"""a basic stoichiometric model with thermodynamics."""
# The MIT License (MIT)
#
# Copyright (c) 2013 Weizmann Institute of Science
# Copyright (c) 2018-2020 Institute for Molecular Systems Biology,
# ETH Zurich
# Copyright (c) 2018-2020 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import logging
import sys
import warnings
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
import numpy as np
import pandas as pd
from equilibrator_cache import Compound
from slugify import slugify
from equilibrator_api import Q_, ComponentContribution, R, Reaction, default_T
from . import Bounds, SBtabDocument, SBtabError, SBtabTable, open_sbtabdoc
[docs]class StoichiometricModel(object):
"""A basic stoichiometric model with thermodynamics.
Designed as a base model for 'Pathway' which also includes flux directions
and magnitudes.
"""
[docs] CONFIDENCE_INTERVAL = 1.96 # TODO: move to params dict
[docs] MINIMAL_STDEV = 1e-3 # TODO: move to params dict
def __init__(
self,
reactions: List[Reaction],
comp_contrib: Optional[ComponentContribution] = None,
standard_dg_primes: Optional[Q_] = None,
dg_sigma: Optional[Q_] = None,
bounds: Optional[Bounds] = None,
config_dict: Optional[Dict[str, str]] = None,
compound_id_mapping: Callable[[Compound], str] = None,
) -> None:
"""Initialize a StoichiometricModel object.
Parameters
----------
reactions : List[Reaction]
a list of Reaction objects
comp_contrib : ComponentContribution
a ComponentContribution object
standard_dg_primes : Quantity, optional
reaction energies (in kJ/mol)
dg_sigma : Quantity, optional
square root of the uncertainty covariance matrix (in kJ/mol)
bounds : Bounds, optional
bounds on metabolite concentrations (by default uses the
"data/cofactors.csv" file in `equilibrator-api`)
config_dict : dict, optional
configuration parameters for Pathway analysis
compound_id_mapping : callable, optional
a function mapping compounds to their names in the model
"""
self.comp_contrib = comp_contrib or ComponentContribution()
self.water = self.comp_contrib.ccache.water
self.is_proton = self.comp_contrib.ccache.is_proton
self.is_water = self.comp_contrib.ccache.is_water
self.reactions = reactions
if bounds is None:
self._bounds = Bounds.get_default_bounds(self.comp_contrib).copy()
else:
self._bounds = bounds.copy()
self.config_dict = config_dict or dict()
self.stdev_factor = 0.0
self.configure()
# make sure the bounds on Water are (1, 1)
self._bounds.set_bounds(self.water, Q_(1.0, "M"), Q_(1.0, "M"))
self.S = self.comp_contrib.create_stoichiometric_matrix(reactions)
# we keep the index to H2O handy, since we often need to remove
# water from the list of compounds
self.idx_water = self.S.index.tolist().index(self.water)
self.Nc, self.Nr = self.S.shape
if standard_dg_primes is None:
assert dg_sigma is None, (
"If standard_dg_primes are not "
"provided, dg_sigma must also be None"
)
self.update_standard_dgs()
else:
assert standard_dg_primes.check("[energy]/[substance]")
assert standard_dg_primes.shape == (self.Nr,)
if dg_sigma is not None:
assert dg_sigma.check("[energy]/[substance]")
# note that the number of columns in the sqrt
# of the covariance can be any number, so we don't check it.
assert dg_sigma.shape[0] == self.Nr
# dGr should be orthogonal to nullspace of S
# If not, dGr is not contained in image(S) and then there
# is no consistent set of dGfs that generates dGr and the
# first law of thermo is violated by the model.
S_T = self.S.T.values
S_inv = np.linalg.pinv(S_T)
null_proj = np.eye(self.S.shape[1]) - S_T @ S_inv
projected = null_proj @ standard_dg_primes.T
assert (projected < Q_("1e-8 kJ/mol")).all(), (
"Supplied reaction standard deltaG values are inconsistent "
"with the stoichiometric matrix."
)
self.standard_dg_primes = standard_dg_primes
self.dg_sigma = dg_sigma
# Set the compound names, these names can
# be changed later using the same function (set_compound_ids)
self.set_compound_ids(compound_id_mapping)
# Some analyses use Z-score to describe thee distribution of metabolites
# so we need to convert the "hard" bounds into a Gaussian distribution
# we assume that the given bounds represent the 95% confidence interval
# a the Gaussian distribution (i.e. [mu - 1.96*sigma, mu + 1.96*sigma])
# Therefore:
# mu = (ub + lb) / 2
# sigma = (ub - lb) / 3.92
self.ln_conc_mu = (self.ln_conc_ub + self.ln_conc_lb) / 2.0
self.ln_conc_sigma = (self.ln_conc_ub - self.ln_conc_lb) / (
self.CONFIDENCE_INTERVAL * 2.0
)
[docs] def update_standard_dgs(self) -> None:
"""Calculate the standard ΔG' values and uncertainties.
Use the Component Contribution method.
"""
(
self.standard_dg_primes,
self.dg_sigma,
) = self.comp_contrib.standard_dg_prime_multi(
self.reactions, uncertainty_representation="sqrt"
)
[docs] def set_compound_ids(
self, mapping: Callable[[Compound], str] = None
) -> None:
"""Use alternative compound names for outputs such as plots.
Parameters
----------
mapping : callable, optional
a function mapping compounds to their names in the model
"""
if mapping is None:
self.compound_ids = [
slugify(cpd.get_common_name(), separator="_", lowercase=False)
for cpd in self.S.index
]
else:
self.compound_ids = list(map(mapping, self.S.index))
@property
[docs] def bounds(self) -> Tuple[Iterable[float], Iterable[float]]:
"""Get the concentration bounds.
The order of compounds is according to the stoichiometric matrix index.
Returns
-------
tuple of (lower bounds, upper bounds)
"""
return self._bounds.get_bounds(self.S.index)
@property
[docs] def ln_conc_lb(self) -> np.array:
"""Get the log lower bounds on the concentrations.
The order of compounds is according to the stoichiometric matrix index.
Returns
-------
a NumPy array of the log lower bounds
"""
return np.array(
list(map(float, self._bounds.get_ln_lower_bounds(self.S.index)))
)
@property
[docs] def ln_conc_ub(self) -> np.ndarray:
"""Get the log upper bounds on the concentrations.
The order of compounds is according to the stoichiometric matrix index.
Returns
-------
a NumPy array of the log upper bounds
"""
return np.array(
list(map(float, self._bounds.get_ln_upper_bounds(self.S.index)))
)
@staticmethod
[docs] def get_compounds(reactions: Iterable[Reaction]) -> List[Compound]:
"""Get a unique list of all compounds in all reactions.
:param reactions: an iterator of reactions
:return: a list of unique compounds
"""
compounds = set()
for r in reactions:
compounds.update(r.keys())
return sorted(compounds)
@property
[docs] def reaction_ids(self) -> Iterable[str]:
"""Iterate through all the reaction IDs.
:return: the reaction IDs
"""
return map(lambda rxn: rxn.rid, self.reactions)
@property
@staticmethod
[docs] def read_thermodynamics(
thermo_sbtab: SBtabTable, config_dict: Dict[str, str]
) -> Dict[str, Q_]:
"""Read the 'thermodynamics' table from an SBtab.
Parameters
----------
thermo_sbtab : SBtabTable
A SBtabTable containing the thermodynamic data
config_dict : dict
A dictionary containing the configuration arguments
Returns
-------
A dictionary mapping reaction IDs to standard ΔG' values.
"""
try:
std_conc = thermo_sbtab.get_attribute("StandardConcentration")
assert Q_(std_conc) == Q_(
"1 M"
), "We only support a standard concentration of 1 M."
except SBtabError:
pass
if "temperature" in config_dict:
temperature = Q_(config_dict["temperature"])
assert temperature.check("[temperature]")
else:
temperature = default_T
thermo_df = thermo_sbtab.to_data_frame()
cols = ["QuantityType", "Value", "Compound", "Reaction", "Unit"]
rid2dg0 = dict()
for i, row in thermo_df.iterrows():
try:
typ, val, cid, rid, unit = [row[c] for c in cols]
val = Q_(float(val), unit)
if typ == "equilibrium constant":
assert val.check(
""
), "equilibrium constants must have no units"
rid2dg0[rid] = -np.log(val.m_as("")) * R * temperature
elif typ == "reaction gibbs energy":
assert val.check(
"[energy]/[substance]"
), "Gibbs energies must be in kJ/mol (or equivalent)"
val.ito("kJ/mol")
rid2dg0[rid] = val
else:
raise AssertionError(
"unrecognized Rate Constant Type: " + typ
)
except AssertionError:
raise ValueError(
"Syntax error in Parameter table, row %d - %s" % (i, row)
)
return rid2dg0
def _compound_to_id(self, compound: Compound) -> str:
d = dict(zip(self.S.index, self.compound_ids))
try:
return d[compound]
except KeyError:
warnings.warn(
f"Cannot find {compound.get_common_name()} in the model"
)
return compound.get_common_name()
def _write_compound_and_coeff(
self, compound: Compound, coeff: float
) -> str:
if np.abs(coeff - 1.0) < sys.float_info.epsilon:
return self._compound_to_id(compound)
else:
return "%g %s" % (coeff, self._compound_to_id(compound))
def _reaction_to_formula(self, reaction: Reaction) -> str:
l_tokens = []
r_tokens = []
for compound, coeff in sorted(reaction.sparse.items()):
if self.is_proton(compound):
continue
elif coeff < 0:
l_tokens.append(
self._write_compound_and_coeff(compound, -coeff)
)
elif coeff > 0:
r_tokens.append(self._write_compound_and_coeff(compound, coeff))
l_str = " + ".join(l_tokens)
r_str = " + ".join(r_tokens)
return f"{l_str} {reaction.arrow} {r_str}"
@staticmethod
def _read_network_sbtab(
filename: Union[str, SBtabDocument],
comp_contrib: Optional[ComponentContribution] = None,
freetext: bool = True,
) -> Tuple[pd.DataFrame, ComponentContribution, Dict[str, str]]:
"""Initialize a Pathway object using a 'network'-only SBtab.
Parameters
----------
filename : str, SBtabDocument
a filename containing an SBtabDocument (or the SBtabDocument
object itself) defining the network (topology) only
comp_contrib : ComponentContribution, optional
a ComponentContribution object needed for parsing and searching
the reactions.
also used to set the aqueous parameters (pH, I, etc.)
freetext : bool, optional
a flag indicating whether the reactions are given as free-text
(i.e. common names for compounds) or by standard database
accessions. (Default value: `True`)
Returns
-------
reaction_df
comp_contrib
config_dict
"""
def formula_to_obj(rxn_formula: str) -> Reaction:
"""Convert the reaction formulas to Reaction objects."""
logging.debug("formula = %f", rxn_formula)
if freetext:
rxn = comp_contrib.search_reaction(rxn_formula)
else:
rxn = comp_contrib.parse_reaction_formula(rxn_formula)
if not rxn.is_balanced():
raise Exception(f"This reaction is not balanced: {rxn_formula}")
return rxn
comp_contrib = comp_contrib or ComponentContribution()
sbtabdoc = open_sbtabdoc(filename)
reaction_df = sbtabdoc.get_sbtab_by_id("Reaction").to_data_frame()
reaction_df["Reaction"] = reaction_df.ReactionFormula.apply(
formula_to_obj
)
for i, rxn in enumerate(reaction_df["Reaction"]):
rxn.rid = f"R{i:05d}"
config_dict = {
"p_h": str(comp_contrib.p_h),
"p_mg": str(comp_contrib.p_mg),
"ionic_strength": str(comp_contrib.ionic_strength),
"temperature": str(comp_contrib.temperature),
}
return reaction_df, comp_contrib, config_dict
@staticmethod
def _read_model_sbtab(
filename: Union[str, SBtabDocument],
comp_contrib: Optional[ComponentContribution] = None,
):
"""Parse and SBtabDocument and return a StoichiometricModel.
Parameters
----------
filename : str or SBtabDocument
a filename containing an SBtabDocument (or the SBtabDocument
object itself) defining the pathway
comp_contrib : ComponentContribution, optional
a ComponentContribution object needed for parsing and searching
the reactions. also used to set the aqueous parameters (pH, I, etc.)
Returns
-------
stoich_model: StoichiometricModel
A StoichiometricModel object based on the configuration SBtab
"""
sbtabdoc = open_sbtabdoc(filename)
# Read the configuration table if it exists
config_sbtab = sbtabdoc.get_sbtab_by_id("Configuration")
if config_sbtab:
config_df = config_sbtab.to_data_frame()
assert (
"Option" in config_df.columns
), "Configuration table must have an Option column"
assert (
"Value" in config_df.columns
), "Configuration table must have an Value column"
config_dict = config_df.set_index("Option").Value.to_dict()
else:
config_dict = dict()
table_ids = ["Compound", "ConcentrationConstraint", "Reaction"]
dfs = []
for table_id in table_ids:
sbtab = sbtabdoc.get_sbtab_by_id(table_id)
if sbtab is None:
tables = ", ".join(map(lambda s: s.table_id, sbtabdoc.sbtabs))
raise ValueError(
f"The SBtabDocument must have a table "
f"with the following ID: {table_id}, "
f"however, only these tables were "
f"found: {tables}"
)
dfs.append(sbtab.to_data_frame())
compound_df, bounds_df, reaction_df = dfs
# Read the Compound table
# -----------------------
# use equilibrator-cache to build a dictionary of compound IDs to
# Compound objects
compound_df.set_index("ID", inplace=True)
if "Identifiers" not in compound_df.columns:
# Fallback legacy option, KEGG IDs could be provided without the
# namespace when the column title explicitly indicates that they
# were from KEGG
if "Identifiers:kegg.compound" in compound_df.columns:
compound_df["Identifiers"] = (
"kegg:" + compound_df["Identifiers:kegg.compound"]
)
else:
raise KeyError(
"There is no column of Identifiers in the Compound table"
)
comp_contrib = comp_contrib or ComponentContribution()
compounds = []
for i, row in enumerate(compound_df.itertuples()):
if row.Identifiers:
compounds.append(comp_contrib.get_compound(row.Identifiers))
else:
# generate a novel Compound object (with a unique negative ID)
# just to use it later in this specific pathway
compounds.append(Compound(id=-i))
compound_df["Compound"] = compounds
if pd.isnull(compound_df.Compound).any():
accessions_not_found = compound_df.loc[
pd.isnull(compound_df.Compound), "Identifiers"
]
error_msg = str(accessions_not_found.to_dict())
raise KeyError(
f"Some compounds not found in equilibrator-cache: "
f"{error_msg}"
)
# Read the ConcentrationConstraints table
# ---------------------------------------
# convert compound IDs in the bounds table to Compound objects
id_to_compound = compound_df.Compound.to_dict()
bounds_df["Compound"] = bounds_df.Compound.apply(id_to_compound.get)
bounds_df.set_index("Compound", inplace=True)
bounds_sbtab = sbtabdoc.get_sbtab_by_id("ConcentrationConstraint")
try:
bounds_unit = bounds_sbtab.get_attribute("Unit")
lbs = bounds_df["Min"].apply(lambda x: Q_(float(x), bounds_unit))
ubs = bounds_df["Max"].apply(lambda x: Q_(float(x), bounds_unit))
except SBtabError:
# if the unit is not defined in the header, we assume it is given
# next to each bound individually
lbs = bounds_df["Min"].apply(Q_)
ubs = bounds_df["Max"].apply(Q_)
bounds = Bounds(lbs.to_dict(), ubs.to_dict())
bounds.check_bounds()
# Read the Reaction table
# -----------------------
reactions = []
reaction_ids = []
for row in reaction_df.itertuples():
rxn = Reaction.parse_formula(
id_to_compound.get, row.ReactionFormula
)
rxn.rid = row.ID
if not rxn.is_balanced(ignore_atoms=("H",)):
warnings.warn(f"Reaction {rxn.rid} is not balanced")
reactions.append(rxn)
if row.ID in reaction_ids:
raise KeyError(
f"Reaction IDs must be unique, but you have "
f"{row.ID} twice"
)
reaction_ids.append(row.ID)
thermo_sbtab = sbtabdoc.get_sbtab_by_id("Thermodynamics")
if thermo_sbtab:
rid2dg0 = StoichiometricModel.read_thermodynamics(
thermo_sbtab, config_dict
)
# make sure all reactions have a standard Gibbs energy
assert set(reaction_ids).issubset(rid2dg0.keys()), (
"Not all reactions have a equilibrium constant or gibbs energy "
"in the 'Thermodynamics' table."
)
standard_dg_primes = np.array(
[rid2dg0[rid].m_as("kJ/mol") for rid in reaction_ids]
) * Q_("1 kJ/mol")
dg_sigma = None
else:
standard_dg_primes = None
dg_sigma = None
compound_to_id = dict(map(reversed, id_to_compound.items()))
return (
sbtabdoc,
reactions,
reaction_ids,
standard_dg_primes,
dg_sigma,
bounds,
config_dict,
compound_to_id,
)
@classmethod
[docs] def from_network_sbtab(
cls,
filename: Union[str, SBtabDocument],
comp_contrib: Optional[ComponentContribution] = None,
freetext: bool = True,
bounds: Optional[Bounds] = None,
) -> object:
"""Initialize a Pathway object using a 'network'-only SBtab.
Parameters
----------
filename : str, SBtabDocument
a filename containing an SBtabDocument (or the SBtabDocument
object itself) defining the network (topology) only
comp_contrib : ComponentContribution, optional
a ComponentContribution object needed for parsing and searching
the reactions. also used to set the aqueous parameters (pH, I, etc.)
freetext : bool, optional
a flag indicating whether the reactions are given as free-text (i.e.
common names for compounds) or by standard database accessions
(Default value: `True`)
bounds : Bounds, optional
bounds on metabolite concentrations (by default uses the
"data/cofactors.csv" file in `equilibrator-api`)
Returns
-------
a Pathway object
"""
(
reaction_df,
comp_contrib,
config_dict,
) = StoichiometricModel._read_network_sbtab(
filename, comp_contrib, freetext
)
stoich_model = StoichiometricModel(
reactions=reaction_df.Reaction.tolist(),
comp_contrib=comp_contrib,
bounds=bounds,
config_dict=config_dict,
)
return stoich_model
@classmethod
[docs] def from_sbtab(
cls,
filename: Union[str, SBtabDocument],
comp_contrib: Optional[ComponentContribution] = None,
) -> "StoichiometricModel":
"""Parse and SBtabDocument and return a StoichiometricModel.
Parameters
----------
filename : str or SBtabDocument
a filename containing an SBtabDocument (or the SBtabDocument
object itself) defining the pathway
comp_contrib : ComponentContribution, optional
a ComponentContribution object needed for parsing and searching
the reactions. also used to set the aqueous parameters (pH, I, etc.)
Returns
-------
stoich_model: StoichiometricModel
A StoichiometricModel object based on the configuration SBtab
"""
(
sbtabdoc,
reactions,
reaction_ids,
standard_dg_primes,
dg_sigma,
bounds,
config_dict,
compound_to_id,
) = cls._read_model_sbtab(filename, comp_contrib)
return StoichiometricModel(
reactions=reactions,
comp_contrib=comp_contrib,
standard_dg_primes=standard_dg_primes,
dg_sigma=None,
bounds=bounds,
config_dict=config_dict,
compound_id_mapping=compound_to_id.get,
)
[docs] def to_sbtab(self) -> SBtabDocument:
"""Export the model to an SBtabDocument."""
sbtabdoc = SBtabDocument("pathway", filename="pathway.tsv")
config_dict = dict(self.config_dict)
config_df = pd.DataFrame(
data=[(k, v, "") for k, v in config_dict.items()],
columns=["!Option", "!Value", "!Comment"],
)
config_sbtab = SBtabTable.from_data_frame(
df=config_df, table_id="Configuration", table_type="Config"
)
reaction_df = pd.DataFrame(
data=[
(rxn.rid, self._reaction_to_formula(rxn))
for rxn in self.reactions
],
columns=["!ID", "!ReactionFormula"],
)
reaction_sbtab = SBtabTable.from_data_frame(
df=reaction_df, table_id="Reaction", table_type="Reaction"
)
compound_df = pd.DataFrame(
data=[
(
self._compound_to_id(cpd),
cpd.get_common_name(),
cpd.get_accession(),
)
for cpd in self.S.index
],
columns=["!ID", "!Name", "!Identifiers"],
)
compound_sbtab = SBtabTable.from_data_frame(
df=compound_df, table_id="Compound", table_type="Compound"
)
thermo_df = pd.DataFrame(
data=[
(
"reaction gibbs energy",
rxn.rid,
"",
f"{dg.m_as('kJ/mol'):.2f}",
"kJ/mol",
)
for rxn, dg in zip(self.reactions, self.standard_dg_primes)
],
columns=[
"!QuantityType",
"!Reaction",
"!Compound",
"!Value",
"!Unit",
],
)
thermo_sbtab = SBtabTable.from_data_frame(
df=thermo_df, table_id="Thermodynamics", table_type="Quantity"
)
thermo_sbtab.change_attribute("StandardConcentration", "M")
lbs, ubs = self.bounds
lbs = map(lambda x: x.m_as("mM"), lbs)
ubs = map(lambda x: x.m_as("mM"), ubs)
conc_df = pd.DataFrame(
data=[
(
"concentration",
self._compound_to_id(cpd),
f"{lb:.3g}",
f"{ub:.3g}",
)
for cpd, lb, ub in zip(self.S.index, lbs, ubs)
],
columns=["!QuantityType", "!Compound", "!Min", "!Max"],
)
conc_sbtab = SBtabTable.from_data_frame(
df=conc_df,
table_id="ConcentrationConstraint",
table_type="Quantity",
)
conc_sbtab.change_attribute("Unit", "mM")
sbtabdoc.add_sbtab(config_sbtab)
sbtabdoc.add_sbtab(reaction_sbtab)
sbtabdoc.add_sbtab(compound_sbtab)
sbtabdoc.add_sbtab(thermo_sbtab)
sbtabdoc.add_sbtab(conc_sbtab)
return sbtabdoc
[docs] def write_sbtab(self, filename: str) -> None:
"""Write the pathway to an SBtab file."""
self.to_sbtab().write(filename)