In [ ]:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
import pylab as p
from scipy import integrate

Modelling A System of Auxotrophic Bacteria and Fitting the Model to Real Data

Synthetic Biology Society Computational Workshop 2

Tutorial by: Georgeos Hardo

Model

Parameters: 0: $[S_a]$: Strain $a$ concentration 1: $[S_b]$: Strain $b$ concentration 2: $[a]$: Amino acid $a$ concentration 3: $[b]$: Amino acid $b$ concentration 4: $[m]$: Media concentration

0: $$ \frac{d[S_a]}{dt} = k_{s_a} \cdot [a] \cdot [S_a] \cdot [m]$$

1: $$ \frac{d[S_b]}{dt} = k_{s_b} \cdot [b] \cdot [S_b] \cdot [m] $$

2: $$ \frac{d[a]}{dt} = \alpha_b \cdot [S_b] \cdot [m] - \beta_a \cdot \frac{d[S_a]}{dt} $$

3: $$ \frac{d[b]}{dt} = \alpha_a \cdot [S_a] \cdot [m] - \beta_b \cdot \frac{d[S_b]}{dt} $$

4: $$ \frac{d[m]}{dt} = -\bigg(\frac{d[S_a]}{dt} + \frac{d[S_b]}{dt}\bigg) $$

Where: $k_{s_a}$ is the growth rate constant of strain $a$

$k_{s_b}$ is the growth rate constant of strain $a$

$\alpha$ is the excretion rate constant amino acid $a$ or $b$

$\beta$ is the amino acid requirement for $a$ or $b$

Discritising this system as we saw in the previous notebook:

$$ \frac{[S_a]_{i+1} - [S_a]_i}{\Delta t} = k_{s_a} \cdot [a] \cdot [S_a]_i \cdot [m]_i $$$$ \frac{[S_b]_{i+1} - [S_b]_i}{\Delta t} = k_{s_b} \cdot [b] \cdot [S_b]_i \cdot [m]_i $$$$ \frac{[a]_{i+1} - [a]_i}{\Delta t} = \alpha_b \cdot [S_b]_i \cdot [m]_i - \beta_a \cdot k_{s_a} \cdot [a] \cdot [S_a]_i \cdot [m]_i $$$$ \frac{[b]_{i+1} - [b]_i}{\Delta t} = \alpha_a \cdot [S_a]_i \cdot [m]_i - \beta_b \cdot k_{s_b} \cdot [b] \cdot [S_b]_i \cdot [m]_i $$$$\frac{[m]_{i+1} - [m]_i}{\Delta t} = -(k_{s_a} \cdot [a] \cdot [S_a]_i \cdot [m]_i + k_{s_b} \cdot [b] \cdot [S_b]_i \cdot [m]_i) $$

Rearranging to an incrementation function:

$$ [S_a]_{i+1} = [S_a]_i + \Delta t(k_{s_a} \cdot [a]_i \cdot [S_a]_i \cdot [m]_i) $$$$ [S_b]_{i+1} = [S_b]_i + \Delta t(k_{s_b} \cdot [b]_i \cdot [S_b]_i \cdot [m]_i) $$$$ [a]_{i+1} = [a]_i + \Delta t \cdot [m]_i (\alpha_b \cdot [S_b]_i - \beta_a \cdot k_{s_a} \cdot [a]_i \cdot [S_a]_i) $$$$ [b]_{i+1} = [b]_i + \Delta t\cdot [m]_i(\alpha_a \cdot [S_a]_i - \beta_b \cdot k_{s_b} \cdot [b]_i \cdot [S_b]_i) $$$$ [m]_{i+1} = [m]_i -\Delta t(k_{s_a} \cdot [a] \cdot [S_a]_i \cdot [m]_i + k_{s_b} \cdot [b] \cdot [S_b]_i \cdot [m]_i)$$

In [ ]:
#Define constant coefficients
k_Sa = 15; k_Sb = 1; alpha_a = 0.005; alpha_b = 0.002; beta_a = 0.04; beta_b = 0.05

#Define initial conditions
Sa_0 = 0.1; Sb_0 = 0.1; a_0 = 0; b_0 = 0 ; m_0 = 1

#Initialise the variable initial conditions
Sa_i = Sa_0; Sb_i = Sb_0; a_i = a_0; b_i = b_0; m_i = m_0

dt = 0.1

Sa = [Sa_i]; Sb = [Sb_i]; a = [a_i]; b = [b_i]; m = [m_i]; time = [0]

integration_time = 192
t_steps = integration_time/dt


for x in range(int(t_steps)):

    #Strain a concentration
    Sa_i1 = Sa_i + dt*(k_Sa * a_i * Sa_i * m_i)
    Sa.append(Sa_i1)
    
    #Strain b concentration
    Sb_i1 = Sb_i + dt*(k_Sb * b_i * Sb_i * m_i)
    Sb.append(Sb_i1)
    
    #Amino acid a concentration
    a_i1 = a_i + dt*m_i*(alpha_b * Sb_i - beta_a * k_Sa * a_i * Sa_i)
    a.append(a_i1)
    
    #Amino acid b concentration
    b_i1 = b_i + dt*m_i*(alpha_a * Sa_i - beta_b * k_Sb * b_i * Sb_i)
    b.append(b_i1)
    
    #Media concentration
    m_i1 = m_i - dt*(k_Sa * a_i * Sa_i * m_i + k_Sb * b_i * Sb_i * m_i)
    m.append(m_i1)
    
    Sa_i = Sa_i1
    Sb_i = Sb_i1
    a_i = a_i1
    b_i = b_i1
    m_i = m_i1
    time.append(dt*x)
In [ ]:
# Plot the species, amino acid and media concentrations

plt.plot(time, Sa)
plt.plot(time, Sb)
plt.legend(["Strain a", "Strain b"])
plt.xlabel("Time")
plt.ylabel("Concentration")
plt.show()

plt.plot(time, a)
plt.plot(time, b)
plt.legend(["a", "b"])
plt.xlabel("Time")
plt.ylabel("Concentration")
plt.show()

plt.plot(time, m)
plt.legend(["media"])
plt.xlabel("Time")
plt.ylabel("Concentration")
plt.show()

We're going to download the fluorescence datasets from some previous experiments we did.

In [ ]:
import pandas as pd
mCherry = pd.read_csv("http://cusbs.soc.srcf.net/datasets/mCherry.csv", sep="\t") #mCherry URL
eGFP =  pd.read_csv("http://cusbs.soc.srcf.net/datasets/eGFP.csv", sep="\t") # eGFP URL
In [ ]:
def normalise(data):
  z = []
  for x in range(len(data)):
    z_i = (data[x] - min(data))/(max(data) - min(data))
    z.append(z_i)
  return z 
In [ ]:
expt_time = []
for x in range(len(mCherry)):
  expt_time.append(x)
In [ ]:
## Play with the constants to try and get a good fit of the model to the data!

plt.scatter(expt_time, normalise(mCherry["F1"]), c="r", s=5, alpha=0.5)
plt.scatter(expt_time, normalise(eGFP["F1"]), c="g", s=5, alpha=0.4)
plt.plot(time, normalise(Sa), c="g")
plt.plot(time, normalise(Sb), c="r")
plt.xlabel("Time")
plt.ylabel("Normalised fluorescence")
plt.legend(["Expt Met Aux", "Expt Tryp Aux", "Model Tryp Aux", "Model Met Aux"])
plt.title("ODE model vs Experimental Data")
plt.show()

If you've made it this far, we're now going to try and use an optimiser to fit rate constants for us.

For this we're going to use SciPy's numerical integrator and optimiser functions.

In [ ]:
## Let's turn ODE numerical integration into a function and use SciPy's ODE integration 
## toolbox for fast solving

def dX_dt(X,t, k_Sa, k_Sb, alpha_a, alpha_b, beta_a, beta_b):
  rxn = np.zeros((5,))
  
  rxn[0] = k_Sa*X[0]*X[2]*X[4]
  rxn[1] = k_Sb*X[1]*X[3]*X[4]
  rxn[2] = alpha_b*X[1]*X[4] - beta_a*k_Sa*X[0]*X[2]*X[4]
  rxn[3] = alpha_a*X[0]*X[4] - beta_b*k_Sb*X[1]*X[3]*X[4]
  rxn[4] = -(k_Sa*X[0]*X[2]*X[4] + k_Sb*X[1]*X[3]*X[4])

  return rxn
In [ ]:
k_Sa = 15; k_Sb = 1; alpha_a = 0.005; alpha_b = 0.002; beta_a = 0.04; beta_b = 0.05
X0 = [0.1, 0.1, 0, 0, 1]
tf = integration_time
t_step = tf*100
t = np.linspace(0, tf, t_step)

X, infodict = integrate.odeint(dX_dt, X0, t, args = (k_Sa, k_Sb, alpha_a, alpha_b, beta_a, beta_b), full_output=True)

plt.plot(t, X.T[0])
plt.plot(t, X.T[1])
plt.show()
plt.plot(t, X.T[2])
plt.plot(t, X.T[3])
plt.show()
plt.plot(t, X.T[4])
plt.show()

del k_Sa, k_Sb, alpha_a, alpha_b, beta_a, beta_b ## we delete these because we want to fit them in the next code-cell
In [ ]:
def system_solution(constants, t, **kwargs):  ## We wrap the whole integration inside another funciton
  def dX_dt(X, t, k_Sa, k_Sb, alpha_a, alpha_b, beta_a, beta_b): 
    rxn = np.zeros((5,))
    
    rxn[0] = k_Sa*X[0]*X[2]*X[4]
    rxn[1] = k_Sb*X[1]*X[3]*X[4]
    rxn[2] = alpha_b*X[1]*X[4] - beta_a*k_Sa*X[0]*X[2]*X[4]
    rxn[3] = alpha_a*X[0]*X[4] - beta_b*k_Sb*X[1]*X[3]*X[4]
    rxn[4] = -(k_Sa*X[0]*X[2]*X[4] + k_Sb*X[1]*X[3]*X[4])

    return rxn
  k_Sa, k_Sb, alpha_a, alpha_b, beta_a, beta_b = constants
  X0 = [0.1, 0.1, 0, 0, 1]
  tf = integration_time
  t_step = tf*100
  t = np.linspace(0, tf, t_step)
  X, infodict = integrate.odeint(dX_dt, X0, t, args = (k_Sa, k_Sb, alpha_a, alpha_b, beta_a, beta_b), full_output=True)
  return X.T

tf = integration_time
t_step = tf*100
t = np.linspace(0, len(mCherry),  t_step)
indices = [int(x) for x in np.linspace(0, t_step-1, len(mCherry))] ## The timeseries of the ODE and the dataset are different, so sample the ODE timeseries when calculating the MSE
In [ ]:
## Define a function which calculates the mean squared error between the ODE and the real data:

def mse(obs, pred):
    n = len(obs)
    diff = []
    for x in range(n):
        diff.append( (obs[x] - pred[x])**2 )
    mean_squared_error = np.sum(diff) * 1/n
    return mean_squared_error

## Now define an objective function to minimise: This will be the sum of the MSE 

def objective(constants):
  outputs = system_solution(constants, 0)
  minimise1 = mse(normalise(mCherry["F1"]), normalise(outputs[1][indices]))
  minimise2 = mse(normalise(eGFP["F1"]), normalise(outputs[0][indices]))
  minimise = minimise1 + minimise2
  return minimise
In [12]:
from copy import deepcopy
from scipy.optimize import basinhopping
guess = [15, 1, 0.005, 0.002, 0.04, 0.05] # this is a good initial guess I found by trial and error
#guess = ret.x
parameters = deepcopy(guess)
ret = basinhopping(objective, guess, niter=50,   stepsize=10) #Fitting 6 constants to a system of 5 equations can be slow
parameters = ret.x

This is quite a good fit!

In [14]:
outputs = system_solution(ret.x, 0)
print(ret.x)
plt.scatter(expt_time, normalise(mCherry["F1"]), c="r", s=5, alpha=0.5, label="mCherry data")
plt.scatter(expt_time, normalise(eGFP["F1"]), c="g", s=5, alpha=0.4, label="eGFP data")
plt.plot(t[indices], normalise(outputs[1][indices]), c="r", label="mCherry model")
plt.plot(t[indices], normalise(outputs[0][indices]), c="g", label ="eGFP model")
plt.xlabel("Time (min)")
plt.ylabel("Normalised fluorescence")
plt.legend(loc="best")
plt.show()

plt.plot(t[indices], normalise(outputs[2][indices]), c="g", label = "Methionine")
plt.plot(t[indices], normalise(outputs[3][indices]), c="r", label = "Tryptophan")
plt.plot(t[indices], normalise(outputs[4][indices]), label = "Media")
plt.xlabel("Time (min)")
plt.ylabel("Normalised concentration")
plt.legend(loc = "best")
plt.show()

print("MSE is "+str(objective(ret.x)))
[1.49985075e+01 1.04578702e+00 2.89907798e-03 2.95278098e-03
 3.03076902e-02 2.79116824e-01]
MSE is 0.010015771875594874
In [14]: