Hakkel Tamás - 2018.06.16.
The SIRC (Susceptible-Infected-Recovered-Carrier) epidemiological model has four state variables:
The model expresses that
The dynamical behaviour of the system is described by the following set of differential equations: $$ \begin{cases} \frac{dS}{dt} = \nu - (\beta I + \epsilon \beta C)S - \mu S \\ \frac{dI}{dt} = (\beta I + \epsilon \beta C)S - \gamma I - \mu I \\ \frac{dC}{dt} = \gamma q I − \Gamma C − \mu C \\ \frac{dR}{dt} = \gamma (1 − q)I + \Gamma C − \mu R \end{cases} $$ $\mu$ is the natural mortality of the individuals, $\nu$ corresponds to the birth rate, $\epsilon$ denotes the transmission rate representing the infections related to carriers. $q$ is the proportion of infectious individuals turning to carriers and $(1 − q)$ is the proportion turning to recovered. $\Gamma$ represents the rate at which carriers become recovered.

import numpy as np
from numpy.linalg import inv
from numpy.linalg import det
from scipy.integrate import odeint
import random
from IPython.core.display import display, HTML
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm as cm
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
font = {'family' : 'sans serif',
'size' : 14}
matplotlib.rc('font', **font)
# I will use that function later to plot simulation and estimation results
def plot(y,labels,total=True):
fig = plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='w')
ax = fig.add_subplot(111, axisbelow=True)
if total:
ax.plot(np.sum(y,0), 'c', alpha=0.5, lw=2, label='Total population')
for i,label in enumerate(labels):
ax.plot(y[i], label[0], alpha=0.5, lw=2, label=label[1])
ax.set_xlabel('Time (days)')
ax.set_ylabel('Number')
ax.set_facecolor('#dddddd')
ax.yaxis.set_tick_params(length=0)
ax.xaxis.set_tick_params(length=0)
ax.grid(b=True, which='major', c='w', lw=2, ls='-')
legend = ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
for spine in ('top', 'right', 'bottom', 'left'):
ax.spines[spine].set_visible(False)
def double_ax_plot(Lambda,theta):
fig, ax1 = plt.subplots(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='w')
ax1.plot(theta[0].T, 'b',label='v')
ax1.set_xlabel('time (s)')
ax1.set_ylabel('v', color='b')
ax1.tick_params('y', colors='b')
ax2 = ax1.twinx()
ax2.plot(theta[1].T, 'm',label='$\gamma$')
ax2.plot(theta[2].T, 'tab:orange', label='$\mu$')
ax2.plot(Lambda, 'k', label='$\lambda$')
ax2.set_ylabel('$\gamma$ and $\mu$', color='r')
ax2.tick_params('y', colors='r')
ax1.yaxis.set_tick_params(length=0)
ax2.yaxis.set_tick_params(length=0)
ax1.xaxis.set_tick_params(length=0)
for spine in ('top', 'right', 'bottom', 'left'):
ax1.spines[spine].set_visible(False)
fig.legend(bbox_to_anchor=(1.01, .65), loc=2, borderaxespad=0.)
fig.tight_layout()
def plot_3D(theta_1,theta_2,posterior_density):
fig = plt.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='w')
ax = fig.add_subplot(111, projection='3d')
X, Y = np.meshgrid(theta_1, theta_2)
ax.set_xlabel('$q$', linespacing=4)
ax.set_ylabel('$\gamma$', linespacing=6)
ax.set_zlabel('probability')
ax.plot_surface(X, Y, posterior_density)
def table(tuples, width=200):
base = '''
<table class="table table-striped" style="width: {width}px;margin-left: 30px;">
<thead> <tr> <th></th> <th>Original</th> <th>Estimated</th> </tr> </thead>
<tbody> {rows} </tbody>
</table>
'''
row = '<tr> <th scope="row">{}</th> <td>{:.5f}</td> <td>{:.5f}</td> </tr>'
rows = ''
for item in tuples:
rows += row.format(*item)
display(HTML(base.format(width=width,rows=rows)))
# Total population
N = 1000
# Initial number of infected, recovered and carrier individuals, I0, R0, C0.
I0, R0, C0 = 10, 0, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0 - C0
# Parameters (in 1/days).
mu = 0.015 # death rate
nu = 14 # number of births
beta = 0.0002 # infection rate (possibility of infection of individual if exposed to infected individual)
epsilon = 1.2 # transfer rate (infection rate of infected people / infection rate of carriers)
gamma = 0.05 # healing rate (possibility of infected individual turns to carrier or recovered state)
q = 0.8 # carrier-recovered rate of healed people
Gamma = 0.03 # recovery rate of carriers
# A grid of time points (in days)
t = np.arange(200)
def deriv(y, t):
S, I, R, C = y
dSdt = nu - (beta*I + epsilon*beta*C)*S - mu*S
dIdt = (beta*I + epsilon*beta*C)*S - gamma*I - mu*I
dRdt = gamma*(1-q)*I + Gamma*C - mu*R
dCdt = gamma*q*I - Gamma*C - mu*C
return dSdt, dIdt, dRdt, dCdt
y0 = S0, I0, R0, C0
y = odeint(deriv, y0, t).T
plot(y,(('b','Susceptible'),('r','Infected'),('g','Recovered with immunity'),('k','Carriers')))
y_noisy = [[S0, I0, R0, C0]]
for _ in t:
S, I, R, C = tuple(y_noisy[-1])
n = [random.gauss(1,0.3) for i in range(9)]
dSdt = nu*n[0] - (beta*I*n[1] + epsilon*beta*C*n[2])*S - mu*S*n[3]
dIdt = (beta*I*n[1] + epsilon*beta*C*n[2])*S - gamma*I*n[4] - mu*I*n[5]
dRdt = gamma*(1-q)*I*n[4] + Gamma*C*n[6] - mu*R*n[7]
dCdt = gamma*q*I*n[4] - Gamma*C*n[6] - mu*C*n[8]
y_noisy.append([S+dSdt, I+dIdt, R+dRdt, C+dCdt])
y_noisy_multiplicative = np.array(y_noisy)[:-1].T
plot(y_noisy_multiplicative,(('b','Susceptible'),('r','Infected'),('g','Recovered with immunity'),('k','Carriers')))
y_noisy = [(S0, I0, R0, C0)]
for _ in t:
S, I, R, C = tuple(y_noisy[-1])
n = [random.gauss(0,3) for i in range(4)]
dSdt = nu - (beta*I + epsilon*beta*C)*S - mu*S + n[0]
dIdt = (beta*I + epsilon*beta*C)*S - gamma*I - mu*I + n[1]
dRdt = gamma*(1-q)*I + Gamma*C - mu*R + n[2]
dCdt = gamma*q*I - Gamma*C - mu*C + n[3]
y_noisy.append([S+dSdt, I+dIdt, R+dRdt, C+dCdt])
y_noisy_additive = np.array(y_noisy)[:-1].T
plot(y_noisy_additive,(('b','Susceptible'),('r','Infected'),('g','Recovered with immunity'),('k','Carriers')))
noise = np.random.normal(0, 7, y.shape)
y_noisy_simple = y + noise
plot(y_noisy_simple,(('b','Susceptible'),('r','Infected'),('g','Recovered with immunity'),('k','Carriers')))
There are multiple options to add noise to the simulation, but I have chosen to use multiplicative white noise because it is the most realistic.
y_noisy = y_noisy_multiplicative
$$ \begin{cases} \frac{dS}{dt} = \nu - (\beta I + \epsilon \beta C)S - \mu S \\ \frac{dI}{dt} = (\beta I + \epsilon \beta C)S - \gamma I - \mu I \\ \frac{dC}{dt} = \gamma q I − \Gamma C − \mu C \\ \frac{dR}{dt} = \gamma (1 − q)I + \Gamma C − \mu R \end{cases} $$
First we want to estimate the following parameters: $\nu, \mu, \gamma$
Let's rewrite our equations to discrete time: $$ \begin{cases} S(k) = S(k-1) + \nu - (\beta I(k-1) + \epsilon \beta C(k-1))S(k-1) - \mu S(k-1) \\ I(k) = I(k-1) + (\beta I(k-1) + \epsilon \beta C(k-1))S(k-1) - \gamma I(k-1) - \mu I(k-1) \\ C(k) = C(k-1) + \gamma q I(k-1) − \Gamma C(k-1) − \mu C(k-1) \\ R(k) = R(k-1) \gamma (1 − q)I(k-1) + \Gamma C(k-1) − \mu R(k-1) \end{cases} $$
We can add up the first two equations, and get the following equation: $$S(k) + I(k) = S(k-1) + I(k-1) + \nu - \gamma I(k-1) - \mu (S(k-1) + I(k-1))$$
Let us introduce some auxiliary variables: $$SI_{diff}(k) = S(k) - S(k-1) + I(k) - I(k-1)$$ $$SI_{sum}(k-1) = S(k-1) + I(k-1)$$
That way, we have got a linear equation with the desired parameters: $$SI_{diff}(k) = \nu - \gamma I(k-1) - \mu (SI_{sum}(k-1))$$
Let $n$ denote the number of time steps.
So the regressor is: $$X = \begin{bmatrix} 1 && 1 && ... && 1 \\ I(0) && I(1) && ... && I(n-1) \\ S(0) + I(0) && S(1) + I(1) && ... && S(n-1) + I(n-1)\end{bmatrix}$$
And the output vector is: $$y = \begin{bmatrix}(S(1) - S(0) + I(1) - I(0)) \\ (S(2) - S(1) + I(2) - I(1)) \\ (S(3) - S(2) + I(3) - I(2)) \\ \vdots \\ (S(n) - S(n-1) + I(n) - I(n-1))\end{bmatrix}^T$$
We get the estimation in the following way: $$\hat{\theta} = \bigg[X*X^T\bigg]^{-1} * \bigg[X*y^T\bigg]$$
ones = np.ones(y_noisy.shape[1]-1)
I = y_noisy[0,:-1]
SI = y_noisy[0,:-1] + y_noisy[1,:-1]
dSdI = y_noisy[0,1:]-y_noisy[0,:-1] + y_noisy[1,1:]-y_noisy[1,:-1]
X = np.matrix(np.vstack((ones,I,SI)))
Y = np.matrix(dSdI)
theta = inv(X*X.T) * (X*Y.T)
table((('ν',nu,theta[0,0]),('$\gamma$',gamma,theta[1,0]),('$\mu$',mu,theta[2,0])))
Though $\nu$ and $\gamma$ values are correct, estimation of $\mu$ was unsuccessful.
To overcome that problem, I try another linearization option: I add up all four equations, to get only $\nu$ and $\mu$ as parameters. $$\begin{aligned} S(k) + I(k) + R(k) + C(k) = {} & S(k-1) + I(k-1) + R(k-1) + C(k-1) \\ & + \nu - \mu (S(k-1) + I(k-1) + R(k-1) + C(k-1))\end{aligned}$$
Auxiliary variables are the following in that case: $$SI_{diff}(k) = S(k) - S(k-1) + I(k) - I(k-1) + R(k) - R(k-1) + C(k) - C(k-1)$$
$$SI_{sum}(k-1) = S(k-1) + I(k-1) + R(k-1) + C(k-1)$$
And the linearized equation: $$SI_{diff}(k) = \nu - \mu (SI_{sum}(k-1))$$
So the regressor is: $$X = \begin{bmatrix} 1 && S(0) + I(0) + R(0) + C(0) \\ 1 && S(1) + I(1) + R(1) + C(1) \\ 1 && S(2) + I(2) + R(2) + C(2) \\ \vdots && \vdots \\ 1 && S(n-1) + I(n-1) + R(n-1) + C(n-1)\end{bmatrix}^T$$
And the output vector is: $$y = \begin{bmatrix}S(1) - S(0) + I(1) - I(0) + R(1) - R(0) + C(1) - C(0) \\ S(2) - S(1) + I(2) - I(1) + R(2) - R(1) + C(2) - C(1) \\ S(3) - S(2) + I(3) - I(2) + R(3) - R(2) + C(3) - C(2) \\ \vdots \\ S(n) - S(n-1) + I(n) - I(n-1) + R(n) - R(n-1) + C(n) - C(n-1)\end{bmatrix}^T$$
ones = np.ones(y_noisy.shape[1]-1)
SIRC = y_noisy[0,:-1]+y_noisy[1,:-1]+y_noisy[2,:-1]+y_noisy[3,:-1]
dSdIdRdC = y_noisy[0,1:]-y_noisy[0,:-1] \
+y_noisy[1,1:]-y_noisy[1,:-1] \
+y_noisy[2,1:]-y_noisy[2,:-1] \
+y_noisy[3,1:]-y_noisy[3,:-1]
X = np.matrix(np.vstack((ones, SIRC)))
Y = np.matrix(dSdIdRdC)
theta = inv(X*X.T) * X * Y.T
table((('ν',nu,theta[0,0]),('$\mu$',mu,theta[1,0])))
Unfortunately that modification did not yielded the desired output, moreover it even made results worse. That might be the effect of the autoregressive noise added separately to groups, so when I added these groups together, I might have amplified that noise.
Even though parameters did not changed during simulation, in a real case they definitely would change, so I implemented the recursive form as well because it is capable of handling changing parameters. Although, recursive least squares method has the advantage of online estimation and lower computational power. For the estimation I used the first linearized form above (OLS version #1).
Initial value for $P(k)$: $$P(t_0) = \bigg[\sum_{k=0}^{t_0}\alpha_k*X(k)*X^T(k)\bigg]^{-1}$$ where $X(k)$ is the $k^{th}$ column of matrix $X$.
Initial value for $\theta$: $$\hat{\theta}(t_0) = P(t_0) \sum_{k=0}^{t_0}\alpha_k*X(k)*y^T(k)$$
Lambda = 1-np.exp(-t/20 - 1)
alpha = 1
t0 = 30
ones = np.ones(y_noisy.shape[1]-1)
S = y_noisy[0,:-1]
dSdI = y_noisy[0,1:]-y_noisy[0,:-1] + y_noisy[1,1:]-y_noisy[1,:-1]
SI = y_noisy[0,:-1]+y_noisy[1,:-1]
X = np.matrix(np.vstack((ones,S,SI)))
Y = np.matrix(dSdI)
theta = np.matrix(np.zeros((3,X.shape[1]-t0+1)))
P = np.linalg.inv(alpha*X[:,:t0]*X[:,:t0].T)
theta[:,0] = P * alpha*X[:,:t0]*Y[:,:t0].T
print('Initial P matrix:\n',P)
print('\nInitial theta vector:\n'.format(theta.shape),theta[:,0])
$$P(t) = \frac{1}{\lambda(t)} \bigg[ P(t-1) - \frac{P(t-1) X(t) X^T(t) P(t-1)}{\frac{\lambda(t)}{\alpha_t} + X^T(t) P(t-1) X(t)} \bigg]$$ $$\hat{\theta}(t) = \hat{\theta}(t-1) + \alpha_t P(t) X(t) \Big[y(t) - X^T \hat{\theta}(t-1) \Big] $$
In order to make conversation faster without losing much of accuracy, I decided to increase forgetting parameter($\lambda$) asymptotically to 1.
for i in range(t0,X.shape[1]):
P = 1 / Lambda[i] * (P - (P * X[:,i] * X[:,i].T * P) / (Lambda[i]/alpha + X[:,i].T * P * X[:,i]))
theta[:,i-t0+1] = theta[:,i-t0] + alpha * P * X[:,i] * (Y[:,i] - X[:,i].T * theta[:,i-t0])
table((('ν',nu,theta[0,-1]),('$\gamma$',gamma,theta[1,-1]),('$\mu$',mu,theta[2,-1])))
double_ax_plot(Lambda,theta)
Although, the estimation of the parameter $\mu$ was unsuccessful, estimations for $\nu$ and $\gamma$ values are fairly accurate, so I will use them in later estimations.
nu_est = theta[0,-1]
gamma_est = theta[1,-1]
Now let us examine the number of people getting infected, and estimate $\beta$ and $\epsilon$ parameters.
From the system of equations $$ \begin{cases} \frac{dS}{dt} = \nu - (\beta I + \epsilon \beta C)S - \mu S \\ \frac{dI}{dt} = (\beta I + \epsilon \beta C)S - \gamma I - \mu I \\ \frac{dC}{dt} = \gamma q I − \Gamma C − \mu C \\ \frac{dR}{dt} = \gamma (1 − q)I + \Gamma C − \mu R \end{cases} $$
we get an estimation for the number of people getting infected $\big((\beta I + \epsilon \beta C)S\big)$ by substracting the first equation from the second: $$\frac{dI}{dt} - \frac{dS}{dt} = - \nu + 2 (\beta I + \epsilon \beta C)S - \gamma I - \mu (I - S)$$ and then we rearrange the resulted equation: $$\frac{\frac{dI}{dt} - \frac{dS}{dt} + \nu + \gamma I + \mu (I - S)}{2} = (\beta I + \epsilon \beta C)S$$
Let's write it to discrete time form, and also add error term: $$\frac{(I(k)-I(k-1)) - (S(k)-S(k-1)) + \nu + \gamma I(k-1) + \mu (I(k-1) - S(k-1))}{2} = (\beta I(k-1) + \epsilon \beta C(k-1)S(k-1) + \varepsilon(k)$$
Introducing auxiliary variables $$\beta_2 = \epsilon \beta$$
$$\Delta I_{infected} = \frac{(I(k)-I(k-1)) - (S(k)-S(k-1)) + \nu + \gamma I(k-1) + \mu (I(k-1) - S(k-1))}{2}$$ we get the following form: $$\Delta I_{infected} = \beta I(k-1) S(k-1) + \beta_2 C(k-1)S(k-1) + \varepsilon(k)$$
Because it is an ARMAX process, we know that $\varepsilon$ is not independent of $I$, $S$ and $C$. However, knowing the original parameters, we can also prove it, by calculating the covariance:
dI = y_noisy[1,1:]-y_noisy[1,:-1]
dS = y_noisy[0,1:]-y_noisy[0,:-1]
S = y_noisy[0,:-1]
I = y_noisy[1,:-1]
C = y_noisy[3,:-1]
delta_I_infected = dI - dS + nu + gamma*I + mu*(I + S)
e = delta_I_infected - beta*I*S - epsilon*beta*C*S
np.set_printoptions(precision=3)
print('Covariance between I*S and the error:\n', np.cov(I*S, e))
print('\nCovariance between C*S and the error:\n', np.cov(C*S, e))
From that covariance matrices, we can clearly see, that there is a very strong correlation within $S*I$ and $C*I$ variables, but also correlation between $I*S$ and $\varepsilon$, and between $C*S$ and $\varepsilon$ is strong, while correlation within the error term is relatively low compared to the others correlations.
The desired output is: $y = \Delta I_{infected}$
And the input vector is: $u = \begin{bmatrix}I(k)*S(k) \\ C(k)*S(k)\end{bmatrix}$
Then the new equation using that annotation: $$y(k) = \begin{bmatrix}\beta \\ \beta_2\end{bmatrix} u(k-1) + \varepsilon(k)$$
Then we calculate the LS estimation for the parameters: $$\hat{\theta}_N^{(1)} = \hat{\theta}_N^{LS} = (X * X^T)^{-1} * (X * Y^T)$$
Y = np.matrix(delta_I_infected)
X = np.matrix(np.vstack((I*S,C*S)))
theta_ls = inv(X*X.T) * (X*Y.T)
np.set_printoptions(precision=6)
print(theta_ls)
From that result, we can see that the least squares approximation over estimates parameters.
The equation using another notation: $$\hat{A}^{(1)}_N(q)y(k) = \hat{B}^{(1)}_N(q)u(k)$$
Then we introduce a new instrumental variable as $$z^{(1)}(k) = \hat{G}^{(1)}_N u(k)$$ where $$\hat{G}^{(1)}_N = \frac{\hat{A}^{(1)}_N(q)}{\hat{B}^{(1)}_N(q)}$$
z = np.zeros(X.shape)
for i in range(1,X.shape[1]):
z[:,i] = -1*z[:,i-1] + theta_ls.T*X[:,i]
def reshape(array):
return array.reshape(1,array.shape[0])
values = np.concatenate((reshape(delta_I_infected),z,reshape(e)),axis=0)
plot(values,total=False,
labels=(('r','$\Delta I_{infected}$'),('b','First instrumental variable for $IS$'),
('m','First instrumental variable for $CS$'),('k','Noise/Error')))
The plot shows that instrumental variables for $I S$ and $C S$ are identical (that is a pretty much of bad news) and are mostly independent from noise.
The regressor for the new instrumental variables is: $$\xi^{(1)}_N = \begin{bmatrix}z^{(1)}_0(0) && z^{(1)}_0(1) && z^{(1)}_0(2) && ... && z^{(1)}_0(n-1) \\ z^{(1)}_1(0) && z^{(1)}_1(1) && z^{(1)}_1(2) && ... && z^{(1)}_1(n-1)\end{bmatrix}$$
Then we calculate the first IV estimation for the parameters: $$\hat{\theta}_N^{(2)} = \hat{\theta}_N^{IV} = (\xi^{(1)}_N * X^T)^{-1} * (\xi^{(1)}_N * Y^T)$$
# generate a matrix composed of the transpose of instrument vectors
xi_1 = np.matrix(np.vstack((z[0,:],z[1,:])))
# first IV estimate
#theta_iv_1 = np.linalg.inv(xi_1 * X.T) * (xi_1 * Y.T)
#print(theta_iv_1)
Either xi_1 * X.T matrix is singular, or estimated theta is zero, so it turned out that the method I tried here was not good enough to estimate value of $\beta$ correctly. $\beta$ is a particularily small number, so it might need a more advanced method to estimate it. I suspect that one of my assumptions during the linearization step might have been wrong.
Now let us examine the number of people getting infected, and estimate $\beta$, $\epsilon$ and $\mu$ parameters.
From the system of equations $$ \begin{cases} \frac{dS}{dt} = \nu - (\beta I + \epsilon \beta C)S - \mu S \\ \frac{dI}{dt} = (\beta I + \epsilon \beta C)S - \gamma I - \mu I \\ \frac{dC}{dt} = \gamma q I − \Gamma C − \mu C \\ \frac{dR}{dt} = \gamma (1 − q)I + \Gamma C − \mu R \end{cases} $$ we pick the second equation to estimate the parameters above.
Let's write it to discrete time form, and also add error term: $$I(k) - I(k-1) = (\beta I(k-1) + \epsilon \beta C(k-1))S(k-1) - \gamma I(k-1) - \mu I(k-1) + \varepsilon(k)$$
After some rearragment: $$I(k) = I(k-1)(1 - \gamma - \mu) + \beta I(k-1)*S(k-1) + \epsilon \beta C(k-1) S(k-1) + \varepsilon(k)$$
Introducing auxiliary variables $$\beta_2 = \epsilon \beta$$
$$\beta_3 = 1 - \gamma - \mu$$ we get the following form: $$I(k) = \beta_3 I(k-1) + \beta I(k-1)S(k-1) + \beta_2 C(k-1) S(k-1) + \varepsilon(k)$$
Because it is an ARMAX process, we know that $\varepsilon$ is not independent of $I$, $S$ and $C$. However, knowing the original parameters, we can also prove it, by calculating the covariance:
I_k = y_noisy[1,1:]
I_k_1 = y_noisy[1,:-1]
IS = y_noisy[1,:-1]*y_noisy[0,:-1]
CS = y_noisy[3,:-1]*y_noisy[0,:-1]
e = I_k - (1-gamma-mu)*I_k_1 - beta*IS - epsilon*beta*CS
np.set_printoptions(precision=3)
print('Covariance between I and the error:\n', np.cov(I_k_1, e))
print('\nCovariance between I*S and the error:\n', np.cov(IS, e))
print('\nCovariance between C*S and the error:\n', np.cov(CS, e))
From that covariance matrices, we can clearly see, that there is a very strong correlation within $S*I$ and $C*I$ variables, but also correlation between $I*S$ and $\varepsilon$, and between $C*S$ and $\varepsilon$ is strong, while correlation within the error term is relatively low compared to the others correlations.
The desired output is: $y = I(k)$
And the input vector is: $u = \begin{bmatrix}I(k-1)*S(k-1) \\ C(k-1)*S(k-1)\end{bmatrix}$
Then the new equation using that annotation: $$y(k) = \beta_3 y(k-1) + \begin{bmatrix}\beta \\ \beta_2\end{bmatrix} u(k-1) + \varepsilon(k)$$
The regressor is: $$X = \begin{bmatrix}y(0) && y(1) && y(2) && ... && y(n-1) \\ u(0) && u(1) && u(2) && ... && u(n-1)\end{bmatrix}$$
Then we calculate the LS estimation for the parameters: $$\hat{\theta}_N^{(1)} = \hat{\theta}_N^{LS} = (X * X^T)^{-1} * (X * Y^T)$$
Y = np.matrix(y_noisy[1,1:])
u = np.matrix(np.vstack((IS,CS)))
X = np.matrix(np.vstack((y_noisy[1,:-1],u)))
theta_ls = inv(X*X.T) * (X*Y.T)
table((('β$_3 = 1-\gamma-\mu$',1-gamma-mu,theta_ls[0,0]),
('β',beta,theta_ls[1,0]),
('β$_2 = \epsilon$ β',epsilon*beta,theta_ls[2,0])),width=400)
From that result, we can see that the least squares approximation generally over estimates parameters.
The equation using another notation: $$\hat{A}^{(1)}_N(q)y(k) = \hat{B}^{(1)}_N(q)u(k)$$
Then we introduce a new instrumental variable as $$z^{(1)}(k) = \hat{G}^{(1)}_N u(k)$$ where $$\hat{G}^{(1)}_N = \frac{\hat{A}^{(1)}_N(q)}{\hat{B}^{(1)}_N(q)}$$
z = np.zeros(Y.shape)
for i in range(1,X.shape[1]):
z[:,i] = theta_ls[0]*z[:,i-1] + theta_ls[1]*IS[i-1] + theta_ls[1]*CS[i-1]
def reshape(array):
return array.reshape(1,array.shape[0])
np.set_printoptions(precision=3)
print('Covariance between IV and the error:\n', np.cov(z, e))
values = np.concatenate((reshape(I_k),z,reshape(e)),axis=0)
plot(values,total=False,
labels=(('r','$I(k)$'),('b','First instrumental variable'),('k','Noise/Error')))
The covariance matrix and the plot shows that first instrumental variable is mostly independent from noise.
The regressor for the new instrumental variables is: $$\xi^{(1)}_N = \begin{bmatrix}z^{(1)}(0) && z^{(1)}(1) && z^{(1)}(2) && ... && z^{(1)}(n-1) \\ u_1(0) && u_1(1) && u_1(2) && ... && u_1(n-1) \\ u_2(0) && u_2(1) && u_2(2) && ... && u_2(n-1) \end{bmatrix}$$
Then we calculate the first IV estimation for the parameters: $$\hat{\theta}_N^{(2)} = \hat{\theta}_N^{IV} = (\xi^{(1)}_N * X^T)^{-1} * (\xi^{(1)}_N * Y^T)$$
# generate a matrix composed of the transpose of instrument vectors
xi_1 = np.matrix(np.vstack((-z,IS,CS)))
# first IV estimate
theta_iv_1 = np.linalg.inv(xi_1 * X.T) * (xi_1 * Y.T)
print(theta_iv_1)
Compute the equation error based on the previous IV estimate: $$\hat{w}^2_N(k) = \hat{A}^{(2)}_N(q)y(k) - \hat{B}^{(2)}_N(q)u(k)$$ where $\hat{A}^{(2)}_N(q)$ and $\hat{B}^{(2)}_N(q)$ calculated in the previous step.
Postulate an AR model of order $n_a + n_b = 2$ based on $w_2$ and estimate its parameters. The regressor is: $$X_L = \begin{bmatrix}\hat{w}^2_N(0) && \hat{w}^2_N(1) && \hat{w}^2_N(2) && ... && \hat{w}^2_N(n-1) \\ \hat{w}^2_N(1) && \hat{w}^2_N(2) && \hat{w}^2_N(3) && ... && \hat{w}^2_N(n) \end{bmatrix}$$
w2 = np.zeros(I_k.shape)
for i in range(1,I_k.shape[0]):
w2[i] = I_k[i] + theta_iv_1[0]*I_k[i-1] - theta_iv_1[1]*IS[i-1] - theta_iv_1[1]*CS[i-1]
Y_L = np.matrix(w2[2:])
X_L = np.matrix(np.vstack((w2[1:-1],w2[:-2])))
L_est = np.linalg.inv(X_L* X_L.T) * (X_L * Y_L.T)
plt.plot(w2)
print(L_est)
We introduce again a new instrumental variable as $$z^{(2)}(k) = \hat{G}^{(2)}_N u(k)$$ where $$\hat{G}^{(2)}_N = \frac{\hat{A}^{(2)}_N(q)}{\hat{B}^{(2)}_N(q)}$$
So the regressor is: $$\xi^{(2)}_N = \begin{bmatrix}z^{(2)}(0) && z^{(2)}(1) && z^{(2)}(2) && ... && z^{(2)}(n-1) \\ u_1(0) && u_1(1) && u_1(2) && ... && u_1(n-1) \\ u_2(0) && u_2(1) && u_2(2) && ... && u_2(n-1)\end{bmatrix}$$
Then we calculate the first IV estimation for the parameters: $$\hat{\theta}_N^{(3)} = \hat{\theta}_N^{IV} = (\xi^{(2)}_N * X^T)^{-1} * (\xi^{(2)}_N * Y^T)$$
z2 = np.zeros(Y.shape)
for i in range(1,X.shape[1]):
z2[:,i] = theta_iv_1[0]*z2[:,i-1] + theta_iv_1[1]*IS[i-1] + theta_iv_1[1]*CS[i-1]
def reshape(array):
return array.reshape(1,array.shape[0])
values = np.concatenate((reshape(I_k),z2),axis=0)
plot(values,total=False,labels=(('r','$I(k)$'),('b','Second instrumental variable')))
# generate a matrix composed of the transpose of instrument vectors
xi_2 = np.matrix(np.vstack((-z2,IS,CS)))
# first IV estimate
theta_iv_2 = np.linalg.inv(xi_2 * X.T) * (xi_2 * Y.T)
table((('β$_3 = 1-\gamma-\mu$',1-gamma-mu,theta_iv_2[0,0]),
('β',beta,theta_iv_2[1,0]),
('β$_2 = \epsilon$ β',epsilon*beta,theta_iv_2[2,0])),width=400)
The result of IV method is almost the same as the LS method. However, these result are reasonably accurate, so I can use them in the further tasks.
mu_est = 1 - theta_iv_2[0,0] - gamma_est
beta_est = theta_iv_2[1,0]
epsilon_est = theta_iv_2[2,0] / beta_est
Now I would like to estimate $\Gamma$ and $q$ parameters. To do that I will use the third equation from the equation system: $$\frac{dC}{dt} = \gamma q I − \Gamma C − \mu C$$ And in dicrete time form: $$C(k) - C(k-1) = \gamma q I(k-1) − \Gamma C(k-1) − \mu C(k-1)$$
I already have a reasonable estimate for $\mu$, so I exclude it from investigation: $$C(k) - C(k-1) + \mu C(k-1) = \gamma q I(k-1) − \Gamma C(k-1)$$
Intoducing auxiliary variables: $$\theta: \begin{bmatrix}\gamma q \\ \Gamma\end{bmatrix}$$
$$y(k) = C(k) - C(k-1) + \mu C(k-1)$$
$$u_1(k) = \gamma I(k)$$
$$u_2(k) = C(k)$$
So I can rewrite the equation: $$y(k) = \theta_1 u_1(k-1) − \theta_2 u_2(k-1)$$
Data set of previous values: $$D^{k} = \begin{bmatrix}y(1) && y(2) && y(3) && ... && y(k) \\ -u_1(0) && -u_1(1) && -u_1(2) && ... && -u_1(k-1) \\ -u_2(0) && -u_2(1) && -u_2(2) && ... && -u_2(k-1) \end{bmatrix}$$
Arbitrary values for $\theta_{initial}$: $\begin{bmatrix} 0.3 \\ 0.3 \end{bmatrix}$
$\sigma_e = 0.3$
$\Sigma = \begin{bmatrix}\sigma_e && 0 \\ 0 && \sigma_e\end{bmatrix}$
Initial probalility for a given $\theta$ vector (2D normal distribution): $$p(\theta \mid D^0)=\frac{1}{\sqrt{(2 \pi)^2 det(\Sigma)}}exp\bigg({\frac{1}{2}(\theta-\theta_{initial})\Sigma^{-1}(\theta-\theta_{initial})}\bigg)$$
Recursive formula to compute probalility for a given $\theta$ vector (1D normal distribution is assumed for $y(k)$): $$p(\theta \mid D^{k}) \propto p(\theta \mid D^{k-1})*p(y(k) \mid D^{k}) = \\ p(\theta \mid D^{k-1})*\frac{1}{\sqrt{2 \pi \sigma_e^2}}exp\bigg({\frac{-(y(k) - \theta_1 u_1(k-1) - \theta_2 u_2(k-1))^2}{\sigma_e^2}}\bigg)$$
def calc_prob(D,theta,sigma):
Sigma = sigma * np.eye(3)
theta_init = np.matrix([1,0.5,0.03])
prob = 1/(2*np.pi*np.linalg.det(Sigma)) * np.exp(.5*(theta-theta_init)*np.linalg.inv(Sigma)*(theta-theta_init).T)
for i in range(1,D.shape[1]):
prob = prob * (1/np.sqrt(2*np.pi)*sigma) * np.exp(-(D[:,i]*theta.T)**2 / sigma**2)
return prob
theta_1 = np.linspace(0,1*gamma_est,20)
theta_2 = np.linspace(0,1,20)
posterior_density = np.zeros((len(theta_1),len(theta_2)))
sigma = 0.3
dC_muC = y_noisy[2,1:]-(1-mu_est)*y_noisy[2,:-1]
gammaI = gamma_est*y_noisy[1,:-1]
C = y_noisy[3,:-1]
D = np.vstack((dC_muC,-gammaI,-C))/N
for i,t_1 in enumerate(theta_1):
for j,t_2 in enumerate(theta_2):
posterior_density[i,j] = calc_prob(D,np.matrix([1,t_1,t_2]),sigma)
max_Gamma, max_q = np.unravel_index(np.argmax(posterior_density, axis=None), posterior_density.shape)
table((('$q$',q,theta_1[max_q]/gamma_est),('$\gamma$',gamma,theta_2[max_Gamma])))
plot_3D(theta_1/gamma_est,theta_2,posterior_density)
Obviously, something went wrong here, but I cannot find the reason of it, so I run a simple LS estimation to figure out whether the code or the mathematical model is wrong.
Y = np.matrix(dC_muC)
X = np.matrix(np.vstack((gammaI,C)))
theta = inv(X*X.T) * (X*Y.T)
print(theta)
It also gave false result, so I certainly made some mistake at the linearization.