Multivariate Normal in Python
Contents
Multivariate Normal in Python¶
import numpy as np
Generating random variables¶
Univariate Normal¶
from numpy.random import default_rng
gen = default_rng(100) ## manually setting the seed
type(gen)
numpy.random._generator.Generator
gen = default_rng() # random seed
state = gen.bit_generator.state ## generating the state
state
{'bit_generator': 'PCG64',
'state': {'state': 212435109451986325195384366542246802488,
'inc': 37039534531782789552079828644192678419},
'has_uint32': 0,
'uinteger': 0}
We can save that state to a file and then use it again as follows
gen.bit_generator.state = state
x = gen.standard_normal((100_000, 5))
x.shape
(100000, 5)
x[:,0].mean(), x[:,0].std()
(-0.00024556466153710394, 0.998059330983146)
x.mean(axis=0)
array([-0.00024556, 0.00477713, -0.00098417, -0.00168984, -0.001569 ])
x.std(axis=0)
array([0.99805933, 1.00000398, 1.00143235, 0.99977127, 0.99781166])
x = gen.normal(1.5, 2, (100_000, 5))
x.mean(axis=0)
array([1.49763718, 1.5039621 , 1.49414967, 1.49893494, 1.50338947])
x.std(axis=0)
array([2.00115569, 2.00216282, 2.00429388, 2.0007595 , 2.00494332])
Multivariate Normal¶
mu = np.zeros((5))
# generate a random A, and compute Sigma as the outer product
A = gen.normal(1, 1, (5,5))
Sigma = A@A.T
Sigma
array([[ 4.03180951, 4.73313063, 3.61754384, 3.73787311, 5.6188108 ],
[ 4.73313063, 6.95157449, 3.57366635, 5.17490821, 4.86268933],
[ 3.61754384, 3.57366635, 4.33687998, 5.2157186 , 5.41228928],
[ 3.73787311, 5.17490821, 5.2157186 , 10.58411225, 3.28068836],
[ 5.6188108 , 4.86268933, 5.41228928, 3.28068836, 12.31481605]])
z = gen.multivariate_normal(mean=mu, cov=Sigma, size=1_000_000)
z.shape
(1000000, 5)
z.mean(axis=0)
array([ 1.19777802e-03, 9.50322240e-04, -4.97930161e-05, -3.01139528e-03,
2.90507331e-03])
z.var(axis=0)
array([ 4.02321396, 6.93401097, 4.34047652, 10.57431192, 12.3131452 ])
Sigma.diagonal()
array([ 4.03180951, 6.95157449, 4.33687998, 10.58411225, 12.31481605])
np.diag(Sigma.diagonal())
array([[ 4.03180951, 0. , 0. , 0. , 0. ],
[ 0. , 6.95157449, 0. , 0. , 0. ],
[ 0. , 0. , 4.33687998, 0. , 0. ],
[ 0. , 0. , 0. , 10.58411225, 0. ],
[ 0. , 0. , 0. , 0. , 12.31481605]])
Legacy approach to setting state¶
np.random.seed(42)
st = np.random.get_state()
np.random.set_state(st)
z2 = np.random.multivariate_normal(mean=mu, cov=Sigma, size=10000)
z2[0]
array([-0.75269505, -2.24617757, 0.11399728, -1.41760333, -1.21951135])
z2[0]
array([-0.75269505, -2.24617757, 0.11399728, -1.41760333, -1.21951135])
z2.mean(axis=0)
array([-0.00010955, 0.02067258, -0.00574703, 0.02405158, -0.03230395])
z2.var(axis=0)
array([ 4.14665415, 7.17015529, 4.40328244, 10.603619 , 12.65814749])
Task 1¶
Set gen = default_rng(100)
, and generate 10000 samples from bivariate normal with mean (10, 10) and variances 3 and 1 and covariance 1. Compute the sample covariance matrix without using the np.cov()
method. Compare the result with the obtained using np.cov
.
gen = default_rng(100)
N = 10_000
mean = np.array([10,10])
cov = np.array([[3,1],[1,5]])
x = gen.multivariate_normal(mean=mean, cov=cov, size=N)
u = x - x.mean(0)
cov = u.T@u/(N-1)
print(cov)
print('*'*30)
print(np.cov(x, rowvar=False))
print('*'*30)
np.testing.assert_almost_equal(cov, np.cov(x, rowvar=False))
[[2.96065507 1.00828446]
[1.00828446 5.03174172]]
******************************
[[2.96065507 1.00828446]
[1.00828446 5.03174172]]
******************************
Task 2¶
Let \(z_1\) and \(z_2\) be bivariate normal with means 0, variances 1, and covariance .9. Compute the conditional mean and variance of \(z_1\), given that \(z_2=1\).
rho=.9
sigma1 = 1
sigma2 = 1
mu1=0
mu2=0
z2 = 1
(
mu1 + (z2 - mu2)*(rho*sigma1)/(sigma2),
(1 - rho**2)*sigma1**2
)
(0.9, 0.18999999999999995)
General case¶
Task 3¶
Compute the conditional mean and variance of \(z_1\), given that \(z_2=1\) Using
Sigma12 = np.array([[0.1]])
Sigma22 = np.array([[1.0]])
Sigma21 = np.array([[0.1]])
Sigma11 = np.array([[1.0]])
mu1 = np.array([0])
mu2 = np.array([0])
z2 = np.array([1])
cond_mu = mu1 + (Sigma12 / np.linalg.inv(Sigma22)) * (z2 - mu2)
cond_mu
array([[0.1]])
cond_cov = Sigma11 - Sigma12 @ np.linalg.inv(Sigma22) @ Sigma21
cond_cov
array([[0.99]])
Instead of computing the inverse \(\mathbf{\Sigma}^{-1}_{2 2}\), it is recommended to get \(\mathbf{\Sigma}^{-1}_{2 2} (\mathbf{z}_2 - \boldsymbol \mu_2)\), by solving the matrix equation
using the linalg.solve()
function. Similarly, for \(\mathbf{\Sigma}^{-1}_{2 2} \mathbf{\Sigma}_{2 1}\)
cond_mu = mu1 + Sigma12 @ np.linalg.solve(Sigma22, z2 - mu2)
cond_cov = Sigma11 - Sigma12 @ np.linalg.solve(Sigma22, Sigma21)
cond_mu, cond_cov
(array([0.1]), array([[0.99]]))
Task 4¶
Compute the conditional mean and variance of \(z_1\), given that \(z_2=1\) Using
Sigma12 = np.array([[0.9]])
Sigma22 = np.array([[1.0]])
Sigma21 = np.array([[0.9]])
mu1 = np.array([0])
mu1 = np.array([0])
z2 = np.array([1])
cond_mu = mu1 + Sigma12 @ np.linalg.solve(Sigma22, z2 - mu2)
cond_cov = Sigma11 - Sigma12 @ np.linalg.solve(Sigma22, Sigma21)
cond_mu, cond_cov
(array([0.9]), array([[0.19]]))
Task 5¶
Use A = gen.normal(1, 1, (4,4))
to compute Sigma = A@A.T
Let \(z\) be 4D Normal with covariance Sigma
and partition it into \(z_1\) and \(z_2\) both of which are 2D.
Compute the conditional variance of \(z_1\) given \(z_2\)
A = gen.normal(1, 1, (4,4))
Sigma = A@A.T
indices_1 = [0, 1]
indices_2 = [2, 3]
z2indices = np.zeros(4, bool)
z1indices = z2indices.copy()
z2indices[indices_2] = 1
z1indices[indices_1] = 1
Sigma11 = Sigma[z1indices, :][:, z1indices]
Sigma12 = Sigma[z1indices, :][:, z2indices]
Sigma21 = Sigma[z2indices, :][:, z1indices]
Sigma22 = Sigma[z2indices, :][:, z2indices]
cond_cov = Sigma11 - Sigma12 @ np.linalg.solve(Sigma22, Sigma21)
cond_cov
array([[ 3.95231817, -1.35320667],
[-1.35320667, 0.62295154]])