Introduction to AR and MA models

Time series models

  • aim to capture complex temporal dependence

  • build by combining processes with simple dependence structure - innovations

Innovations

ε={εt},    E(εt)=0,    var(εt)=σ2
  • white noise

  • martingale difference

  • iid

Linear time series models

  • The time series z and the innovation ε are linearly related

z=Aε
  • or more generally, the transformation εz is affine

z=μ+Aε
  • with Gaussian innovations p(ε)N(0,σ2I)

z=μ+AεN(μ,Σ),Σ=σ2AA

Moving Average (MA) models

zt=i=0qθiεti

MA(1)

zt=εt+θεt1
z1=θε0+ε1z2=θε1+ε2zT=θεT1+εT
[z1z2z3z4zT]z=[θ1000000θ1000000θ1000000θ00000000θ1]A[ε0ε1ε2ε3ε4εT]ε

Stationarity conditions

  • constant mean?

Ezt=0
  • constant variance?

γ(0)=σ2(1+θ2)
  • constant covariances?

for lag h=1

γ(1)=cov(zt,zt1)=E(ztzt1)=E((εt+θεt1)(εt1+θεt2))=E(εtεt1+θεt12+θεtεt2+θ2εt1εt2)=θσ2

for lag h2

γ(h)=cov(zt,zth)=E((εt+θεt1)(εth+θεth1))=0
import warnings
warnings.simplefilter("ignore", FutureWarning)

import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa.arima_process import arma_acovf
from scipy.linalg import toeplitz

from matplotlib import rc
rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)


plt.rcParams.update({'ytick.left' : True,
                     "xtick.bottom" : True,
                     "ytick.major.size": 0,
                     "ytick.major.width": 0,
                     "xtick.major.size": 0,
                     "xtick.major.width": 0,
                     "ytick.direction": 'in',
                     "xtick.direction": 'in',
                     'ytick.major.right': False,
                     'xtick.major.top': True,
                     'xtick.top': True,
                     'ytick.right': True,
                     'ytick.labelsize': 18,
                     'xtick.labelsize': 18
                    })

np.random.seed(12345)

def correlation_from_covariance(covariance):
    v = np.sqrt(np.diag(covariance))
    outer_v = np.outer(v, v)
    correlation = covariance / outer_v
    correlation[covariance == 0] = 0
    return correlation


def show_covariance(cov_mat, process_name):
    K = cov_mat.shape[0]
    
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20,8))

    
    c = ax.imshow(cov_mat,
                       cmap="magma_r",
                      )
    
    #c = ax.pcolormesh(Sigma, edgecolors='k', linewidth=.01, cmap='inferno_r')
    #ax = plt.gca()
    #ax.set_aspect('equal')
    #ax.invert_yaxis()


    plt.colorbar(c);


    ax.set_xticks(range(0,K))
    ax.set_xticklabels(range(1, K+1));
    ax.set_yticks(range(0,K))
    ax.set_yticklabels(range(1, K+1));  
    ax.set_title(f'Covariance {process_name}', fontsize=24)

    plt.subplots_adjust(wspace=-0.7)
n_obs = 20
arparams = np.array([0])
maparams = np.array([.4])
ar = np.r_[1, -arparams] # add zero-lag and negate
ma = np.r_[1, maparams] # add zero-lag
sigma2=np.array([1])

acov_ma1 = arma_acovf(ar=ar, ma=ma, nobs=n_obs, sigma2=sigma2)

Sigma_ma1 = toeplitz(acov_ma1)

corr_ma1 = correlation_from_covariance(Sigma_ma1)
show_covariance(cov_mat=Sigma_ma1, process_name='MA(1)')
../../_images/01-ARMA-part-1_11_0.png
n_obs = 20
arparams = np.array([0])
maparams = np.array([.4, .2])
ar = np.r_[1, -arparams] # add zero-lag and negate
ma = np.r_[1, maparams] # add zero-lag
sigma2=np.array([1])

acov_ma2 = arma_acovf(ar=ar, ma=ma, nobs=n_obs, sigma2=sigma2)

Sigma_ma2 = toeplitz(acov_ma2)

corr_ma2 = correlation_from_covariance(Sigma_ma2)
show_covariance(cov_mat=Sigma_ma2, process_name='MA(2)')
../../_images/01-ARMA-part-1_13_0.png
  • need (at least) MA(q) to have dependence between zt and zt+q

zt=εt+θ1εt1+θ2εt2+++θqεtq,θq0
  • not parsimonious (q+1 parameters to estimate)

Autoregressive (AR) models

zt=i=1qαizt1+εt

AR(1)

zt=αzt1+εt
z1=αz0+ε1z2=αz1+ε2=α2z0+αε1+ε2zT=αTz0+k=0T1αkεTk
[z1z2z3z4zT]=[α100000α2α10000α3α2α1000α4α3α2α000αTαT1αT2αT3α2α1][z0ε1ε2ε3εT]

initial values z0

  • known constant, e.g. z0=0

  • unknown constant, to be estimated

  • unknown random variable, independent from εt, e.g. z0N(0,σ02), σ0 to be estimated

  • unknown random variable, independent from εt e.g. z0N(0,σ21α2)

    • equivalent to z0=ε01α2, with ε0N(0,σ2)

[z1z2z3z4zT]z=[α1α2100000α21α2α10000α31α2α2α1000α41α2α3α2α000αT1α2αT1αT2αT3α2α1]A[ε0ε1ε2ε3εT]ε

Stationarity conditions

  • constant variance (σzt2=σzt+k2=γ(0)):

σzt2=α2σzt12+σ2
γ(0)=α2γ(0)+σ2

since σ2>0, α±1

γ(0)=σ21α2
  • constant mean (Ezt=Ezt+k=μz)

Ezt=αEzt1+Eεt=αEzt1

since α1

Ezt=Ezt1=μz=0
  • constant covariances (cov(zt,zt+h)=E(ztzt+h)=γ(h))

zt+h=αzt+h1+εt+h=α2zt+h2+αεt+h1+εt+h=αhzt+αh1εt+1++αεt+h1+εt+h
cov(zt,zt+h)=E(αhztzt+αh1εt+1zt++αεt+h1zt+εt+hzt)=αhE(ztzt)=αhvar(zt)=αhσ21α2
  • covariance is time-invariant

  • correlation is time-invariant

corr(zt,zt+h)=cov(zt,zt+h)var(zt)var(zt)=α|h|
  • (stability condition) for corr(zt,zt+h)0 as h

|α|<1
n_obs = 20
arparams = np.array([0.9])
maparams = np.array([0])
ar = np.r_[1, -arparams] # add zero-lag and negate
ma = np.r_[1, maparams] # add zero-lag
sigma2=np.array([1])

acov_ar1 = arma_acovf(ar=ar, ma=ma, nobs=n_obs, sigma2=sigma2)

Sigma_ar1 = toeplitz(acov_ar1)

corr_ar1 = correlation_from_covariance(Sigma_ar1)
show_covariance(cov_mat=Sigma_ar1, process_name='AR(1)')
../../_images/01-ARMA-part-1_19_0.png

MA and AR processes in Python

from statsmodels.tsa.arima_process import ArmaProcess
# create a MA(1) model
theta = np.array([.4])
ar = np.r_[1] # the coefficient on $z_t$
ma = np.r_[1, theta] 
ma1_process = ArmaProcess(ar, ma)

print(ma1_process)
ArmaProcess
AR: [1.0]
MA: [1.0, 0.4]

autocovariances and autocorrelations (MA models)

ma1_process.acovf(3)
array([1.16, 0.4 , 0.  ])
#compare to analytical result
print([1*(1+theta**2), theta])
[array([1.16]), array([0.4])]
def plot_autocov(process, nlags, process_name):
    lags = np.arange(nlags+1)
    acovf_x = process.acovf(nlags+1)
    acf_x = process.acf(nlags+1)

    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(15, 4))
    
    ax1.vlines(lags, [0], acovf_x, color='b')
    ax1.scatter(lags, acovf_x, marker='o', c='b')
    ax1.axhline(color='black', linewidth=.3)
    ax1.set_xticks(lags)
    ax1.set_xlabel('lags', fontsize=16)
    ax1.set_ylabel('ACOVF', fontsize=16)
    ax1.set_title(f'Theoretical autocovariances {process_name}', fontsize=20)
    

    ax2.vlines(lags, [0], acf_x, color='b')
    ax2.scatter(lags, acf_x, marker='o', c='b')
    ax2.axhline(color='black', linewidth=.3)
    ax2.set_xticks(lags)
    ax2.set_xlabel('lags', fontsize=16)
    ax2.set_ylabel('ACF', fontsize=16)
    ax2.set_title(f'Theoretical autocorrelations {process_name}', fontsize=18)

def plot_sample(z, process_name):

    fig, ax = plt.subplots(figsize=(12,4))
    ax.plot(z, c='black')
    ax.set_xlabel('time', fontsize=16)
    ax.set_title(f'{process_name}', fontsize=18)
plot_autocov(process=ma1_process, nlags=5, process_name='MA(1)')
../../_images/01-ARMA-part-1_27_0.png
# define a MA(2) model

theta = np.array([.4, .9])
ar = np.r_[1] # the coefficient on $z_t$
ma = np.r_[1, theta] 
ma2_process = ArmaProcess(ar, ma)
plot_autocov(process=ma2_process, nlags=5, process_name='MA(2)')
../../_images/01-ARMA-part-1_28_0.png
# define a MA(5) model
theta = np.array([.4, -.7, .3, -.1, -.6])
ar = np.r_[1] # the coefficient on $z_t$
ma = np.r_[1, theta] 
ma5_process = ArmaProcess(ar, ma)
plot_autocov(process=ma5_process, nlags=5, process_name='MA(5)')
../../_images/01-ARMA-part-1_29_0.png
# generate a sample from the MA(1) model and plot it
z_ma1 = ma1_process.generate_sample(100)
plot_sample(z_ma1, process_name='MA(1)')
../../_images/01-ARMA-part-1_30_0.png
# generate a sample from the MA(2) model and plot it
z_ma2 = ma2_process.generate_sample(100)
plot_sample(z_ma2, process_name='MA(2)')
../../_images/01-ARMA-part-1_31_0.png
# generate a sample from the MA(5) model and plot it
z_ma5 = ma5_process.generate_sample(100)
plot_sample(z_ma5, process_name='MA(5)')
../../_images/01-ARMA-part-1_32_0.png

autocovariances and autocorrelations (AR models)

alpha = .8 # coefficient on z(t-1)
ar = np.r_[1, -alpha] 
ma = np.r_[1] # coefficient on e(t)
ar1_process = ArmaProcess(ar, ma)

print(ar1_process)
ArmaProcess
AR: [1.0, -0.8]
MA: [1.0]
ar1_process.acovf(3)
array([2.77777778, 2.22222222, 1.77777778])
#compare to analytical result
sigma=1
print([sigma**2 / (1-alpha**2), \
       alpha*sigma**2/(1-alpha**2), \
       alpha**2*sigma**2/(1-alpha**2)
      ])
[2.7777777777777786, 2.222222222222223, 1.7777777777777788]
plot_autocov(process=ar1_process, nlags=5, process_name='AR(1)')
../../_images/01-ARMA-part-1_37_0.png
alpha = np.array([.4, .1]) 
ar = np.r_[1, -alpha] # the coefficient on $z_t$
ma = np.r_[1] 
ar2_process = ArmaProcess(ar, ma)
plot_autocov(process=ar2_process, nlags=5, process_name='AR(2)')
../../_images/01-ARMA-part-1_38_0.png
alpha = np.array([.4, .2, -.3, .1, -.1])
ar = np.r_[1, -alpha] # the coefficient on $z_t$
ma = np.r_[1] 
ar5_process = ArmaProcess(ar, ma)
plot_autocov(process=ar5_process, nlags=5, process_name='AR(5)')
../../_images/01-ARMA-part-1_39_0.png
z_ar1 = ar1_process.generate_sample(100)
plot_sample(z_ar1, process_name='AR(1)')
../../_images/01-ARMA-part-1_40_0.png
z_ar2 = ar2_process.generate_sample(100)
plot_sample(z_ar2, process_name='AR(2)')
../../_images/01-ARMA-part-1_41_0.png
z_ar5 = ar5_process.generate_sample(100)
plot_sample(z_ar5, process_name='AR(5)')
../../_images/01-ARMA-part-1_42_0.png

Non-zero mean

MA(1)

zt=μ+εt+θεt1
E(zt)=μ

AR(1)

zt=α0+α1zt1+εt
E(zt)=μ
μ=α0+α1μ=α01α1
ztμ=α1(zt1μ)+εt