import pandas as pd
import numpy as np

import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.metrics import mean_squared_error

%matplotlib inline
%config InlineBackend.figure_format='retina'
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(42)
idx = pd.IndexSlice


!head -n2 data/h2weekly.csv

Date,IsCanceled
2015-06-21,97


ts = pd.read_csv('data/h2weekly.csv', parse_dates=['Date'], index_col='Date', squeeze=True)

type(ts)
ts.head(2)
ts.tail(2)

pandas.core.series.Series

Date
2015-06-21     97
2015-06-28    153
Name: IsCanceled, dtype: int64

Date
2017-08-20    201
2017-08-27    129
Name: IsCanceled, dtype: int64


fig = plt.figure(constrained_layout=True, figsize=(14, 7))
gs = fig.add_gridspec(2, 2)

ax_top = fig.add_subplot(gs[0, :])
ax_bot_0 = fig.add_subplot(gs[1, 0])
ax_bot_1 = fig.add_subplot(gs[1, 1])

_ = ts.plot(ax=ax_top, title='Number of cancellations per week')
_ = plot_acf(ts, ax=ax_bot_0, title='ACF', lags=60)
_ = plot_pacf(ts, ax=ax_bot_1, title='PACF')


_ = plot_acf(ts, lags=60)


from pydlm import dlm, trend, seasonality


linear_trend = trend(degree=1, discount=0.9, name='linear_trend', w=10)
seasonal28 = seasonality(period=28, discount=0.9, name='seasonal28', w=10)

simple_dlm = dlm(ts) + linear_trend + seasonal28
simple_dlm.ls()

The static components are
linear_trend (degree = 2)
seasonal28 (degree = 28)
 
There is no dynamic component.
 
There is no automatic component.


simple_dlm.fit()

Initializing models...
Initialization finished.
Starting forward filtering...
Forward filtering completed.
Starting backward smoothing...
Backward smoothing completed.


# Plot the fitted results
simple_dlm.turnOff('data points')
simple_dlm.plot()

/Users/dimitri/opt/miniconda3/envs/ts-forecasting/lib/python3.8/site-packages/pydlm/plot/dlmPlot.py:519: MatplotlibDeprecationWarning: Passing non-integers as three-element position specification is deprecated since 3.3 and will be removed two minor releases later.
  plt.subplot(str(size[0]) + str(size[1]) + str(location))


from pydlm import modelTuner


%%time

myTuner = modelTuner(method='gradient_descent', loss='mse')
tuned_sDLM = myTuner.tune(simple_dlm, maxit=100)

The algorithm stops without converging.
Possible reason: some discount is too close to 1 or 0.7 (0.7 is smallest discount that is permissible.
CPU times: user 28.4 s, sys: 352 ms, total: 28.7 s
Wall time: 29.1 s


tuned_sDLM.fit()


round(np.sqrt(simple_dlm.getMSE()), 4)
round(np.sqrt(tuned_sDLM.getMSE()), 4)

124.2865

129.6941


# Plot the linear and seasonal trends.
simple_dlm.turnOff('predict plot')
simple_dlm.turnOff('filtered plot')

simple_dlm.plot('linear_trend')
simple_dlm.plot('seasonal28')


len(ts)

115


# Plot prediction for the first 100 weeks and a forecast for the following 15 weeks.
simple_dlm.plotPredictN(date=100, N=15)

# Plot prediction for the first 60 weeks and a forecast for the following 55 weeks.
simple_dlm.plotPredictN(date=60, N=55)


test = ts[-15:]
len(test)

test.head(2)
test.tail(2)

15

Date
2017-05-21    407
2017-05-28    463
Name: IsCanceled, dtype: int64

Date
2017-08-20    201
2017-08-27    129
Name: IsCanceled, dtype: int64


forecast = simple_dlm.predictN(date=100, N=15)[0]

round(np.sqrt(mean_squared_error(test, forecast)), 4)

141.4244


round(np.mean(test), 4)

327.5333


round(141.4244 / 327.5333 * 100, 0)

43.0


from pydlm import autoReg, dynamic


# Construct the base
DLM_2 = dlm(ts)
DLM_2 = DLM_2 + trend(1, name='linear_trend', w=1.0)
DLM_2 = DLM_2 + seasonality(28, name='28wks', w=1.0)
DLM_2 = DLM_2 + autoReg(degree=2, name='autoreg2', w=1.0)

# Introduce dynamic component
features = [[1.0, 2.0] for i in range(115)]
ctrend = dynamic(features=features, name='random', discount=0.9)
DLM_2 = DLM_2 + ctrend

# Show the added components
DLM_2.ls()

The static components are
linear_trend (degree = 2)
28wks (degree = 28)
 
The dynamic components are
random (dimension = 2)
 
The automatic components are
autoreg2 (dimension = 2)


DLM_2.fit()

Initializing models...
Initialization finished.
Starting forward filtering...
Forward filtering completed.
Starting backward smoothing...
Backward smoothing completed.


# Plot the fitted results
DLM_2.turnOff('data points')
DLM_2.plot()


%%time

myTuner = modelTuner(method='gradient_descent', loss='mse')
tuned_DLM2 = myTuner.tune(DLM_2, maxit=100)

The algorithm stops without converging.
Possible reason: some discount is too close to 1 or 0.7 (0.7 is smallest discount that is permissible.
CPU times: user 37.4 s, sys: 424 ms, total: 37.8 s
Wall time: 38.2 s


tuned_DLM2.fit()


round(np.sqrt(DLM_2.getMSE()), 4)
round(np.sqrt(tuned_DLM2.getMSE()), 4)

110.0252

107.4558


round(np.sqrt(simple_dlm.getMSE()), 4)
round(np.sqrt(tuned_DLM2.getMSE()), 4)

124.2865

107.4558


# Plot prediction for the first 100 weeks and a forecast for the following 14 weeks.
tuned_DLM2.plotPredictN(date=100, N=14)

# Plot prediction for the first 60 weeks and a forecast for the following 54 weeks.
tuned_DLM2.plotPredictN(date=60, N=54)


test = ts[-15:-1]
len(test)

test.head(2)
test.tail(2)

14

Date
2017-05-21    407
2017-05-28    463
Name: IsCanceled, dtype: int64

Date
2017-08-13    341
2017-08-20    201
Name: IsCanceled, dtype: int64


forecast = tuned_DLM2.predictN(date=100, N=14)[0]

round(np.sqrt(mean_squared_error(test, forecast)), 4)

89.8731


round(np.mean(test), 4)

341.7143


round(89.8731 / 341.7143 * 100, 0)

26.0

数学建模

编程知识点

模型第三方库-statsmodels

In this notebook¶

Milestone 1: Simple Dynamic Linear Model¶

Tunning¶

Forecast Interpretation¶

RMSE¶

Milestone 2: DLM with Dynamic & Autoregressive Components¶

Tunning¶

Forecast Interpretation¶

RMSE¶