Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4311a1b

Browse files
author
Algorithmica
authored
Add files via upload
1 parent ebf3c26 commit 4311a1b

7 files changed

+335
-0
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import math
2+
import pandas as pd
3+
import numpy as np
4+
import os
5+
from sklearn import metrics
6+
from sklearn.model_selection import TimeSeriesSplit
7+
from statsmodels.tsa import ar_model
8+
import matplotlib.pyplot as plt
9+
10+
def grid_search_best_model_timeseries_ar(df, grid, cv):
11+
best_param = None
12+
best_score = np.infty
13+
tsp = TimeSeriesSplit(n_splits=cv)
14+
15+
for param in grid.get('lags'):
16+
scores = []
17+
for train_ind, test_ind in tsp.split(df):
18+
train_data = df.iloc[train_ind]
19+
test_data = df.iloc[test_ind]
20+
try:
21+
#print(train_data, test_data)
22+
estimator = ar_model.AutoReg(train_data, lags=param)
23+
res = estimator.fit()
24+
#print(res.params)
25+
#get out of sample predictions with test data start and end
26+
pred = estimator.predict(res.params, test_data.index[0], test_data.index[-1])
27+
#print(pred)
28+
y_pred = pred.values.reshape(-1)
29+
y_test = test_data.values.reshape(-1)
30+
score = math.sqrt(metrics.mean_squared_error(y_test, y_pred))
31+
scores.append(score)
32+
except:
33+
pass
34+
#print(scores)
35+
if len(scores) > 0 and np.mean(scores) < best_score :
36+
best_score = np.mean(scores)
37+
best_param = param
38+
39+
if best_param is not None:
40+
estimator = ar_model.AutoReg(df, lags=best_param)
41+
res = estimator.fit()
42+
print("best parameters:" + str(best_param))
43+
print("validation rmse:" + str(best_score))
44+
#get insample predictions with start and end indices
45+
predictions = estimator.predict(res.params, start=0, end=df.shape[0]-1 )
46+
y_pred = predictions.values.reshape(-1)
47+
y_train = df.values.reshape(-1)[best_param:]
48+
train_rmse = math.sqrt(metrics.mean_squared_error(y_train, y_pred))
49+
print("train rmse:" + str(train_rmse))
50+
return estimator, res
51+
else:
52+
return None, None
53+
54+
path = 'F:/'
55+
df = pd.read_csv(os.path.join(path,'uk-deaths-from-bronchitis-emphys.csv'))
56+
df.info()
57+
58+
df.columns = ['timestamp', 'y']
59+
df.index = pd.to_datetime(df['timestamp'], format='%Y-%m')
60+
df.index.freq = 'MS'
61+
df.drop('timestamp', axis=1, inplace=True)
62+
63+
#grid search and get final model with best parameters
64+
ar_grid = { 'lags':[2,3,4,5] }
65+
estimator, res = grid_search_best_model_timeseries_ar(df, ar_grid, 3)
66+
print(res.params)
67+
print(res.summary())
68+
plt.plot(df)
69+
plt.figure()
70+
res.resid.plot()
71+
72+
#get predictions for future(implicit intervals based on freq of train data)
73+
start_index = pd.datetime(1980, 1, 1)
74+
end_index = pd.datetime(1990, 12, 1)
75+
pred = estimator.predict(res.params, start_index, end_index)
76+
print(pred)
77+
78+
#get predictions for future(explicit intervals)
79+
index = pd.date_range('1-1-1980', '12-1-1990', freq='MS')
80+
pred = estimator.predict(res.params, index[0], index[-1])
81+
print(pred)
82+
83+
plt.figure()
84+
plt.plot(pred)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import math
2+
import pandas as pd
3+
import numpy as np
4+
import os
5+
from sklearn import metrics
6+
from sklearn.model_selection import TimeSeriesSplit
7+
from itertools import product
8+
from statsmodels.tsa import arima_model
9+
import matplotlib.pyplot as plt
10+
11+
def grid_search_best_model_timeseries_arima(df, grid, cv):
12+
keys, values = zip(*grid.items())
13+
params =[]
14+
for v in product(*values):
15+
params.append(tuple(v))
16+
17+
print(params)
18+
best_param = None
19+
best_score = np.infty
20+
tsp = TimeSeriesSplit(n_splits=cv)
21+
22+
for param in params:
23+
scores = []
24+
for train_ind, test_ind in tsp.split(df):
25+
train_data = df.iloc[train_ind]
26+
test_data = df.iloc[test_ind]
27+
try:
28+
#print(train_data, test_data)
29+
estimator = arima_model.ARIMA(train_data, order=param)
30+
res = estimator.fit()
31+
#print(res.params)
32+
#get out of sample predictions with test data start and end
33+
y_pred = estimator.predict(res.params, test_data.index[0], test_data.index[-1])
34+
#print(y_pred)
35+
y_test = test_data.values.reshape(-1)
36+
score = math.sqrt(metrics.mean_squared_error(y_test, y_pred))
37+
scores.append(score)
38+
except:
39+
pass
40+
#print(scores)
41+
if len(scores) > 0 and np.mean(scores) < best_score :
42+
best_score = np.mean(scores)
43+
best_param = param
44+
45+
if best_param is not None:
46+
estimator = arima_model.ARIMA(df, order=best_param)
47+
res = estimator.fit()
48+
print("best parameters:" + str(best_param))
49+
print("validation rmse:" + str(best_score))
50+
#get insample predictions with start and end indices
51+
y_pred = estimator.predict(res.params, start=0, end=df.shape[0]-1 )
52+
y_train = df.values.reshape(-1)
53+
train_rmse = math.sqrt(metrics.mean_squared_error(y_train, y_pred))
54+
print("train rmse:" + str(train_rmse))
55+
return estimator, res
56+
else:
57+
return None, None
58+
59+
path = 'F:/'
60+
df = pd.read_csv(os.path.join(path,'uk-deaths-from-bronchitis-emphys.csv'))
61+
df.info()
62+
63+
df.columns = ['timestamp', 'y']
64+
df.index = pd.to_datetime(df['timestamp'], format='%Y-%m')
65+
df.index.freq = 'MS'
66+
df.drop('timestamp', axis=1, inplace=True)
67+
68+
#grid search and get final model with best parameters
69+
arima_grid = { 'p':[0,1,5,7], 'd':[0,1,2], 'q':[0,1,2,5] }
70+
estimator, res = grid_search_best_model_timeseries_arima(df, arima_grid, 3)
71+
print(res.params)
72+
print(res.summary())
73+
plt.plot(df)
74+
plt.figure()
75+
res.resid.plot()
76+
77+
#get predictions for future(implicit intervals based on freq of train data)
78+
start_index = pd.datetime(1980, 1, 1)
79+
end_index = pd.datetime(1990, 12, 1)
80+
pred = estimator.predict(res.params, start_index, end_index)
81+
print(pred)
82+
83+
#get predictions for future(explicit intervals)
84+
index = pd.date_range('1-1-1980', '12-1-1990', freq='MS')
85+
pred = estimator.predict(res.params, index[0], index[-1])
86+
pred = pd.Series(pred, index=index)
87+
print(pred)
88+
89+
plt.figure()
90+
plt.plot(pred)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import math
2+
import pandas as pd
3+
import numpy as np
4+
import os
5+
from sklearn import metrics
6+
from sklearn.model_selection import TimeSeriesSplit
7+
from itertools import product
8+
from statsmodels.tsa import arima_model
9+
import matplotlib.pyplot as plt
10+
11+
def grid_search_best_model_timeseries_arma(df, grid, cv):
12+
keys, values = zip(*grid.items())
13+
params =[]
14+
for v in product(*values):
15+
params.append(tuple(v))
16+
17+
print(params)
18+
best_param = None
19+
best_score = np.infty
20+
tsp = TimeSeriesSplit(n_splits=cv)
21+
22+
for param in params:
23+
scores = []
24+
for train_ind, test_ind in tsp.split(df):
25+
train_data = df.iloc[train_ind]
26+
test_data = df.iloc[test_ind]
27+
try:
28+
#print(train_data, test_data)
29+
estimator = arima_model.ARMA(train_data, order=param)
30+
res = estimator.fit()
31+
#print(res.params)
32+
#get out of sample predictions with test data start and end
33+
y_pred = estimator.predict(res.params, test_data.index[0], test_data.index[-1])
34+
#print(y_pred)
35+
y_test = test_data.values.reshape(-1)
36+
score = math.sqrt(metrics.mean_squared_error(y_test, y_pred))
37+
scores.append(score)
38+
except:
39+
pass
40+
#print(scores)
41+
if len(scores) > 0 and np.mean(scores) < best_score :
42+
best_score = np.mean(scores)
43+
best_param = param
44+
45+
if best_param is not None:
46+
estimator = arima_model.ARMA(df, order=best_param)
47+
res = estimator.fit()
48+
print("best parameters:" + str(best_param))
49+
print("validation rmse:" + str(best_score))
50+
#get insample predictions with start and end indices
51+
y_pred = estimator.predict(res.params, start=0, end=df.shape[0]-1 )
52+
y_train = df.values.reshape(-1)
53+
train_rmse = math.sqrt(metrics.mean_squared_error(y_train, y_pred))
54+
print("train rmse:" + str(train_rmse))
55+
return estimator, res
56+
else:
57+
return None, None
58+
59+
path = 'F:/'
60+
df = pd.read_csv(os.path.join(path,'uk-deaths-from-bronchitis-emphys.csv'))
61+
df.info()
62+
63+
df.columns = ['timestamp', 'y']
64+
df.index = pd.to_datetime(df['timestamp'], format='%Y-%m')
65+
df.index.freq = 'MS'
66+
df.drop('timestamp', axis=1, inplace=True)
67+
68+
#grid search and get final model with best parameters
69+
arma_grid = { 'p':[0,1,2,3,5,7], 'q':[1,2,3,5,7] }
70+
estimator, res = grid_search_best_model_timeseries_arma(df, arma_grid, 3)
71+
print(res.params)
72+
print(res.summary())
73+
plt.plot(df)
74+
plt.figure()
75+
res.resid.plot()
76+
77+
#get predictions for future(implicit intervals based on freq of train data)
78+
start_index = pd.datetime(1980, 1, 1)
79+
end_index = pd.datetime(1990, 12, 1)
80+
pred = estimator.predict(res.params, start_index, end_index)
81+
print(pred)
82+
83+
#get predictions for future(explicit intervals)
84+
index = pd.date_range('1-1-1980', '12-1-1990', freq='MS')
85+
pred = estimator.predict(res.params, index[0], index[-1])
86+
pred = pd.Series(pred, index=index)
87+
print(pred)
88+
89+
plt.figure()
90+
plt.plot(pred)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import pandas as pd
2+
import os
3+
from sklearn.model_selection import TimeSeriesSplit
4+
import statsmodels
5+
print(statsmodels.__version__)
6+
7+
path = 'F:/'
8+
df = pd.read_csv(os.path.join(path,'uk-deaths-from-bronchitis-emphys.csv'))
9+
df.info()
10+
11+
df.columns = ['timestamp', 'y']
12+
df.index = pd.to_datetime(df['timestamp'], format='%Y-%m')
13+
df.drop('timestamp', axis=1, inplace=True)
14+
15+
tsp = TimeSeriesSplit(n_splits=3)
16+
17+
for train_ind, test_ind in tsp.split(df):
18+
print(train_ind, test_ind)
19+
20+
for train_ind, test_ind in tsp.split(df):
21+
train_data = df.iloc[train_ind]
22+
test_data = df.iloc[test_ind]
23+
print(train_data, test_data)
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import math
2+
import pandas as pd
3+
import numpy as np
4+
import os
5+
from sklearn import metrics
6+
from sklearn.model_selection import TimeSeriesSplit
7+
from statsmodels.tsa import ar_model
8+
9+
path = 'F:/'
10+
df = pd.read_csv(os.path.join(path,'uk-deaths-from-bronchitis-emphys.csv'))
11+
df.info()
12+
13+
df.columns = ['timestamp', 'y']
14+
df.index = pd.to_datetime(df['timestamp'], format='%Y-%m').copy()
15+
df.index.freq = 'MS'
16+
df.drop('timestamp', axis=1, inplace=True)
17+
18+
#build model
19+
estimator = ar_model.AutoReg(df, lags=5)
20+
res = estimator.fit()
21+
print(res.params)
22+
print(res.model)
23+
print(res.summary())
24+
25+
#using model
26+
predictions = estimator.predict(res.params, start=0, end=df.shape[0]-1 )
27+
print(predictions)
28+
y_pred = predictions.values.reshape(-1)
29+
y_train = df.values.reshape(-1)[5:]
30+
train_rmse = math.sqrt(metrics.mean_squared_error(y_train, y_pred))
31+
print(train_rmse)
32+
33+
#evaluate model
34+
tsp = TimeSeriesSplit(n_splits=3)
35+
scores = []
36+
37+
for train_ind, test_ind in tsp.split(df):
38+
train_data = df.iloc[train_ind]
39+
test_data = df.iloc[test_ind]
40+
estimator = ar_model.AutoReg(df, lags=5)
41+
res = estimator.fit()
42+
pred = estimator.predict(res.params,test_data.index[0], test_data.index[-1])
43+
y_pred = pred.values.reshape(-1)
44+
y_test = test_data.values.reshape(-1)
45+
score = math.sqrt(metrics.mean_squared_error(y_test, y_pred))
46+
scores.append(score)
47+
print(scores)
48+
print(np.mean(scores))
1.66 KB
Binary file not shown.
2.45 KB
Binary file not shown.

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /