|
| 1 | +# importing libraries |
| 2 | +import numpy as np |
| 3 | +import pandas as pd |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +from statsmodels.tsa.arima_model import ARIMA |
| 6 | +import datetime |
| 7 | +from datetime import date |
| 8 | +import warnings |
| 9 | +warnings.filterwarnings('ignore') |
| 10 | +plt.style.use('fivethirtyeight') |
| 11 | +from pmdarima import auto_arima |
| 12 | + |
| 13 | +confirmed_cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv') |
| 14 | +deaths_reported = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv') |
| 15 | +recovered_cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv') |
| 16 | +latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-15-2020.csv') |
| 17 | +## attributes |
| 18 | +# Fetching all the columns from confirmed dataset |
| 19 | +cols = confirmed_cases.keys() |
| 20 | +# Extracting the date columns |
| 21 | +confirmed = confirmed_cases.loc[:, cols[4]:cols[-1]] |
| 22 | +deaths = deaths_reported.loc[:, cols[4]:cols[-1]] |
| 23 | +recoveries = recovered_cases.loc[:, cols[4]:cols[-1]] |
| 24 | +# Range of date |
| 25 | +dates = confirmed.keys() |
| 26 | +# Summary |
| 27 | +world_cases = [] |
| 28 | +total_deaths = [] |
| 29 | +mortality_rate = [] |
| 30 | +recovery_rate = [] |
| 31 | +total_recovered = [] |
| 32 | +total_active = [] |
| 33 | +# Confirmed |
| 34 | +india_cases = [] |
| 35 | +# Death |
| 36 | +india_deaths = [] |
| 37 | +# Recovered |
| 38 | +india_recoveries = [] |
| 39 | +# Fill with the dataset |
| 40 | +for i in dates: |
| 41 | + india_cases.append(confirmed_cases[confirmed_cases['Country/Region'] == 'India'][i].sum()) |
| 42 | + india_deaths.append(deaths_reported[deaths_reported['Country/Region'] == 'India'][i].sum()) |
| 43 | + india_recoveries.append(recovered_cases[recovered_cases['Country/Region'] == 'India'][i].sum()) |
| 44 | + |
| 45 | +def daily_increase(data): |
| 46 | + d = [] |
| 47 | + for i in range(len(data)): |
| 48 | + if i == 0: |
| 49 | + d.append(data[0]) |
| 50 | + else: |
| 51 | + d.append(data[i]-data[i-1]) |
| 52 | + return d |
| 53 | + |
| 54 | +def fresh_cases_daily(): |
| 55 | + #confirmed cases |
| 56 | + india_daily_increase = daily_increase(india_cases) |
| 57 | + |
| 58 | + # Dates pre processing |
| 59 | + days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1) |
| 60 | + |
| 61 | + days_in_future = 0 |
| 62 | + future_forecast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1) |
| 63 | + |
| 64 | + start = '1/22/2020' |
| 65 | + start_date = datetime.datetime.strptime(start, '%m/%d/%Y') |
| 66 | + future_forecast_dates = [] |
| 67 | + for i in range(len(future_forecast)): |
| 68 | + future_forecast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y')) |
| 69 | + |
| 70 | + dataCovid= pd.DataFrame({ 'Dates': future_forecast_dates , 'Daily Increase':india_daily_increase }) |
| 71 | + train = dataCovid[:int(0.7*(len(dataCovid)))] |
| 72 | + valid = dataCovid[int(0.7*(len(dataCovid))):] |
| 73 | + #preprocessing (since arima takes univariate series as input) |
| 74 | + train.drop('Dates',axis=1,inplace=True) |
| 75 | + valid.drop('Dates',axis=1,inplace=True) |
| 76 | + model = auto_arima(train, trace=True, error_action='ignore', suppress_warnings=True) |
| 77 | + model.fit(train) |
| 78 | + forecast = model.predict(n_periods=len(valid)) |
| 79 | + forecast = pd.DataFrame(forecast,index = valid.index,columns=['Prediction']) |
| 80 | + |
| 81 | + def ARIMAmodel(series, order, days = 21): |
| 82 | + # Fitting and forecast the series |
| 83 | + train = [x for x in series] |
| 84 | + model = ARIMA(train, order = order) |
| 85 | + model_fit = model.fit(disp=0) |
| 86 | + forecast, err, ci = model_fit.forecast(steps = days, alpha = 0.05) |
| 87 | + start_day = date.today() + datetime.timedelta(days = 1) |
| 88 | + predictions_df = pd.DataFrame({'Forecast':forecast.round()}, index=pd.date_range(start = start_day, periods=days, freq='D')) |
| 89 | + return predictions_df, ci |
| 90 | + |
| 91 | + new_positives = dataCovid['Daily Increase'].values |
| 92 | + order = { |
| 93 | + 'new_positives': (2, 1, 5), |
| 94 | + } |
| 95 | + new_positives_today=new_positives[-1] |
| 96 | + # Forecasting with ARIMA models |
| 97 | + new_positives_pred, new_positives_ci = ARIMAmodel(new_positives, order['new_positives']) |
| 98 | + casesY=[] |
| 99 | + datesX=[] |
| 100 | + list1 = new_positives_pred.iloc[: ,0] |
| 101 | + for i in range(0,21): |
| 102 | + casesY.append(list1[i]) |
| 103 | + datesX.append((date.today()+ datetime.timedelta(days=i)).strftime('%m/%d/%Y')) |
| 104 | + |
| 105 | + # Plot Results for forecasted dates only (detailed) |
| 106 | + plt.plot(datesX,casesY,color='red') |
| 107 | + plt.title('New active Cases Forecast') |
| 108 | + plt.xticks(rotation=90) |
| 109 | + # plt.figure(figsize=(22,22)) |
| 110 | + plt.savefig("./corona cases forecasting/Results/plot1.png",bbox_inches='tight') |
| 111 | + plt.autoscale() |
| 112 | + plt.show() |
| 113 | + |
| 114 | +def death_cases_daily(): |
| 115 | + #confirmed cases |
| 116 | + india_daily_increase = daily_increase(india_deaths) |
| 117 | + |
| 118 | + # Dates pre processing |
| 119 | + days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1) |
| 120 | + |
| 121 | + days_in_future = 0 |
| 122 | + future_forecast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1) |
| 123 | + |
| 124 | + start = '1/22/2020' |
| 125 | + start_date = datetime.datetime.strptime(start, '%m/%d/%Y') |
| 126 | + future_forecast_dates = [] |
| 127 | + for i in range(len(future_forecast)): |
| 128 | + future_forecast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y')) |
| 129 | + |
| 130 | + dataCovid= pd.DataFrame({ 'Dates': future_forecast_dates , 'Daily Increase':india_daily_increase }) |
| 131 | + train = dataCovid[:int(0.7*(len(dataCovid)))] |
| 132 | + valid = dataCovid[int(0.7*(len(dataCovid))):] |
| 133 | + #preprocessing (since arima takes univariate series as input) |
| 134 | + train.drop('Dates',axis=1,inplace=True) |
| 135 | + valid.drop('Dates',axis=1,inplace=True) |
| 136 | + model = auto_arima(train, trace=True, error_action='ignore', suppress_warnings=True) |
| 137 | + model.fit(train) |
| 138 | + forecast = model.predict(n_periods=len(valid)) |
| 139 | + forecast = pd.DataFrame(forecast,index = valid.index,columns=['Prediction']) |
| 140 | + |
| 141 | + def ARIMAmodel(series, order, days = 21): |
| 142 | + # Fitting and forecast the series |
| 143 | + train = [x for x in series] |
| 144 | + model = ARIMA(train, order = order) |
| 145 | + model_fit = model.fit(disp=0) |
| 146 | + forecast, err, ci = model_fit.forecast(steps = days, alpha = 0.05) |
| 147 | + start_day = date.today() + datetime.timedelta(days = 1) |
| 148 | + predictions_df = pd.DataFrame({'Forecast':forecast.round()}, index=pd.date_range(start = start_day, periods=days, freq='D')) |
| 149 | + return predictions_df, ci |
| 150 | + |
| 151 | + new_deaths = dataCovid['Daily Increase'].values |
| 152 | + order = { |
| 153 | + 'new_deaths': (0, 1, 1), |
| 154 | + } |
| 155 | + new_deaths_today=new_deaths[-1] |
| 156 | + # Forecasting with ARIMA models |
| 157 | + new_deaths_pred, new_deaths_ci = ARIMAmodel(new_deaths, order['new_deaths']) |
| 158 | + casesY=[] |
| 159 | + datesX=[] |
| 160 | + list1 = new_deaths_pred.iloc[: ,0] |
| 161 | + for i in range(0,21): |
| 162 | + casesY.append(list1[i]) |
| 163 | + datesX.append((date.today()+ datetime.timedelta(days=i)).strftime('%m/%d/%Y')) |
| 164 | + |
| 165 | + # Plot Results for forecasted dates only (detailed) |
| 166 | + plt.plot(datesX,casesY,color='red') |
| 167 | + plt.title('New death Cases Forecast') |
| 168 | + plt.xticks(rotation=90) |
| 169 | + # plt.figure(figsize=(22,22)) |
| 170 | + plt.savefig("./corona cases forecasting/Results/plot2.png",bbox_inches='tight') |
| 171 | + plt.autoscale() |
| 172 | + plt.show() |
| 173 | + |
| 174 | +def recovered_cases_daily(): |
| 175 | + #confirmed cases |
| 176 | + india_daily_increase = daily_increase(india_recoveries) |
| 177 | + # Dates pre processing |
| 178 | + days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1) |
| 179 | + days_in_future = 0 |
| 180 | + future_forecast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1) |
| 181 | + start = '1/22/2020' |
| 182 | + start_date = datetime.datetime.strptime(start, '%m/%d/%Y') |
| 183 | + future_forecast_dates = [] |
| 184 | + for i in range(len(future_forecast)): |
| 185 | + future_forecast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y')) |
| 186 | + |
| 187 | + dataCovid= pd.DataFrame({ 'Dates': future_forecast_dates , 'Daily recoveries':india_daily_increase }) |
| 188 | + train = dataCovid[:int(0.7*(len(dataCovid)))] |
| 189 | + valid = dataCovid[int(0.7*(len(dataCovid))):] |
| 190 | + #preprocessing (since arima takes univariate series as input) |
| 191 | + train.drop('Dates',axis=1,inplace=True) |
| 192 | + valid.drop('Dates',axis=1,inplace=True) |
| 193 | + model = auto_arima(train, trace=True, error_action='ignore', suppress_warnings=True) |
| 194 | + model.fit(train) |
| 195 | + forecast = model.predict(n_periods=len(valid)) |
| 196 | + forecast = pd.DataFrame(forecast,index = valid.index,columns=['Prediction']) |
| 197 | + |
| 198 | + def ARIMAmodel(series, order, days = 21): |
| 199 | + # Fitting and forecast the series |
| 200 | + train = [x for x in series] |
| 201 | + model = ARIMA(train, order = order) |
| 202 | + model_fit = model.fit(disp=0) |
| 203 | + forecast, err, ci = model_fit.forecast(steps = days, alpha = 0.05) |
| 204 | + start_day = date.today() + datetime.timedelta(days = 1) |
| 205 | + predictions_df = pd.DataFrame({'Forecast':forecast.round()}, index=pd.date_range(start = start_day, periods=days, freq='D')) |
| 206 | + return predictions_df, ci |
| 207 | + |
| 208 | + new_recoveries = dataCovid['Daily recoveries'].values |
| 209 | + order = { |
| 210 | + 'new_recoveries': (1, 1, 2), |
| 211 | + } |
| 212 | + new_recoveries_today=new_recoveries[-1] |
| 213 | + # Forecasting with ARIMA models |
| 214 | + new_recoveries_pred, new_recoveries_ci = ARIMAmodel(new_recoveries, order['new_recoveries']) |
| 215 | + casesY=[] |
| 216 | + datesX=[] |
| 217 | + list1 = new_recoveries_pred.iloc[: ,0] |
| 218 | + for i in range(0,21): |
| 219 | + casesY.append(list1[i]) |
| 220 | + datesX.append((date.today()+ datetime.timedelta(days=i)).strftime('%m/%d/%Y')) |
| 221 | + |
| 222 | + # Plot Results for forecasted dates only (detailed) |
| 223 | + plt.plot(datesX,casesY,color='red') |
| 224 | + plt.title('New recovered Cases Forecast') |
| 225 | + plt.xticks(rotation=90) |
| 226 | + # plt.figure(figsize=(22,22)) |
| 227 | + plt.savefig("./corona cases forecasting/Results/plot3.png",bbox_inches='tight') |
| 228 | + plt.autoscale() |
| 229 | + plt.show() |
| 230 | + |
| 231 | +# Taking user input choice for type of prediction method to be intitiated |
| 232 | +choice=input("F for fresh cases,D for death cases,R for recovered cases prediction : ") |
| 233 | +if choice=='F': |
| 234 | + fresh_cases_daily() |
| 235 | +elif choice=='D': |
| 236 | + death_cases_daily() |
| 237 | +elif choice=='R': |
| 238 | + recovered_cases_daily() |
| 239 | +else: |
| 240 | + print("Enter a valid choice") |
0 commit comments