Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 7403a63

Browse files
add hw12, hw13 solutions
add solution files, update readme
1 parent bae7b0d commit 7403a63

File tree

3 files changed

+154
-2
lines changed

3 files changed

+154
-2
lines changed

‎homework_solutions/hw12_solution.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
from __future__ import print_function
2+
3+
import numpy as np
4+
import matplotlib.pyplot as plt
5+
6+
from sklearn.linear_model import LinearRegression
7+
from sklearn.preprocessing import PolynomialFeatures
8+
from sklearn.model_selection import cross_validate
9+
from sklearn.model_selection import KFold
10+
11+
# ========== HW12 SOLUTION [Python2/3] ========== #
12+
13+
np.random.seed(1)
14+
x = np.random.random(20) * 2.0
15+
noise = np.random.normal(size=20)
16+
y = 2.0 * x - 3.2 + noise
17+
# plt.figure()
18+
# plt.plot(x, y, 'o')
19+
# plt.show()
20+
X = x.reshape(-1, 1)
21+
22+
# linear model
23+
linreg_fit = LinearRegression(fit_intercept=True)
24+
25+
# polynomial model (degree=2)
26+
poly2 = PolynomialFeatures(degree=2)
27+
X_poly2 = poly2.fit_transform(X)
28+
poly2_fit = LinearRegression(fit_intercept=False)
29+
30+
# polynomial model (degree=10)
31+
poly10 = PolynomialFeatures(degree=10)
32+
X_poly10 = poly10.fit_transform(X)
33+
poly10_fit = LinearRegression(fit_intercept=False)
34+
35+
36+
# option 1: one loop for everything (faster)
37+
38+
# for plotting purposes
39+
x_linspace = np.linspace(np.min(X), np.max(X), 100)
40+
X_linspace = x_linspace.reshape(-1, 1)
41+
X_linspace_poly2 = poly2.transform(X_linspace)
42+
X_linspace_poly10 = poly10.transform(X_linspace)
43+
44+
scores_linreg_fit = []
45+
scores_poly2_fit = []
46+
scores_poly10_fit = []
47+
kf = KFold(n_splits=5)
48+
for train, test in kf.split(X):
49+
X_test, X_train = X[test], X[train]
50+
y_test, y_train = y[test], y[train]
51+
52+
# subset training data
53+
X_lin_train = X[train]
54+
X_poly2_train = X_poly2[train]
55+
X_poly10_train = X_poly10[train]
56+
57+
# subset testing data
58+
X_lin_test = X[test]
59+
X_poly2_test = X_poly2[test]
60+
X_poly10_test = X_poly10[test]
61+
62+
# fit models
63+
linreg_fit.fit(X_lin_train, y_train)
64+
poly2_fit.fit(X_poly2_train, y_train)
65+
poly10_fit.fit(X_poly10_train, y_train)
66+
67+
# predict models for plots
68+
y_hat_lin = linreg_fit.predict(X_linspace)
69+
y_hat_poly2 = poly2_fit.predict(X_linspace_poly2)
70+
y_hat_poly10 = poly10_fit.predict(X_linspace_poly10)
71+
72+
# compute R^2 scores and append to lists
73+
lin_score = linreg_fit.score(X_lin_test, y_test)
74+
scores_linreg_fit.append(lin_score)
75+
76+
poly2_score = poly2_fit.score(X_poly2_test, y_test)
77+
scores_poly2_fit.append(poly2_score)
78+
79+
poly10_score = poly10_fit.score(X_poly10_test, y_test)
80+
scores_poly10_fit.append(poly10_score)
81+
82+
# uncomment to show visualization for each cross-validation step
83+
# plt.figure()
84+
# plt.plot(X_train, y_train, 'ok', label='train')
85+
# plt.plot(X_test, y_test, 'xb', label='test')
86+
# plt.plot(X_linspace, y_hat_lin, '.-', label='Linear model')
87+
# plt.plot(X_linspace, y_hat_poly2, '.-', label='Quadratic model')
88+
# plt.plot(X_linspace, y_hat_poly10, '.-', label='10 degree model')
89+
# plt.ylim((-10, 10))
90+
# plt.legend()
91+
# plt.show()
92+
93+
print(np.mean(scores_linreg_fit))
94+
print(np.mean(scores_poly2_fit))
95+
print(np.mean(scores_poly10_fit))
96+
97+
# option 2: one-liner for each model (more readable)
98+
99+
scores_linreg_fit = cross_validate(linreg_fit, X, y, cv=5,
100+
return_train_score=False)
101+
scores_poly2_fit = cross_validate(poly2_fit, X_poly2, y, cv=5,
102+
return_train_score=False)
103+
scores_poly10_fit = cross_validate(poly10_fit, X_poly10, y, cv=5,
104+
return_train_score=False)
105+
106+
print(np.mean(scores_linreg_fit['test_score']))
107+
print(np.mean(scores_poly2_fit['test_score']))
108+
print(np.mean(scores_poly10_fit['test_score']))

‎homework_solutions/hw13_solution.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from __future__ import print_function
2+
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
6+
# ========== HW13 SOLUTION [Python2/3] ========== #
7+
8+
# read in data
9+
df_aapl = pd.read_csv('AAPL.csv', na_values='null', index_col='Date')
10+
df_msft = pd.read_csv('MSFT.csv', na_values='null', index_col='Date')
11+
df_pg = pd.read_csv('PG.csv', na_values='null', index_col='Date')
12+
# convert index to datetime type (for plotting)
13+
df_aapl.index = df_aapl.index.astype('datetime64')
14+
df_msft.index = df_msft.index.astype('datetime64')
15+
df_pg.index = df_pg.index.astype('datetime64')
16+
17+
# calculate Range (abs not necessary since High >= Low can be assumed)
18+
df_aapl['Range'] = abs(df_aapl['High'] - df_aapl['Low'])
19+
df_msft['Range'] = abs(df_msft['High'] - df_msft['Low'])
20+
df_pg['Range'] = abs(df_pg['High'] - df_pg['Low'])
21+
22+
# write output files
23+
df_aapl.to_csv('AAPL_range.csv')
24+
df_msft.to_csv('MSFT_range.csv')
25+
df_pg.to_csv('PG_range.csv')
26+
27+
# print summary statistics
28+
print(df_aapl.Range.describe())
29+
print(df_msft.Range.describe())
30+
print(df_pg.Range.describe())
31+
32+
# subset Close prices between 2008-2009 (for year 2008)
33+
close_aapl = df_aapl['Close'].loc['2008']
34+
close_msft = df_msft['Close'].loc['2008']
35+
close_pg = df_pg['Close'].loc['2008']
36+
37+
# plot
38+
plt.figure()
39+
plt.plot(close_aapl.index, close_aapl, label='AAPL')
40+
plt.plot(close_msft.index, close_msft, label='MSFT')
41+
plt.plot(close_pg.index, close_pg, label='PG')
42+
plt.legend()
43+
plt.xticks(rotation=30)
44+
plt.show()

‎readme.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ Lectures were performed using the presentation slides along with a Jupyter Noteb
3939
| Python read and write: opening and modifying text/csv files | [lecture09.pdf](lectures/lecture09/lecture09.pdf) | [lecture09.ipynb](lectures/lecture09/lecture09.ipynb) | [hw09.pdf](lectures/lecture09/hw09.pdf) | [HW09 Solution](homework_solutions/hw09_solution.py) |
4040
| Symbolic math with SymPy , DOE with pyDOE (Second quiz 15 mins before end of class) | [lecture10.pdf](lectures/lecture10/lecture10.pdf) | [lecture10.ipynb](lectures/lecture10/lecture10.ipynb) | [hw10.pdf](lectures/lecture10/hw10.pdf) | [HW10 Solution](homework_solutions/hw10_solution.py) |
4141
| Scikit-learn: surrogate modeling | [lecture11.pdf](lectures/lecture11/lecture11.pdf) | [lecture11.ipynb](lectures/lecture11/lecture11.ipynb) | [hw11.pdf](lectures/lecture11/hw11.pdf) | [HW11 Solution](homework_solutions/hw11_solution.py) |
42-
| Scikit-learn: surrogate modeling and machine learning | [lecture12.pdf](lectures/lecture12/lecture12.pdf) | [lecture12.ipynb](lectures/lecture12/lecture12.ipynb) | [hw12.pdf](lectures/lecture12/hw12.pdf) | soon |
43-
| Pandas and DataFrames / Review for final | [lecture13.pdf](lectures/lecture13/lecture13.pdf) | [lecture13.ipynb](lectures/lecture13/lecture13.ipynb) | [hw13.pdf](lectures/lecture13/hw13.pdf) | soon |
42+
| Scikit-learn: surrogate modeling and machine learning | [lecture12.pdf](lectures/lecture12/lecture12.pdf) | [lecture12.ipynb](lectures/lecture12/lecture12.ipynb) | [hw12.pdf](lectures/lecture12/hw12.pdf) | [HW12 Solution](homework_solutions/hw12_solution.py) |
43+
| Pandas and DataFrames / Review for final | [lecture13.pdf](lectures/lecture13/lecture13.pdf) | [lecture13.ipynb](lectures/lecture13/lecture13.ipynb) | [hw13.pdf](lectures/lecture13/hw13.pdf) | [HW13 Solution](homework_solutions/hw13_solution.py) |
4444

4545
[Quiz](/quiz)
4646

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /