Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 25676a2

Browse files
author
Algorithmica
authored
Add files via upload
1 parent 076bbb0 commit 25676a2

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import sys
2+
path = 'F://New Folder/utils'
3+
sys.path.append(path)
4+
5+
import common_utils as utils
6+
from sklearn import metrics, model_selection, ensemble, neighbors, linear_model, decomposition, manifold, feature_selection, preprocessing, pipeline, impute, compose, svm
7+
import math
8+
import pandas as pd
9+
import os
10+
import numpy as np
11+
from sklearn.externals import joblib
12+
from sklearn2pmml import sklearn2pmml
13+
from sklearn2pmml.pipeline import PMMLPipeline
14+
15+
16+
def log_rmse(y_orig, y_pred):
17+
return math.sqrt(metrics.mean_squared_log_error(y_orig,y_pred) )
18+
19+
def rmse(y_orig, y_pred):
20+
return math.sqrt(metrics.mean_squared_error(y_orig,y_pred) )
21+
22+
path = 'F://house-prices'
23+
house_train = pd.read_csv(os.path.join(path,"train.csv"))
24+
house_train.shape
25+
house_train.info()
26+
27+
#type cast features
28+
features_to_cast = ['MSSubClass']
29+
utils.cast_to_cat(house_train, features_to_cast)
30+
31+
#manual feature selection
32+
features_to_drop = ['Id', 'SalePrice']
33+
missing_features_above_th = utils.get_features_to_drop_on_missingdata(house_train, 0.25)
34+
features_to_drop.extend(missing_features_above_th)
35+
house_train1 = utils.drop_features(house_train, features_to_drop)
36+
house_train1.shape
37+
38+
#build pipeline for categorical features
39+
categorical_pipeline = pipeline.Pipeline([
40+
('imputer', impute.SimpleImputer(strategy="most_frequent") ),
41+
('ohe', preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore') )
42+
])
43+
44+
45+
#build pipeline for numerical features
46+
numerical_pipeline = pipeline.Pipeline([
47+
('imputer', impute.SimpleImputer() ),
48+
('scaler', preprocessing.StandardScaler() )
49+
])
50+
51+
#build preprocessing pipeline for all features
52+
cat_features = utils.get_non_continuous_features(house_train1)
53+
num_features = utils.get_continuous_features(house_train1)
54+
55+
preprocess_pipeline = compose.ColumnTransformer([
56+
('cat', categorical_pipeline, cat_features),
57+
('num', numerical_pipeline, num_features)
58+
])
59+
60+
#build feature selection pipeline
61+
features_pipeline = pipeline.FeatureUnion([
62+
('pca_selector', decomposition.PCA() ),
63+
('et_selector', feature_selection.SelectFromModel(ensemble.ExtraTreesClassifier()) )
64+
])
65+
66+
67+
68+
regressor = svm.LinearSVR()
69+
#build complete pipeline with feature selection and ml algorithms
70+
complete_pipeline = pipeline.Pipeline([
71+
('preprocess', preprocess_pipeline),
72+
('zv_filter', feature_selection.VarianceThreshold() ),
73+
('features', features_pipeline ),
74+
('tregressor', compose.TransformedTargetRegressor(
75+
regressor= regressor,
76+
func=np.log1p, inverse_func=np.expm1))
77+
])
78+
79+
pipeline_grid = {'features__pca_selector__n_components':[2, 3],
80+
'tregressor__regressor__C':[0.01, 0.1, 5, 10]
81+
}
82+
83+
#build model with pipeline
84+
scoring = metrics.make_scorer(log_rmse, greater_is_better=False)
85+
pipeline_generated = utils.grid_search_best_model(complete_pipeline, pipeline_grid, house_train1, house_train['SalePrice'], scoring=scoring)
86+
print(pipeline_generated)
87+
88+
objects_to_dump = {
89+
'features_to_cast': features_to_cast,
90+
'features_to_drop': features_to_drop,
91+
'pipeline': pipeline_generated
92+
}
93+
joblib.dump(objects_to_dump, os.path.join(path, 'house_price_model_v1.pkl'))
94+
95+
#build pipeline in pmml format
96+
complete_pipeline_pmml = PMMLPipeline([
97+
('preprocess', preprocess_pipeline),
98+
('zv_filter', feature_selection.VarianceThreshold() ),
99+
('features', features_pipeline ),
100+
('tregressor', compose.TransformedTargetRegressor(
101+
regressor= regressor,
102+
func=np.log1p, inverse_func=np.expm1))
103+
])
104+
105+
pipeline_grid = {'features__pca_selector__n_components':[2, 3],
106+
'tregressor__regressor__C':[0.01, 0.1, 5, 10]
107+
}
108+
109+
pipeline_generated_pmml = utils.grid_search_best_model(complete_pipeline_pmml, pipeline_grid, house_train1, house_train['SalePrice'], scoring=scoring)
110+
sklearn2pmml(pipeline_generated_pmml, 'house_price_model_v1.pmml', with_repr = True)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import requests
2+
import pandas as pd
3+
import os
4+
5+
path = 'F://house-prices'
6+
house_test = pd.read_csv(os.path.join(path,"test.csv"))
7+
house_test.shape
8+
house_test.info()
9+
house_test['SalePrice'] = None
10+
11+
house_test1 = house_test.iloc[0:1,]
12+
data = house_test1.to_json(orient='records')
13+
14+
url = 'http://localhost:8080/price/predict/'
15+
r = requests.post(url, json=data)
16+
print(r.json())

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /