Commit 25676a2

author

Algorithmica

authored

Add files via upload

1 parent 076bbb0 commit 25676a2Copy full SHA for 25676a2

File tree

2 files changed

+126

-0

lines changed

2019-october/21.ml as service
- house price model building and deployment.py
- webclient.py

2 files changed

+126

-0

lines changed

`‎2019-october/21.ml as service/house price model building and deployment.py‎`

Lines changed: 110 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,110 @@`
	`1`	`+import sys`
	`2`	`+path = 'F://New Folder/utils'`
	`3`	`+sys.path.append(path)`
	`4`	`+`
	`5`	`+import common_utils as utils`
	`6`	`+from sklearn import metrics, model_selection, ensemble, neighbors, linear_model, decomposition, manifold, feature_selection, preprocessing, pipeline, impute, compose, svm`
	`7`	`+import math`
	`8`	`+import pandas as pd`
	`9`	`+import os`
	`10`	`+import numpy as np`
	`11`	`+from sklearn.externals import joblib`
	`12`	`+from sklearn2pmml import sklearn2pmml`
	`13`	`+from sklearn2pmml.pipeline import PMMLPipeline`
	`14`	`+`
	`15`	`+`
	`16`	`+def log_rmse(y_orig, y_pred):`
	`17`	`+ return math.sqrt(metrics.mean_squared_log_error(y_orig,y_pred) )`
	`18`	`+`
	`19`	`+def rmse(y_orig, y_pred):`
	`20`	`+ return math.sqrt(metrics.mean_squared_error(y_orig,y_pred) )`
	`21`	`+`
	`22`	`+path = 'F://house-prices'`
	`23`	`+house_train = pd.read_csv(os.path.join(path,"train.csv"))`
	`24`	`+house_train.shape`
	`25`	`+house_train.info()`
	`26`	`+`
	`27`	`+#type cast features`
	`28`	`+features_to_cast = ['MSSubClass']`
	`29`	`+utils.cast_to_cat(house_train, features_to_cast)`
	`30`	`+`
	`31`	`+#manual feature selection`
	`32`	`+features_to_drop = ['Id', 'SalePrice']`
	`33`	`+missing_features_above_th = utils.get_features_to_drop_on_missingdata(house_train, 0.25)`
	`34`	`+features_to_drop.extend(missing_features_above_th)`
	`35`	`+house_train1 = utils.drop_features(house_train, features_to_drop)`
	`36`	`+house_train1.shape`
	`37`	`+`
	`38`	`+#build pipeline for categorical features`
	`39`	`+categorical_pipeline = pipeline.Pipeline([`
	`40`	`+ ('imputer', impute.SimpleImputer(strategy="most_frequent") ),`
	`41`	`+ ('ohe', preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore') )`
	`42`	`+ ])`
	`43`	`+`
	`44`	`+`
	`45`	`+#build pipeline for numerical features`
	`46`	`+numerical_pipeline = pipeline.Pipeline([`
	`47`	`+ ('imputer', impute.SimpleImputer() ),`
	`48`	`+ ('scaler', preprocessing.StandardScaler() )`
	`49`	`+ ])`
	`50`	`+`
	`51`	`+#build preprocessing pipeline for all features`
	`52`	`+cat_features = utils.get_non_continuous_features(house_train1)`
	`53`	`+num_features = utils.get_continuous_features(house_train1)`
	`54`	`+`
	`55`	`+preprocess_pipeline = compose.ColumnTransformer([`
	`56`	`+ ('cat', categorical_pipeline, cat_features),`
	`57`	`+ ('num', numerical_pipeline, num_features)`
	`58`	`+ ])`
	`59`	`+`
	`60`	`+ #build feature selection pipeline`
	`61`	`+features_pipeline = pipeline.FeatureUnion([`
	`62`	`+ ('pca_selector', decomposition.PCA() ),`
	`63`	`+ ('et_selector', feature_selection.SelectFromModel(ensemble.ExtraTreesClassifier()) )`
	`64`	`+ ])`
	`65`	`+`
	`66`	`+`
	`67`	`+`
	`68`	`+regressor = svm.LinearSVR()`
	`69`	`+#build complete pipeline with feature selection and ml algorithms`
	`70`	`+complete_pipeline = pipeline.Pipeline([`
	`71`	`+ ('preprocess', preprocess_pipeline),`
	`72`	`+ ('zv_filter', feature_selection.VarianceThreshold() ),`
	`73`	`+ ('features', features_pipeline ),`
	`74`	`+ ('tregressor', compose.TransformedTargetRegressor(`
	`75`	`+ regressor= regressor,`
	`76`	`+ func=np.log1p, inverse_func=np.expm1))`
	`77`	`+ ])`
	`78`	`+`
	`79`	`+pipeline_grid = {'features__pca_selector__n_components':[2, 3],`
	`80`	`+ 'tregressor__regressor__C':[0.01, 0.1, 5, 10]`
	`81`	`+ }`
	`82`	`+`
	`83`	`+#build model with pipeline`
	`84`	`+scoring = metrics.make_scorer(log_rmse, greater_is_better=False)`
	`85`	`+pipeline_generated = utils.grid_search_best_model(complete_pipeline, pipeline_grid, house_train1, house_train['SalePrice'], scoring=scoring)`
	`86`	`+print(pipeline_generated)`
	`87`	`+`
	`88`	`+objects_to_dump = {`
	`89`	`+ 'features_to_cast': features_to_cast,`
	`90`	`+ 'features_to_drop': features_to_drop,`
	`91`	`+ 'pipeline': pipeline_generated`
	`92`	`+ }`
	`93`	`+joblib.dump(objects_to_dump, os.path.join(path, 'house_price_model_v1.pkl'))`
	`94`	`+`
	`95`	`+#build pipeline in pmml format`
	`96`	`+complete_pipeline_pmml = PMMLPipeline([`
	`97`	`+ ('preprocess', preprocess_pipeline),`
	`98`	`+ ('zv_filter', feature_selection.VarianceThreshold() ),`
	`99`	`+ ('features', features_pipeline ),`
	`100`	`+ ('tregressor', compose.TransformedTargetRegressor(`
	`101`	`+ regressor= regressor,`
	`102`	`+ func=np.log1p, inverse_func=np.expm1))`
	`103`	`+ ])`
	`104`	`+`
	`105`	`+pipeline_grid = {'features__pca_selector__n_components':[2, 3],`
	`106`	`+ 'tregressor__regressor__C':[0.01, 0.1, 5, 10]`
	`107`	`+ }`
	`108`	`+`
	`109`	`+pipeline_generated_pmml = utils.grid_search_best_model(complete_pipeline_pmml, pipeline_grid, house_train1, house_train['SalePrice'], scoring=scoring)`
	`110`	`+sklearn2pmml(pipeline_generated_pmml, 'house_price_model_v1.pmml', with_repr = True)`

`‎2019-october/21.ml as service/webclient.py‎`

Lines changed: 16 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,16 @@`
	`1`	`+import requests`
	`2`	`+import pandas as pd`
	`3`	`+import os`
	`4`	`+`
	`5`	`+path = 'F://house-prices'`
	`6`	`+house_test = pd.read_csv(os.path.join(path,"test.csv"))`
	`7`	`+house_test.shape`
	`8`	`+house_test.info()`
	`9`	`+house_test['SalePrice'] = None`
	`10`	`+`
	`11`	`+house_test1 = house_test.iloc[0:1,]`
	`12`	`+data = house_test1.to_json(orient='records')`
	`13`	`+`
	`14`	`+url = 'http://localhost:8080/price/predict/'`
	`15`	`+r = requests.post(url, json=data)`
	`16`	`+print(r.json())`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 25676a2

File tree

2 files changed

2 files changed

`‎2019-october/21.ml as service/house price model building and deployment.py‎`

`‎2019-october/21.ml as service/webclient.py‎`

0 commit comments