Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 08fbdec

Browse files
Merge pull request avinashkranjan#556 from shreyasZ10/avinashkranjan#507
Salary predictor, issue no.-avinashkranjan#507
2 parents 44baae0 + c2f7432 commit 08fbdec

File tree

6 files changed

+32486
-0
lines changed

6 files changed

+32486
-0
lines changed

‎Salary Predictor/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Salary Predictor
2+
3+
4+
This is basically a python script used to predict the average annual salary of person.
5+
6+
Given the sector he/she wants to work, what are the skills of a person and also for which job post he/she is applying for, this model predicts what could be hos/her average annual salary.
7+

‎Salary Predictor/dataset/cleaned_dataset.csv

Lines changed: 32350 additions & 0 deletions
Large diffs are not rendered by default.

‎Salary Predictor/model.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# import libraries
2+
import pandas as pd
3+
import numpy as np
4+
5+
# Load dataset
6+
df = pd.read_csv('./Salary Predictor/dataset/cleaned_dataset.csv')
7+
8+
df_model = df[['avg_salary','Sector','python_yn','job_sim','R_yn','tableau','power bi','ml','dl']]
9+
10+
# Categorical encoding
11+
df_dum = pd.get_dummies(df_model)
12+
13+
# division into training and test set
14+
from sklearn.model_selection import train_test_split
15+
X = df_dum.drop('avg_salary', axis = 1)
16+
y = df_dum.avg_salary.values
17+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
18+
19+
# Model Generation
20+
from sklearn.ensemble import RandomForestRegressor
21+
regressor = RandomForestRegressor()
22+
23+
from sklearn.model_selection import cross_val_score
24+
25+
np.mean(cross_val_score(regressor, X_train, y_train, scoring = 'neg_mean_absolute_error', cv= 5))
26+
27+
#Hyperparameter tuning
28+
from sklearn.model_selection import GridSearchCV
29+
parameters = {
30+
"n_estimators": range(10, 400, 10),
31+
"criterion": ['mse','mae'],
32+
"max_features": ['auto','sqrt','log2']
33+
}
34+
35+
gs = GridSearchCV(regressor, param_grid = parameters, scoring = 'neg_mean_absolute_error', cv = 5)
36+
gs.fit(X_train, y_train)
37+
38+
gs.best_score_
39+
y_pred = gs.best_estimator_.predict(X_test)
40+
41+
# Accuracy measurement
42+
from sklearn.metrics import mean_absolute_error
43+
mean_absolute_error(y_test, y_pred)
44+
45+
# Save the model
46+
import pickle
47+
filename = './Salary Predictor/models/random_forest2_model.sav'
48+
pickle.dump(gs.best_estimator_, open(filename, 'wb'))
49+
50+
# saving the columns
51+
model_columns = list(X.columns)
52+
with open('./Salary Predictor/models/model_columns1.pkl','wb') as file:
53+
pickle.dump(model_columns, file)
849 Bytes
Binary file not shown.
5.36 MB
Binary file not shown.

‎Salary Predictor/script.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#import the libraries
2+
import pandas as pd
3+
import pickle
4+
5+
# Function to let user choose from various options
6+
def let_user_pick(options, str):
7+
print(f"Enter the {str}")
8+
for idx, element in enumerate(options):
9+
print("{}) {}".format(idx+1,element))
10+
i = input("Enter number: ")
11+
try:
12+
if 0 < int(i) <= len(options):
13+
return options[int(i)-1]
14+
except:
15+
pass
16+
return None
17+
18+
# Function to convert yes or no to boolean
19+
def know_language(yes_no):
20+
if yes_no.lower() == 'y' or yes_no.lower() == 'yes' or yes_no == '1':
21+
return 1
22+
else:
23+
return 0
24+
25+
# Make an empty dataframe df
26+
data = [['0','0','0','0','0','0','0','0']]
27+
df = pd.DataFrame(data, columns = ['Sector','python_yn','job_sim','R_yn','tableau','power bi','ml','dl'])
28+
29+
# Take Sector as an input from user
30+
sectors = ['Information Technology','Business Services','Education','Business Services','Finance', 'Government','Travel & Tourism']
31+
sect = let_user_pick(sectors, 'Sector')
32+
df['Sector'][0] = sect
33+
34+
# Take job role as an input from user
35+
job_role = ['Software Enginner', 'data scientist', 'data engineer', 'analyst', 'Machine Learnig Engineer', 'director', 'manager']
36+
job_simp = let_user_pick(job_role, 'Job Role')
37+
if job_simp == 'Software Engineer':
38+
job_simp = 'na'
39+
elif job_simp == 'Machine Learnig Engineer':
40+
job_simp = 'mle'
41+
df['job_sim'][0] = job_simp
42+
43+
# Asking for skills from user
44+
python = know_language(input('Do you know python?(Y/N)'))
45+
df['python_yn'][0] = python
46+
47+
r = know_language(input('Do you know R?(Y/N)'))
48+
df['R_yn'][0] = r
49+
50+
tableau = know_language(input('Do you know Tableau?(Y/N)'))
51+
df['tableau'][0] = tableau
52+
53+
Power_Bi = know_language(input('Do you know Power Bi?(Y/N)'))
54+
df['power bi'][0] = Power_Bi
55+
56+
Machine_Learning = know_language(input('Do you know Machine Learning?(Y/N)'))
57+
df['ml'][0] = Machine_Learning
58+
59+
Deep_Learning = know_language(input('Do you know Deep Learning?(Y/N)'))
60+
df['dl'][0] = Deep_Learning
61+
62+
#Load the model
63+
with open('./Salary Predictor/models/random_forest2_model.sav', 'rb') as f:
64+
random_forest_model = pickle.load(f)
65+
66+
# load the columns file
67+
with open('./Salary Predictor/models/model_columns1.pkl', 'rb') as f:
68+
model_columns = pickle.load(f)
69+
70+
# Query into the model to fet results
71+
query_ = pd.get_dummies(pd.DataFrame(df, index = [0]), prefix=['Sector','job_sim'], columns=['Sector','job_sim'])
72+
query = query_.reindex(columns = model_columns, fill_value= 0)
73+
prediction = list(random_forest_model.predict(query))
74+
final_val = round(prediction[0],2)
75+
76+
print(f'Your Estimated Annual Salary will be {final_val}K Dollars')

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /