Commit da7c769

authored

Merge pull request #342 from Swapnilden/correct_prediction

Fix Model Evaluation Metrics Display Issue

2 parents 40c6531 + 07cc62a commit da7c769Copy full SHA for da7c769

File tree

1 file changed

+25

-47

lines changed

Job Satisfaction Analysis
- app.py

1 file changed

+25

-47

lines changed

`‎Job Satisfaction Analysis/app.py‎`

Lines changed: 25 additions & 47 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,9 +1,7 @@`
`1`	`1`	`import streamlit as st`
`2`	`2`	`import joblib`
`3`	`3`	`import pandas as pd`
`4`		`-import matplotlib.pyplot as plt`
`5`		`-import seaborn as sns`
`6`		`-from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc`
	`4`	`+from sklearn.preprocessing import LabelEncoder`
`7`	`5`
`8`	`6`	`# Load the model`
`9`	`7`	`model = joblib.load('model.pkl')`
`@@ -12,6 +10,26 @@`
`12`	`10`	`features = ['Hobby', 'OpenSource', 'Country', 'Student', 'Employment', 'FormalEducation',`
`13`	`11`	`'UndergradMajor', 'CompanySize', 'DevType', 'YearsCoding', 'YearsCodingProf']`
`14`	`12`
	`13`	`+# Initialize label encoders for categorical features`
	`14`	`+encoders = {`
	`15`	`+ 'Hobby': LabelEncoder().fit(['Yes', 'No']),`
	`16`	`+ 'OpenSource': LabelEncoder().fit(['Yes', 'No']),`
	`17`	`+ 'Country': LabelEncoder().fit(['United States', 'India', 'Germany']),`
	`18`	`+ 'Student': LabelEncoder().fit(['Yes', 'No']),`
	`19`	`+ 'Employment': LabelEncoder().fit(['Employed full-time', 'Employed part-time', 'Self-employed', 'Unemployed']),`
	`20`	`+ 'FormalEducation': LabelEncoder().fit(["Bachelor’s degree (BA, BS, B.Eng., etc.)",`
	`21`	`+ "Master’s degree (MA, MS, M.Eng., MBA, etc.)",`
	`22`	`+ "Doctoral degree (PhD)"]),`
	`23`	`+ 'UndergradMajor': LabelEncoder().fit(["Computer science, computer engineering, or software engineering",`
	`24`	`+ "Information technology, networking, or system administration",`
	`25`	`+ "Other engineering discipline"]),`
	`26`	`+ 'CompanySize': LabelEncoder().fit(['Fewer than 10 employees', '10 to 19 employees', '20 to 99 employees',`
	`27`	`+ '100 to 499 employees', '500 to 999 employees', '1,000 to 4,999 employees']),`
	`28`	`+ 'DevType': LabelEncoder().fit(['Developer, back-end', 'Developer, front-end', 'Developer, full-stack']),`
	`29`	`+ 'YearsCoding': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']),`
	`30`	`+ 'YearsCodingProf': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']),`
	`31`	`+}`
	`32`	`+`
`15`	`33`	`st.title('Job Satisfaction Prediction')`
`16`	`34`
`17`	`35`	`# Create a form for user input`
`@@ -54,6 +72,10 @@`
`54`	`72`	`# Convert user input to DataFrame`
`55`	`73`	`input_df = pd.DataFrame([input_data])`
`56`	`74`
	`75`	`+ # Encode categorical features`
	`76`	`+ for feature in features:`
	`77`	`+ input_df[feature] = encoders[feature].transform(input_df[feature])`
	`78`	`+`
`57`	`79`	`# Ensure the input has the same columns as the training data`
`58`	`80`	`input_df = input_df[features]`
`59`	`81`
`@@ -62,47 +84,3 @@`
`62`	`84`
`63`	`85`	`# Display the prediction`
`64`	`86`	`st.write(f'Predicted Job Satisfaction: {prediction[0]}')`
`65`		`-`
`66`		`- # Evaluate the model on test data (assuming y_test and y_pred are available)`
`67`		`- # This part would typically be done during model development, not in the prediction app`
`68`		`- # However, for demonstration purposes, we can create some dummy data`
`69`		`- y_test = [1, 0, 1, 1, 0] # Example true labels`
`70`		`- y_pred = model.predict(input_df) # Example predicted labels`
`71`		`-`
`72`		`- # Print accuracy`
`73`		`- accuracy = accuracy_score(y_test, y_pred)`
`74`		`- st.write(f'Accuracy: {accuracy:.2f}')`
`75`		`-`
`76`		`- # Print classification report`
`77`		`- report = classification_report(y_test, y_pred, output_dict=True)`
`78`		`- st.write('Classification Report:')`
`79`		`- st.write(report)`
`80`		`-`
`81`		`- # Convert classification report to a DataFrame for better readability`
`82`		`- report_df = pd.DataFrame(report).transpose()`
`83`		`- st.write(report_df)`
`84`		`-`
`85`		`- # Plot confusion matrix`
`86`		`- cm = confusion_matrix(y_test, y_pred)`
`87`		`- plt.figure(figsize=(10, 6))`
`88`		`- sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)`
`89`		`- plt.title('Confusion Matrix')`
`90`		`- plt.xlabel('Predicted')`
`91`		`- plt.ylabel('Actual')`
`92`		`- st.pyplot(plt)`
`93`		`-`
`94`		`- # If the model is a binary classifier, plot the ROC curve`
`95`		`- if len(set(y_test)) == 2:`
`96`		`- fpr, tpr, _ = roc_curve(y_test, y_pred)`
`97`		`- roc_auc = auc(fpr, tpr)`
`98`		`-`
`99`		`- plt.figure(figsize=(10, 6))`
`100`		`- plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')`
`101`		`- plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')`
`102`		`- plt.xlim([0.0, 1.0])`
`103`		`- plt.ylim([0.0, 1.05])`
`104`		`- plt.xlabel('False Positive Rate')`
`105`		`- plt.ylabel('True Positive Rate')`
`106`		`- plt.title('Receiver Operating Characteristic (ROC) Curve')`
`107`		`- plt.legend(loc='lower right')`
`108`		`- st.pyplot(plt)`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit da7c769

File tree

1 file changed

1 file changed

`‎Job Satisfaction Analysis/app.py‎`

0 commit comments