|
1 | 1 | import streamlit as st |
2 | 2 | import joblib |
3 | 3 | import pandas as pd |
4 | | -import matplotlib.pyplot as plt |
5 | | -import seaborn as sns |
6 | | -from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc |
| 4 | +from sklearn.preprocessing import LabelEncoder |
7 | 5 |
|
8 | 6 | # Load the model |
9 | 7 | model = joblib.load('model.pkl') |
|
12 | 10 | features = ['Hobby', 'OpenSource', 'Country', 'Student', 'Employment', 'FormalEducation', |
13 | 11 | 'UndergradMajor', 'CompanySize', 'DevType', 'YearsCoding', 'YearsCodingProf'] |
14 | 12 |
|
| 13 | +# Initialize label encoders for categorical features |
| 14 | +encoders = { |
| 15 | + 'Hobby': LabelEncoder().fit(['Yes', 'No']), |
| 16 | + 'OpenSource': LabelEncoder().fit(['Yes', 'No']), |
| 17 | + 'Country': LabelEncoder().fit(['United States', 'India', 'Germany']), |
| 18 | + 'Student': LabelEncoder().fit(['Yes', 'No']), |
| 19 | + 'Employment': LabelEncoder().fit(['Employed full-time', 'Employed part-time', 'Self-employed', 'Unemployed']), |
| 20 | + 'FormalEducation': LabelEncoder().fit(["Bachelor’s degree (BA, BS, B.Eng., etc.)", |
| 21 | + "Master’s degree (MA, MS, M.Eng., MBA, etc.)", |
| 22 | + "Doctoral degree (PhD)"]), |
| 23 | + 'UndergradMajor': LabelEncoder().fit(["Computer science, computer engineering, or software engineering", |
| 24 | + "Information technology, networking, or system administration", |
| 25 | + "Other engineering discipline"]), |
| 26 | + 'CompanySize': LabelEncoder().fit(['Fewer than 10 employees', '10 to 19 employees', '20 to 99 employees', |
| 27 | + '100 to 499 employees', '500 to 999 employees', '1,000 to 4,999 employees']), |
| 28 | + 'DevType': LabelEncoder().fit(['Developer, back-end', 'Developer, front-end', 'Developer, full-stack']), |
| 29 | + 'YearsCoding': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']), |
| 30 | + 'YearsCodingProf': LabelEncoder().fit(['0-2 years', '3-5 years', '6-8 years', '9-11 years']), |
| 31 | +} |
| 32 | + |
15 | 33 | st.title('Job Satisfaction Prediction') |
16 | 34 |
|
17 | 35 | # Create a form for user input |
|
54 | 72 | # Convert user input to DataFrame |
55 | 73 | input_df = pd.DataFrame([input_data]) |
56 | 74 |
|
| 75 | + # Encode categorical features |
| 76 | + for feature in features: |
| 77 | + input_df[feature] = encoders[feature].transform(input_df[feature]) |
| 78 | + |
57 | 79 | # Ensure the input has the same columns as the training data |
58 | 80 | input_df = input_df[features] |
59 | 81 |
|
|
62 | 84 |
|
63 | 85 | # Display the prediction |
64 | 86 | st.write(f'Predicted Job Satisfaction: {prediction[0]}') |
65 | | - |
66 | | - # Evaluate the model on test data (assuming y_test and y_pred are available) |
67 | | - # This part would typically be done during model development, not in the prediction app |
68 | | - # However, for demonstration purposes, we can create some dummy data |
69 | | - y_test = [1, 0, 1, 1, 0] # Example true labels |
70 | | - y_pred = model.predict(input_df) # Example predicted labels |
71 | | - |
72 | | - # Print accuracy |
73 | | - accuracy = accuracy_score(y_test, y_pred) |
74 | | - st.write(f'Accuracy: {accuracy:.2f}') |
75 | | - |
76 | | - # Print classification report |
77 | | - report = classification_report(y_test, y_pred, output_dict=True) |
78 | | - st.write('Classification Report:') |
79 | | - st.write(report) |
80 | | - |
81 | | - # Convert classification report to a DataFrame for better readability |
82 | | - report_df = pd.DataFrame(report).transpose() |
83 | | - st.write(report_df) |
84 | | - |
85 | | - # Plot confusion matrix |
86 | | - cm = confusion_matrix(y_test, y_pred) |
87 | | - plt.figure(figsize=(10, 6)) |
88 | | - sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False) |
89 | | - plt.title('Confusion Matrix') |
90 | | - plt.xlabel('Predicted') |
91 | | - plt.ylabel('Actual') |
92 | | - st.pyplot(plt) |
93 | | - |
94 | | - # If the model is a binary classifier, plot the ROC curve |
95 | | - if len(set(y_test)) == 2: |
96 | | - fpr, tpr, _ = roc_curve(y_test, y_pred) |
97 | | - roc_auc = auc(fpr, tpr) |
98 | | - |
99 | | - plt.figure(figsize=(10, 6)) |
100 | | - plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})') |
101 | | - plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') |
102 | | - plt.xlim([0.0, 1.0]) |
103 | | - plt.ylim([0.0, 1.05]) |
104 | | - plt.xlabel('False Positive Rate') |
105 | | - plt.ylabel('True Positive Rate') |
106 | | - plt.title('Receiver Operating Characteristic (ROC) Curve') |
107 | | - plt.legend(loc='lower right') |
108 | | - st.pyplot(plt) |
0 commit comments