Commit 3bfd5f4

committed

fix(ml): sync

1 parent ca66a27 commit 3bfd5f4Copy full SHA for 3bfd5f4

File tree

2 files changed

+41

-45

lines changed

api
- code
  - code.py
- routers
  - operation_router.py

2 files changed

+41

-45

lines changed

`‎api/code/code.py‎`

Lines changed: 26 additions & 36 deletions

Original file line number	Diff line number	Diff line change
`@@ -292,8 +292,8 @@ def plot_correlation_clustermap(input_file, output_dir, drop_column, user_info):`
`292`	`292`	`plt.close()`
`293`	`293`
`294`	`294`	`corr_csv = f"{BASE_URL}/files/{user_info['user_id']}/Highly_Correlated_Features.csv"`
`295`		`- corr_pdf = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap.pdf"`
`296`		`- corr_png = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap.png"`
	`295`	`+ corr_pdf = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap_of_All_Features.pdf"`
	`296`	`+ corr_png = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap_of_All_Features.png"`
`297`	`297`	`return {`
`298`	`298`	`"message": "Correlation clustermap created successfully.",`
`299`	`299`	`"output_files": {`
`@@ -592,7 +592,7 @@ def benchmark_models(input_file, output_dir, user_info):`
`592`	`592`	`fig.savefig(pdf_path, dpi=300, bbox_inches='tight')`
`593`	`593`
`594`	`594`	`metrics_csv = f"{BASE_URL}/files/{user_info['user_id']}/ML_classifiers_benchmarking_results.csv"`
`595`		`- png_path = f"{BASE_URL}/files/{user_info['user_id']}/model_benchmarking_curves.png"`
	`595`	`+ png_path = f"{BASE_URL}/files/{user_info['user_id']}/ML_classifiers_benchmarking_curves.png"`
`596`	`596`	`return {`
`597`	`597`	`"metrics": metrics_df.to_dict(orient="records"),`
`598`	`598`	`"metrics_path": metrics_csv,`
`@@ -652,15 +652,12 @@ def get_model_and_importance_with_top10(metrics_df, best_models, reduced_df, sel`
`652`	`652`	`top10_df = reduced_df[columns_to_include].copy()`
`653`	`653`
`654`	`654`	`# File paths`
`655`		`- user_id = user_info['user_id']`
`656`		`- user_folder = os.path.join(output_dir, 'files', str(user_id))`
`657`		`- os.makedirs(user_folder, exist_ok=True)`
`658`	`655`
`659`	`656`	`base_fname = selected_model_name.replace(' ', '_').lower()`
`660`		`- full_csv_path = os.path.join(user_folder, f"{base_fname}_feature_importance.csv")`
`661`		`- top10_csv_path = os.path.join(user_folder, f"top10_features_{base_fname}.csv")`
`662`		`- plot_png_path = os.path.join(user_folder, f"top10_feature_importance_{base_fname}.png")`
`663`		`- plot_pdf_path = os.path.join(user_folder, f"top10_feature_importance_{base_fname}.pdf")`
	`657`	`+ full_csv_path = os.path.join(output_dir, f"{base_fname}_feature_importance.csv")`
	`658`	`+ top10_csv_path = os.path.join(output_dir, f"top10_features_{base_fname}.csv")`
	`659`	`+ plot_png_path = os.path.join(output_dir, f"top10_feature_importance_{base_fname}.png")`
	`660`	`+ plot_pdf_path = os.path.join(output_dir, f"top10_feature_importance_{base_fname}.pdf")`
`664`	`661`
`665`	`662`	`# Save full importance CSV`
`666`	`663`	`importance_df.to_csv(full_csv_path, index=False)`
`@@ -681,7 +678,7 @@ def get_model_and_importance_with_top10(metrics_df, best_models, reduced_df, sel`
`681`	`678`	`plt.close()`
`682`	`679`
`683`	`680`	`# Return as API-ready paths`
`684`		`- base_url = f"{BASE_URL}/files/{user_id}"`
	`681`	`+ base_url = f"{BASE_URL}/files/{user_info['user_id']}"`
`685`	`682`	`return {`
`686`	`683`	`"top10_features_path": f"{base_url}/top10_features_{base_fname}.csv",`
`687`	`684`	`"top10_plot_path": f"{base_url}/top10_feature_importance_{base_fname}.png",`
`@@ -929,12 +926,10 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir`
`929`	`926`	`# --- Save metrics ---`
`930`	`927`	`metrics_df = pd.DataFrame(metrics_scores).sort_values(by='AUPRC', ascending=False)`
`931`	`928`
`932`		`- # Save paths`
`933`		`- user_id = user_info['user_id']`
`934`		`- user_folder = os.path.join(output_dir, 'files', str(user_id))`
`935`		`- os.makedirs(user_folder, exist_ok=True)`
	`929`	`+`
	`930`	`+`
`936`	`931`
`937`		`- csv_path = os.path.join(user_folder, 'single_feature_metrics_ranking.csv')`
	`932`	`+ csv_path = os.path.join(output_dir, 'single_feature_metrics_ranking.csv')`
`938`	`933`	`metrics_df.to_csv(csv_path, index=False)`
`939`	`934`
`940`	`935`	`# --- Plotting ---`
`@@ -978,14 +973,14 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir`
`978`	`973`	`plt.tight_layout(rect=[0, 0, 1, 0.96])`
`979`	`974`
`980`	`975`	`# Save figures`
`981`		`- plot_png = os.path.join(user_folder, 'single_feature_model_performance_landscape.png')`
`982`		`- plot_pdf = os.path.join(user_folder, 'single_feature_model_performance_landscape.pdf')`
	`976`	`+ plot_png = os.path.join(output_dir, 'single_feature_model_performance_landscape.png')`
	`977`	`+ plot_pdf = os.path.join(output_dir, 'single_feature_model_performance_landscape.pdf')`
`983`	`978`	`fig.savefig(plot_png, dpi=300, bbox_inches='tight')`
`984`	`979`	`fig.savefig(plot_pdf, dpi=300, bbox_inches='tight')`
`985`	`980`	`plt.close()`
`986`	`981`
`987`	`982`	`# Return URLs`
`988`		`- base_url = f"{BASE_URL}/files/{user_id}"`
	`983`	`+ base_url = f"{BASE_URL}/files/{user_info['user_id']}"`
`989`	`984`	`return json.dumps({`
`990`	`985`	`"message": "Feature ranking and plotting completed successfully.",`
`991`	`986`	`"ranking_file": f"{base_url}/single_feature_metrics_ranking.csv",`
`@@ -1087,11 +1082,9 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param`
`1087`	`1082`	`metrics_df = pd.DataFrame(performance_metrics)`
`1088`	`1083`	`metrics_df.sort_values(by='AUPRC', ascending=False, inplace=True)`
`1089`	`1084`
`1090`		`- user_id = user_info['user_id']`
`1091`		`- user_folder = os.path.join(output_dir, 'files', str(user_id))`
`1092`		`- os.makedirs(user_folder, exist_ok=True)`
`1093`	`1085`
`1094`		`- metrics_csv_path = os.path.join(user_folder, 'biomarker_algorithms_performance.csv')`
	`1086`	`+`
	`1087`	`+ metrics_csv_path = os.path.join(output_dir, 'biomarker_algorithms_performance.csv')`
`1095`	`1088`	`metrics_df.to_csv(metrics_csv_path, index=False)`
`1096`	`1089`
`1097`	`1090`	`# Plotting`
`@@ -1119,8 +1112,8 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param`
`1119`	`1112`	`fig.suptitle('AUPRC and AUROC Plots of Gene-Models', fontsize=16, y=1.02)`
`1120`	`1113`	`plt.tight_layout(rect=[0, 0, 1, 0.96])`
`1121`	`1114`
`1122`		`- png_path = os.path.join(user_folder, 'biomarker_algorithms_performance_metrics.png')`
`1123`		`- pdf_path = os.path.join(user_folder, 'biomarker_algorithms_performance_metrics.pdf')`
	`1115`	`+ png_path = os.path.join(output_dir, 'biomarker_algorithms_performance_metrics.png')`
	`1116`	`+ pdf_path = os.path.join(output_dir, 'biomarker_algorithms_performance_metrics.pdf')`
`1124`	`1117`	`fig.savefig(png_path, dpi=300, bbox_inches='tight')`
`1125`	`1118`	`fig.savefig(pdf_path, bbox_inches='tight')`
`1126`	`1119`	`plt.close()`
`@@ -1134,11 +1127,11 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param`
`1134`	`1127`	`raise ValueError("Some selected features are missing in top10_df.")`
`1135`	`1128`
`1136`	`1129`	`final_df = top10_df[selected_features + ['condition']]`
`1137`		`- final_df_path = os.path.join(user_folder, 'final_selected_biomarker_algorithms_df.csv')`
	`1130`	`+ final_df_path = os.path.join(output_dir, 'final_selected_biomarker_algorithms_df.csv')`
`1138`	`1131`	`final_df.to_csv(final_df_path, index=False)`
`1139`	`1132`
`1140`	`1133`	`# Return paths and results`
`1141`		`- base_url = f"{BASE_URL}/files/{user_id}"`
	`1134`	`+ base_url = f"{BASE_URL}/files/{user_info['user_id']}"`
`1142`	`1135`	`return {`
`1143`	`1136`	`"message": "Evaluation completed successfully.",`
`1144`	`1137`	`"metrics_file": f"{base_url}/biomarker_algorithms_performance.csv",`
`@@ -1375,17 +1368,14 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers`
`1375`	`1368`
`1376`	`1369`	`metrics_df = pd.DataFrame([train_metrics, test_metrics])`
`1377`	`1370`
`1378`		`- # Create user folder`
`1379`		`- user_id = user_info['user_id']`
`1380`		`- user_folder = os.path.join(output_dir, 'files', str(user_id))`
`1381`		`- os.makedirs(user_folder, exist_ok=True)`
	`1371`	`+`
`1382`	`1372`
`1383`	`1373`	`# Save metrics CSV`
`1384`		`- metrics_csv_path = os.path.join(user_folder, 'final_model_metrics_summary.csv')`
	`1374`	`+ metrics_csv_path = os.path.join(output_dir, 'final_model_metrics_summary.csv')`
`1385`	`1375`	`metrics_df.drop(columns=['Confusion Matrix']).to_csv(metrics_csv_path, index=False)`
`1386`	`1376`
`1387`	`1377`	`# Save model`
`1388`		`- model_path = os.path.join(user_folder, 'final_model.joblib')`
	`1378`	`+ model_path = os.path.join(output_dir, 'final_model.joblib')`
`1389`	`1379`	`dump(tuned_model, model_path)`
`1390`	`1380`
`1391`	`1381`	`# Plot PR and ROC curves`
`@@ -1415,7 +1405,7 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers`
`1415`	`1405`	`fig.suptitle('Performance of the Final Model (Train vs Test)', fontsize=15, y=1.02)`
`1416`	`1406`	`plt.tight_layout(rect=[0, 0, 1, 0.95])`
`1417`	`1407`
`1418`		`- pr_roc_png = os.path.join(user_folder, 'final_model_performance.png')`
	`1408`	`+ pr_roc_png = os.path.join(output_dir, 'final_model_performance.png')`
`1419`	`1409`	`plt.savefig(pr_roc_png, dpi=300, bbox_inches='tight')`
`1420`	`1410`	`plt.close()`
`1421`	`1411`
`@@ -1429,12 +1419,12 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers`
`1429`	`1419`	`axes[1].set_title("Test Confusion Matrix")`
`1430`	`1420`	`fig.suptitle('Confusion Matrices of Final Model: Train vs Test', fontsize=16)`
`1431`	`1421`	`plt.tight_layout(rect=[0, 0, 1, 0.95])`
`1432`		`- cm_png = os.path.join(user_folder, 'final_model_confusion_matrix.png')`
	`1422`	`+ cm_png = os.path.join(output_dir, 'final_model_confusion_matrix.png')`
`1433`	`1423`	`plt.savefig(cm_png, dpi=300, bbox_inches='tight')`
`1434`	`1424`	`plt.close()`
`1435`	`1425`
`1436`	`1426`	`# Return URLs`
`1437`		`- base_url = f"{BASE_URL}/files/{user_id}"`
	`1427`	`+ base_url = f"{BASE_URL}/files/{user_info['user_id']}"`
`1438`	`1428`	`return {`
`1439`	`1429`	`"message": "Final model evaluation completed successfully.",`
`1440`	`1430`	`"train_metrics": train_metrics,`

`‎api/routers/operation_router.py‎`

Lines changed: 15 additions & 9 deletions

Original file line number	Diff line number	Diff line change
`@@ -131,6 +131,7 @@ async def upload_merge(merged_df_data_normalized_t: UploadFile = File(...), user`
`131`	`131`	`API endpoint to upload and save a merge file (merged_df_data_normalized_t.csv) directly to the files directory.`
`132`	`132`	`"""`
`133`	`133`	`try:`
	`134`	`+ os.chdir(R_CODE_DIRECTORY + "/..")`
`134`	`135`	`# Define user-specific directories`
`135`	`136`	`user_id = str(user_info['user_id'])`
`136`	`137`	`files_dir = os.path.join(R_CODE_DIRECTORY, user_id, "files")`
`@@ -173,6 +174,7 @@ async def batch_effect_correction(user_info: dict = Depends(verify_token)):`
`173`	`174`
`174`	`175`
`175`	`176`	`print("before")`
	`177`	`+ print(os.getcwd())`
`176`	`178`
`177`	`179`	`# Check if input file exists`
`178`	`180`	`if not os.path.exists(input_file):`
`@@ -257,7 +259,7 @@ async def z_score_normalize(user_info: dict = Depends(verify_token)):`
`257`	`259`	`return {`
`258`	`260`	`"message": result["message"],`
`259`	`261`
`260`		`- "normalized_file": f"{BASE_URL}/files/{user_info['user_id']}/z_score_normalized_data.csv"`
	`262`	`+ "normalized_file": f"{BASE_URL}/files/{user_info['user_id']}/z_score_normalized_data_of_ML_DF.csv"`
`261`	`263`	`}`
`262`	`264`	`else:`
`263`	`265`	`return {`
`@@ -286,7 +288,7 @@ async def dimensionality_reduction(user_info: dict = Depends(verify_token)):`
`286`	`288`	`# Define input file and output directory paths`
`287`	`289`	`user_id = str(user_info['user_id'])`
`288`	`290`	`input_file = os.path.join(`
`289`		`- "code", user_id, "files", "z_score_normalized_data.csv"`
	`291`	`+ "code", user_id, "files", "z_score_normalized_data_of_ML_DF.csv"`
`290`	`292`	`)`
`291`	`293`	`output_dir = os.path.join("code", user_id, "files")`
`292`	`294`
`@@ -328,7 +330,7 @@ async def correlation_clustermap(user_info: dict = Depends(verify_token)):`
`328`	`330`	`try:`
`329`	`331`	`# Define input and output paths`
`330`	`332`	`user_id = str(user_info['user_id'])`
`331`		`- input_file = os.path.join("code", user_id, "files", "z_score_normalized_data.csv")`
	`333`	`+ input_file = os.path.join("code", user_id, "files", "z_score_normalized_data_of_ML_DF.csv")`
`332`	`334`	`output_dir = os.path.join("code", user_id, "files")`
`333`	`335`	`drop_column = "condition"`
`334`	`336`
`@@ -374,7 +376,7 @@ async def feature_selection_model(`
`374`	`376`	`try:`
`375`	`377`	`# Define file paths`
`376`	`378`	`user_id = str(user_info['user_id'])`
`377`		`- input_file = os.path.join("code", user_id, "files", "z_score_normalized_data.csv")`
	`379`	`+ input_file = os.path.join("code", user_id, "files", "z_score_normalized_data_of_ML_DF.csv")`
`378`	`380`	`output_dir = os.path.join("code", user_id, "files")`
`379`	`381`
`380`	`382`	`# Verify input file exists`
`@@ -413,7 +415,7 @@ async def feature_selection_model(`
`413`	`415`	`async def benchmark_models_api(user_info: dict = Depends(verify_token)):`
`414`	`416`	`try:`
`415`	`417`	`user_id = str(user_info['user_id'])`
`416`		`- input_file = os.path.join("code", user_id, "files", "selected_features.csv")`
	`418`	`+ input_file = os.path.join("code", user_id, "files", "selected_features_RFE_RF.csv")`
`417`	`419`	`output_dir = os.path.join("code", user_id, "files")`
`418`	`420`	`os.makedirs(output_dir, exist_ok=True)`
`419`	`421`
`@@ -464,7 +466,7 @@ async def top10_features(model_name: str = Form(...), user_info: dict = Depends(`
`464`	`466`	`try:`
`465`	`467`	`# Define file paths`
`466`	`468`	`user_id = str(user_info['user_id'])`
`467`		`- reduced_df_path = os.path.join("code", user_id, "files", "selected_features.csv")`
	`469`	`+ reduced_df_path = os.path.join("code", user_id, "files", "selected_features_RFE_RF.csv")`
`468`	`470`	`output_dir = os.path.join("code", user_id, "files")`
`469`	`471`
`470`	`472`
`@@ -514,7 +516,7 @@ async def visualize_dimensions_api(`
`514`	`516`	`try:`
`515`	`517`	`# Define file paths`
`516`	`518`	`user_id = str(user_info['user_id'])`
`517`		`- input_file = os.path.join("code", user_id, "files", "top10_features_Extra Trees.csv")`
	`519`	`+ input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")`
`518`	`520`	`output_dir = os.path.join("code", user_id, "files")`
`519`	`521`
`520`	`522`	`# Ensure the input file exists`
`@@ -553,7 +555,7 @@ async def rank_features_api(`
`553`	`555`	`try:`
`554`	`556`	`# Define file paths`
`555`	`557`	`user_id = str(user_info['user_id'])`
`556`		`- input_file = os.path.join("code", user_id, "files", "top10_features_Extra Trees.csv")`
	`558`	`+ input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")`
`557`	`559`	`output_dir = os.path.join("code", user_id, "files")`
`558`	`560`
`559`	`561`	`# Ensure the input file exists`
`@@ -597,7 +599,7 @@ async def evaluate_model_features_api(`
`597`	`599`	`try:`
`598`	`600`	`# Define file paths`
`599`	`601`	`user_id = str(user_info['user_id'])`
`600`		`- input_file = os.path.join("code", user_id, "files", "top10_features_Extra Trees.csv")`
	`602`	`+ input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")`
`601`	`603`	`output_dir = os.path.join("code", user_id, "files")`
`602`	`604`
`603`	`605`	`# Ensure the input file exists`
`@@ -1215,6 +1217,10 @@ async def run_mapping_plotting(request: MappingPlottingRequest, user_info: dict`
`1215`	`1217`	`"""`
`1216`	`1218`	`try:`
`1217`	`1219`	`# Define file paths`
	`1220`	`+`
	`1221`	`+ os.chdir(R_CODE_DIRECTORY + "/..")`
	`1222`	`+ print(os.getcwd())`
	`1223`	`+`
`1218`	`1224`	`user_id = str(user_info['user_id'])`
`1219`	`1225`	`r_script_path = "string/String_Workflow_Update_v3_Feb19.R"`
`1220`	`1226`	`output_dir = os.path.join("code", user_id, "files", "string")`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 3bfd5f4

File tree

2 files changed

2 files changed

`‎api/code/code.py‎`

`‎api/routers/operation_router.py‎`

0 commit comments