@@ -292,8 +292,8 @@ def plot_correlation_clustermap(input_file, output_dir, drop_column, user_info):
292
292
plt .close ()
293
293
294
294
corr_csv = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /Highly_Correlated_Features.csv"
295
- corr_pdf = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /Pearson_Correlation_Clustermap .pdf"
296
- corr_png = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /Pearson_Correlation_Clustermap .png"
295
+ corr_pdf = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /Pearson_Correlation_Clustermap_of_All_Features .pdf"
296
+ corr_png = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /Pearson_Correlation_Clustermap_of_All_Features .png"
297
297
return {
298
298
"message" : "Correlation clustermap created successfully." ,
299
299
"output_files" : {
@@ -592,7 +592,7 @@ def benchmark_models(input_file, output_dir, user_info):
592
592
fig .savefig (pdf_path , dpi = 300 , bbox_inches = 'tight' )
593
593
594
594
metrics_csv = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /ML_classifiers_benchmarking_results.csv"
595
- png_path = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /model_benchmarking_curves .png"
595
+ png_path = f"{ BASE_URL } /files/{ user_info ['user_id' ]} /ML_classifiers_benchmarking_curves .png"
596
596
return {
597
597
"metrics" : metrics_df .to_dict (orient = "records" ),
598
598
"metrics_path" : metrics_csv ,
@@ -652,15 +652,12 @@ def get_model_and_importance_with_top10(metrics_df, best_models, reduced_df, sel
652
652
top10_df = reduced_df [columns_to_include ].copy ()
653
653
654
654
# File paths
655
- user_id = user_info ['user_id' ]
656
- user_folder = os .path .join (output_dir , 'files' , str (user_id ))
657
- os .makedirs (user_folder , exist_ok = True )
658
655
659
656
base_fname = selected_model_name .replace (' ' , '_' ).lower ()
660
- full_csv_path = os .path .join (user_folder , f"{ base_fname } _feature_importance.csv" )
661
- top10_csv_path = os .path .join (user_folder , f"top10_features_{ base_fname } .csv" )
662
- plot_png_path = os .path .join (user_folder , f"top10_feature_importance_{ base_fname } .png" )
663
- plot_pdf_path = os .path .join (user_folder , f"top10_feature_importance_{ base_fname } .pdf" )
657
+ full_csv_path = os .path .join (output_dir , f"{ base_fname } _feature_importance.csv" )
658
+ top10_csv_path = os .path .join (output_dir , f"top10_features_{ base_fname } .csv" )
659
+ plot_png_path = os .path .join (output_dir , f"top10_feature_importance_{ base_fname } .png" )
660
+ plot_pdf_path = os .path .join (output_dir , f"top10_feature_importance_{ base_fname } .pdf" )
664
661
665
662
# Save full importance CSV
666
663
importance_df .to_csv (full_csv_path , index = False )
@@ -681,7 +678,7 @@ def get_model_and_importance_with_top10(metrics_df, best_models, reduced_df, sel
681
678
plt .close ()
682
679
683
680
# Return as API-ready paths
684
- base_url = f"{ BASE_URL } /files/{ user_id } "
681
+ base_url = f"{ BASE_URL } /files/{ user_info [ ' user_id' ] } "
685
682
return {
686
683
"top10_features_path" : f"{ base_url } /top10_features_{ base_fname } .csv" ,
687
684
"top10_plot_path" : f"{ base_url } /top10_feature_importance_{ base_fname } .png" ,
@@ -929,12 +926,10 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
929
926
# --- Save metrics ---
930
927
metrics_df = pd .DataFrame (metrics_scores ).sort_values (by = 'AUPRC' , ascending = False )
931
928
932
- # Save paths
933
- user_id = user_info ['user_id' ]
934
- user_folder = os .path .join (output_dir , 'files' , str (user_id ))
935
- os .makedirs (user_folder , exist_ok = True )
929
+
930
+
936
931
937
- csv_path = os .path .join (user_folder , 'single_feature_metrics_ranking.csv' )
932
+ csv_path = os .path .join (output_dir , 'single_feature_metrics_ranking.csv' )
938
933
metrics_df .to_csv (csv_path , index = False )
939
934
940
935
# --- Plotting ---
@@ -978,14 +973,14 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
978
973
plt .tight_layout (rect = [0 , 0 , 1 , 0.96 ])
979
974
980
975
# Save figures
981
- plot_png = os .path .join (user_folder , 'single_feature_model_performance_landscape.png' )
982
- plot_pdf = os .path .join (user_folder , 'single_feature_model_performance_landscape.pdf' )
976
+ plot_png = os .path .join (output_dir , 'single_feature_model_performance_landscape.png' )
977
+ plot_pdf = os .path .join (output_dir , 'single_feature_model_performance_landscape.pdf' )
983
978
fig .savefig (plot_png , dpi = 300 , bbox_inches = 'tight' )
984
979
fig .savefig (plot_pdf , dpi = 300 , bbox_inches = 'tight' )
985
980
plt .close ()
986
981
987
982
# Return URLs
988
- base_url = f"{ BASE_URL } /files/{ user_id } "
983
+ base_url = f"{ BASE_URL } /files/{ user_info [ ' user_id' ] } "
989
984
return json .dumps ({
990
985
"message" : "Feature ranking and plotting completed successfully." ,
991
986
"ranking_file" : f"{ base_url } /single_feature_metrics_ranking.csv" ,
@@ -1087,11 +1082,9 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
1087
1082
metrics_df = pd .DataFrame (performance_metrics )
1088
1083
metrics_df .sort_values (by = 'AUPRC' , ascending = False , inplace = True )
1089
1084
1090
- user_id = user_info ['user_id' ]
1091
- user_folder = os .path .join (output_dir , 'files' , str (user_id ))
1092
- os .makedirs (user_folder , exist_ok = True )
1093
1085
1094
- metrics_csv_path = os .path .join (user_folder , 'biomarker_algorithms_performance.csv' )
1086
+
1087
+ metrics_csv_path = os .path .join (output_dir , 'biomarker_algorithms_performance.csv' )
1095
1088
metrics_df .to_csv (metrics_csv_path , index = False )
1096
1089
1097
1090
# Plotting
@@ -1119,8 +1112,8 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
1119
1112
fig .suptitle ('AUPRC and AUROC Plots of Gene-Models' , fontsize = 16 , y = 1.02 )
1120
1113
plt .tight_layout (rect = [0 , 0 , 1 , 0.96 ])
1121
1114
1122
- png_path = os .path .join (user_folder , 'biomarker_algorithms_performance_metrics.png' )
1123
- pdf_path = os .path .join (user_folder , 'biomarker_algorithms_performance_metrics.pdf' )
1115
+ png_path = os .path .join (output_dir , 'biomarker_algorithms_performance_metrics.png' )
1116
+ pdf_path = os .path .join (output_dir , 'biomarker_algorithms_performance_metrics.pdf' )
1124
1117
fig .savefig (png_path , dpi = 300 , bbox_inches = 'tight' )
1125
1118
fig .savefig (pdf_path , bbox_inches = 'tight' )
1126
1119
plt .close ()
@@ -1134,11 +1127,11 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
1134
1127
raise ValueError ("Some selected features are missing in top10_df." )
1135
1128
1136
1129
final_df = top10_df [selected_features + ['condition' ]]
1137
- final_df_path = os .path .join (user_folder , 'final_selected_biomarker_algorithms_df.csv' )
1130
+ final_df_path = os .path .join (output_dir , 'final_selected_biomarker_algorithms_df.csv' )
1138
1131
final_df .to_csv (final_df_path , index = False )
1139
1132
1140
1133
# Return paths and results
1141
- base_url = f"{ BASE_URL } /files/{ user_id } "
1134
+ base_url = f"{ BASE_URL } /files/{ user_info [ ' user_id' ] } "
1142
1135
return {
1143
1136
"message" : "Evaluation completed successfully." ,
1144
1137
"metrics_file" : f"{ base_url } /biomarker_algorithms_performance.csv" ,
@@ -1375,17 +1368,14 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers
1375
1368
1376
1369
metrics_df = pd .DataFrame ([train_metrics , test_metrics ])
1377
1370
1378
- # Create user folder
1379
- user_id = user_info ['user_id' ]
1380
- user_folder = os .path .join (output_dir , 'files' , str (user_id ))
1381
- os .makedirs (user_folder , exist_ok = True )
1371
+
1382
1372
1383
1373
# Save metrics CSV
1384
- metrics_csv_path = os .path .join (user_folder , 'final_model_metrics_summary.csv' )
1374
+ metrics_csv_path = os .path .join (output_dir , 'final_model_metrics_summary.csv' )
1385
1375
metrics_df .drop (columns = ['Confusion Matrix' ]).to_csv (metrics_csv_path , index = False )
1386
1376
1387
1377
# Save model
1388
- model_path = os .path .join (user_folder , 'final_model.joblib' )
1378
+ model_path = os .path .join (output_dir , 'final_model.joblib' )
1389
1379
dump (tuned_model , model_path )
1390
1380
1391
1381
# Plot PR and ROC curves
@@ -1415,7 +1405,7 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers
1415
1405
fig .suptitle ('Performance of the Final Model (Train vs Test)' , fontsize = 15 , y = 1.02 )
1416
1406
plt .tight_layout (rect = [0 , 0 , 1 , 0.95 ])
1417
1407
1418
- pr_roc_png = os .path .join (user_folder , 'final_model_performance.png' )
1408
+ pr_roc_png = os .path .join (output_dir , 'final_model_performance.png' )
1419
1409
plt .savefig (pr_roc_png , dpi = 300 , bbox_inches = 'tight' )
1420
1410
plt .close ()
1421
1411
@@ -1429,12 +1419,12 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers
1429
1419
axes [1 ].set_title ("Test Confusion Matrix" )
1430
1420
fig .suptitle ('Confusion Matrices of Final Model: Train vs Test' , fontsize = 16 )
1431
1421
plt .tight_layout (rect = [0 , 0 , 1 , 0.95 ])
1432
- cm_png = os .path .join (user_folder , 'final_model_confusion_matrix.png' )
1422
+ cm_png = os .path .join (output_dir , 'final_model_confusion_matrix.png' )
1433
1423
plt .savefig (cm_png , dpi = 300 , bbox_inches = 'tight' )
1434
1424
plt .close ()
1435
1425
1436
1426
# Return URLs
1437
- base_url = f"{ BASE_URL } /files/{ user_id } "
1427
+ base_url = f"{ BASE_URL } /files/{ user_info [ ' user_id' ] } "
1438
1428
return {
1439
1429
"message" : "Final model evaluation completed successfully." ,
1440
1430
"train_metrics" : train_metrics ,
0 commit comments