Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 3bfd5f4

Browse files
fix(ml): sync
1 parent ca66a27 commit 3bfd5f4

File tree

2 files changed

+41
-45
lines changed

2 files changed

+41
-45
lines changed

‎api/code/code.py‎

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,8 @@ def plot_correlation_clustermap(input_file, output_dir, drop_column, user_info):
292292
plt.close()
293293

294294
corr_csv = f"{BASE_URL}/files/{user_info['user_id']}/Highly_Correlated_Features.csv"
295-
corr_pdf = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap.pdf"
296-
corr_png = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap.png"
295+
corr_pdf = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap_of_All_Features.pdf"
296+
corr_png = f"{BASE_URL}/files/{user_info['user_id']}/Pearson_Correlation_Clustermap_of_All_Features.png"
297297
return {
298298
"message": "Correlation clustermap created successfully.",
299299
"output_files": {
@@ -592,7 +592,7 @@ def benchmark_models(input_file, output_dir, user_info):
592592
fig.savefig(pdf_path, dpi=300, bbox_inches='tight')
593593

594594
metrics_csv = f"{BASE_URL}/files/{user_info['user_id']}/ML_classifiers_benchmarking_results.csv"
595-
png_path = f"{BASE_URL}/files/{user_info['user_id']}/model_benchmarking_curves.png"
595+
png_path = f"{BASE_URL}/files/{user_info['user_id']}/ML_classifiers_benchmarking_curves.png"
596596
return {
597597
"metrics": metrics_df.to_dict(orient="records"),
598598
"metrics_path": metrics_csv,
@@ -652,15 +652,12 @@ def get_model_and_importance_with_top10(metrics_df, best_models, reduced_df, sel
652652
top10_df = reduced_df[columns_to_include].copy()
653653

654654
# File paths
655-
user_id = user_info['user_id']
656-
user_folder = os.path.join(output_dir, 'files', str(user_id))
657-
os.makedirs(user_folder, exist_ok=True)
658655

659656
base_fname = selected_model_name.replace(' ', '_').lower()
660-
full_csv_path = os.path.join(user_folder, f"{base_fname}_feature_importance.csv")
661-
top10_csv_path = os.path.join(user_folder, f"top10_features_{base_fname}.csv")
662-
plot_png_path = os.path.join(user_folder, f"top10_feature_importance_{base_fname}.png")
663-
plot_pdf_path = os.path.join(user_folder, f"top10_feature_importance_{base_fname}.pdf")
657+
full_csv_path = os.path.join(output_dir, f"{base_fname}_feature_importance.csv")
658+
top10_csv_path = os.path.join(output_dir, f"top10_features_{base_fname}.csv")
659+
plot_png_path = os.path.join(output_dir, f"top10_feature_importance_{base_fname}.png")
660+
plot_pdf_path = os.path.join(output_dir, f"top10_feature_importance_{base_fname}.pdf")
664661

665662
# Save full importance CSV
666663
importance_df.to_csv(full_csv_path, index=False)
@@ -681,7 +678,7 @@ def get_model_and_importance_with_top10(metrics_df, best_models, reduced_df, sel
681678
plt.close()
682679

683680
# Return as API-ready paths
684-
base_url = f"{BASE_URL}/files/{user_id}"
681+
base_url = f"{BASE_URL}/files/{user_info['user_id']}"
685682
return {
686683
"top10_features_path": f"{base_url}/top10_features_{base_fname}.csv",
687684
"top10_plot_path": f"{base_url}/top10_feature_importance_{base_fname}.png",
@@ -929,12 +926,10 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
929926
# --- Save metrics ---
930927
metrics_df = pd.DataFrame(metrics_scores).sort_values(by='AUPRC', ascending=False)
931928

932-
# Save paths
933-
user_id = user_info['user_id']
934-
user_folder = os.path.join(output_dir, 'files', str(user_id))
935-
os.makedirs(user_folder, exist_ok=True)
929+
930+
936931

937-
csv_path = os.path.join(user_folder, 'single_feature_metrics_ranking.csv')
932+
csv_path = os.path.join(output_dir, 'single_feature_metrics_ranking.csv')
938933
metrics_df.to_csv(csv_path, index=False)
939934

940935
# --- Plotting ---
@@ -978,14 +973,14 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
978973
plt.tight_layout(rect=[0, 0, 1, 0.96])
979974

980975
# Save figures
981-
plot_png = os.path.join(user_folder, 'single_feature_model_performance_landscape.png')
982-
plot_pdf = os.path.join(user_folder, 'single_feature_model_performance_landscape.pdf')
976+
plot_png = os.path.join(output_dir, 'single_feature_model_performance_landscape.png')
977+
plot_pdf = os.path.join(output_dir, 'single_feature_model_performance_landscape.pdf')
983978
fig.savefig(plot_png, dpi=300, bbox_inches='tight')
984979
fig.savefig(plot_pdf, dpi=300, bbox_inches='tight')
985980
plt.close()
986981

987982
# Return URLs
988-
base_url = f"{BASE_URL}/files/{user_id}"
983+
base_url = f"{BASE_URL}/files/{user_info['user_id']}"
989984
return json.dumps({
990985
"message": "Feature ranking and plotting completed successfully.",
991986
"ranking_file": f"{base_url}/single_feature_metrics_ranking.csv",
@@ -1087,11 +1082,9 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
10871082
metrics_df = pd.DataFrame(performance_metrics)
10881083
metrics_df.sort_values(by='AUPRC', ascending=False, inplace=True)
10891084

1090-
user_id = user_info['user_id']
1091-
user_folder = os.path.join(output_dir, 'files', str(user_id))
1092-
os.makedirs(user_folder, exist_ok=True)
10931085

1094-
metrics_csv_path = os.path.join(user_folder, 'biomarker_algorithms_performance.csv')
1086+
1087+
metrics_csv_path = os.path.join(output_dir, 'biomarker_algorithms_performance.csv')
10951088
metrics_df.to_csv(metrics_csv_path, index=False)
10961089

10971090
# Plotting
@@ -1119,8 +1112,8 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
11191112
fig.suptitle('AUPRC and AUROC Plots of Gene-Models', fontsize=16, y=1.02)
11201113
plt.tight_layout(rect=[0, 0, 1, 0.96])
11211114

1122-
png_path = os.path.join(user_folder, 'biomarker_algorithms_performance_metrics.png')
1123-
pdf_path = os.path.join(user_folder, 'biomarker_algorithms_performance_metrics.pdf')
1115+
png_path = os.path.join(output_dir, 'biomarker_algorithms_performance_metrics.png')
1116+
pdf_path = os.path.join(output_dir, 'biomarker_algorithms_performance_metrics.pdf')
11241117
fig.savefig(png_path, dpi=300, bbox_inches='tight')
11251118
fig.savefig(pdf_path, bbox_inches='tight')
11261119
plt.close()
@@ -1134,11 +1127,11 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
11341127
raise ValueError("Some selected features are missing in top10_df.")
11351128

11361129
final_df = top10_df[selected_features + ['condition']]
1137-
final_df_path = os.path.join(user_folder, 'final_selected_biomarker_algorithms_df.csv')
1130+
final_df_path = os.path.join(output_dir, 'final_selected_biomarker_algorithms_df.csv')
11381131
final_df.to_csv(final_df_path, index=False)
11391132

11401133
# Return paths and results
1141-
base_url = f"{BASE_URL}/files/{user_id}"
1134+
base_url = f"{BASE_URL}/files/{user_info['user_id']}"
11421135
return {
11431136
"message": "Evaluation completed successfully.",
11441137
"metrics_file": f"{base_url}/biomarker_algorithms_performance.csv",
@@ -1375,17 +1368,14 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers
13751368

13761369
metrics_df = pd.DataFrame([train_metrics, test_metrics])
13771370

1378-
# Create user folder
1379-
user_id = user_info['user_id']
1380-
user_folder = os.path.join(output_dir, 'files', str(user_id))
1381-
os.makedirs(user_folder, exist_ok=True)
1371+
13821372

13831373
# Save metrics CSV
1384-
metrics_csv_path = os.path.join(user_folder, 'final_model_metrics_summary.csv')
1374+
metrics_csv_path = os.path.join(output_dir, 'final_model_metrics_summary.csv')
13851375
metrics_df.drop(columns=['Confusion Matrix']).to_csv(metrics_csv_path, index=False)
13861376

13871377
# Save model
1388-
model_path = os.path.join(user_folder, 'final_model.joblib')
1378+
model_path = os.path.join(output_dir, 'final_model.joblib')
13891379
dump(tuned_model, model_path)
13901380

13911381
# Plot PR and ROC curves
@@ -1415,7 +1405,7 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers
14151405
fig.suptitle('Performance of the Final Model (Train vs Test)', fontsize=15, y=1.02)
14161406
plt.tight_layout(rect=[0, 0, 1, 0.95])
14171407

1418-
pr_roc_png = os.path.join(user_folder, 'final_model_performance.png')
1408+
pr_roc_png = os.path.join(output_dir, 'final_model_performance.png')
14191409
plt.savefig(pr_roc_png, dpi=300, bbox_inches='tight')
14201410
plt.close()
14211411

@@ -1429,12 +1419,12 @@ def evaluate_final_model(final_df_path, selected_model, param_grids, classifiers
14291419
axes[1].set_title("Test Confusion Matrix")
14301420
fig.suptitle('Confusion Matrices of Final Model: Train vs Test', fontsize=16)
14311421
plt.tight_layout(rect=[0, 0, 1, 0.95])
1432-
cm_png = os.path.join(user_folder, 'final_model_confusion_matrix.png')
1422+
cm_png = os.path.join(output_dir, 'final_model_confusion_matrix.png')
14331423
plt.savefig(cm_png, dpi=300, bbox_inches='tight')
14341424
plt.close()
14351425

14361426
# Return URLs
1437-
base_url = f"{BASE_URL}/files/{user_id}"
1427+
base_url = f"{BASE_URL}/files/{user_info['user_id']}"
14381428
return {
14391429
"message": "Final model evaluation completed successfully.",
14401430
"train_metrics": train_metrics,

‎api/routers/operation_router.py‎

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ async def upload_merge(merged_df_data_normalized_t: UploadFile = File(...), user
131131
API endpoint to upload and save a merge file (merged_df_data_normalized_t.csv) directly to the files directory.
132132
"""
133133
try:
134+
os.chdir(R_CODE_DIRECTORY + "/..")
134135
# Define user-specific directories
135136
user_id = str(user_info['user_id'])
136137
files_dir = os.path.join(R_CODE_DIRECTORY, user_id, "files")
@@ -173,6 +174,7 @@ async def batch_effect_correction(user_info: dict = Depends(verify_token)):
173174

174175

175176
print("before")
177+
print(os.getcwd())
176178

177179
# Check if input file exists
178180
if not os.path.exists(input_file):
@@ -257,7 +259,7 @@ async def z_score_normalize(user_info: dict = Depends(verify_token)):
257259
return {
258260
"message": result["message"],
259261

260-
"normalized_file": f"{BASE_URL}/files/{user_info['user_id']}/z_score_normalized_data.csv"
262+
"normalized_file": f"{BASE_URL}/files/{user_info['user_id']}/z_score_normalized_data_of_ML_DF.csv"
261263
}
262264
else:
263265
return {
@@ -286,7 +288,7 @@ async def dimensionality_reduction(user_info: dict = Depends(verify_token)):
286288
# Define input file and output directory paths
287289
user_id = str(user_info['user_id'])
288290
input_file = os.path.join(
289-
"code", user_id, "files", "z_score_normalized_data.csv"
291+
"code", user_id, "files", "z_score_normalized_data_of_ML_DF.csv"
290292
)
291293
output_dir = os.path.join("code", user_id, "files")
292294

@@ -328,7 +330,7 @@ async def correlation_clustermap(user_info: dict = Depends(verify_token)):
328330
try:
329331
# Define input and output paths
330332
user_id = str(user_info['user_id'])
331-
input_file = os.path.join("code", user_id, "files", "z_score_normalized_data.csv")
333+
input_file = os.path.join("code", user_id, "files", "z_score_normalized_data_of_ML_DF.csv")
332334
output_dir = os.path.join("code", user_id, "files")
333335
drop_column = "condition"
334336

@@ -374,7 +376,7 @@ async def feature_selection_model(
374376
try:
375377
# Define file paths
376378
user_id = str(user_info['user_id'])
377-
input_file = os.path.join("code", user_id, "files", "z_score_normalized_data.csv")
379+
input_file = os.path.join("code", user_id, "files", "z_score_normalized_data_of_ML_DF.csv")
378380
output_dir = os.path.join("code", user_id, "files")
379381

380382
# Verify input file exists
@@ -413,7 +415,7 @@ async def feature_selection_model(
413415
async def benchmark_models_api(user_info: dict = Depends(verify_token)):
414416
try:
415417
user_id = str(user_info['user_id'])
416-
input_file = os.path.join("code", user_id, "files", "selected_features.csv")
418+
input_file = os.path.join("code", user_id, "files", "selected_features_RFE_RF.csv")
417419
output_dir = os.path.join("code", user_id, "files")
418420
os.makedirs(output_dir, exist_ok=True)
419421

@@ -464,7 +466,7 @@ async def top10_features(model_name: str = Form(...), user_info: dict = Depends(
464466
try:
465467
# Define file paths
466468
user_id = str(user_info['user_id'])
467-
reduced_df_path = os.path.join("code", user_id, "files", "selected_features.csv")
469+
reduced_df_path = os.path.join("code", user_id, "files", "selected_features_RFE_RF.csv")
468470
output_dir = os.path.join("code", user_id, "files")
469471

470472

@@ -514,7 +516,7 @@ async def visualize_dimensions_api(
514516
try:
515517
# Define file paths
516518
user_id = str(user_info['user_id'])
517-
input_file = os.path.join("code", user_id, "files", "top10_features_Extra Trees.csv")
519+
input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")
518520
output_dir = os.path.join("code", user_id, "files")
519521

520522
# Ensure the input file exists
@@ -553,7 +555,7 @@ async def rank_features_api(
553555
try:
554556
# Define file paths
555557
user_id = str(user_info['user_id'])
556-
input_file = os.path.join("code", user_id, "files", "top10_features_Extra Trees.csv")
558+
input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")
557559
output_dir = os.path.join("code", user_id, "files")
558560

559561
# Ensure the input file exists
@@ -597,7 +599,7 @@ async def evaluate_model_features_api(
597599
try:
598600
# Define file paths
599601
user_id = str(user_info['user_id'])
600-
input_file = os.path.join("code", user_id, "files", "top10_features_Extra Trees.csv")
602+
input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")
601603
output_dir = os.path.join("code", user_id, "files")
602604

603605
# Ensure the input file exists
@@ -1215,6 +1217,10 @@ async def run_mapping_plotting(request: MappingPlottingRequest, user_info: dict
12151217
"""
12161218
try:
12171219
# Define file paths
1220+
1221+
os.chdir(R_CODE_DIRECTORY + "/..")
1222+
print(os.getcwd())
1223+
12181224
user_id = str(user_info['user_id'])
12191225
r_script_path = "string/String_Workflow_Update_v3_Feb19.R"
12201226
output_dir = os.path.join("code", user_id, "files", "string")

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /