Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 436ab62

Browse files
fix merge issues of ml
1 parent 3bfd5f4 commit 436ab62

File tree

5 files changed

+67
-43
lines changed

5 files changed

+67
-43
lines changed

‎.DS_Store

4 KB
Binary file not shown.

‎api/.DS_Store

4 KB
Binary file not shown.

‎api/code/batch_effect_correction.R

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,86 @@
1-
21
batch_effect_correction <- function(input_file, output_dir, user_id) {
32
library(jsonlite)
43
library(sva) # For batch effect correction
5-
4+
65
tryCatch(
76
{
87
# Read and preprocess data - preserve exact feature names
98
merged_df_data <- read.csv(input_file, header = TRUE, row.names = 1, check.names = FALSE)
109
merged_df_data <- na.omit(merged_df_data)
11-
10+
1211
# Ensure unique column names
1312
colnames(merged_df_data) <- make.unique(colnames(merged_df_data))
14-
13+
1514
# Extract condition and expression matrix
1615
condition_info <- merged_df_data$condition
1716
data_t <- t(merged_df_data[, !(colnames(merged_df_data) %in% c("condition", "batch"))])
18-
17+
1918
# Save original feature names
2019
feature_names <- rownames(data_t)
2120
sample_names <- colnames(data_t)
22-
21+
2322
# Batch effect correction with ComBat
2423
batch_info <- merged_df_data$batch
2524
data_combat <- ComBat(dat = as.matrix(data_t), batch = batch_info, par.prior = TRUE, prior.plots = FALSE)
26-
25+
2726
# Restore original feature names
2827
rownames(data_combat) <- feature_names
29-
28+
3029
# Save corrected data
3130
output_file <- file.path(output_dir, paste0("batch_", basename(input_file)))
3231
data_corrected <- t(data_combat)
3332
data_corrected_with_condition <- cbind(condition = condition_info, data_corrected)
34-
33+
3534
# Write CSV with proper quoting to preserve commas/spaces in feature names
3635
write.csv(
3736
data_corrected_with_condition,
3837
output_file,
3938
row.names = TRUE,
4039
quote = TRUE,
4140
na = "",
42-
fileEncoding = "UTF-8")
43-
41+
fileEncoding = "UTF-8"
42+
)
43+
4444
# Create boxplots in PDF and PNG formats only
4545
plot_formats <- c("pdf", "png")
4646
for (fmt in plot_formats) {
4747
file_name <- file.path(output_dir, paste0("batch_correction_boxplots.", fmt))
48-
48+
4949
# Set up the plotting device
5050
if (fmt == "png") {
51-
png(file_name, width = 1200, height = 600, res = 300)
51+
png(file_name, width = 2400, height = 1200, res = 300)
5252
} else {
5353
pdf(file_name, width = 12, height = 6)
5454
}
55-
55+
5656
# Create the plots
5757
par(mfrow = c(1, 2), mar = c(10, 5, 4, 2))
58-
58+
5959
# Pre-correction plot
6060
boxplot(data_t,
61-
main = "Before Batch Correction",
62-
las = 2,
63-
col = "lightblue",
64-
outline = FALSE,
65-
ylab = "Expression Levels",
66-
cex.axis = 0.7,
67-
names = sample_names)
68-
61+
main = "Before Batch Correction",
62+
las = 2,
63+
col = "lightblue",
64+
outline = FALSE,
65+
ylab = "Expression Levels",
66+
cex.axis = 0.7,
67+
names = sample_names
68+
)
69+
6970
# Post-correction plot
7071
boxplot(data_combat,
71-
main = "After Batch Correction",
72-
las = 2,
73-
col = "lightgreen",
74-
outline = FALSE,
75-
ylab = "Expression Levels",
76-
cex.axis = 0.7,
77-
names = sample_names)
78-
72+
main = "After Batch Correction",
73+
las = 2,
74+
col = "lightgreen",
75+
outline = FALSE,
76+
ylab = "Expression Levels",
77+
cex.axis = 0.7,
78+
names = sample_names
79+
)
80+
7981
dev.off()
8082
}
81-
83+
8284
# Output completion message
8385
cat("Batch effect correction completed. Corrected data saved to:", output_file, "\n")
8486
cat("Boxplots saved in PDF and PNG formats.\n")

‎api/code/code.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -856,12 +856,16 @@ def set_perplexity(n_samples):
856856
)
857857
from sklearn.base import clone
858858

859-
def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir, user_info):
859+
def rank_features(top10_df_path, selected_model, param_grids, classifiers, output_dir, user_info):
860860
"""
861861
Rank top features based on single-feature model performance (AUPRC, AUROC, etc.).
862862
Saves CSV and plots ROC/PR curves for each.
863863
"""
864864

865+
top10_df = pd.read_csv(top10_df_path)
866+
867+
print('top10_df:', top10_df.head())
868+
865869
try:
866870
# --- Validate inputs ---
867871
if selected_model not in param_grids:
@@ -932,6 +936,9 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
932936
csv_path = os.path.join(output_dir, 'single_feature_metrics_ranking.csv')
933937
metrics_df.to_csv(csv_path, index=False)
934938

939+
940+
print("okay till plotting")
941+
935942
# --- Plotting ---
936943
fig, axes = plt.subplots(1, 2, figsize=(15, 6))
937944

@@ -981,19 +988,20 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
981988

982989
# Return URLs
983990
base_url = f"{BASE_URL}/files/{user_info['user_id']}"
984-
return json.dumps({
991+
return {
985992
"message": "Feature ranking and plotting completed successfully.",
986993
"ranking_file": f"{base_url}/single_feature_metrics_ranking.csv",
987994
"plot_png": f"{base_url}/single_feature_model_performance_landscape.png",
988995
"plot_pdf": f"{base_url}/single_feature_model_performance_landscape.pdf",
989996
"metrics": metrics_df.to_dict(orient="records")
990-
})
997+
}
991998

992999
except Exception as e:
993-
return json.dumps({
1000+
print(e)
1001+
return {
9941002
"message": "Error during feature ranking and plotting.",
9951003
"error": str(e)
996-
})
1004+
}
9971005

9981006

9991007

@@ -1009,7 +1017,7 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir
10091017
matthews_corrcoef, log_loss
10101018
)
10111019

1012-
def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param_grids, classifiers, output_dir, user_info):
1020+
def evaluate_model_with_features(top10_df_path, selected_model, param_grids, classifiers, output_dir, user_info):
10131021
"""
10141022
Evaluate the performance of models using top-N features (10 to 1), save plots and metrics, and select the best feature subset.
10151023
"""
@@ -1018,6 +1026,10 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param
10181026
outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
10191027
inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
10201028

1029+
top10_df = pd.read_csv(top10_df_path)
1030+
1031+
top10_df_array = top10_df.drop(columns='condition').columns.to_numpy()
1032+
10211033
# Storage
10221034
roc_curves = []
10231035
pr_curves = []

‎api/routers/operation_router.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ async def benchmark_models_api(user_info: dict = Depends(verify_token)):
455455
from code.code import get_model_and_importance_with_top10, best_models
456456
from fastapi import Form
457457
global_model_name = "Extra Trees"
458+
global_basef_name = "top10_features_extra_trees.csv"
458459

459460
@router.post('/top10-features')
460461
async def top10_features(model_name: str = Form(...), user_info: dict = Depends(verify_token)):
@@ -491,6 +492,8 @@ async def top10_features(model_name: str = Form(...), user_info: dict = Depends(
491492
user_info=user_info
492493
)
493494

495+
global_basef_name = result['top10_features_path']
496+
494497
return {
495498
"message": "Top 10 features extracted successfully.",
496499
"top10_features": result["top10_features"],
@@ -516,7 +519,7 @@ async def visualize_dimensions_api(
516519
try:
517520
# Define file paths
518521
user_id = str(user_info['user_id'])
519-
input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")
522+
input_file = os.path.join("code", user_id, "files", global_basef_name)
520523
output_dir = os.path.join("code", user_id, "files")
521524

522525
# Ensure the input file exists
@@ -546,7 +549,7 @@ async def visualize_dimensions_api(
546549
from code.code import rank_features, param_grids, classifiers
547550

548551
@router.get('/evaluate-single-features')
549-
async def rank_features_api(
552+
async def evaluate_single_features(
550553
user_info: dict = Depends(verify_token)
551554
):
552555
"""
@@ -555,7 +558,7 @@ async def rank_features_api(
555558
try:
556559
# Define file paths
557560
user_id = str(user_info['user_id'])
558-
input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")
561+
input_file = os.path.join("code", user_id, "files", global_basef_name)
559562
output_dir = os.path.join("code", user_id, "files")
560563

561564
# Ensure the input file exists
@@ -568,6 +571,8 @@ async def rank_features_api(
568571
# Call the feature ranking function
569572
result = rank_features(input_file, global_model_name, param_grids, classifiers, output_dir, user_info)
570573

574+
print('result: ', result)
575+
571576
# Check for errors in the result
572577
if "error" in result:
573578
return {"message": "Feature ranking failed.", "error": result["error"]}
@@ -609,10 +614,15 @@ async def evaluate_model_features_api(
609614
# Call the function
610615
result = evaluate_model_with_features(input_file, global_model_name, param_grids, classifiers, output_dir, user_info)
611616

617+
print('result: ', result)
618+
612619
# Handle errors
613620
if "error" in result:
614621
return {"message": "Evaluation failed.", "error": result["error"]}
615622

623+
624+
625+
616626
return {
617627
"message": result["message"],
618628
"metrics_file": result["metrics_file"],
@@ -637,7 +647,7 @@ async def visualize_dimensions_api(
637647
try:
638648
# Define file paths
639649
user_id = str(user_info['user_id'])
640-
input_file = os.path.join("code", user_id, "files", "final_selected_features_auprc.csv")
650+
input_file = os.path.join("code", user_id, "files", "final_selected_biomarker_algorithms_df.csv")
641651
output_dir = os.path.join("code", user_id, "files")
642652

643653
# Ensure the input file exists
@@ -677,7 +687,7 @@ async def evaluate_final_model_api(
677687
try:
678688
# Define file paths
679689
user_id = str(user_info['user_id'])
680-
final_df_path = os.path.join("code", user_id, "files", "final_selected_features_auprc.csv")
690+
final_df_path = os.path.join("code", user_id, "files", "final_selected_biomarker_algorithms_df.csv")
681691
output_dir = os.path.join("code", user_id, "files")
682692

683693
# Ensure the input file exists

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /