Commit 436ab62

committed

fix merge issues of ml

1 parent 3bfd5f4 commit 436ab62Copy full SHA for 436ab62

File tree

5 files changed

+67

-43

lines changed

.DS_Store
api
- .DS_Store
- code
  - batch_effect_correction.R
  - code.py
- routers
  - operation_router.py

5 files changed

+67

-43

lines changed

`‎.DS_Store`

4 KB

Binary file not shown.

`‎api/.DS_Store`

4 KB

Binary file not shown.

`‎api/code/batch_effect_correction.R`

Lines changed: 34 additions & 32 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,84 +1,86 @@`
`1`		`-`
`2`	`1`	`batch_effect_correction <- function(input_file, output_dir, user_id) {`
`3`	`2`	`library(jsonlite)`
`4`	`3`	`library(sva) # For batch effect correction`
`5`		`-`
	`4`	`+`
`6`	`5`	`tryCatch(`
`7`	`6`	`{`
`8`	`7`	`# Read and preprocess data - preserve exact feature names`
`9`	`8`	`merged_df_data <- read.csv(input_file, header = TRUE, row.names = 1, check.names = FALSE)`
`10`	`9`	`merged_df_data <- na.omit(merged_df_data)`
`11`		`-`
	`10`	`+`
`12`	`11`	`# Ensure unique column names`
`13`	`12`	`colnames(merged_df_data) <- make.unique(colnames(merged_df_data))`
`14`		`-`
	`13`	`+`
`15`	`14`	`# Extract condition and expression matrix`
`16`	`15`	`condition_info <- merged_df_data$condition`
`17`	`16`	`data_t <- t(merged_df_data[, !(colnames(merged_df_data) %in% c("condition", "batch"))])`
`18`		`-`
	`17`	`+`
`19`	`18`	`# Save original feature names`
`20`	`19`	`feature_names <- rownames(data_t)`
`21`	`20`	`sample_names <- colnames(data_t)`
`22`		`-`
	`21`	`+`
`23`	`22`	`# Batch effect correction with ComBat`
`24`	`23`	`batch_info <- merged_df_data$batch`
`25`	`24`	`data_combat <- ComBat(dat = as.matrix(data_t), batch = batch_info, par.prior = TRUE, prior.plots = FALSE)`
`26`		`-`
	`25`	`+`
`27`	`26`	`# Restore original feature names`
`28`	`27`	`rownames(data_combat) <- feature_names`
`29`		`-`
	`28`	`+`
`30`	`29`	`# Save corrected data`
`31`	`30`	`output_file <- file.path(output_dir, paste0("batch_", basename(input_file)))`
`32`	`31`	`data_corrected <- t(data_combat)`
`33`	`32`	`data_corrected_with_condition <- cbind(condition = condition_info, data_corrected)`
`34`		`-`
	`33`	`+`
`35`	`34`	`# Write CSV with proper quoting to preserve commas/spaces in feature names`
`36`	`35`	`write.csv(`
`37`	`36`	`data_corrected_with_condition,`
`38`	`37`	`output_file,`
`39`	`38`	`row.names = TRUE,`
`40`	`39`	`quote = TRUE,`
`41`	`40`	`na = "",`
`42`		`- fileEncoding = "UTF-8")`
`43`		`-`
	`41`	`+ fileEncoding = "UTF-8"`
	`42`	`+ )`
	`43`	`+`
`44`	`44`	`# Create boxplots in PDF and PNG formats only`
`45`	`45`	`plot_formats <- c("pdf", "png")`
`46`	`46`	`for (fmt in plot_formats) {`
`47`	`47`	`file_name <- file.path(output_dir, paste0("batch_correction_boxplots.", fmt))`
`48`		`-`
	`48`	`+`
`49`	`49`	`# Set up the plotting device`
`50`	`50`	`if (fmt == "png") {`
`51`		`- png(file_name, width = 1200, height = 600, res = 300)`
	`51`	`+ png(file_name, width = 2400, height = 1200, res = 300)`
`52`	`52`	`} else {`
`53`	`53`	`pdf(file_name, width = 12, height = 6)`
`54`	`54`	`}`
`55`		`-`
	`55`	`+`
`56`	`56`	`# Create the plots`
`57`	`57`	`par(mfrow = c(1, 2), mar = c(10, 5, 4, 2))`
`58`		`-`
	`58`	`+`
`59`	`59`	`# Pre-correction plot`
`60`	`60`	`boxplot(data_t,`
`61`		`- main = "Before Batch Correction",`
`62`		`- las = 2,`
`63`		`- col = "lightblue",`
`64`		`- outline = FALSE,`
`65`		`- ylab = "Expression Levels",`
`66`		`- cex.axis = 0.7,`
`67`		`- names = sample_names)`
`68`		`-`
	`61`	`+ main = "Before Batch Correction",`
	`62`	`+ las = 2,`
	`63`	`+ col = "lightblue",`
	`64`	`+ outline = FALSE,`
	`65`	`+ ylab = "Expression Levels",`
	`66`	`+ cex.axis = 0.7,`
	`67`	`+ names = sample_names`
	`68`	`+ )`
	`69`	`+`
`69`	`70`	`# Post-correction plot`
`70`	`71`	`boxplot(data_combat,`
`71`		`- main = "After Batch Correction",`
`72`		`- las = 2,`
`73`		`- col = "lightgreen",`
`74`		`- outline = FALSE,`
`75`		`- ylab = "Expression Levels",`
`76`		`- cex.axis = 0.7,`
`77`		`- names = sample_names)`
`78`		`-`
	`72`	`+ main = "After Batch Correction",`
	`73`	`+ las = 2,`
	`74`	`+ col = "lightgreen",`
	`75`	`+ outline = FALSE,`
	`76`	`+ ylab = "Expression Levels",`
	`77`	`+ cex.axis = 0.7,`
	`78`	`+ names = sample_names`
	`79`	`+ )`
	`80`	`+`
`79`	`81`	`dev.off()`
`80`	`82`	`}`
`81`		`-`
	`83`	`+`
`82`	`84`	`# Output completion message`
`83`	`85`	`cat("Batch effect correction completed. Corrected data saved to:", output_file, "\n")`
`84`	`86`	`cat("Boxplots saved in PDF and PNG formats.\n")`

`‎api/code/code.py`

Lines changed: 18 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -856,12 +856,16 @@ def set_perplexity(n_samples):`
`856`	`856`	`)`
`857`	`857`	`from sklearn.base import clone`
`858`	`858`
`859`		`-def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir, user_info):`
	`859`	`+def rank_features(top10_df_path, selected_model, param_grids, classifiers, output_dir, user_info):`
`860`	`860`	`"""`
`861`	`861`	`Rank top features based on single-feature model performance (AUPRC, AUROC, etc.).`
`862`	`862`	`Saves CSV and plots ROC/PR curves for each.`
`863`	`863`	`"""`
`864`	`864`
	`865`	`+ top10_df = pd.read_csv(top10_df_path)`
	`866`	`+`
	`867`	`+ print('top10_df:', top10_df.head())`
	`868`	`+`
`865`	`869`	`try:`
`866`	`870`	`# --- Validate inputs ---`
`867`	`871`	`if selected_model not in param_grids:`
`@@ -932,6 +936,9 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir`
`932`	`936`	`csv_path = os.path.join(output_dir, 'single_feature_metrics_ranking.csv')`
`933`	`937`	`metrics_df.to_csv(csv_path, index=False)`
`934`	`938`
	`939`	`+`
	`940`	`+ print("okay till plotting")`
	`941`	`+`
`935`	`942`	`# --- Plotting ---`
`936`	`943`	`fig, axes = plt.subplots(1, 2, figsize=(15, 6))`
`937`	`944`
`@@ -981,19 +988,20 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir`
`981`	`988`
`982`	`989`	`# Return URLs`
`983`	`990`	`base_url = f"{BASE_URL}/files/{user_info['user_id']}"`
`984`		`- return json.dumps({`
	`991`	`+ return {`
`985`	`992`	`"message": "Feature ranking and plotting completed successfully.",`
`986`	`993`	`"ranking_file": f"{base_url}/single_feature_metrics_ranking.csv",`
`987`	`994`	`"plot_png": f"{base_url}/single_feature_model_performance_landscape.png",`
`988`	`995`	`"plot_pdf": f"{base_url}/single_feature_model_performance_landscape.pdf",`
`989`	`996`	`"metrics": metrics_df.to_dict(orient="records")`
`990`		`- })`
	`997`	`+ }`
`991`	`998`
`992`	`999`	`except Exception as e:`
`993`		`- return json.dumps({`
	`1000`	`+ print(e)`
	`1001`	`+ return {`
`994`	`1002`	`"message": "Error during feature ranking and plotting.",`
`995`	`1003`	`"error": str(e)`
`996`		`- })`
	`1004`	`+ }`
`997`	`1005`
`998`	`1006`
`999`	`1007`
`@@ -1009,7 +1017,7 @@ def rank_features(top10_df, selected_model, param_grids, classifiers, output_dir`
`1009`	`1017`	`matthews_corrcoef, log_loss`
`1010`	`1018`	`)`
`1011`	`1019`
`1012`		`-def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param_grids, classifiers, output_dir, user_info):`
	`1020`	`+def evaluate_model_with_features(top10_df_path, selected_model, param_grids, classifiers, output_dir, user_info):`
`1013`	`1021`	`"""`
`1014`	`1022`	`Evaluate the performance of models using top-N features (10 to 1), save plots and metrics, and select the best feature subset.`
`1015`	`1023`	`"""`
`@@ -1018,6 +1026,10 @@ def evaluate_model_with_features(top10_df, top10_df_array, selected_model, param`
`1018`	`1026`	`outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)`
`1019`	`1027`	`inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)`
`1020`	`1028`
	`1029`	`+ top10_df = pd.read_csv(top10_df_path)`
	`1030`	`+`
	`1031`	`+ top10_df_array = top10_df.drop(columns='condition').columns.to_numpy()`
	`1032`	`+`
`1021`	`1033`	`# Storage`
`1022`	`1034`	`roc_curves = []`
`1023`	`1035`	`pr_curves = []`

`‎api/routers/operation_router.py`

Lines changed: 15 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -455,6 +455,7 @@ async def benchmark_models_api(user_info: dict = Depends(verify_token)):`
`455`	`455`	`from code.code import get_model_and_importance_with_top10, best_models`
`456`	`456`	`from fastapi import Form`
`457`	`457`	`global_model_name = "Extra Trees"`
	`458`	`+global_basef_name = "top10_features_extra_trees.csv"`
`458`	`459`
`459`	`460`	`@router.post('/top10-features')`
`460`	`461`	`async def top10_features(model_name: str = Form(...), user_info: dict = Depends(verify_token)):`
`@@ -491,6 +492,8 @@ async def top10_features(model_name: str = Form(...), user_info: dict = Depends(`
`491`	`492`	`user_info=user_info`
`492`	`493`	`)`
`493`	`494`
	`495`	`+ global_basef_name = result['top10_features_path']`
	`496`	`+`
`494`	`497`	`return {`
`495`	`498`	`"message": "Top 10 features extracted successfully.",`
`496`	`499`	`"top10_features": result["top10_features"],`
`@@ -516,7 +519,7 @@ async def visualize_dimensions_api(`
`516`	`519`	`try:`
`517`	`520`	`# Define file paths`
`518`	`521`	`user_id = str(user_info['user_id'])`
`519`		`- input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")`
	`522`	`+ input_file = os.path.join("code", user_id, "files", global_basef_name)`
`520`	`523`	`output_dir = os.path.join("code", user_id, "files")`
`521`	`524`
`522`	`525`	`# Ensure the input file exists`
`@@ -546,7 +549,7 @@ async def visualize_dimensions_api(`
`546`	`549`	`from code.code import rank_features, param_grids, classifiers`
`547`	`550`
`548`	`551`	`@router.get('/evaluate-single-features')`
`549`		`-async def rank_features_api(`
	`552`	`+async def evaluate_single_features(`
`550`	`553`	`user_info: dict = Depends(verify_token)`
`551`	`554`	`):`
`552`	`555`	`"""`
`@@ -555,7 +558,7 @@ async def rank_features_api(`
`555`	`558`	`try:`
`556`	`559`	`# Define file paths`
`557`	`560`	`user_id = str(user_info['user_id'])`
`558`		`- input_file = os.path.join("code", user_id, "files", "top10_features_extra_trees.csv")`
	`561`	`+ input_file = os.path.join("code", user_id, "files", global_basef_name)`
`559`	`562`	`output_dir = os.path.join("code", user_id, "files")`
`560`	`563`
`561`	`564`	`# Ensure the input file exists`
`@@ -568,6 +571,8 @@ async def rank_features_api(`
`568`	`571`	`# Call the feature ranking function`
`569`	`572`	`result = rank_features(input_file, global_model_name, param_grids, classifiers, output_dir, user_info)`
`570`	`573`
	`574`	`+ print('result: ', result)`
	`575`	`+`
`571`	`576`	`# Check for errors in the result`
`572`	`577`	`if "error" in result:`
`573`	`578`	`return {"message": "Feature ranking failed.", "error": result["error"]}`
`@@ -609,10 +614,15 @@ async def evaluate_model_features_api(`
`609`	`614`	`# Call the function`
`610`	`615`	`result = evaluate_model_with_features(input_file, global_model_name, param_grids, classifiers, output_dir, user_info)`
`611`	`616`
	`617`	`+ print('result: ', result)`
	`618`	`+`
`612`	`619`	`# Handle errors`
`613`	`620`	`if "error" in result:`
`614`	`621`	`return {"message": "Evaluation failed.", "error": result["error"]}`
`615`	`622`
	`623`	`+`
	`624`	`+`
	`625`	`+`
`616`	`626`	`return {`
`617`	`627`	`"message": result["message"],`
`618`	`628`	`"metrics_file": result["metrics_file"],`
`@@ -637,7 +647,7 @@ async def visualize_dimensions_api(`
`637`	`647`	`try:`
`638`	`648`	`# Define file paths`
`639`	`649`	`user_id = str(user_info['user_id'])`
`640`		`- input_file = os.path.join("code", user_id, "files", "final_selected_features_auprc.csv")`
	`650`	`+ input_file = os.path.join("code", user_id, "files", "final_selected_biomarker_algorithms_df.csv")`
`641`	`651`	`output_dir = os.path.join("code", user_id, "files")`
`642`	`652`
`643`	`653`	`# Ensure the input file exists`
`@@ -677,7 +687,7 @@ async def evaluate_final_model_api(`
`677`	`687`	`try:`
`678`	`688`	`# Define file paths`
`679`	`689`	`user_id = str(user_info['user_id'])`
`680`		`- final_df_path = os.path.join("code", user_id, "files", "final_selected_features_auprc.csv")`
	`690`	`+ final_df_path = os.path.join("code", user_id, "files", "final_selected_biomarker_algorithms_df.csv")`
`681`	`691`	`output_dir = os.path.join("code", user_id, "files")`
`682`	`692`
`683`	`693`	`# Ensure the input file exists`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 436ab62

File tree

5 files changed

5 files changed

`‎.DS_Store`

`‎api/.DS_Store`

`‎api/code/batch_effect_correction.R`

`‎api/code/code.py`

`‎api/routers/operation_router.py`

0 commit comments