|
| 1 | + |
1 | 2 | batch_effect_correction <- function(input_file, output_dir, user_id) {
|
2 | 3 | library(jsonlite)
|
3 | 4 | library(sva) # For batch effect correction
|
4 | | - |
| 5 | + |
5 | 6 | tryCatch(
|
6 | 7 | {
|
7 | | - # Read and preprocess data |
8 | | - merged_df_data <- read.csv(input_file, header = TRUE, row.names = 1) |
| 8 | + # Read and preprocess data - preserve exact feature names |
| 9 | + merged_df_data <- read.csv(input_file, header = TRUE, row.names = 1, check.names=FALSE) |
9 | 10 | merged_df_data <- na.omit(merged_df_data)
|
| 11 | + |
| 12 | + # Ensure unique column names |
| 13 | + colnames(merged_df_data) <- make.unique(colnames(merged_df_data)) |
| 14 | + |
| 15 | + # Extract condition and expression matrix |
10 | 16 | condition_info <- merged_df_data$condition
|
11 | 17 | data_t <- t(merged_df_data[, !(colnames(merged_df_data) %in% c("condition", "batch"))])
|
12 | | - sample_names <- colnames(data_t) # Save sample names for labeling |
13 | | - |
| 18 | + |
| 19 | + # Save original feature names |
| 20 | + feature_names <- rownames(data_t) |
| 21 | + sample_names <- colnames(data_t) |
| 22 | + |
14 | 23 | # Batch effect correction with ComBat
|
15 | 24 | batch_info <- merged_df_data$batch
|
16 | 25 | data_combat <- ComBat(dat = as.matrix(data_t), batch = batch_info, par.prior = TRUE, prior.plots = FALSE)
|
17 | | - |
| 26 | + |
| 27 | + # Restore original feature names |
| 28 | + rownames(data_combat) <- feature_names |
| 29 | + |
18 | 30 | # Save corrected data
|
19 | 31 | output_file <- file.path(output_dir, paste0("batch_", basename(input_file)))
|
20 | 32 | data_corrected <- t(data_combat)
|
21 | 33 | data_corrected_with_condition <- cbind(condition = condition_info, data_corrected)
|
22 | | - write.csv(data_corrected_with_condition, output_file, row.names = TRUE) |
23 | | - |
24 | | - # Save boxplots in multiple formats |
25 | | - plot_formats <- c("png", "jpg", "tif", "pdf") |
| 34 | + |
| 35 | + # Write CSV with proper quoting to preserve commas/spaces in feature names |
| 36 | + write.csv( |
| 37 | + data_corrected_with_condition, |
| 38 | + output_file, |
| 39 | + row.names = TRUE, |
| 40 | + quote = TRUE, |
| 41 | + na = "", |
| 42 | + fileEncoding = "UTF-8") |
| 43 | + |
| 44 | + # Create boxplots in PDF and PNG formats only |
| 45 | + plot_formats <- c("pdf", "png") |
26 | 46 | for (fmt in plot_formats) {
|
27 | 47 | file_name <- file.path(output_dir, paste0("batch_correction_boxplots.", fmt))
|
| 48 | + |
| 49 | + # Set up the plotting device |
28 | 50 | if (fmt == "png") {
|
29 | | - png(file_name, width = 1200, height = 600) |
30 | | - } else if (fmt == "jpg") { |
31 | | - jpeg(file_name, width = 1200, height = 600) |
32 | | - } else if (fmt == "tif") { |
33 | | - tiff(file_name, width = 1200, height = 600) |
34 | | - } else if (fmt == "pdf") { |
| 51 | + png(file_name, width = 1200, height = 600, res = 300) |
| 52 | + } else { |
35 | 53 | pdf(file_name, width = 12, height = 6)
|
36 | 54 | }
|
| 55 | + |
| 56 | + # Create the plots |
37 | 57 | par(mfrow = c(1, 2), mar = c(10, 5, 4, 2))
|
| 58 | + |
| 59 | + # Pre-correction plot |
38 | 60 | boxplot(data_t,
|
39 | | - main = "Normalized Data", las = 2, col = "lightblue", outline = FALSE, |
40 | | - ylab = "Expression Levels", cex.axis = 0.7, names = sample_names |
41 | | - ) |
| 61 | + main = "Before Batch Correction", |
| 62 | + las = 2, |
| 63 | + col = "lightblue", |
| 64 | + outline = FALSE, |
| 65 | + ylab = "Expression Levels", |
| 66 | + cex.axis = 0.7, |
| 67 | + names = sample_names) |
| 68 | + |
| 69 | + # Post-correction plot |
42 | 70 | boxplot(data_combat,
|
43 | | - main = "Batch Corrected Data", las = 2, col = "lightgreen", |
44 | | - outline = FALSE, ylab = "Expression Levels", cex.axis = 0.7, names = sample_names |
45 | | - ) |
| 71 | + main = "After Batch Correction", |
| 72 | + las = 2, |
| 73 | + col = "lightgreen", |
| 74 | + outline = FALSE, |
| 75 | + ylab = "Expression Levels", |
| 76 | + cex.axis = 0.7, |
| 77 | + names = sample_names) |
| 78 | + |
46 | 79 | dev.off()
|
47 | 80 | }
|
48 | | - |
| 81 | + |
49 | 82 | # Output completion message
|
50 | 83 | cat("Batch effect correction completed. Corrected data saved to:", output_file, "\n")
|
51 | | - cat("Boxplots saved in PNG, JPG, TIF, and PDF formats.\n") |
| 84 | + cat("Boxplots saved in PDF and PNG formats.\n") |
52 | 85 | },
|
53 | 86 | error = function(e) {
|
54 | 87 | # Handle errors gracefully
|
|
0 commit comments