From 25b91213550db96cf13351d67c44e2cb3e267b54 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Wed, 28 May 2025 12:58:16 +0100 Subject: [PATCH 01/17] Refactor returnWhatBasedOnThreshold function --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 25 +++---------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index d405a6fc..7a362532 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4568,16 +4568,6 @@ DirectionTagFE = function(x, return(tag) } -returnWhatBasedOnThreshold = function(x = NULL, - threshold = .0001, - Return = 'ABNORMAL') { - if (is.numeric(x) && x < threshold) { - return(Return) - } else { - return('IgnoreThisCaseAtALL') - } -} - GenotypeTag = function(obj, threshold = 10 ^ -4, expDetailsForErrorOnly = NULL, @@ -4609,6 +4599,7 @@ GenotypeTag = function(obj, for (pair in sex_column_prefix_pairs) { sex <- pair[1] column_prefix <- pair[2] + pvalue = obj[[paste0(column_prefix, " p-value")]] # Append to existing dataframe. tag <- rbind(tag, data.frame( Sex = sex, @@ -4616,20 +4607,12 @@ GenotypeTag = function(obj, # Statistical test based on genotype effect estimate and p-value. DirectionTagMM( x = obj[[paste0(column_prefix, " estimate")]]$Value, - pvalue = obj[[paste0(column_prefix, " p-value")]], + pvalue = pvalue, threshold = threshold ), # Simple statistical test based on p-value only. - returnWhatBasedOnThreshold( - x = obj[[paste0(column_prefix, " p-value")]], - threshold = threshold, - Return = "ABNORMAL" - ), - returnWhatBasedOnThreshold( - x = obj[[paste0(column_prefix, " p-value")]], - threshold = threshold, - Return = 'INFERRED' - ) + if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else "IgnoreThisCaseAtALL", + if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL" ), Level = "OVERALL", stringsAsFactors = FALSE From 3a6bde525846ded98413c5a36237f3cdfc17784e Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Wed, 28 May 2025 13:31:40 +0100 Subject: [PATCH 02/17] Add p-value for MM --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 7a362532..5052aae8 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4615,6 +4615,7 @@ GenotypeTag = function(obj, if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL" ), Level = "OVERALL", + PValue = pvalue, stringsAsFactors = FALSE )) } @@ -4888,8 +4889,8 @@ annotationChooser = function(statpacket = NULL, arrange(Sex) # Convert to the desired list format. MPTERMS <- filtered_data %>% - mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm) %>% - select(term_id, event, sex) %>% + mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>% + select(term_id, event, sex, p_value) %>% # Use `purrr::transpose()` to create an unnamed list of objects. purrr::transpose() %>% # Remove names from the list From cc8ec35208c74d7a7f61c964152df52c6d32e8dc Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Wed, 28 May 2025 13:32:48 +0100 Subject: [PATCH 03/17] Fix bug with stringsAsFactors --- .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 5052aae8..40bd0303 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4592,8 +4592,7 @@ GenotypeTag = function(obj, sex_column_prefix_pairs <- list( c("UNSPECIFIED", "Genotype"), c("FEMALE", "Sex FvKO"), - c("MALE", "Sex MvKO"), - stringsAsFactors = FALSE + c("MALE", "Sex MvKO") ) # Iterate over sexes and their corresponding column prefixes. for (pair in sex_column_prefix_pairs) { From a17f09b4aadd0b3586d14031c13624aae5aba0dc Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Wed, 28 May 2025 16:01:45 +0100 Subject: [PATCH 04/17] Add p-value for FE --- .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R | 1 + 1 file changed, 1 insertion(+) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 40bd0303..c7ad00ff 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4656,6 +4656,7 @@ GenotypeTag = function(obj, threshold = threshold ), Level = "OVERALL", + PValue = fmodels[[column_prefix]]$`Complete table`$p.value, stringsAsFactors = FALSE ) ) From ff0165aa727ff3fe689619666947667bbcf120c4 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Wed, 28 May 2025 17:09:39 +0100 Subject: [PATCH 05/17] Add p-value for RR --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index c7ad00ff..665c6b17 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4685,25 +4685,41 @@ GenotypeTag = function(obj, sex <- pair[1] column_prefix <- pair[2] # Get data for LOW and HIGH tails. + low_pvalue <- fmodels[[paste0("Low.data_point.", column_prefix)]]$Genotype$Result$p.value + high_pvalue <- fmodels[[paste0("High.data_point.", column_prefix)]]$Genotype$Result$p.value low_results <- DirectionTagFE( - x = fmodels[[paste0("Low.data_point.", column_prefix)]]$Genotype$Result$p.value, + x = low_pvalue, threshold = rrlevel, group = c("ABNORMAL", "DECREASED") ) high_results <- DirectionTagFE( - x = fmodels[[paste0("High.data_point.", column_prefix)]]$Genotype$Result$p.value, + x = high_pvalue, threshold = rrlevel, group = c("ABNORMAL", "INCREASED") ) - all_results <- c(low_results, high_results) + # Append all to existing dataframe. - for (result in all_results) { + for (result in low_results) { + tag <- rbind( + tag, + data.frame( + Sex = sex, + StatisticalTestResult = result, + Level = "OVERALL", + PValue = low_pvalue, + stringsAsFactors = FALSE + ) + ) + } + + for (result in high_results) { tag <- rbind( tag, data.frame( Sex = sex, StatisticalTestResult = result, Level = "OVERALL", + PValue = high_pvalue, stringsAsFactors = FALSE ) ) From 87daf2d86d6f5d53949d02d74b844e4769e40113 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Thu, 29 May 2025 17:00:24 +0100 Subject: [PATCH 06/17] Fix bug with NA p-value --- .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 665c6b17..79f2a654 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4614,7 +4614,7 @@ GenotypeTag = function(obj, if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL" ), Level = "OVERALL", - PValue = pvalue, + PValue = if (is.null(pvalue)) NA else pvalue, stringsAsFactors = FALSE )) } From da92377d41048cd43d41ba5e0f14bdf1dcc5a049 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Fri, 30 May 2025 12:12:10 +0100 Subject: [PATCH 07/17] Fix bug with NA p-value for FE and RR --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 79f2a654..2ef61254 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4646,17 +4646,18 @@ GenotypeTag = function(obj, for (pair in sex_column_prefix_pairs) { sex <- pair[1] column_prefix <- pair[2] + pvalue <- fmodels[[column_prefix]]$`Complete table`$p.value # Append to existing dataframe. tag <- rbind( tag, data.frame( Sex = sex, StatisticalTestResult = DirectionTagFE( - x = fmodels[[column_prefix]]$`Complete table`$p.value, + x = pvalue, threshold = threshold ), Level = "OVERALL", - PValue = fmodels[[column_prefix]]$`Complete table`$p.value, + PValue = if (is.null(pvalue)) NA else pvalue, stringsAsFactors = FALSE ) ) @@ -4706,7 +4707,7 @@ GenotypeTag = function(obj, Sex = sex, StatisticalTestResult = result, Level = "OVERALL", - PValue = low_pvalue, + PValue = if (is.null(low_pvalue)) NA else low_pvalue, stringsAsFactors = FALSE ) ) @@ -4719,7 +4720,7 @@ GenotypeTag = function(obj, Sex = sex, StatisticalTestResult = result, Level = "OVERALL", - PValue = high_pvalue, + PValue = if (is.null(high_pvalue)) NA else high_pvalue, stringsAsFactors = FALSE ) ) From baa93a98b76e6213e70d5623d7d7f175090cb856 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 11:19:33 +0100 Subject: [PATCH 08/17] Use GtagExactUnmatched in match_mp_terms --- .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 2ef61254..291eae84 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4776,7 +4776,7 @@ match_mp_terms <- function(Gtag, d, allowed_results = c()) { GtagExactUnmatched <- subset(GtagExact, is.na(MpTerm)) # Now, for the unmatched records, try to find a U term as a fallback. GtagUnspecified <- merge( - Gtag, + GtagExactUnmatched, subset(d, Sex == "UNSPECIFIED", select = -Sex), by = c("StatisticalTestResult", "Level"), all.x = TRUE From d0dcbd948e1164518e80f2eb7e76f6dec686a544 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 11:25:37 +0100 Subject: [PATCH 09/17] Unify tag initialisation --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 291eae84..683d944f 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4581,13 +4581,16 @@ GenotypeTag = function(obj, ' and for the RR method it is ', rrlevel) + # Initialise an empty data frame. + tag <- data.frame( + Sex = character(), + StatisticalTestResult = character(), + Level = character(), + PValue = numeric(), + stringsAsFactors = FALSE + ) + if (method %in% 'MM') { - # Initialise an empty data frame. - tag <- data.frame( - Sex = character(), - StatisticalTestResult = character(), - Level = character() - ) # Define sex/column prefix info. sex_column_prefix_pairs <- list( c("UNSPECIFIED", "Genotype"), @@ -4629,13 +4632,6 @@ GenotypeTag = function(obj, fmodels$Genotype$`Complete table` = fmodels$`Complete table` fmodels$`Complete table` = NULL } - # Initialise an empty data frame. - tag <- data.frame( - Sex = character(), - StatisticalTestResult = character(), - Level = character(), - stringsAsFactors = FALSE - ) # Define sex/column prefix info. sex_column_prefix_pairs <- list( c("UNSPECIFIED", "Genotype"), @@ -4668,13 +4664,6 @@ GenotypeTag = function(obj, fmodels = obj$`Additional information`$Analysis$`Further models` if (is.null(fmodels)) return(NULL) - # Initialise an empty data frame. - tag <- data.frame( - Sex = character(), - StatisticalTestResult = character(), - Level = character(), - stringsAsFactors = FALSE - ) # Define sex/column prefix info. sex_column_prefix_pairs <- list( c("UNSPECIFIED", "Genotype"), From 566ca422fd7ac399e454a70139abcdf7f8c470b0 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 11:39:07 +0100 Subject: [PATCH 10/17] Remove DirectionTagFE function --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 80 +++++-------------- 1 file changed, 22 insertions(+), 58 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 683d944f..186b52ae 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4551,23 +4551,6 @@ DirectionTagMM = function(x, return(tag) } -DirectionTagFE = function(x, - threshold = .0001, - group = c('ABNORMAL', 'INCREASED', 'DECREASED')) { - tag = if (is.null(x) || length(x) < 1) { - 'NoEffectCalculated' - } else if (x < threshold) { - group - } else { - paste0('NORMAL - Test not significant at the level of ', - threshold, - ' (pvalue = ', - x, - ')') - } - return(tag) -} - GenotypeTag = function(obj, threshold = 10 ^ -4, expDetailsForErrorOnly = NULL, @@ -4612,9 +4595,9 @@ GenotypeTag = function(obj, pvalue = pvalue, threshold = threshold ), - # Simple statistical test based on p-value only. - if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else "IgnoreThisCaseAtALL", - if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL" + # Comparing test based on p-value only. + if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else NA, + if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else NA ), Level = "OVERALL", PValue = if (is.null(pvalue)) NA else pvalue, @@ -4648,10 +4631,7 @@ GenotypeTag = function(obj, tag, data.frame( Sex = sex, - StatisticalTestResult = DirectionTagFE( - x = pvalue, - threshold = threshold - ), + StatisticalTestResult = if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else NA, Level = "OVERALL", PValue = if (is.null(pvalue)) NA else pvalue, stringsAsFactors = FALSE @@ -4677,43 +4657,27 @@ GenotypeTag = function(obj, # Get data for LOW and HIGH tails. low_pvalue <- fmodels[[paste0("Low.data_point.", column_prefix)]]$Genotype$Result$p.value high_pvalue <- fmodels[[paste0("High.data_point.", column_prefix)]]$Genotype$Result$p.value - low_results <- DirectionTagFE( - x = low_pvalue, - threshold = rrlevel, - group = c("ABNORMAL", "DECREASED") - ) - high_results <- DirectionTagFE( - x = high_pvalue, - threshold = rrlevel, - group = c("ABNORMAL", "INCREASED") - ) - # Append all to existing dataframe. - for (result in low_results) { - tag <- rbind( - tag, - data.frame( - Sex = sex, - StatisticalTestResult = result, - Level = "OVERALL", - PValue = if (is.null(low_pvalue)) NA else low_pvalue, - stringsAsFactors = FALSE - ) + tag <- rbind( + tag, + data.frame( + Sex = sex, + StatisticalTestResult = if (is.numeric(low_pvalue) && low_pvalue < rrlevel) "ABNORMAL" else NA, + Level = "OVERALL", + PValue = if (is.null(low_pvalue)) NA else low_pvalue, + stringsAsFactors = FALSE ) - } - - for (result in high_results) { - tag <- rbind( - tag, - data.frame( - Sex = sex, - StatisticalTestResult = result, - Level = "OVERALL", - PValue = if (is.null(high_pvalue)) NA else high_pvalue, - stringsAsFactors = FALSE - ) + ) + tag <- rbind( + tag, + data.frame( + Sex = sex, + StatisticalTestResult = if (is.numeric(high_pvalue) && high_pvalue < rrlevel) "ABNORMAL" else NA, + Level = "OVERALL", + PValue = if (is.null(high_pvalue)) NA else high_pvalue, + stringsAsFactors = FALSE ) - } + ) } } else { From 5a731998e197bb0cdac4b17e40bd2c2cafaae8c9 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 11:43:12 +0100 Subject: [PATCH 11/17] Refactor match_mp_terms --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 186b52ae..ee2c5ad5 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4712,11 +4712,7 @@ flatten_mp_chooser <- function(d) { })) } -match_mp_terms <- function(Gtag, d, allowed_results = c()) { - # If provided, restrict the data to a list of allowed statistical test results. - if (length(allowed_results) > 0) { - Gtag <- subset(Gtag, StatisticalTestResult %in% allowed_results) - } +match_mp_terms <- function(Gtag, d) { # First, try to join by exact sex, for example M call to M term. GtagExact <- merge( Gtag, @@ -4803,21 +4799,7 @@ annotationChooser = function(statpacket = NULL, d <- flatten_mp_chooser(d) # 2. Join MP term information from mp_chooser. - if (method %in% "MM") { - Gtag <- match_mp_terms(Gtag, d) - } else if (method %in% "FE") { - Gtag <- match_mp_terms(Gtag, d, c("ABNORMAL")) - } else if (method %in% "RR") { - # By default, only ABNORMAL calls are used for RR. - GtagAbnormal <- match_mp_terms(Gtag, d, c("ABNORMAL")) - # In case no ABNORMAL MP terms were found, try to match INCREASED/DECREASED terms. - # This approach is left unchanged from the original annotation pipeline. - if (nrow(subset(GtagAbnormal, !is.na(MpTerm))) > 0) { - Gtag <- GtagAbnormal - } else { - Gtag <- match_mp_terms(Gtag, d, c("INCREASED", "DECREASED")) - } - } + Gtag <- match_mp_terms(Gtag, d) # 3. Remove records with no assigned MP terms. # This filters out all rows with no statistically significant results. From 646ab76979ac1e4e93f171a79a13702003c62a99 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 11:47:44 +0100 Subject: [PATCH 12/17] Remove duplicated gtag check --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 77 +++++++++---------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index ee2c5ad5..d612d085 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4812,49 +4812,44 @@ annotationChooser = function(statpacket = NULL, } else { # Define the priority order for StatisticalTestResult. priority_order <- c("INCREASED", "DECREASED", "INFERRED", "ABNORMAL") - # Check if the input dataframe is empty. - if (nrow(Gtag) == 0) { - MPTERMS <- list() - } else { - # Group by Sex and select one row per group based on priority. - filtered_data <- Gtag %>% - group_by(Sex) %>% - arrange(match(StatisticalTestResult, priority_order)) %>% - slice(1) %>% - ungroup() - # Drop UNSPECIFIED if MALE or FEMALE data is present. - if (any(filtered_data$Sex %in% c("MALE", "FEMALE"))) { - filtered_data <- filtered_data %>% filter(Sex != "UNSPECIFIED") - } - # Handle UNSPECIFIED case if no MALE or FEMALE data is present. - if (!any(filtered_data$Sex %in% c("MALE", "FEMALE"))) { - if (length(sex_levels) == 1) { - filtered_data$Sex <- toupper(sex_levels) - } - } - # Convert Sex column to lowercase and replace "unspecified" with "not_considered". - filtered_data <- filtered_data %>% - mutate(Sex = tolower(Sex), - Sex = ifelse(Sex == "unspecified", "not_considered", Sex)) - # Ensure female comes before male if both are present. - filtered_data <- filtered_data %>% - arrange(Sex) - # Convert to the desired list format. - MPTERMS <- filtered_data %>% - mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>% - select(term_id, event, sex, p_value) %>% - # Use `purrr::transpose()` to create an unnamed list of objects. - purrr::transpose() %>% - # Remove names from the list - unname() - # Special case for RR: add an empty `otherPossibilities` field for compatibility. - if (method == "RR") { - MPTERMS <- purrr::map(MPTERMS, ~ { - .x$otherPossibilities <- "" - .x - }) + # Group by Sex and select one row per group based on priority. + filtered_data <- Gtag %>% + group_by(Sex) %>% + arrange(match(StatisticalTestResult, priority_order)) %>% + slice(1) %>% + ungroup() + # Drop UNSPECIFIED if MALE or FEMALE data is present. + if (any(filtered_data$Sex %in% c("MALE", "FEMALE"))) { + filtered_data <- filtered_data %>% filter(Sex != "UNSPECIFIED") + } + # Handle UNSPECIFIED case if no MALE or FEMALE data is present. + if (!any(filtered_data$Sex %in% c("MALE", "FEMALE"))) { + if (length(sex_levels) == 1) { + filtered_data$Sex <- toupper(sex_levels) } } + # Convert Sex column to lowercase and replace "unspecified" with "not_considered". + filtered_data <- filtered_data %>% + mutate(Sex = tolower(Sex), + Sex = ifelse(Sex == "unspecified", "not_considered", Sex)) + # Ensure female comes before male if both are present. + filtered_data <- filtered_data %>% + arrange(Sex) + # Convert to the desired list format. + MPTERMS <- filtered_data %>% + mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>% + select(term_id, event, sex, p_value) %>% + # Use `purrr::transpose()` to create an unnamed list of objects. + purrr::transpose() %>% + # Remove names from the list + unname() + # Special case for RR: add an empty `otherPossibilities` field for compatibility. + if (method == "RR") { + MPTERMS <- purrr::map(MPTERMS, ~ { + .x$otherPossibilities <- "" + .x + }) + } } } From 6573a08db8fb5147ba9fc4faaecc85e183441fc4 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 11:52:53 +0100 Subject: [PATCH 13/17] Fix typo --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index d612d085..3766f431 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4851,7 +4851,6 @@ annotationChooser = function(statpacket = NULL, }) } } - } # Return the final data structure. @@ -4859,7 +4858,5 @@ annotationChooser = function(statpacket = NULL, json$Result$Details[[TermKey]] = MPTERMS statpacket$V20 = FinalJson2ObjectCreator(FinalList = json) } - return(invisible(list( - MPTERM = MPTERMS, statpacket = statpacket - ))) + return(invisible(list(MPTERM = MPTERMS, statpacket = statpacket))) } From 92a875e775303a3ebc2023aed46067e38c8deee8 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 12:04:08 +0100 Subject: [PATCH 14/17] Remove empty ErrorneousCases.tsv.err initialisation --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 28 ++++--------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 3766f431..94f0be33 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4553,11 +4553,10 @@ DirectionTagMM = function(x, GenotypeTag = function(obj, threshold = 10 ^ -4, - expDetailsForErrorOnly = NULL, - rrlevel = 10 ^ -4) { + rrlevel = 10 ^ -4, + method = NULL) { if (is.null(obj)) return(NULL) - method = GetMethodStPa(x = obj$`Applied method`) message('\t The analysis method = ', method) message('\t The decision threshold = ', threshold, @@ -4682,15 +4681,6 @@ GenotypeTag = function(obj, } else { tag = NULL - write( - x = paste( - head(expDetailsForErrorOnly, 18), - sep = '\t', - collapse = '\t' - ), - file = 'ErrorneousCases.tsv.err', - ncolumns = 5000 - ) } return(tag) } @@ -4746,7 +4736,7 @@ annotationChooser = function(statpacket = NULL, requireNamespace("jsonlite") library(dplyr) - # Handle unsuccessful StatPackages. + # Handle unsuccessful StatPackets. if ( is.null(statpacket) || length(statpacket) < 1 || @@ -4771,7 +4761,8 @@ annotationChooser = function(statpacket = NULL, Gtag = GenotypeTag( obj = json$Result$`Vector output`[[resultKey]], threshold = level, - rrlevel = rrlevel + rrlevel = rrlevel, + method = method ) # Load mp_chooser Rdata file. @@ -4841,15 +4832,8 @@ annotationChooser = function(statpacket = NULL, select(term_id, event, sex, p_value) %>% # Use `purrr::transpose()` to create an unnamed list of objects. purrr::transpose() %>% - # Remove names from the list + # Remove names from the list. unname() - # Special case for RR: add an empty `otherPossibilities` field for compatibility. - if (method == "RR") { - MPTERMS <- purrr::map(MPTERMS, ~ { - .x$otherPossibilities <- "" - .x - }) - } } } From 4fe8860eada304d763777f5521c9263ff218dd90 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Mon, 2 Jun 2025 17:21:15 +0100 Subject: [PATCH 15/17] Remove duplicated MpTerm --- .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R | 1 + 1 file changed, 1 insertion(+) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 94f0be33..886ec261 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4713,6 +4713,7 @@ match_mp_terms <- function(Gtag, d) { # Separate rows where we did / did not find a MP term. GtagExactMatched <- subset(GtagExact, !is.na(MpTerm)) GtagExactUnmatched <- subset(GtagExact, is.na(MpTerm)) + GtagExactUnmatched <- subset(GtagExactUnmatched, select = -MpTerm) # Now, for the unmatched records, try to find a U term as a fallback. GtagUnspecified <- merge( GtagExactUnmatched, From 139305e253dcc05d1c2a2d184ccb18aa6d37fac6 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Fri, 13 Jun 2025 15:01:36 +0100 Subject: [PATCH 16/17] Add dependencies --- orchestration/orchestration.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/orchestration/orchestration.sh b/orchestration/orchestration.sh index ef995e0d..e50584d8 100755 --- a/orchestration/orchestration.sh +++ b/orchestration/orchestration.sh @@ -247,6 +247,12 @@ for file in $(find . -maxdepth 1 -type f -name "split_index*"); do done chmod 775 annotation_jobs.bch +message0 "Installing Python dependencies..." +module load python-3.10.2-gcc-9.3.0-gswnsij +python3.10 -m pip install rpy2 +python3.10 -m pip install numpy +python3.10 -m pip install pandas + message0 "Downloading the action script..." fetch_script loader.py annotation_pipeline submit_limit_jobs annotation_jobs.bch ../../../../compressed_logs/annotation_job_id.txt From 3fcf2469f4f5bab182e2ce431fe8800e33b79e12 Mon Sep 17 00:00:00 2001 From: Marina Kan Date: Wed, 25 Jun 2025 11:17:47 +0100 Subject: [PATCH 17/17] Add effect size --- .../DRrequiredAgeingPackage/R/sideFunctions.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R index 886ec261..40af2717 100644 --- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R +++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R @@ -4584,13 +4584,14 @@ GenotypeTag = function(obj, sex <- pair[1] column_prefix <- pair[2] pvalue = obj[[paste0(column_prefix, " p-value")]] + effect_size = obj[[paste0(column_prefix, " estimate")]]$Value # Append to existing dataframe. tag <- rbind(tag, data.frame( Sex = sex, StatisticalTestResult = c( # Statistical test based on genotype effect estimate and p-value. DirectionTagMM( - x = obj[[paste0(column_prefix, " estimate")]]$Value, + x = effect_size, pvalue = pvalue, threshold = threshold ), @@ -4600,6 +4601,7 @@ GenotypeTag = function(obj, ), Level = "OVERALL", PValue = if (is.null(pvalue)) NA else pvalue, + EffectSize = if (is.null(effect_size)) NA else effect_size, stringsAsFactors = FALSE )) } @@ -4633,6 +4635,7 @@ GenotypeTag = function(obj, StatisticalTestResult = if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else NA, Level = "OVERALL", PValue = if (is.null(pvalue)) NA else pvalue, + EffectSize = NA, stringsAsFactors = FALSE ) ) @@ -4664,6 +4667,7 @@ GenotypeTag = function(obj, StatisticalTestResult = if (is.numeric(low_pvalue) && low_pvalue < rrlevel) "ABNORMAL" else NA, Level = "OVERALL", PValue = if (is.null(low_pvalue)) NA else low_pvalue, + EffectSize = NA, stringsAsFactors = FALSE ) ) @@ -4674,6 +4678,7 @@ GenotypeTag = function(obj, StatisticalTestResult = if (is.numeric(high_pvalue) && high_pvalue < rrlevel) "ABNORMAL" else NA, Level = "OVERALL", PValue = if (is.null(high_pvalue)) NA else high_pvalue, + EffectSize = NA, stringsAsFactors = FALSE ) ) @@ -4829,8 +4834,8 @@ annotationChooser = function(statpacket = NULL, arrange(Sex) # Convert to the desired list format. MPTERMS <- filtered_data %>% - mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>% - select(term_id, event, sex, p_value) %>% + mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue, effect_size = EffectSize) %>% + select(term_id, event, sex, p_value, effect_size) %>% # Use `purrr::transpose()` to create an unnamed list of objects. purrr::transpose() %>% # Remove names from the list.