From 25b91213550db96cf13351d67c44e2cb3e267b54 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Wed, 28 May 2025 12:58:16 +0100
Subject: [PATCH 01/17] Refactor returnWhatBasedOnThreshold function

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R | 25 +++----------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index d405a6fc..7a362532 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4568,16 +4568,6 @@ DirectionTagFE = function(x,
   return(tag)
 }
 
-returnWhatBasedOnThreshold = function(x = NULL,
-                                      threshold = .0001,
-                                      Return = 'ABNORMAL') {
-  if (is.numeric(x) && x < threshold) {
-    return(Return)
-  } else {
-    return('IgnoreThisCaseAtALL')
-  }
-}
-
 GenotypeTag = function(obj,
                        threshold = 10 ^ -4,
                        expDetailsForErrorOnly = NULL,
@@ -4609,6 +4599,7 @@ GenotypeTag = function(obj,
     for (pair in sex_column_prefix_pairs) {
       sex <- pair[1]
       column_prefix <- pair[2]
+      pvalue = obj[[paste0(column_prefix, " p-value")]]
       # Append to existing dataframe.
       tag <- rbind(tag, data.frame(
         Sex = sex,
@@ -4616,20 +4607,12 @@ GenotypeTag = function(obj,
           # Statistical test based on genotype effect estimate and p-value.
           DirectionTagMM(
             x = obj[[paste0(column_prefix, " estimate")]]$Value,
-            pvalue = obj[[paste0(column_prefix, " p-value")]],
+            pvalue = pvalue,
             threshold = threshold
           ),
           # Simple statistical test based on p-value only.
-          returnWhatBasedOnThreshold(
-            x = obj[[paste0(column_prefix, " p-value")]],
-            threshold = threshold,
-            Return = "ABNORMAL"
-          ),
-          returnWhatBasedOnThreshold(
-            x = obj[[paste0(column_prefix, " p-value")]],
-            threshold = threshold,
-            Return = 'INFERRED'
-          )
+          if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else "IgnoreThisCaseAtALL",
+          if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL"
         ),
         Level = "OVERALL",
         stringsAsFactors = FALSE

From 3a6bde525846ded98413c5a36237f3cdfc17784e Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Wed, 28 May 2025 13:31:40 +0100
Subject: [PATCH 02/17] Add p-value for MM

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R                | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 7a362532..5052aae8 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4615,6 +4615,7 @@ GenotypeTag = function(obj,
           if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL"
         ),
         Level = "OVERALL",
+        PValue = pvalue,
         stringsAsFactors = FALSE
       ))
     }
@@ -4888,8 +4889,8 @@ annotationChooser = function(statpacket = NULL,
           arrange(Sex)
         # Convert to the desired list format.
         MPTERMS <- filtered_data %>%
-          mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm) %>%
-          select(term_id, event, sex) %>%
+          mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>%
+          select(term_id, event, sex, p_value) %>%
           # Use `purrr::transpose()` to create an unnamed list of objects.
           purrr::transpose() %>%
           # Remove names from the list

From cc8ec35208c74d7a7f61c964152df52c6d32e8dc Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Wed, 28 May 2025 13:32:48 +0100
Subject: [PATCH 03/17] Fix bug with stringsAsFactors

---
 .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 5052aae8..40bd0303 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4592,8 +4592,7 @@ GenotypeTag = function(obj,
     sex_column_prefix_pairs <- list(
       c("UNSPECIFIED", "Genotype"),
       c("FEMALE", "Sex FvKO"),
-      c("MALE", "Sex MvKO"),
-      stringsAsFactors = FALSE
+      c("MALE", "Sex MvKO")
     )
     # Iterate over sexes and their corresponding column prefixes.
     for (pair in sex_column_prefix_pairs) {

From a17f09b4aadd0b3586d14031c13624aae5aba0dc Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Wed, 28 May 2025 16:01:45 +0100
Subject: [PATCH 04/17] Add p-value for FE

---
 .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 40bd0303..c7ad00ff 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4656,6 +4656,7 @@ GenotypeTag = function(obj,
             threshold = threshold
           ),
           Level = "OVERALL",
+          PValue = fmodels[[column_prefix]]$`Complete table`$p.value,
           stringsAsFactors = FALSE
         )
       )

From ff0165aa727ff3fe689619666947667bbcf120c4 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Wed, 28 May 2025 17:09:39 +0100
Subject: [PATCH 05/17] Add p-value for RR

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index c7ad00ff..665c6b17 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4685,25 +4685,41 @@ GenotypeTag = function(obj,
       sex <- pair[1]
       column_prefix <- pair[2]
       # Get data for LOW and HIGH tails.
+      low_pvalue <- fmodels[[paste0("Low.data_point.", column_prefix)]]$Genotype$Result$p.value
+      high_pvalue <- fmodels[[paste0("High.data_point.", column_prefix)]]$Genotype$Result$p.value
       low_results <- DirectionTagFE(
-        x = fmodels[[paste0("Low.data_point.", column_prefix)]]$Genotype$Result$p.value,
+        x = low_pvalue,
         threshold = rrlevel,
         group = c("ABNORMAL", "DECREASED")
       )
       high_results <- DirectionTagFE(
-        x = fmodels[[paste0("High.data_point.", column_prefix)]]$Genotype$Result$p.value,
+        x = high_pvalue,
         threshold = rrlevel,
         group = c("ABNORMAL", "INCREASED")
       )
-      all_results <- c(low_results, high_results)
+
       # Append all to existing dataframe.
-      for (result in all_results) {
+      for (result in low_results) {
+        tag <- rbind(
+          tag,
+          data.frame(
+            Sex = sex,
+            StatisticalTestResult = result,
+            Level = "OVERALL",
+            PValue = low_pvalue,
+            stringsAsFactors = FALSE
+          )
+        )
+      }
+
+      for (result in high_results) {
         tag <- rbind(
           tag,
           data.frame(
             Sex = sex,
             StatisticalTestResult = result,
             Level = "OVERALL",
+            PValue = high_pvalue,
             stringsAsFactors = FALSE
           )
         )

From 87daf2d86d6f5d53949d02d74b844e4769e40113 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Thu, 29 May 2025 17:00:24 +0100
Subject: [PATCH 06/17] Fix bug with NA p-value

---
 .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 665c6b17..79f2a654 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4614,7 +4614,7 @@ GenotypeTag = function(obj,
           if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL"
         ),
         Level = "OVERALL",
-        PValue = pvalue,
+        PValue = if (is.null(pvalue)) NA else pvalue,
         stringsAsFactors = FALSE
       ))
     }

From da92377d41048cd43d41ba5e0f14bdf1dcc5a049 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Fri, 30 May 2025 12:12:10 +0100
Subject: [PATCH 07/17] Fix bug with NA p-value for FE and RR

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R            | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 79f2a654..2ef61254 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4646,17 +4646,18 @@ GenotypeTag = function(obj,
     for (pair in sex_column_prefix_pairs) {
       sex <- pair[1]
       column_prefix <- pair[2]
+      pvalue <- fmodels[[column_prefix]]$`Complete table`$p.value
       # Append to existing dataframe.
       tag <- rbind(
         tag,
         data.frame(
           Sex = sex,
           StatisticalTestResult = DirectionTagFE(
-            x = fmodels[[column_prefix]]$`Complete table`$p.value,
+            x = pvalue,
             threshold = threshold
           ),
           Level = "OVERALL",
-          PValue = fmodels[[column_prefix]]$`Complete table`$p.value,
+          PValue = if (is.null(pvalue)) NA else pvalue,
           stringsAsFactors = FALSE
         )
       )
@@ -4706,7 +4707,7 @@ GenotypeTag = function(obj,
             Sex = sex,
             StatisticalTestResult = result,
             Level = "OVERALL",
-            PValue = low_pvalue,
+            PValue = if (is.null(low_pvalue)) NA else low_pvalue,
             stringsAsFactors = FALSE
           )
         )
@@ -4719,7 +4720,7 @@ GenotypeTag = function(obj,
             Sex = sex,
             StatisticalTestResult = result,
             Level = "OVERALL",
-            PValue = high_pvalue,
+            PValue = if (is.null(high_pvalue)) NA else high_pvalue,
             stringsAsFactors = FALSE
           )
         )

From baa93a98b76e6213e70d5623d7d7f175090cb856 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 11:19:33 +0100
Subject: [PATCH 08/17] Use GtagExactUnmatched in match_mp_terms

---
 .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 2ef61254..291eae84 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4776,7 +4776,7 @@ match_mp_terms <- function(Gtag, d, allowed_results = c()) {
   GtagExactUnmatched <- subset(GtagExact, is.na(MpTerm))
   # Now, for the unmatched records, try to find a U term as a fallback.
   GtagUnspecified <- merge(
-    Gtag,
+    GtagExactUnmatched,
     subset(d, Sex == "UNSPECIFIED", select = -Sex),
     by = c("StatisticalTestResult", "Level"),
     all.x = TRUE

From d0dcbd948e1164518e80f2eb7e76f6dec686a544 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 11:25:37 +0100
Subject: [PATCH 09/17] Unify tag initialisation

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R | 29 ++++++-------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 291eae84..683d944f 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4581,13 +4581,16 @@ GenotypeTag = function(obj,
           ' and for the RR method it is ',
           rrlevel)
 
+  # Initialise an empty data frame.
+  tag <- data.frame(
+    Sex = character(),
+    StatisticalTestResult = character(),
+    Level = character(),
+    PValue = numeric(),
+    stringsAsFactors = FALSE
+  )
+
   if (method %in% 'MM') {
-    # Initialise an empty data frame.
-    tag <- data.frame(
-      Sex = character(),
-      StatisticalTestResult = character(),
-      Level = character()
-    )
     # Define sex/column prefix info.
     sex_column_prefix_pairs <- list(
       c("UNSPECIFIED", "Genotype"),
@@ -4629,13 +4632,6 @@ GenotypeTag = function(obj,
       fmodels$Genotype$`Complete table` = fmodels$`Complete table`
       fmodels$`Complete table` = NULL
     }
-    # Initialise an empty data frame.
-    tag <- data.frame(
-      Sex = character(),
-      StatisticalTestResult = character(),
-      Level = character(),
-      stringsAsFactors = FALSE
-    )
     # Define sex/column prefix info.
     sex_column_prefix_pairs <- list(
       c("UNSPECIFIED", "Genotype"),
@@ -4668,13 +4664,6 @@ GenotypeTag = function(obj,
     fmodels = obj$`Additional information`$Analysis$`Further models`
     if (is.null(fmodels))
       return(NULL)
-    # Initialise an empty data frame.
-    tag <- data.frame(
-      Sex = character(),
-      StatisticalTestResult = character(),
-      Level = character(),
-      stringsAsFactors = FALSE
-    )
     # Define sex/column prefix info.
     sex_column_prefix_pairs <- list(
       c("UNSPECIFIED", "Genotype"),

From 566ca422fd7ac399e454a70139abcdf7f8c470b0 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 11:39:07 +0100
Subject: [PATCH 10/17] Remove DirectionTagFE function

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R | 80 +++++--------------
 1 file changed, 22 insertions(+), 58 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 683d944f..186b52ae 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4551,23 +4551,6 @@ DirectionTagMM = function(x,
   return(tag)
 }
 
-DirectionTagFE = function(x,
-                          threshold = .0001,
-                          group = c('ABNORMAL', 'INCREASED', 'DECREASED')) {
-  tag = if (is.null(x) || length(x) < 1) {
-    'NoEffectCalculated'
-  } else if (x < threshold) {
-    group
-  } else {
-    paste0('NORMAL - Test not significant at the level of ',
-           threshold,
-           ' (pvalue = ',
-           x,
-           ')')
-  }
-  return(tag)
-}
-
 GenotypeTag = function(obj,
                        threshold = 10 ^ -4,
                        expDetailsForErrorOnly = NULL,
@@ -4612,9 +4595,9 @@ GenotypeTag = function(obj,
             pvalue = pvalue,
             threshold = threshold
           ),
-          # Simple statistical test based on p-value only.
-          if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else "IgnoreThisCaseAtALL",
-          if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else "IgnoreThisCaseAtALL"
+          # Comparing test based on p-value only.
+          if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else NA,
+          if (is.numeric(pvalue) && pvalue < threshold) "INFERRED" else NA
         ),
         Level = "OVERALL",
         PValue = if (is.null(pvalue)) NA else pvalue,
@@ -4648,10 +4631,7 @@ GenotypeTag = function(obj,
         tag,
         data.frame(
           Sex = sex,
-          StatisticalTestResult = DirectionTagFE(
-            x = pvalue,
-            threshold = threshold
-          ),
+          StatisticalTestResult = if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else NA,
           Level = "OVERALL",
           PValue = if (is.null(pvalue)) NA else pvalue,
           stringsAsFactors = FALSE
@@ -4677,43 +4657,27 @@ GenotypeTag = function(obj,
       # Get data for LOW and HIGH tails.
       low_pvalue <- fmodels[[paste0("Low.data_point.", column_prefix)]]$Genotype$Result$p.value
       high_pvalue <- fmodels[[paste0("High.data_point.", column_prefix)]]$Genotype$Result$p.value
-      low_results <- DirectionTagFE(
-        x = low_pvalue,
-        threshold = rrlevel,
-        group = c("ABNORMAL", "DECREASED")
-      )
-      high_results <- DirectionTagFE(
-        x = high_pvalue,
-        threshold = rrlevel,
-        group = c("ABNORMAL", "INCREASED")
-      )
-
       # Append all to existing dataframe.
-      for (result in low_results) {
-        tag <- rbind(
-          tag,
-          data.frame(
-            Sex = sex,
-            StatisticalTestResult = result,
-            Level = "OVERALL",
-            PValue = if (is.null(low_pvalue)) NA else low_pvalue,
-            stringsAsFactors = FALSE
-          )
+      tag <- rbind(
+        tag,
+        data.frame(
+          Sex = sex,
+          StatisticalTestResult = if (is.numeric(low_pvalue) && low_pvalue < rrlevel) "ABNORMAL" else NA,
+          Level = "OVERALL",
+          PValue = if (is.null(low_pvalue)) NA else low_pvalue,
+          stringsAsFactors = FALSE
         )
-      }
-
-      for (result in high_results) {
-        tag <- rbind(
-          tag,
-          data.frame(
-            Sex = sex,
-            StatisticalTestResult = result,
-            Level = "OVERALL",
-            PValue = if (is.null(high_pvalue)) NA else high_pvalue,
-            stringsAsFactors = FALSE
-          )
+      )
+      tag <- rbind(
+        tag,
+        data.frame(
+          Sex = sex,
+          StatisticalTestResult = if (is.numeric(high_pvalue) && high_pvalue < rrlevel) "ABNORMAL" else NA,
+          Level = "OVERALL",
+          PValue = if (is.null(high_pvalue)) NA else high_pvalue,
+          stringsAsFactors = FALSE
         )
-      }
+      )
     }
 
   } else {

From 5a731998e197bb0cdac4b17e40bd2c2cafaae8c9 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 11:43:12 +0100
Subject: [PATCH 11/17] Refactor match_mp_terms

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R | 22 ++-----------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 186b52ae..ee2c5ad5 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4712,11 +4712,7 @@ flatten_mp_chooser <- function(d) {
   }))
 }
 
-match_mp_terms <- function(Gtag, d, allowed_results = c()) {
-  # If provided, restrict the data to a list of allowed statistical test results.
-  if (length(allowed_results) > 0) {
-    Gtag <- subset(Gtag, StatisticalTestResult %in% allowed_results)
-  }
+match_mp_terms <- function(Gtag, d) {
   # First, try to join by exact sex, for example M call to M term.
   GtagExact <- merge(
     Gtag,
@@ -4803,21 +4799,7 @@ annotationChooser = function(statpacket = NULL,
     d <- flatten_mp_chooser(d)
 
     # 2. Join MP term information from mp_chooser.
-    if (method %in% "MM") {
-      Gtag <- match_mp_terms(Gtag, d)
-    } else if (method %in% "FE") {
-      Gtag <- match_mp_terms(Gtag, d, c("ABNORMAL"))
-    } else if (method %in% "RR") {
-      # By default, only ABNORMAL calls are used for RR.
-      GtagAbnormal <- match_mp_terms(Gtag, d, c("ABNORMAL"))
-      # In case no ABNORMAL MP terms were found, try to match INCREASED/DECREASED terms.
-      # This approach is left unchanged from the original annotation pipeline.
-      if (nrow(subset(GtagAbnormal, !is.na(MpTerm))) > 0) {
-        Gtag <- GtagAbnormal
-      } else {
-        Gtag <- match_mp_terms(Gtag, d, c("INCREASED", "DECREASED"))
-      }
-    }
+    Gtag <- match_mp_terms(Gtag, d)
 
     # 3. Remove records with no assigned MP terms.
     # This filters out all rows with no statistically significant results.

From 646ab76979ac1e4e93f171a79a13702003c62a99 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 11:47:44 +0100
Subject: [PATCH 12/17] Remove duplicated gtag check

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R | 77 +++++++++----------
 1 file changed, 36 insertions(+), 41 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index ee2c5ad5..d612d085 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4812,49 +4812,44 @@ annotationChooser = function(statpacket = NULL,
     } else {
       # Define the priority order for StatisticalTestResult.
       priority_order <- c("INCREASED", "DECREASED", "INFERRED", "ABNORMAL")
-      # Check if the input dataframe is empty.
-      if (nrow(Gtag) == 0) {
-        MPTERMS <- list()
-      } else {
-        # Group by Sex and select one row per group based on priority.
-        filtered_data <- Gtag %>%
-          group_by(Sex) %>%
-          arrange(match(StatisticalTestResult, priority_order)) %>%
-          slice(1) %>%
-          ungroup()
-        # Drop UNSPECIFIED if MALE or FEMALE data is present.
-        if (any(filtered_data$Sex %in% c("MALE", "FEMALE"))) {
-          filtered_data <- filtered_data %>% filter(Sex != "UNSPECIFIED")
-        }
-        # Handle UNSPECIFIED case if no MALE or FEMALE data is present.
-        if (!any(filtered_data$Sex %in% c("MALE", "FEMALE"))) {
-          if (length(sex_levels) == 1) {
-            filtered_data$Sex <- toupper(sex_levels)
-          }
-        }
-        # Convert Sex column to lowercase and replace "unspecified" with "not_considered".
-        filtered_data <- filtered_data %>%
-          mutate(Sex = tolower(Sex),
-                  Sex = ifelse(Sex == "unspecified", "not_considered", Sex))
-        # Ensure female comes before male if both are present.
-        filtered_data <- filtered_data %>%
-          arrange(Sex)
-        # Convert to the desired list format.
-        MPTERMS <- filtered_data %>%
-          mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>%
-          select(term_id, event, sex, p_value) %>%
-          # Use `purrr::transpose()` to create an unnamed list of objects.
-          purrr::transpose() %>%
-          # Remove names from the list
-          unname()
-        # Special case for RR: add an empty `otherPossibilities` field for compatibility.
-        if (method == "RR") {
-          MPTERMS <- purrr::map(MPTERMS, ~ {
-            .x$otherPossibilities <- ""
-            .x
-          })
+      # Group by Sex and select one row per group based on priority.
+      filtered_data <- Gtag %>%
+        group_by(Sex) %>%
+        arrange(match(StatisticalTestResult, priority_order)) %>%
+        slice(1) %>%
+        ungroup()
+      # Drop UNSPECIFIED if MALE or FEMALE data is present.
+      if (any(filtered_data$Sex %in% c("MALE", "FEMALE"))) {
+        filtered_data <- filtered_data %>% filter(Sex != "UNSPECIFIED")
+      }
+      # Handle UNSPECIFIED case if no MALE or FEMALE data is present.
+      if (!any(filtered_data$Sex %in% c("MALE", "FEMALE"))) {
+        if (length(sex_levels) == 1) {
+          filtered_data$Sex <- toupper(sex_levels)
         }
       }
+      # Convert Sex column to lowercase and replace "unspecified" with "not_considered".
+      filtered_data <- filtered_data %>%
+        mutate(Sex = tolower(Sex),
+                Sex = ifelse(Sex == "unspecified", "not_considered", Sex))
+      # Ensure female comes before male if both are present.
+      filtered_data <- filtered_data %>%
+        arrange(Sex)
+      # Convert to the desired list format.
+      MPTERMS <- filtered_data %>%
+        mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>%
+        select(term_id, event, sex, p_value) %>%
+        # Use `purrr::transpose()` to create an unnamed list of objects.
+        purrr::transpose() %>%
+        # Remove names from the list
+        unname()
+      # Special case for RR: add an empty `otherPossibilities` field for compatibility.
+      if (method == "RR") {
+        MPTERMS <- purrr::map(MPTERMS, ~ {
+          .x$otherPossibilities <- ""
+          .x
+        })
+      }
     }
 
   }

From 6573a08db8fb5147ba9fc4faaecc85e183441fc4 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 11:52:53 +0100
Subject: [PATCH 13/17] Fix typo

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R                | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index d612d085..3766f431 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4851,7 +4851,6 @@ annotationChooser = function(statpacket = NULL,
         })
       }
     }
-
   }
 
   # Return the final data structure.
@@ -4859,7 +4858,5 @@ annotationChooser = function(statpacket = NULL,
     json$Result$Details[[TermKey]] = MPTERMS
     statpacket$V20 = FinalJson2ObjectCreator(FinalList = json)
   }
-  return(invisible(list(
-    MPTERM = MPTERMS, statpacket = statpacket
-  )))
+  return(invisible(list(MPTERM = MPTERMS, statpacket = statpacket)))
 }

From 92a875e775303a3ebc2023aed46067e38c8deee8 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 12:04:08 +0100
Subject: [PATCH 14/17] Remove empty ErrorneousCases.tsv.err initialisation

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R | 28 ++++---------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 3766f431..94f0be33 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4553,11 +4553,10 @@ DirectionTagMM = function(x,
 
 GenotypeTag = function(obj,
                        threshold = 10 ^ -4,
-                       expDetailsForErrorOnly = NULL,
-                       rrlevel = 10 ^ -4) {
+                       rrlevel = 10 ^ -4,
+                       method = NULL) {
   if (is.null(obj))
     return(NULL)
-  method = GetMethodStPa(x = obj$`Applied method`)
   message('\t The analysis method = ', method)
   message('\t The decision threshold = ',
           threshold,
@@ -4682,15 +4681,6 @@ GenotypeTag = function(obj,
 
   } else {
     tag = NULL
-    write(
-      x = paste(
-        head(expDetailsForErrorOnly, 18),
-        sep = '\t',
-        collapse = '\t'
-      ),
-      file = 'ErrorneousCases.tsv.err',
-      ncolumns = 5000
-    )
   }
   return(tag)
 }
@@ -4746,7 +4736,7 @@ annotationChooser = function(statpacket = NULL,
   requireNamespace("jsonlite")
   library(dplyr)
 
-  # Handle unsuccessful StatPackages.
+  # Handle unsuccessful StatPackets.
   if (
     is.null(statpacket) ||
     length(statpacket) < 1 ||
@@ -4771,7 +4761,8 @@ annotationChooser = function(statpacket = NULL,
   Gtag = GenotypeTag(
     obj = json$Result$`Vector output`[[resultKey]],
     threshold = level,
-    rrlevel = rrlevel
+    rrlevel = rrlevel,
+    method = method
   )
 
   # Load mp_chooser Rdata file.
@@ -4841,15 +4832,8 @@ annotationChooser = function(statpacket = NULL,
         select(term_id, event, sex, p_value) %>%
         # Use `purrr::transpose()` to create an unnamed list of objects.
         purrr::transpose() %>%
-        # Remove names from the list
+        # Remove names from the list.
         unname()
-      # Special case for RR: add an empty `otherPossibilities` field for compatibility.
-      if (method == "RR") {
-        MPTERMS <- purrr::map(MPTERMS, ~ {
-          .x$otherPossibilities <- ""
-          .x
-        })
-      }
     }
   }
 

From 4fe8860eada304d763777f5521c9263ff218dd90 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Mon, 2 Jun 2025 17:21:15 +0100
Subject: [PATCH 15/17] Remove duplicated MpTerm

---
 .../DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 94f0be33..886ec261 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4713,6 +4713,7 @@ match_mp_terms <- function(Gtag, d) {
   # Separate rows where we did / did not find a MP term.
   GtagExactMatched <- subset(GtagExact, !is.na(MpTerm))
   GtagExactUnmatched <- subset(GtagExact, is.na(MpTerm))
+  GtagExactUnmatched <- subset(GtagExactUnmatched, select = -MpTerm)
   # Now, for the unmatched records, try to find a U term as a fallback.
   GtagUnspecified <- merge(
     GtagExactUnmatched,

From 139305e253dcc05d1c2a2d184ccb18aa6d37fac6 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Fri, 13 Jun 2025 15:01:36 +0100
Subject: [PATCH 16/17] Add dependencies

---
 orchestration/orchestration.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/orchestration/orchestration.sh b/orchestration/orchestration.sh
index ef995e0d..e50584d8 100755
--- a/orchestration/orchestration.sh
+++ b/orchestration/orchestration.sh
@@ -247,6 +247,12 @@ for file in $(find . -maxdepth 1 -type f -name "split_index*"); do
 done
 chmod 775 annotation_jobs.bch
 
+message0 "Installing Python dependencies..."
+module load python-3.10.2-gcc-9.3.0-gswnsij
+python3.10 -m pip install rpy2
+python3.10 -m pip install numpy
+python3.10 -m pip install pandas
+
 message0 "Downloading the action script..."
 fetch_script loader.py annotation_pipeline
 submit_limit_jobs annotation_jobs.bch ../../../../compressed_logs/annotation_job_id.txt

From 3fcf2469f4f5bab182e2ce431fe8800e33b79e12 Mon Sep 17 00:00:00 2001
From: Marina Kan <marinak@ebi.ac.uk>
Date: Wed, 25 Jun 2025 11:17:47 +0100
Subject: [PATCH 17/17] Add effect size

---
 .../DRrequiredAgeingPackage/R/sideFunctions.R         | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R
index 886ec261..40af2717 100644
--- a/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
+++ b/Late adults stats pipeline/DRrequiredAgeing/DRrequiredAgeingPackage/R/sideFunctions.R	
@@ -4584,13 +4584,14 @@ GenotypeTag = function(obj,
       sex <- pair[1]
       column_prefix <- pair[2]
       pvalue = obj[[paste0(column_prefix, " p-value")]]
+      effect_size = obj[[paste0(column_prefix, " estimate")]]$Value
       # Append to existing dataframe.
       tag <- rbind(tag, data.frame(
         Sex = sex,
         StatisticalTestResult = c(
           # Statistical test based on genotype effect estimate and p-value.
           DirectionTagMM(
-            x = obj[[paste0(column_prefix, " estimate")]]$Value,
+            x = effect_size,
             pvalue = pvalue,
             threshold = threshold
           ),
@@ -4600,6 +4601,7 @@ GenotypeTag = function(obj,
         ),
         Level = "OVERALL",
         PValue = if (is.null(pvalue)) NA else pvalue,
+        EffectSize = if (is.null(effect_size)) NA else effect_size,
         stringsAsFactors = FALSE
       ))
     }
@@ -4633,6 +4635,7 @@ GenotypeTag = function(obj,
           StatisticalTestResult = if (is.numeric(pvalue) && pvalue < threshold) "ABNORMAL" else NA,
           Level = "OVERALL",
           PValue = if (is.null(pvalue)) NA else pvalue,
+          EffectSize = NA,
           stringsAsFactors = FALSE
         )
       )
@@ -4664,6 +4667,7 @@ GenotypeTag = function(obj,
           StatisticalTestResult = if (is.numeric(low_pvalue) && low_pvalue < rrlevel) "ABNORMAL" else NA,
           Level = "OVERALL",
           PValue = if (is.null(low_pvalue)) NA else low_pvalue,
+          EffectSize = NA,
           stringsAsFactors = FALSE
         )
       )
@@ -4674,6 +4678,7 @@ GenotypeTag = function(obj,
           StatisticalTestResult = if (is.numeric(high_pvalue) && high_pvalue < rrlevel) "ABNORMAL" else NA,
           Level = "OVERALL",
           PValue = if (is.null(high_pvalue)) NA else high_pvalue,
+          EffectSize = NA,
           stringsAsFactors = FALSE
         )
       )
@@ -4829,8 +4834,8 @@ annotationChooser = function(statpacket = NULL,
         arrange(Sex)
       # Convert to the desired list format.
       MPTERMS <- filtered_data %>%
-        mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue) %>%
-        select(term_id, event, sex, p_value) %>%
+        mutate(sex = Sex, event = StatisticalTestResult, term_id = MpTerm, p_value = PValue, effect_size = EffectSize) %>%
+        select(term_id, event, sex, p_value, effect_size) %>%
         # Use `purrr::transpose()` to create an unnamed list of objects.
         purrr::transpose() %>%
         # Remove names from the list.