From f489be66559f461e356e1d9ac4817ddc3969f1a2 Mon Sep 17 00:00:00 2001 From: Abhirupa Ghosh <100681585+AbhirupaGhosh@users.noreply.github.com> Date: Wed, 24 Jun 2026 15:25:05 -0600 Subject: [PATCH 1/3] Enhance retrieveMetadata() with genome list file input option Updated retrieveMetadata() to allow reading from a specified file if provided. Added checks for file existence and trimmed whitespace from genome IDs. This is the only defining step for the restricted list of genomes that the user is willing to use to build a model. An example application is benchmarking using specific genomes. --- R/data_curation.R | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/R/data_curation.R b/R/data_curation.R index b58cf3c..413b25d 100644 --- a/R/data_curation.R +++ b/R/data_curation.R @@ -912,6 +912,7 @@ #' - table_name: "metadata" #' @export retrieveMetadata <- function(user_bacs, + genome_id_file = NULL, filter_type = "AMR", base_dir = ".", abx = "All", @@ -921,10 +922,37 @@ retrieveMetadata <- function(user_bacs, base_dir <- normalizePath(base_dir, mustWork = FALSE) if (isTRUE(verbose)) message("Resolving genome IDs for user inputs.") - genome_ids <- .retrieveQueryIDs( - base_dir = base_dir, user_bacs = user_bacs, - overwrite = overwrite, verbose = verbose - ) + # ------------------------------- + # GENOME ID RESOLUTION (UPDATED) + # ------------------------------- + if (!is.null(genome_id_file)) { + + if (!file.exists(genome_id_file)) { + stop("Provided genome_id_file does not exist.") + } + + if (isTRUE(verbose)) { + message("Using genome IDs from file: ", genome_id_file) + } + + genome_ids <- readLines(genome_id_file, warn = FALSE) + genome_ids <- trimws(genome_ids) + genome_ids <- genome_ids[genome_ids != ""] + + } else { + + if (isTRUE(verbose)) message("Resolving genome IDs for user inputs.") + + genome_ids <- .retrieveQueryIDs( + base_dir = base_dir, + user_bacs = user_bacs, + overwrite = overwrite, + verbose = verbose + ) + } + + genome_ids <- unique(as.character(genome_ids)) + if (length(genome_ids) == 0) { message("No genome IDs available for the specified inputs.") return(NULL) From 05f05220856cc07e384212a85a8e696b80da5f0e Mon Sep 17 00:00:00 2001 From: AbhirupaGhosh Date: Wed, 24 Jun 2026 21:26:20 +0000 Subject: [PATCH 2/3] Style code (GHA) --- R/data_curation.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/data_curation.R b/R/data_curation.R index 413b25d..21b6786 100644 --- a/R/data_curation.R +++ b/R/data_curation.R @@ -922,11 +922,10 @@ retrieveMetadata <- function(user_bacs, base_dir <- normalizePath(base_dir, mustWork = FALSE) if (isTRUE(verbose)) message("Resolving genome IDs for user inputs.") - # ------------------------------- + # ------------------------------- # GENOME ID RESOLUTION (UPDATED) # ------------------------------- if (!is.null(genome_id_file)) { - if (!file.exists(genome_id_file)) { stop("Provided genome_id_file does not exist.") } @@ -938,9 +937,7 @@ retrieveMetadata <- function(user_bacs, genome_ids <- readLines(genome_id_file, warn = FALSE) genome_ids <- trimws(genome_ids) genome_ids <- genome_ids[genome_ids != ""] - } else { - if (isTRUE(verbose)) message("Resolving genome IDs for user inputs.") genome_ids <- .retrieveQueryIDs( @@ -952,7 +949,7 @@ retrieveMetadata <- function(user_bacs, } genome_ids <- unique(as.character(genome_ids)) - + if (length(genome_ids) == 0) { message("No genome IDs available for the specified inputs.") return(NULL) From aa2ace5a8f88eccd5362441e0c7ab539a62261b8 Mon Sep 17 00:00:00 2001 From: Abhirupa Ghosh <100681585+AbhirupaGhosh@users.noreply.github.com> Date: Wed, 24 Jun 2026 15:30:11 -0600 Subject: [PATCH 3/3] Fix genome_id_file parameter in prepareGenomes Updated the 'prepareGenomes' function to use 'genome_id_file' parameter correctly. --- R/data_curation.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/data_curation.R b/R/data_curation.R index 21b6786..aab8022 100644 --- a/R/data_curation.R +++ b/R/data_curation.R @@ -1776,6 +1776,7 @@ genomeList <- function(base_dir = ".", #' #' @export prepareGenomes <- function(user_bacs, + genome_id_file = NULL, base_dir = ".", method = c("ftp", "cli"), overwrite = FALSE, @@ -1790,6 +1791,7 @@ prepareGenomes <- function(user_bacs, if (isTRUE(verbose)) message("Step 0: Building AMR metadata (retrieveMetadata)") invisible(retrieveMetadata( user_bacs = user_bacs, + genome_id_file = NULL, filter_type = "AMR", base_dir = base_dir, abx = "All",