#' Create a reusable helper function to process one component (carrier, enzyme, etc.)
#' @param component the component to subset from CETT
#' @param component_name componemt name
#' @param drug_ids passed drugs ids to subset for
#'
#' @importFrom dplyr .data
#'
#' @return A new, smaller dvobject with the same structure.
#' @noRd
#' @keywords internal
subset_cett_component <- function(component, component_name, drug_ids) {
  new_component <- list()

  if (!is.null(component) && (NROW(component$general_information) > 0)) {
    # The name of the intermediate ID, e.g., "carrier_id", "target_id"
    intermediate_id_col <- paste0(sub("s$", "", component_name), "_id")

    # Step A: Filter the top-level `general_information` table by drug_id.
    # This is our anchor.
    general_info_filtered <- component$general_information %>%
      dplyr::filter(.data$drugbank_id %in% drug_ids)

    # Step B: From this anchor, get the set of relevant intermediate IDs.
    relevant_intermediate_ids <- general_info_filtered[[intermediate_id_col]] %>%
      unique()
    # No matching items for this component
    if (length(relevant_intermediate_ids) > 0) {
      # Step C: Use these intermediate IDs to filter all other tables in the component.
      new_component                     <- list()
      new_component$general_information <- general_info_filtered
      new_component$actions             <- component$actions %>%
        dplyr::filter(.data[[intermediate_id_col]] %in% relevant_intermediate_ids)

      # Step D: Recurse into the `polypeptides` list, using the same intermediate IDs
      if (!is.null(component$polypeptides)) {
        new_component$polypeptides <- list()
        for (poly_table_name in names(component$polypeptides)) {
          poly_table <- component$polypeptides[[poly_table_name]]
          if (is.data.frame(poly_table) && intermediate_id_col %in% names(poly_table)) {
            new_component$polypeptides[[poly_table_name]] <- poly_table %>%
              dplyr::filter(.data[[intermediate_id_col]] %in% relevant_intermediate_ids)
          }
        }
      }
    }
  }

  new_component
}


#' Subset a DrugBank dvobject by a vector of DrugBank IDs
#'
#' @details
#' Intelligently filters a DrugBank dvobject to retain only the data associated
#' with a specified list of drugbank_ids. It correctly handles the deep,
#' multi-level nested structure of the entire object, including the complex
#' relationships within the `cett` list.
#'
#' @param dvobject The dvobject from `parseDrugBank()`.
#' @param drug_ids A character vector of `drugbank_id` values to keep.
#'
#' @return A new, smaller dvobject with the same structure and attributes.
#'
#' @export
#' @importFrom dplyr filter .data
#'
#' @examples
#' \dontrun{
#' library(dbparser)
#' one_drug <- subset_drugbank_dvobject(dvobject = dbdataset::drugbank,
#'                                      drug_ids = "DB00001")
#' }
#' @family utility
subset_drugbank_dvobject <- function(dvobject, drug_ids) {
  new_dvobject <- NULL

  if ((length(drug_ids) == 0) || (sum(nchar(drug_ids)) == 0)) {
    warning("`drug_ids` is empty. Returning NULL")
  } else {
    new_dvobject <- init_dvobject()

    # --- 1. Filter the `drugs` list (many sub-tables) ---
    if (!is.null(dvobject$drugs)) {
      message("Subsetting `drugs` list...")
      new_dvobject$drugs <- list()
      for (name in names(dvobject$drugs)) {
        sub_table <- dvobject$drugs[[name]]
        # Most tables here link directly via drugbank_id
        if (is.data.frame(sub_table) && ("drugbank_id" %in% names(sub_table))) {
          filtered_subtable<- sub_table %>%
            dplyr::filter(.data$drugbank_id %in% drug_ids)
          if (NROW(filtered_subtable) > 0) {
            new_dvobject$drugs[[name]] <- filtered_subtable
          }
        }
      }
    }

    # --- 2. Filter the `salts`, `products` data.frames ---
    for (name in c("salts", "products")) {
      if (NROW(dvobject[[name]]) > 0) {
        message(paste("Subsetting", name, "..."))
        filtered_subtable <- dvobject[[name]] %>%
          dplyr::filter(.data$drugbank_id %in% drug_ids)
        if (NROW(filtered_subtable) > 0) {
          new_dvobject[[name]] <- filtered_subtable
        }
      }
    }

    # --- 3. Filter the drugs`references` list ---
    if (!is.null(dvobject$references) && !is.null(dvobject$references$drugs)) {
      message("Subsetting drugs `references` list...")
      new_dvobject$references$drugs <- list()
      for (name in names(dvobject$references$drugs)) {
        sub_table <- dvobject$references$drugs[[name]]
        if (is.data.frame(sub_table) && "drugbank_id" %in% names(sub_table)) {
          filtered_subtable <- sub_table %>%
            dplyr::filter(.data$drugbank_id %in% drug_ids)
          if (NROW(filtered_subtable) > 0) {
            new_dvobject$references$drugs[[name]] <- filtered_subtable
          }
        }
      }
    }

    # --- 4. Filter the complex, multi-level `cett` List ---
    if (!is.null(dvobject$cett)) {
      message("Subsetting complex `cett` list...")
      new_dvobject$cett <- list()

      # Apply the helper to each component within cett
      for (cett_name in c("carriers", "enzymes", "targets", "transporters")) {
        component <- subset_cett_component(
          component      = dvobject$cett[[cett_name]],
          component_name = cett_name,
          drug_ids       = drug_ids)

        if (length(component) > 0) {
          new_dvobject$cett[[cett_name]] <- component
        }
      }
    }

    # --- 5. Filter the CETT`references` list ---
    if (!is.null(dvobject$references)) {
      for (cett_name in c("carriers", "enzymes", "targets", "transporters")) {
        if ((length(dvobject$references[[cett_name]]) > 0) &&
            (length(new_dvobject$cett[[cett_name]]) > 0)) {
          message("Subsetting ", cett_name ," references list...")
          cett_references <- list()
          # The name of the intermediate ID, e.g., "carrier_id", "target_id"
          intermediate_id_col <- paste0(sub("s$", "", cett_name), "_id")

          for (name in names(dvobject$references[[cett_name]])) {
            sub_table <- dvobject$references[[cett_name]][[name]]
            if (is.data.frame(sub_table) && (intermediate_id_col %in% names(sub_table))) {
              filtered_subtable <- sub_table %>%
                dplyr::filter(.data[[intermediate_id_col]] %in% new_dvobject$cett[[cett_name]][["general_information"]][[intermediate_id_col]])
              if (NROW(filtered_subtable) > 0) {
                new_dvobject$references[[cett_name]][[name]] <- filtered_subtable
              }
            }
          }
        }
      }
    }

    attr(new_dvobject, "original_db_info") <- attr(dvobject, "original_db_info")
    class(new_dvobject) <- "dvobject"

    # --- Final Step: Preserve original object's attributes ---
    #attributes(new_dvobject) <- attributes(dvobject)
    message("Subsetting complete.")
  }

  new_dvobject
}


#' Subset an OnSIDES dvobject by a vector of RxNorm Ingredient IDs (Schema-Aware)
#'
#' Intelligently filters an OnSIDES dvobject by cascading filters through the
#' relational tables, ensuring the final subset is self-consistent.
#'
#' @param dvobject A dvobject from `parseOnSIDES()`.
#' @param ingredient_ids A character vector of RxNorm CUIs (ingredients) to keep.
#'
#' @export
#' @importFrom dplyr filter pull .data
#'
#' @return A new, smaller dvobject with the same structure.
#' @family utility
subset_onsides_dvobject <- function(dvobject, ingredient_ids) {
  new_dvobject <- NULL

  if ((length(ingredient_ids) == 0) || (sum(nchar(ingredient_ids)) == 0)) {
    warning("`ingredient_ids` is empty. Returning NULL")
  } else {
    new_dvobject <- init_dvobject()

    # --- 1. Get the cascading set of keys ---
    message("Subsetting OnSIDES: Identifying all related keys...")
    # Find all products containing our target ingredients
    relevant_product_ids <- dvobject$vocab_rxnorm_ingredient_to_product %>%
      dplyr::filter(.data$ingredient_id %in% ingredient_ids) %>%
      dplyr::pull(.data$product_id) %>%
      unique()

    # Find all labels associated with those products
    relevant_label_ids <- dvobject$product_to_rxnorm %>%
      dplyr::filter(.data$rxnorm_product_id %in% relevant_product_ids) %>%
      dplyr::pull(.data$label_id) %>%
      unique()

    # --- 2. Filter the main data tables ---
    message("Filtering main OnSIDES data tables...")
    product_adverse_effect <- dvobject$product_adverse_effect %>%
      dplyr::filter(.data$product_label_id %in% relevant_label_ids)

    if (NROW(product_adverse_effect) > 0) {
      new_dvobject$product_adverse_effect <- product_adverse_effect
    }

    if ("high_confidence" %in% names(dvobject)) {
      high_confidence <- dvobject$high_confidence %>%
        dplyr::filter(.data$ingredient_id %in% ingredient_ids)

      if (NROW(high_confidence) > 0) {
        new_dvobject$high_confidence <- high_confidence
      }
    }

    # --- 3. Filter the "bridge" and vocabulary tables to keep the subset lean ---
    message("Filtering vocabulary and mapping tables...")
    product_label <- dvobject$product_label %>%
      dplyr::filter(.data$label_id %in% relevant_label_ids)

    if (NROW(product_label) > 0) {
      new_dvobject$product_label <- product_label
    }

    product_to_rxnorm <- dvobject$product_to_rxnorm %>%
      dplyr::filter(.data$label_id %in% relevant_label_ids)

    if (NROW(product_to_rxnorm) > 0) {
      new_dvobject$product_to_rxnorm <- product_to_rxnorm
    }

    vocab_rxnorm_ingredient_to_product <- dvobject$vocab_rxnorm_ingredient_to_product %>%
      dplyr::filter(.data$ingredient_id %in% ingredient_ids)

    if (NROW(vocab_rxnorm_ingredient_to_product) > 0) {
      new_dvobject$vocab_rxnorm_ingredient_to_product <- vocab_rxnorm_ingredient_to_product
    }

    # Find all MedDRA effects that are actually present in our subset
    relevant_meddra_ids <- new_dvobject$product_adverse_effect$effect_meddra_id %>% unique()

    vocab_meddra_adverse_effect <- dvobject$vocab_meddra_adverse_effect %>%
      dplyr::filter(.data$meddra_id %in% relevant_meddra_ids)

    if (NROW(vocab_meddra_adverse_effect) > 0) {
      new_dvobject$vocab_meddra_adverse_effect <- vocab_meddra_adverse_effect
    }

    vocab_rxnorm_ingredient <- dvobject$vocab_rxnorm_ingredient %>%
      dplyr::filter(.data$rxnorm_id %in% ingredient_ids)

    if (NROW(vocab_rxnorm_ingredient) > 0) {
      new_dvobject$vocab_rxnorm_ingredient <- vocab_rxnorm_ingredient
    }

    vocab_rxnorm_product <- dvobject$vocab_rxnorm_product %>%
      dplyr::filter(.data$rxnorm_id %in% relevant_product_ids)

    if (NROW(vocab_rxnorm_product) > 0) {
      new_dvobject$vocab_rxnorm_product <- vocab_rxnorm_product
    }

    # Preserve attributes and return
    attr(new_dvobject, "original_db_info") <- attr(dvobject, "original_db_info")
    class(new_dvobject) <- "dvobject"
  }

  new_dvobject
}
