#' Control parameters for genetic algorithm
#'
#' Creates a list of control settings for the \code{ga.operator()} function.
#'
#' @param npop Integer. The number of individuals (chromosomes) in the
#'   population for each generation.
#' @param niter Integer. The maximum number of generations to run the GA.
#' @param pcross Numeric in \eqn{[0, 1]}. Probability of performing
#'   crossover between two selected parents.
#' @param pmut Numeric in \eqn{[0, 1]}. Probability of mutating each
#'   bit in a chromosome.
#' @param diff_tol A numeric value specifying the significance difference threshold.
#'   Values within this threshold are considered equal and receive the same rank.
#'   Default is 1.
#' @param nls Integer. Frequency (in generations) of running local
#'   exhaustive search around the best current model.
#'
#' @return A named list containing all GA control parameters.
#'
#' @seealso \code{\link{ga.operator}}, \code{\link{rank_new}}, \code{\link{runlocal}}
#'
#' @author Zhonghui Huang
#'
#' @examples
#' # Default settings
#' gaControl()
#'
#' @export

gaControl <- function(
    npop       = 20,
    niter       = 20,
    pcross = 0.7,
    pmut  = 0.1,
    diff_tol       = 1,
    nls  = 3
) {
  list(
    npop       = npop,
    niter       = niter,
    pcross = pcross,
    pmut  = pmut,
    diff_tol       = diff_tol,
    nls  = nls
  )
}

#' Genetic algorithm operator for model selection
#'
#' Run a genetic algorithm to search for an optimal PK model structure within a
#' predefined search space using nlmixr2-based model fitting and penalties.
#'
#' @param dat A data frame containing pharmacokinetic data in standard
#'   nlmixr2 format, including "ID", "TIME", "EVID", and "DV", and may include
#'   additional columns.
#' @param param_table Optional data frame of initial parameter estimates. If NULL,
#'   the table is generated by \code{auto_param_table()}.
#' @param search.space Character, one of "ivbase" or "oralbase". Default is "ivbase".
#' @param no.cores Integer. Number of CPU cores to use. If NULL, uses
#'   \code{rxode2::getRxThreads()}.
#' @param ga.control A list of GA control parameters, generated by
#'   \code{gaControl()}. Includes:
#'   \itemize{
#'     \item npop - number of individuals (chromosomes) per generation.
#'     \item niter - maximum number of generations.
#'     \item pcross - crossover probability.
#'     \item pmut - per-bit mutation probability.
#'     \item diff_tol - significance difference threshold used for ranking.
#'     \item nls - frequency (in generations) of running local exhaustive search
#'       around the best current model.
#'   }
#' @param penalty.control A list of penalty control parameters defined by
#'   \code{penaltyControl()}, specifying penalty values used for model diagnostics
#'   during fitness evaluation.
#' @param precomputed_results_file Optional path to a CSV file of previously computed
#'   model results used for caching.
#' @param foldername Character string specifying the folder name for storing
#'   intermediate results. If \code{NULL} (default), \code{tempdir()}
#'   is used for temporary storage. If specified, a cache directory
#'   is created in the current working directory.
#' @param filename Optional character string used as a prefix for output files.
#'   Defaults to "test".
#' @param seed Integer. Random seed controlling the random sampling steps of the
#'   genetic algorithm operator for reproducible runs. Default is 1234.
#' @param .modEnv Optional environment used to store run state and cached results.
#'   If NULL, a new environment is created.
#' @param verbose Logical. If TRUE, print progress messages.
#' @param ... Additional arguments passed to \code{mod.run()}.
#'
#' @details
#' The algorithm evolves a population of binary-encoded candidate models over
#' multiple generations using tournament selection, crossover, mutation, and local
#' search. Candidate encodings are validated and then evaluated by fitting models
#' and applying user-defined penalties. The best individual is carried forward to
#' the next generation.
#'
#' @return An object of class "gaOperatorResult" containing:
#'   \itemize{
#'     \item Final Selected Code: data frame with the best binary encoding.
#'     \item Final Selected Model Name: model identifier for the best encoding.
#'     \item Model Run History: data frame of fitted models and fitness values.
#'     \item Selection History: list of per-generation results.
#'   }
#'
#' @seealso
#' \link{mod.run}, \link{gaControl}, \link{penaltyControl}, \link{auto_param_table},
#' \link{spaceConfig}, \link{create.pop}, \link{validStringbinary},
#' \link{decodeBinary}, \link{parseName}, \link{rank_new}, \link{runlocal},
#' \link{ga.sel.tournament}, \link{ga.crossover}, \link{ga.mutation}
#'
#' @author Zhonghui Huang
#'
#' @examples
#' \donttest{
#' # Example usage with phenotype dataset
#' outs <- ga.operator(
#'   dat = pheno_sd,
#'   param_table = NULL,
#'   search.space = "ivbase",
#'   ga.control = gaControl(),
#'   saem.control = nlmixr2est::saemControl(
#'     seed = 1234,
#'     nBurn = 200,
#'     nEm   = 300,
#'     logLik = TRUE
#'   )
#' )
#' print(outs)
#' }
#'
#' @export

ga.operator <-  function(dat,
                         param_table = NULL,
                         search.space = c("ivbase", "oralbase"),
                         no.cores = NULL,
                         ga.control = gaControl(),
                         penalty.control = penaltyControl(),
                         precomputed_results_file = NULL,
                         foldername = NULL,
                         filename = "test",
                         seed = 1234,
                         .modEnv = NULL,
                         verbose = TRUE,
                         ...) {
  if (!is.null(.modEnv)) {
    if (!is.environment(.modEnv)) {
      stop("`.modEnv` must be an environment", call. = FALSE)
    }
    # .modEnv <- get(".modEnv", inherits = TRUE)
  } else {
    .modEnv <- new.env(parent = emptyenv())
  }

  # Ensure essential keys exist in .modEnv
  if (is.null(.modEnv$modi))
    .modEnv$modi <- 1L
  if (is.null(.modEnv$r))
    .modEnv$r <- 1L
  if (is.null(.modEnv$Store.all))
    .modEnv$Store.all <- NULL
  if (is.null(.modEnv$precomputed_cache_loaded))
    .modEnv$precomputed_cache_loaded <- FALSE
  if (is.null(.modEnv$precomputed_results))
    .modEnv$precomputed_results <- NULL
  if (is.null(.modEnv$param_table))
    .modEnv$param_table <- NULL
  if (is.null(.modEnv$saem.control))
    .modEnv$saem.control <- NULL

  if (is.null(no.cores)) {
    no.cores <- rxode2::getRxThreads()
  }

  if (is.null(foldername) || !nzchar(foldername)) {
    # foldername <-
    #   paste0("gaCache_", filename, "_", digest::digest(dat))
    foldername <- tempdir()
  }
  if (!dir.exists(foldername)) {
    dir.create(foldername,
               showWarnings = FALSE,
               recursive = TRUE)
  }
  # Set random seed
  if (!is.numeric(seed) ||
      length(seed) != 1 || is.na(seed) || is.infinite(seed)) {
    stop("`seed` must be a single finite numeric value", call. = FALSE)
  }
  if (abs(seed) > .Machine$integer.max) {
    stop("`seed` exceeds valid integer range", call. = FALSE)
  }
  seed <- as.integer(seed)
  if (getRversion() >= "4.2.0") {
    withr::local_rng_version("4.2.0")
  }
  withr::local_seed(seed)

  # Initial estimates
  if (!is.null(param_table)) {
    param_table_use <- param_table
  } else if (!is.null(.modEnv$param_table)) {
    param_table_use <- .modEnv$param_table
  } else {
    param_table_use <- auto_param_table(
      dat = dat,
      nlmixr2autoinits = TRUE,
      foldername = foldername,
      filename = filename,
      out.inits = TRUE
    )
    .modEnv$param_table <- param_table_use
  }

  param_table <- param_table_use

  search.space <-
    match.arg(search.space, choices = c("ivbase", "oralbase"))
  # GA does not support a custom search space.
  custom_config <- NULL
  # if (identical(search.space, "custom")) {
  #   stop(
  #     "GA currently does not support search.space = 'custom'. Use 'ivbase' or 'oralbase'.",
  #     call. = FALSE
  #   )
  # }
  cfg <- spaceConfig(search.space)
  bit.names <- if (identical(search.space, "custom")) {
    custom_config$params
  } else {
    cfg$params
  }
  if (identical(search.space, "custom")) {
    bit.names <- custom_config$params
    params_use <- custom_config$params
    twobit_params <-
      c("no.cmpt",
        "abs.type",
        "abs.delay",
        "rv",
        "allometric_scaling")
  } else {
    params_use <- cfg$params
    twobit_params <- c("no.cmpt", "rv")
  }

  bit.names <- unlist(lapply(params_use, function(p) {
    if (p %in% twobit_params)
      c(paste0(p, "1"), paste0(p, "2"))
    else
      p
  }), use.names = FALSE)

  nbits <- length(bit.names)
  history <- vector("list", ga.control$niter)

  progressr::with_progress({
    p <- progressr::progressor(steps = ga.control$niter)

    for (ga.iter in 1:ga.control$niter) {
      if (ga.iter == 1) {
        population <-
          create.pop(ga.control$npop, nbits)
      } else {
        population <- children.all
      }

      colnames(population) <- bit.names
      population <- t(vapply(seq_len(nrow(population)),
                             function(i)
                               validStringbinary(string = population[i, ],
                                                 search.space = search.space),
                             numeric(nbits)))

      colnames(population) <- bit.names
      data.pop <- as.data.frame(population)
      data.pop$fitness <- vapply(seq_len(nrow(data.pop)),
                                 function(k) {
                                   string_vec <- as.vector(data.pop[k, 1:nbits])
                                   result <- try(mod.run(
                                     string               = decodeBinary(string_vec),
                                     dat                  = dat,
                                     search.space         = search.space,
                                     no.cores             = no.cores,
                                     param_table          = param_table,
                                     precomputed_results_file = precomputed_results_file,
                                     filename             = filename,
                                     foldername           = foldername,
                                     .modEnv              = .modEnv,
                                     verbose = verbose,
                                     ...
                                   ),
                                   silent = TRUE)
                                   if (is.numeric(result) &&
                                       length(result) == 1)
                                     result
                                   else
                                     NA_real_
                                 },
                                 numeric(1))

      data.pop$rank <- rank_new(data.pop$fitness, ga.control$diff_tol)
      data.pop$local.num <- seq_len(nrow(data.pop))
      sel.best <- data.pop[data.pop$rank == min(data.pop$rank), ][1, ]
      sel.best <- as.numeric(sel.best$local.num)
      sel.best.code <- population[sel.best, , drop = FALSE]

      # Local exhaustive search
      ls.population <- NULL
      if (ga.iter %% ga.control$nls == 0) {
        ls.population <- runlocal(
          dat  = dat,
          param_table = param_table,
          search.space = search.space,
          no.cores = no.cores,
          start.string = sel.best.code,
          diff_tol = ga.control$diff_tol,
          penalty.control = penalty.control,
          precomputed_results_file = precomputed_results_file,
          foldername = foldername,
          filename = filename,
          .modEnv = .modEnv,
          verbose = verbose,
          ...
        )
        if (min(data.pop$fitness, na.rm = TRUE) > min(ls.population$fitness, na.rm = TRUE)) {
          sel.best <-
            as.numeric(rownames(ls.population[ls.population$rank == min(ls.population$rank), ][1, ]))
          ls.population2 <- data.matrix(ls.population[, 1:nbits])
          sel.best.code <- ls.population2[sel.best, , drop = FALSE]
        }
      }

      # Selection (tournament)
      sel.population <-
        ga.sel.tournament(data.pop = data.pop,
                          npop = ga.control$npop,
                          nbits = nbits)

      # Crossover
      children.cross <-
        ga.crossover(
          sel.population = sel.population,
          pcross = ga.control$pcross,
          npop = ga.control$npop,
          nbits = nbits
        )
      children.cross <- t(apply(children.cross, 1, function(x) {
        validStringbinary(string       = x,
                          search.space = search.space)
      }))


      # Mutation
      children.mutation <-
        ga.mutation(children.cross = children.cross,
                    pmut = ga.control$pmut)

      children.mutation <- t(apply(children.mutation, 1, function(x) {
        validStringbinary(string       = x,
                          search.space = search.space)
      }))

      # Elitism: Keep best model in next generation
      children.all <-
        rbind(children.mutation[1:(nrow(children.mutation) - 1), ],
              sel.best.code)
      rownames(children.all) <- seq_len(nrow(children.all))

      #  Save results to history
      history[[ga.iter]] <- list(
        iteration              = ga.iter,
        population        = population,
        data.pop          = data.pop,
        sel.best.code     = sel.best.code,
        sel.population    = sel.population,
        ls.population     = ls.population,
        children.cross    = children.cross,
        children.mutation = children.all
      )
      p(sprintf("GA iteration %d / %d", ga.iter, ga.control$niter))
    }
  })
  # Final output
  names(sel.best.code) <- bit.names
  best_model_name <-
    parseName(
      modcode = decodeBinary(binary_string = sel.best.code, search.space = search.space),
      search.space = search.space
    )

  out <- new.env(parent = emptyenv())
  class(out) <- "gaOperatorResult"
  out[["Final Selected Code"]] <- as.data.frame(sel.best.code)
  out[["Final Selected Model Name"]] <- best_model_name
  out[["Model Run History"]] <-
    as.data.frame(.modEnv$Store.all, stringsAsFactors = FALSE)
  out[["Selection History"]] <- history
  return(out)
}


#' Print method for gaOperatorResult objects
#'
#' Custom print method for results returned by the GA operator.
#' Displays only:
#'   - Final selected model code
#'   - Final selected model name
#'
#' @param x An object containing GA operator output (class gaOperatorResult).
#' @param ... Additional arguments (currently unused).
#'
#' @return Invisibly returns the input object.
#' @export
print.gaOperatorResult <- function(x, ...) {
  # Print final selected model code
  cat(crayon::green$bold("\n=== Final Selected Model Code ===\n"))
  print(x$`Final Selected Code`)

  # Print final selected model name
  cat(crayon::green$bold("\n=== Final Selected Model Name ===\n"))
  cat(x$`Final Selected Model Name`, "\n")

  invisible(x)
}













