#' Factor-Augmented Clustering Tree
#'
#' @description
#' Builds a binary tree for clustering time series data based on covariates,
#' using a group factor model framework. The splitting criterion evaluates
#' whether child nodes exhibit distinct factor structures.
#'
#' @param X A numeric matrix of covariates with dimension \eqn{N \times p},
#'   where \eqn{N} is the number of time series and \eqn{p} is the number of features.
#'   Each row corresponds to the covariates for one time series.
#' @param Y A numeric matrix of time series data with dimension \eqn{T \times N},
#'   where \eqn{T} is the length of each series. Each column represents one time series.
#' @param r_a A positive integer specifying the number of singular vectors to extract
#'   from each child node for constructing the projection matrices, default is 8.
#' @param r_b A positive integer specifying the number of leading singular values
#'   to sum for the split statistic. Must satisfy \code{r_b <= r_a}, default is 2.
#' @param method Character string specifying the splitting decision rule:
#'   \describe{
#'     \item{\code{"threshold"}}{Uses a data-adaptive threshold based on
#'       signal-to-noise ratio estimation. Faster but may be less accurate.
#'       Suitable for large datasets.}
#'     \item{\code{"permutation"}}{Uses a permutation test for hypothesis testing.
#'       More rigorous but computationally intensive.}
#'   }
#' @param control A list of control parameters for tree construction:
#'   \describe{
#'     \item{\code{minsplit}}{Minimum number of observations required to attempt
#'       a split. Default: \code{90}.}
#'     \item{\code{minbucket}}{Minimum number of observations in any terminal node.
#'       Default: \code{30}.}
#'     \item{\code{alpha}}{Significance level for the permutation test
#'       (used only when \code{method = "permutation"}). Default: \code{0.01}.}
#'     \item{\code{R}}{Number of permutations for the hypothesis test
#'       (used only when \code{method = "permutation"}). Default: \code{199}.}
#'     \item{\code{sep}}{Controls the density of candidate split points.
#'       If \code{"auto"} (default), subsamples candidates when \eqn{n > 800}.
#'       If numeric, evaluates every \code{sep} candidate point.}
#'     \item{\code{parallel}}{Logical; if \code{TRUE}, enables parallel computation.
#'       Default: \code{FALSE}.}
#'     \item{\code{n_cores}}{Number of cores for parallel processing.
#'       If \code{NULL} (default), uses \code{detectCores() - 1}.}
#'   }
#'
#' @return An object of class \code{"FACT"} containing:
#'   \describe{
#'     \item{\code{frame}}{A data frame describing the tree structure, with one row
#'       per node. Includes split variable, split value, test statistic, and p-value
#'       (if applicable). A smaller test statistic indicates stronger evidence of
#'       heterogeneous factor structures between child nodes.}
#'     \item{\code{membership}}{An integer vector of length \eqn{N} indicating
#'       the terminal node assignment for each observation.}
#'     \item{\code{control}}{The control parameters used.}
#'     \item{\code{terms}}{Metadata including covariate names, data dimensions,
#'       and the values of \code{r_a} and \code{r_b}.}
#'     \item{\code{method}}{The splitting method used.}
#'   }
#' @details
#' The FACT algorithm clusters time series by recursively partitioning them
#' based on their underlying factor structures. At each node, the method:
#' \enumerate{
#'   \item Searches for the optimal split across all covariates and candidate points.
#'   \item Computes a test statistic based on the overlap of factor spaces
#'     between the two child nodes.
#'   \item Decides whether to split using either a threshold rule or permutation test.
#' }
#' @references
#' Hu, J., Li, T., Luo, Z., & Wang, X. Factor-Augmented Clustering Tree for Time Series.
#'
#' @seealso
#' \code{\link{COR}} for correlation-based clustering,
#' \code{\link{gendata}} for generating synthetic data,
#' \code{\link{print.FACT}} and \code{\link{plot.FACT}} for visualization.
#'
#' @examples
#' \donttest{
#' data <- gendata(seed = 123, T = 200, N = c(50, 50, 50, 50))
#' tree1 <- FACT(data$X, data$Y, r_a = 8, r_b = 4, method = "threshold")
#' print(tree1)
#' }
#'
#' @export
#' @importFrom parallel detectCores makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom foreach foreach %dopar% registerDoSEQ
#' @importFrom utils modifyList
#' @importFrom doRNG %dorng%
#'
FACT <- function(X, Y, r_a = 8, r_b = 4,
                 method = c("threshold", "permutation"),
                 control = list()) {

  # --- 1. Argument validation and control parameter setup ---
  method <- match.arg(method)
  defaults <- list(
    minsplit = 90, minbucket = 30, alpha = 0.01, R = 199,
    sep = 1, parallel = FALSE, n_cores = NULL
  )
  # Use utils::modifyList
  control <- utils::modifyList(defaults, control)

  if (is.null(control$minbucket)) {
    control$minbucket <- round(control$minsplit / 3)
  }

  stopifnot(is.matrix(X), is.matrix(Y), ncol(Y) == nrow(X),
            r_a > 0, r_b > 0, r_b <= r_a, r_a <= nrow(Y))

  if (is.null(colnames(X))) {
    message("Input matrix X has no column names. Generating default names: X1, X2, ...")
    colnames(X) <- paste0("X", 1:ncol(X))
  }

  if (control$parallel) {
    n_cores <- if (is.null(control$n_cores)) parallel::detectCores() - 1 else control$n_cores
    if(method == "threshold"){
      if(n_cores > ncol(X)) n_cores <- ncol(X)
    }
    cl <- parallel::makeCluster(n_cores)
    doParallel::registerDoParallel(cl)
    on.exit({ parallel::stopCluster(cl); registerDoSEQ() }, add = TRUE)
    message(paste("Parallel backend registered with", n_cores, "cores."))
  }

  # --- 2. Initialize tree structure ---
  tree_env <- new.env()
  tree_env$nodes <- data.frame(
    node_id = 1, is_leaf = FALSE, n_obs = ncol(Y), parent_id = 0,
    split_var = NA_character_, split_val = NA_real_, statistic = NA_real_,
    p_value = NA_real_, left_child = NA_integer_, right_child = NA_integer_
  )
  tree_env$next_node_id <- 2

  # --- 3. Recursive growing function ---
  grow <- function(node_id, indices) {
    # current_node <- tree_env$nodes[tree_env$nodes$node_id == node_id, ]
    X_node <- X[indices, , drop = FALSE]
    Y_node <- Y[, indices, drop = FALSE]
    n_node <- length(indices)
    T_node <- nrow(Y_node)

    if (n_node < control$minsplit || nrow(unique(X_node)) < 2) {
      tree_env$nodes[tree_env$nodes$node_id == node_id, "is_leaf"] <<- TRUE
      return()
    }

    if (method == "threshold") {
      message(paste("Performing split for node", node_id, "with", n_node, "observations..."))
      best_split <- find_best_split(X_node, Y_node, indices, r_a, r_b, control)
      if (is.infinite(best_split$stat)) {
        tree_env$nodes[tree_env$nodes$node_id == node_id, "is_leaf"] <<- TRUE
        return()
      }
      T_opt <- best_split$stat
      is_significant <- FALSE
      temp = svd(Y_node)$d^2
      snr = sum(temp[-c(1:r_a)])/sum(temp)
      threshold = r_b * (1 - snr*log(n_node)/n_node*2 - snr*log(T_node)/T_node*2)
      if (T_opt < threshold) {
        is_significant <- TRUE
      }
    } else { # Permutation test
      message(paste("Performing permutation test for node", node_id, "with", n_node, "observations..."))
      control_seq <- control; control_seq$parallel <- FALSE
      best_split <- find_best_split(X_node, Y_node, indices, r_a, r_b, control_seq)
      if (is.infinite(best_split$stat)) {
        tree_env$nodes[tree_env$nodes$node_id == node_id, "is_leaf"] <<- TRUE
        return()
      }
      T_opt <- best_split$stat
      is_significant <- FALSE

      perm_stats <- foreach::foreach(
        m = 1:control$R, .combine = 'c',
        .export = c("find_best_split", "calculate_split_statistic", "r_a", "r_b"),
        .packages = "irlba"
      ) %dorng% {
        perm_indices <- sample(1:n_node)
        perm_split <- find_best_split(X_node, Y_node[, perm_indices, drop = FALSE], indices, r_a, r_b, control_seq)
        perm_split$stat
      }

      p_value <- mean(c(perm_stats, T_opt) <= T_opt, na.rm = TRUE)
      tree_env$nodes[tree_env$nodes$node_id == node_id, "p_value"] <<- p_value
      if ((p_value <= control$alpha) & (T_opt < r_b*(1 - 0.01))) {
        # Use dual criteria to determine whether to split, and reduce type I errors
        # Prevent the case to split, where p_value is small, but T_opt is close to r_b
        is_significant <- TRUE
      }
    }

    if (is_significant) {
      node_idx <- which(tree_env$nodes$node_id == node_id)
      tree_env$nodes[node_idx, "split_var"] <<- colnames(X)[best_split$var_idx]
      tree_env$nodes[node_idx, "split_val"] <<- best_split$split_val
      tree_env$nodes[node_idx, "statistic"] <<- T_opt

      left_id <- tree_env$next_node_id
      right_id <- tree_env$next_node_id + 1
      tree_env$next_node_id <<- tree_env$next_node_id + 2

      tree_env$nodes[node_idx, "left_child"] <<- left_id
      tree_env$nodes[node_idx, "right_child"] <<- right_id

      new_nodes <- data.frame(
        node_id = c(left_id, right_id), is_leaf = c(FALSE, FALSE),
        n_obs = c(length(best_split$left_indices), length(best_split$right_indices)),
        parent_id = node_id, split_var = NA, split_val = NA, statistic = NA, p_value = NA,
        left_child = NA, right_child = NA
      )
      tree_env$nodes <<- rbind(tree_env$nodes, new_nodes)

      grow(left_id, best_split$left_indices)
      grow(right_id, best_split$right_indices)

    } else {
      node_idx <- which(tree_env$nodes$node_id == node_id)
      tree_env$nodes[node_idx, "is_leaf"] <<- TRUE
      tree_env$nodes[node_idx, "split_var"] <<- colnames(X)[best_split$var_idx]
      tree_env$nodes[node_idx, "split_val"] <<- best_split$split_val
      tree_env$nodes[node_idx, "statistic"] <<- T_opt
    }
  }

  grow(node_id = 1, indices = 1:ncol(Y))

  get_node_membership <- function(tree_df, X_full) {
    N <- tree_df$n_obs[1]
    membership <- integer(N)
    assign_obs <- function(node_id, indices) {
      node_info <- tree_df[tree_df$node_id == node_id, ]
      if (node_info$is_leaf) {
        membership[indices] <<- node_id
      } else {
        x_feature <- X_full[indices, node_info$split_var]
        left_indices_node <- which(x_feature < node_info$split_val)
        right_indices_node <- which(x_feature >= node_info$split_val)
        assign_obs(node_info$left_child, indices[left_indices_node])
        assign_obs(node_info$right_child, indices[right_indices_node])
      }
    }
    assign_obs(1, 1:N)
    return(membership)
  }

  tree_env$nodes <- tree_env$nodes[order(tree_env$nodes$node_id), ]
  rownames(tree_env$nodes) <- NULL

  result <- list(
    frame = tree_env$nodes,
    membership = get_node_membership(tree_env$nodes, X),
    control = control,
    terms = list(X_names = colnames(X), Y_dim = dim(Y), r_a = r_a, r_b = r_b),
    method = method
  )

  class(result) <- "FACT"
  return(result)
}
