#' @importFrom graphics legend abline matplot
#' @importFrom stats complete.cases median model.frame model.matrix
#'   model.response na.pass sd setNames
#' @importFrom MASS ginv
#' @importFrom fastDummies dummy_cols
#' @importFrom Rglpk Rglpk_solve_LP
#'
#' Unstandardised Dantzig estimator with missing covariates
#'
#' @description
#' Internal function implementing the unstandardised Dantzig selector for
#' high-dimensional linear regression with missing covariates. The estimator
#' is defined through moment constraints and may optionally incorporate
#' unlabelled covariate data to improve estimation of the second-moment matrix.
#'
#' @usage
#' dantzig_missing_unstandardised(
#'   X_labeled, y, X_unlabeled = NULL, lambda, gurobi = FALSE
#' )
#'
#' @param X_labeled
#' Numeric matrix or data.frame of labelled covariates. Rows correspond to
#' observations and columns to covariates; missing values are allowed.
#'
#' @param y
#' Numeric response vector corresponding to the rows of \code{X_labeled}.
#'
#' @param X_unlabeled
#' Optional numeric matrix or data.frame of unlabelled covariates. If supplied,
#' these observations are used only for covariance estimation and not for
#' estimating cross-moments with the response.
#'
#' @param lambda
#' Positive numeric scalar giving the Dantzig regularisation parameter.
#'
#' @param gurobi
#' Logical; if TRUE, the linear program is solved using the \pkg{gurobi}
#' optimizer. Otherwise, \pkg{Rglpk} is used.
#'
#' @details
#' The function constructs empirical moment estimates based on available data
#' and solves the Dantzig selector linear program without standardising the
#' covariates. Categorical covariates are dummy-encoded internally, with missing
#' values preserved. The solution minimises the \eqn{\ell_1}-norm of the
#' coefficient vector subject to moment constraints.
#'
#' @return
#' A list with the following component:
#' \describe{
#'   \item{beta_hat}{Numeric vector of estimated regression coefficients, with
#'   names corresponding to the columns of the encoded design matrix.}
#' }
#'
#' @keywords
#' internal missing data linear regression
#' semi-supervised learning high-dimensional statistics
#'
#' @noRd


dantzig_missing_unstandardised <- function(
    X_labeled, y, X_unlabeled = NULL, lambda, gurobi = FALSE
) {
  # Combine labeled + unlabeled data
  if (!is.null(X_unlabeled)) {
    X <- rbind(X_labeled, X_unlabeled)
  } else {
    X <- X_labeled
  }

  X <- as.data.frame(X)

  # Ensure character columns are factors
  X[] <- lapply(X, function(col) if (is.character(col)) factor(col) else col)

  # One-hot encode factors while keeping NAs
  if (any(sapply(X, function(col) is.factor(col) || is.character(col)))) {
    X_encoded <- fastDummies::dummy_cols(
      X,
      remove_first_dummy = TRUE,
      remove_selected_columns = TRUE,
      ignore_na = TRUE
    )
    X_encoded[] <- lapply(X_encoded, function(col) as.numeric(col))
  } else {
    X_encoded <- X  # all numeric, nothing to encode
  }


  # Sub-matrix for labeled data only
  n_labeled <- nrow(X_labeled)
  X_labeled_encoded <- X_encoded[1:n_labeled, , drop = FALSE]

  # Convert X_encoded to numeric matrix safely
  X_all <- data.matrix(X_encoded)

  # Estimate covariance
  Sigma_hat <- estimate_cov_raw(X_all)

  # Number of columns
  p <- ncol(X_encoded)

  gamma_hat <- sapply(seq_len(ncol(X_labeled_encoded)), function(j) {
    col <- X_labeled_encoded[, j]
    mean(col[!is.na(col)] * y[!is.na(col)])
  })

  # 1. Objective: minimize L1 norm
  Objective <- c(rep(1, p), rep(0, p))  # sum of beta^+ and beta^- (rest 0)

  # 2. Identity and zero matrices
  I <- diag(1, p)
  Z <- matrix(0, p, p)

  # 3. Build block constraint matrices
  mat1 <- cbind(I,  I)
  mat2 <- cbind(I, -I)
  mat3 <- cbind(I,  Z)
  mat4 <- cbind(Z,  Sigma_hat)
  mat5 <- cbind(Z, -Sigma_hat)

  # 4. Combine into overall constraint matrix
  OverallMatrix <- rbind(mat1, mat2, mat3, mat4, mat5)

  # 5. Right-hand side
  rhs <- c(
    rep(0, p),
    rep(0, p),
    rep(0, p),
    -lambda + gamma_hat,
    -lambda - gamma_hat
  )

  dir <- rep(">=", length(rhs))  # all inequalities

  # --------------------------
  # 6. Solve LP
  # --------------------------
  if (!gurobi) {

    bounds <- list(
      lower = list(ind = seq_len(2 * p), val = rep(-Inf, 2 * p)),
      upper = list(ind = seq_len(2 * p), val = rep( Inf, 2 * p))
    )

    result <- Rglpk::Rglpk_solve_LP(
      obj    = Objective,
      mat    = OverallMatrix,
      dir    = dir,
      rhs    = rhs,
      bounds = bounds,
      max    = FALSE
    )

    sol <- result$solution

  } else {

    gurobi_model <- list(
      A = -OverallMatrix,
      rhs = -rhs,
      sense = rep("<", length(rhs)),
      obj = Objective,
      modelsense = "min",
      lb = rep(-1e20, 2*p),
      ub = rep( 1e20, 2*p)
    )

    params <- list(LogToConsole = 0)
    result <- gurobi::gurobi(gurobi_model, params)
    sol <- result$x
  }

  # 7. Extract beta_hat
  beta_hat <- sol[(p+1):(2*p)]
  beta_hat[abs(beta_hat) < 1e-10] <- 0
  # Add this line to preserve names
  names(beta_hat) <- colnames(X_encoded)



  return(list(
    beta_hat=beta_hat
  ))
}




#' Standardised Dantzig estimator with missing covariates
#'
#' @description
#' Internal function implementing the standardised Dantzig selector for
#' high-dimensional linear regression with missing covariates. Covariates are
#' standardised using empirical means and standard deviations computed from the
#' combined labelled and unlabelled data prior to solving the Dantzig linear
#' program. The returned coefficients are mapped back to the original scale.
#'
#' @usage
#' dantzig_missing_standardised(
#'   X_labeled, y, X_unlabeled = NULL, lambda, gurobi = FALSE
#' )
#'
#' @param X_labeled
#' Numeric matrix or data.frame of labelled covariates. Rows correspond to
#' observations and columns to covariates; missing values are allowed.
#'
#' @param y
#' Numeric response vector corresponding to the rows of \code{X_labeled}.
#'
#' @param X_unlabeled
#' Optional numeric matrix or data.frame of unlabelled covariates. If supplied,
#' these observations are used only for estimating covariate means, variances,
#' and second moments.
#'
#' @param lambda
#' Positive numeric scalar giving the Dantzig regularisation parameter.
#'
#' @param gurobi
#' Logical; if TRUE, the linear program is solved using the \pkg{gurobi}
#' optimizer. Otherwise, \pkg{Rglpk} is used.
#'
#' @details
#' All covariates are standardised prior to estimation by subtracting their
#' empirical means and dividing by their empirical standard deviations, both
#' computed using the combined labelled and unlabelled samples. Categorical
#' covariates are dummy-encoded internally, with missing values preserved.
#' After solving the Dantzig linear program in the standardised space, the
#' estimated coefficients are transformed back to the original scale and an
#' intercept term is computed.
#'
#' @return
#' A list with the following components:
#' \describe{
#'   \item{beta_hat}{Numeric vector of regression coefficient estimates on the
#'   original (unstandardised) covariate scale, with names corresponding to the
#'   encoded design matrix columns.}
#'   \item{intercept}{Numeric scalar giving the estimated intercept term.}
#' }
#'
#' @keywords
#' internal missing data linear regression
#' semi-supervised learning high-dimensional statistics
#'
#' @noRd


dantzig_missing_standardised <- function(
    X_labeled, y, X_unlabeled = NULL, lambda, gurobi = FALSE
) {
  # Combine labeled + unlabeled data
  if (!is.null(X_unlabeled)) {
    X <- rbind(X_labeled, X_unlabeled)
  } else {
    X <- X_labeled
  }

  X <- as.data.frame(X)

  # Ensure character columns are factors
  X[] <- lapply(X, function(col) if (is.character(col)) factor(col) else col)

  # One-hot encode factors while keeping NAs
  if (any(sapply(X, function(col) is.factor(col) || is.character(col)))) {
    X_encoded <- fastDummies::dummy_cols(
      X,
      remove_first_dummy = TRUE,
      remove_selected_columns = TRUE,
      ignore_na = TRUE
    )
  } else {
    X_encoded <- X  # all numeric, nothing to encode
  }


  # Number of columns
  p <- ncol(X_encoded)

  mu  <- apply(X_encoded, 2, function(col) mean(col, na.rm = TRUE))
  sdv <- apply(X_encoded, 2, function(col) sd(col,   na.rm = TRUE))

  X_std <- sweep(X_encoded, 2, mu,  FUN = "-")
  X_std <- sweep(X_std,     2, sdv, FUN = "/")

  # Sub-matrix for labeled data only
  n_labeled <- nrow(X_labeled)
  X_labeled_encoded <- X_std[1:n_labeled, , drop = FALSE]

  # Convert X_encoded to numeric matrix safely
  X_all <- data.matrix(X_std)

  # Estimate covariance
  Sigma_hat <- estimate_cov_raw(X_all)



  gamma_hat <- sapply(seq_len(ncol(X_labeled_encoded)), function(j) {
    col <- X_labeled_encoded[, j]
    mean(col[!is.na(col)] * y[!is.na(col)])
  })

  # 1. Objective: minimize L1 norm
  Objective <- c(rep(1, p), rep(0, p))  # sum of beta^+ and beta^- (rest 0)

  # 2. Identity and zero matrices
  I <- diag(1, p)
  Z <- matrix(0, p, p)

  # 3. Build block constraint matrices
  mat1 <- cbind(I,  I)
  mat2 <- cbind(I, -I)
  mat3 <- cbind(I,  Z)
  mat4 <- cbind(Z,  Sigma_hat)
  mat5 <- cbind(Z, -Sigma_hat)

  # 4. Combine into overall constraint matrix
  OverallMatrix <- rbind(mat1, mat2, mat3, mat4, mat5)

  # 5. Right-hand side
  rhs <- c(
    rep(0, p),
    rep(0, p),
    rep(0, p),
    -lambda + gamma_hat,
    -lambda - gamma_hat
  )

  dir <- rep(">=", length(rhs))  # all inequalities

  # --------------------------
  # 6. Solve LP
  # --------------------------
  if (!gurobi) {

    bounds <- list(
      lower = list(ind = seq_len(2 * p), val = rep(-Inf, 2 * p)),
      upper = list(ind = seq_len(2 * p), val = rep( Inf, 2 * p))
    )

    result <- Rglpk::Rglpk_solve_LP(
      obj    = Objective,
      mat    = OverallMatrix,
      dir    = dir,
      rhs    = rhs,
      bounds = bounds,
      max    = FALSE
    )

    sol <- result$solution

  } else {

    gurobi_model <- list(
      A = -OverallMatrix,
      rhs = -rhs,
      sense = rep("<", length(rhs)),
      obj = Objective,
      modelsense = "min",
      lb = rep(-1e20, 2*p),
      ub = rep( 1e20, 2*p)
    )

    params <- list(LogToConsole = 0)
    result <- gurobi::gurobi(gurobi_model, params)
    sol <- result$x
  }

  # 7. Extract beta_hat
  beta_std <- sol[(p + 1):(2 * p)]     # coefficients in standardised space

  beta_unstd <- beta_std / sdv         # return to original X_encoded scale
  beta_unstd[abs(beta_unstd) < 1e-10] <- 0

  #8. Give the intercept

  intercept <- -sum(mu / sdv * beta_std)

  return(list(
    beta_hat=beta_unstd,
    intercept = intercept
  ))
}

#' Dantzig estimator with missing covariates
#'
#' @description
#' High-dimensional linear regression estimator based on the Dantzig selector
#' that accommodates missing covariates and optionally leverages unlabelled
#' covariate data. This function is a user-facing wrapper that dispatches to
#' either a standardised or unstandardised implementation depending on the
#' value of \code{standardise}. This function is based on
#' Section 3 of \insertCite{RisebrowSSLR;textual}{LRMiss}.
#'
#' @usage
#' dantzig_missing(
#'   X_labeled, y, X_unlabeled = NULL, lambda,
#'   gurobi = FALSE, standardise = TRUE
#' )
#'
#' @param X_labeled
#' Numeric matrix or data.frame of labelled covariates, with rows corresponding
#' to observations and columns to covariates. Missing values are allowed.
#'
#' @param y
#' Numeric response vector of length \code{nrow(X_labeled)}.
#'
#' @param X_unlabeled
#' Optional numeric matrix or data.frame of unlabelled covariates. If supplied,
#' these observations are used only for estimating second moments of the
#' covariates and do not contribute to the response.
#'
#' @param lambda
#' Positive numeric scalar giving the Dantzig regularisation parameter.
#'
#' @param gurobi
#' Logical; if TRUE, the linear programs are solved using the \pkg{gurobi}
#' optimizer (a valid Gurobi installation and license are required). If FALSE,
#' the open-source solver from \pkg{Rglpk} is used instead.
#'
#' @param standardise
#' Logical; if TRUE, covariates are standardised prior to estimation and the
#' resulting coefficients are mapped back to the original scale with an
#' intercept term returned.
#'
#' @details
#' Categorical covariates are internally dummy-encoded, with missing values
#' preserved. When \code{standardise = TRUE}, covariates are centred and scaled
#' using empirical means and standard deviations computed from the combined
#' labelled and unlabelled samples.
#'
#' @return
#' A list with at least the following component:
#' \describe{
#'   \item{beta_hat}{Numeric vector of estimated regression coefficients, with
#'   names corresponding to the encoded design matrix columns.}
#' }
#' If \code{standardise = TRUE}, the list also contains:
#' \describe{
#'   \item{intercept}{Numeric scalar giving the estimated intercept term.}
#' }
#'
#' @examples
#' set.seed(1)
#' n <- 50; p <- 5
#' X_full <- matrix(rnorm(n * p), n, p)
#' beta_true <- c(1, 0.5, rep(0, p - 2))
#' y <- X_full[, 1] * beta_true[1] + X_full[, 2] * beta_true[2] + rnorm(n)
#'
#' # introduce missingness into covariates
#' X_miss <- X_full
#' X_miss[sample(length(X_miss), size = 0.1 * length(X_miss))] <- NA
#'
#' # fit Dantzig estimator (example lambda; tune in practice)
#' fit <- dantzig_missing(
#'   X_labeled = X_miss,
#'   y = y,
#'   lambda = 0.1,
#'   standardise = TRUE
#' )
#' fit$beta_hat
#'
#' @references
#' \insertRef{RisebrowSSLR}{LRMiss}
#' @export
dantzig_missing <- function(
    X_labeled, y, X_unlabeled = NULL, lambda, gurobi = FALSE, standardise = TRUE
) {
  if (standardise) {
    fit <- dantzig_missing_standardised(
      X_labeled = X_labeled,
      y = y,
      X_unlabeled = X_unlabeled,
      lambda = lambda,
      gurobi = gurobi
    )
    return(fit)
  } else {
    return(dantzig_missing_unstandardised(
      X_labeled = X_labeled,
      y = y,
      X_unlabeled = X_unlabeled,
      lambda = lambda,
      gurobi = gurobi
    ))
  }
}
