% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/run_bench.R
\name{run_missingness_benchmark}
\alias{run_missingness_benchmark}
\title{Run missingness benchmark}
\usage{
run_missingness_benchmark(
  data,
  target_col,
  feature_cols = NULL,
  mask_rates = c(0.05, 0.1, 0.2, 0.3),
  rf_n_estimators = 200,
  knn_k = 5,
  test_size = 0.2,
  seed = 42
)
}
\arguments{
\item{data}{A data.frame (or object coercible to data.frame) containing the dataset.}

\item{target_col}{Single character string: name of the outcome column.}

\item{feature_cols}{Character vector of feature column names. If \code{NULL},
uses all columns except \code{target_col}.}

\item{mask_rates}{Numeric vector in (0, 1): proportion of feature entries to mask per rate.}

\item{rf_n_estimators}{Integer: number of trees for the random forest.}

\item{knn_k}{Integer: number of neighbors for kNN regression.}

\item{test_size}{Numeric in (0, 1): fraction of rows assigned to validation split.}

\item{seed}{Integer: seed for data split and model reproducibility.}
}
\value{
A data.frame with columns \code{MaskRate}, \code{Model}, \code{MAPE}, and \code{R2}.
}
\description{
Benchmarks model performance under feature missingness. The function:
\enumerate{
\item Filters to complete cases for \code{target_col} and \code{feature_cols} (baseline complete data),
\item Splits into training/validation,
\item Masks feature values at each rate using Bernoulli (cell-wise) missingness,
\item Imputes missing features using MICE on training data and applies the fitted imputation model to
validation data via \code{mice::mice.mids(newdata = ...)} (reduces leakage),
\item Trains Random Forest (\code{ranger}) and kNN regression (\code{FNN::knn.reg}),
\item Returns MAPE and R-squared for each model and mask rate.
}

Feature columns must be numeric (or coercible to numeric without introducing new missing values).
This mirrors workflows where features are treated as numeric arrays.
}
\details{
Validation imputation is performed using \code{mice::mice.mids(newdata = ...)}, which generates imputations
for new data according to the model stored in the training \code{mids} object.

MAPE is computed using \code{Metrics::mape()} on non-zero targets only to avoid instability when actual values are zero.
}
\examples{
data("CGMExampleData")
run_missingness_benchmark(
  CGMExampleData,
  target_col = "LBORRES",
  feature_cols = c("TimeDifferenceMinutes", "TimeSeries", "USUBJID"),
  mask_rates = c(0.05, 0.10),
  rf_n_estimators = 100,
  knn_k = 3
)

}
\author{
Shubh Saraswat, Hasin Shahed Shad, and Xiaohua Douglas Zhang
}
