% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/validation.R
\name{cross_validation}
\alias{cross_validation}
\title{Perform k-fold cross-validation for PTSD diagnostic models}
\usage{
cross_validation(data, k = 5, score_by = "newly_nondiagnosed", seed = 123)
}
\arguments{
\item{data}{A dataframe containing exactly 20 columns with PCL-5 item scores
(output of rename_ptsd_columns). Each symptom should be scored on a 0-4 scale.}

\item{k}{Number of folds for cross-validation (default: 5)}

\item{score_by}{Character string specifying optimization criterion:
\itemize{
  \item "false_cases": Minimize total misclassifications
  \item "newly_nondiagnosed": Minimize false negatives only (default)
}}

\item{seed}{Integer for random number generation reproducibility (default: 123)}
}
\value{
A list containing:
\itemize{
  \item without_clusters: Results for model without cluster representation
    \itemize{
      \item fold_results: List of diagnostic comparisons for each fold
      \item summary_by_fold: Detailed results for each fold
      \item combinations_summary: Average performance for combinations appearing
        in multiple folds (NULL if no combinations repeat)
    }
  \item with_clusters: Results for model with cluster representation
    \itemize{
      \item fold_results: List of diagnostic comparisons for each fold
      \item summary_by_fold: Detailed results for each fold
      \item combinations_summary: Average performance for combinations appearing
        in multiple folds (NULL if no combinations repeat)
    }
}
}
\description{
Validates PTSD diagnostic models using k-fold cross-validation to assess
generalization performance and identify stable symptom combinations.
}
\details{
The function:
\enumerate{
  \item Splits data into k folds
  \item For each fold, trains on k-1 folds and tests on the held-out fold
  \item Identifies symptom combinations that appear across multiple folds
  \item Calculates average performance metrics for repeated combinations
}

Two models are evaluated:
\itemize{
  \item Model without cluster representation: Any 4 of 6 symptoms
  \item Model with cluster representation: 4 of 6 symptoms with at least one from each cluster
}
}
\examples{
# Create sample data
set.seed(42)
sample_data <- data.frame(
  matrix(sample(0:4, 20 * 200, replace = TRUE),
         nrow = 200,
         ncol = 20)
)
colnames(sample_data) <- paste0("symptom_", 1:20)

\donttest{
# Perform 5-fold cross-validation
cv_results <- cross_validation(sample_data, k = 5)

# View summary for each fold
cv_results$without_clusters$summary_by_fold

# View combinations that appeared multiple times
cv_results$without_clusters$combinations_summary
}

}
