% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sparegcv.R
\name{spar.cv}
\alias{spar.cv}
\alias{spareg.cv}
\title{Sparse Projected Averaged Regression with Cross-Validation}
\usage{
spar.cv(
  x,
  y,
  family = gaussian("identity"),
  model = spar_glmnet(),
  rp = NULL,
  screencoef = NULL,
  nfolds = 10,
  nnu = 20,
  nus = NULL,
  nummods = c(20),
  measure = c("deviance", "mse", "mae", "class", "1-auc"),
  avg_type = c("link", "response"),
  parallel = FALSE,
  seed = NULL,
  ...
)

spareg.cv(
  x,
  y,
  family = gaussian("identity"),
  model = spar_glmnet(),
  rp = NULL,
  screencoef = NULL,
  nfolds = 10,
  nnu = 20,
  nus = NULL,
  nummods = c(20),
  measure = c("deviance", "mse", "mae", "class", "1-auc"),
  avg_type = c("link", "response"),
  parallel = FALSE,
  seed = NULL,
  ...
)
}
\arguments{
\item{x}{n x p numeric matrix of predictor variables.}

\item{y}{quantitative response vector of length n.}

\item{family}{a \code{'\link[stats]{family}'} object used for the marginal
generalized linear model; defaults to \code{gaussian("identity")}.}

\item{model}{function creating a \code{'sparmodel'} object;
defaults to \code{spar_glm()} for gaussian family with identity link and to
\code{spar_glmnet()} for all other family-link combinations.}

\item{rp}{function creating a \code{'randomprojection'} object.}

\item{screencoef}{function creating a \code{'screeningcoef'} object}

\item{nfolds}{number of folds to use for cross-validation; should be at least 2, defaults to 10.}

\item{nnu}{number of different threshold values \eqn{\nu} to consider for thresholding;
ignored when \code{nus} is provided; defaults to 20.}

\item{nus}{optional vector of \eqn{\nu}'s to consider for thresholding;
if not provided, \code{nnu} values ranging from 0 to the maximum absolute
marginal coefficient are used.}

\item{nummods}{vector of numbers of marginal models to consider for
validation; defaults to \code{c(20)}.}

\item{measure}{loss to use for validation; defaults to \code{"deviance"}
available for all families. Other options are \code{"mse"} or \code{"mae"}
(between responses and predicted means, for all families),
\code{"class"} (misclassification error) and
\code{"1-auc"} (one minus area under the ROC curve) both just for
binomial family.}

\item{avg_type}{type of averaging the marginal models; either on link (default)
or on response level. This is used in computing the validation measure.}

\item{parallel}{assuming a parallel backend is loaded and available, a
logical indicating whether the function should use it in parallelizing the
estimation of the marginal models. Defaults to FALSE.}

\item{seed}{integer seed to be set at the beginning of the SPAR algorithm. Default to NULL, in which case no seed is set.}

\item{...}{further arguments mainly to ensure back-compatibility}
}
\value{
object of class \code{'spar.cv'} with elements
\itemize{
\item \code{betas} p x  \code{max(nummods)} sparse matrix of class
\code{'\link[Matrix:dgCMatrix-class]{Matrix::dgCMatrix}'} containing the
standardized coefficients from each marginal model computed with the spar
algorithm on the whole training data.
\item \code{intercepts} used in each marginal model, vector of length \code{max(nummods)}
computed with the spar algorithm on the whole training data.
\item \code{scr_coef} p-vector of coefficients used for screening for standardized predictors
\item \code{inds} list of index-vectors corresponding to variables kept after
screening in each marginal model of length  \code{max(nummods)}
\item \code{RPMs} list of projection matrices used in each marginal model of length \code{max(nummods)}
\item \code{val_res} a \code{data.frame} with CV results for each fold and for each element of nus and nummods
\item \code{nus} vector of \eqn{\nu}'s considered for thresholding
\item \code{nummods} vector of numbers of marginal models considered for validation
\item \code{family}  a character corresponding to \link[stats]{family}  object used for the marginal generalized linear model e.g.,
\code{"gaussian(identity)"}
\item \code{measure} character, type of validation measure used
\item \code{avg_type} character, averaging type for computing the validation measure
\item \code{rp} an object of class \code{'randomprojection'}
\item \code{screencoef} an object of class \code{'screeningcoef'}
\item \code{model} an object of class \code{'sparmodel'}
\item \code{ycenter} empirical mean of initial response vector
\item \code{yscale} empirical standard deviation of initial response vector
. \item \code{xcenter} p-vector of empirical means of initial predictor variables
\item \code{xscale} p-vector of empirical standard deviations of initial predictor variables
}
}
\description{
Apply Sparse Projected Averaged Regression to High-Dimensional Data, where the
number of models and the threshold parameter is chosen using a cross-validation
procedure.
}
\examples{
\donttest{
example_data <- simulate_spareg_data(n = 100, p = 400, ntest = 100)
spar_res <- spar.cv(example_data$x, example_data$y, nfolds = 3L,
  rp = rp_gaussian(), nummods = c(5, 10))
spar_res
coefs <- coef(spar_res)
pred <- predict(spar_res, example_data$x)
plot(spar_res)
plot(spar_res, plot_type = "val_measure", plot_along = "nummod", nu = 0)
plot(spar_res, plot_type = "val_measure", plot_along = "nu", nummod = 10)
plot(spar_res, plot_type = "val_numactive",  plot_along = "nummod", nu = 0)
plot(spar_res, plot_type = "val_numactive",  plot_along = "nu", nummod = 10)
plot(spar_res, plot_type = "res_vs_fitted",  xfit = example_data$xtest,
  yfit = example_data$ytest, opt_par = "1se")
plot(spar_res, "coefs", prange = c(1, 400))
}
\donttest{
spar_res <- spareg.cv(example_data$x, example_data$y,
  nummods=c(5, 10, 15, 20, 25, 30))
}
}
\seealso{
\link{spar}, \link{coef.spar.cv}, \link{predict.spar.cv}, \link{plot.spar.cv}, \link{print.spar.cv}
}
