% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/enspls.ad.R
\name{enspls.ad}
\alias{enspls.ad}
\title{Ensemble Sparse Partial Least Squares for
Model Applicability Domain Evaluation}
\usage{
enspls.ad(x, y, xtest, ytest, maxcomp = 5L, cvfolds = 5L, alpha = seq(0.2,
  0.8, 0.2), space = c("sample", "variable"), method = c("mc", "boot"),
  reptimes = 500L, ratio = 0.8, parallel = 1L)
}
\arguments{
\item{x}{Predictor matrix of the training set.}

\item{y}{Response vector of the training set.}

\item{xtest}{List, with the i-th component being the i-th test set's
predictor matrix (see example code below).}

\item{ytest}{List, with the i-th component being the i-th test set's
response vector (see example code below).}

\item{maxcomp}{Maximum number of components included within each model.
If not specified, will use \code{5} by default.}

\item{cvfolds}{Number of cross-validation folds used in each model
for automatic parameter selection, default is \code{5}.}

\item{alpha}{Parameter (grid) controlling sparsity of the model.
If not specified, default is \code{seq(0.2, 0.8, 0.2)}.}

\item{space}{Space in which to apply the resampling method.
Can be the sample space (\code{"sample"}) or
the variable space (\code{"variable"}).}

\item{method}{Resampling method. \code{"mc"} (Monte-Carlo resampling)
or \code{"boot"} (bootstrapping). Default is \code{"mc"}.}

\item{reptimes}{Number of models to build with Monte-Carlo resampling
or bootstrapping.}

\item{ratio}{Sampling ratio used when \code{method = "mc"}.}

\item{parallel}{Integer. Number of CPU cores to use.
Default is \code{1} (not parallelized).}
}
\value{
A list containing:
\itemize{
\item \code{tr.error.mean} -
absolute mean prediction error for training set
\item \code{tr.error.median} -
absolute median prediction error for training set
\item \code{tr.error.sd} -
prediction error sd for training set
\item \code{tr.error.matrix} -
raw prediction error matrix for training set
\item \code{te.error.mean} -
list of absolute mean prediction error for test set(s)
\item \code{te.error.median} -
list of absolute median prediction error for test set(s)
\item \code{te.error.sd} -
list of prediction error sd for test set(s)
\item \code{te.error.matrix} -
list of raw prediction error matrix for test set(s)
}
}
\description{
Model applicability domain evaluation with
ensemble sparse partial least squares.
}
\note{
Note that for \code{space = "variable"}, \code{method} could
only be \code{"mc"}, since bootstrapping in the variable space
will create duplicated variables, and that could cause problems.
}
\examples{
data("logd1k")
# remove low variance variables
x = logd1k$x[, -c(17, 52, 59)]
y = logd1k$y

# training set
x.tr = x[1:300, ]
y.tr = y[1:300]

# two test sets
x.te = list("test.1" = x[301:400, ],
            "test.2" = x[401:500, ])
y.te = list("test.1" = y[301:400],
            "test.2" = y[401:500])

set.seed(42)
ad = enspls.ad(x.tr, y.tr, x.te, y.te,
               maxcomp = 3, alpha = c(0.3, 0.6, 0.9),
               space = "variable", method = "mc",
               ratio = 0.8, reptimes = 10)
print(ad)
plot(ad)
# The interactive plot requires a HTML viewer
\dontrun{
plot(ad, type = "interactive")}
}
\author{
Nan Xiao <\url{http://nanx.me}>
}

