% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RobSBoosting.R
\name{RobSBoosting}
\alias{RobSBoosting}
\title{Robust semiparametric gene-environment interaction analysis using sparse boosting}
\usage{
RobSBoosting(
  G,
  E,
  Y,
  loop_time,
  num.knots = NULL,
  Boundary.knots = NULL,
  degree = 1,
  v = 0.1,
  family = c("continuous", "survival"),
  knots = NULL,
  E_type
)
}
\arguments{
\item{G}{Input matrix of \code{p} genetic measurements consisting of \code{n} rows. Each row
is an observation vector.}

\item{E}{Input matrix of \code{q} environmental risk factors, each row is an observation
vector.}

\item{Y}{Response variable. A quantitative vector for \code{family="continuous"}. For
\code{family="survival"}, \code{Y} should be a two-column matrix with the first column being
the log(survival time) and the second column being the censoring indicator. The indicator is a
binary variable, with "1" indicating dead, and "0" indicating right censored.}

\item{loop_time}{Number of iterations of the sparse boosting.}

\item{num.knots}{Numbers of knots for the B spline basis.}

\item{Boundary.knots}{The boundary of knots for the B spline basis.}

\item{degree}{Degree for the B spline basis.}

\item{v}{The step size used in the sparse boosting process. Default is 0.1.}

\item{family}{Response type of \code{Y} (see above).}

\item{knots}{List of knots for the B spline basis. Default is NULL and knots can be generated
with the given \code{num.knots}, \code{degree} and \code{Boundary.knots}.}

\item{E_type}{A vector indicating the type of each E factor, with "ED" representing discrete E factor, and "EC" representing continuous E factor.}
}
\value{
An object with S3 class \code{"RobSBoosting"} is returned, which is a list with the following components.
\item{call}{The call that produced this object.}
\item{max_t}{The stopping iteration time of the sparse boosting.}
\item{spline_result}{A list of length \code{max_t} that includes the estimation results of
each iteration.}
\item{BIC}{A vector of length max_t that includes Bayesian Information Criterion based on the
Huber's prediction error.}
\item{variable}{A vector of length max_t that includes the index of selected variable in each
iteration.}
\item{id}{The iteration time with the smallest BIC.}
\item{variable_pair}{A matrix with two columns that include the set of variables that can
potentially enter the regression model at the stopping iteration time. Here, the first and
second columns correspond to the indexes of E factors and G factors. For example, (1, 0)
represents that this variable is the first E factor, and (1,2) represents that the variable is
the interaction between the first E factor and second G factor. }
\item{v_type}{A vector whose length is the number of rows of \code{variable_pair}, with each
element representing the variable type of the corresponding row of \code{variable_pair}. Here,
"EC" stands for continuous E effect, "ED" for discrete E effect, and "G"  for G effect, "EC-G"
for the interaction between "EC" and "G", "ED-G" for the interaction between "ED" and "G".}
\item{family}{The same as input \code{family}.}
\item{degree}{Degree for the B spline basis.}
\item{v}{The step size used in the sparse boosting process.}
\item{NorM}{The values of B spline basis.}
\item{estimation_results}{A list of estimation results for each variable. Here, the first
\code{q} elemnets are for the E effects, the (\code{q+1}) element
is for the first G effect and the (\code{q+2}) to (\code{2q+1}) elements are for the interactions
corresponding to the first G factor, and so on.}
}
\description{
Robust semiparametric gene-environment interaction analysis using sparse boosting. Here a
semiparametric model is assumed to accommodate nonlinear effects, where we model continuous
environmental (E) factors in a nonlinear way, and discrete E factors and all genetic (G)
factors in a linear way. For estimating the nonlinear functions, the B spline expansion is
adopted. The Huber loss function and Qn estimator are adopted to accommodate long-tailed
distribution/data contamination. For model estimation and selection of relevant variables, we
adopt an effective sparse boosting approach, where the strong hierarchy is respected.
}
\examples{
data(Rob_data)
G=Rob_data[,1:20];E=Rob_data[,21:24]
Y=Rob_data[,25];Y_s=Rob_data[,26:27]
knots = list();Boundary.knots = matrix(0, 24, 2)
for(i in 1:4) {
  knots[[i]] = c(0, 1)
  Boundary.knots[i, ] = c(0, 1)
  }

#continuous
fit1= RobSBoosting(G,E,Y,loop_time = 80,num.knots = 2,Boundary.knots=Boundary.knots,
degree = 2,family = "continuous",knots = knots,E_type=c("EC","EC","ED","ED"))
coef1 = coef(fit1)
predict1=predict(fit1,newE=E[1:2,],newG=G[1:2,])
plot(fit1)

\donttest{
#survival
fit2= RobSBoosting(G,E,Y_s,loop_time = 200, num.knots = 2, Boundary.knots=Boundary.knots,
family = "survival", knots = knots,E_type=c("EC","EC","ED","ED"))
coef2 = coef(fit2)
predict2=predict(fit2,newE=E[1:2,],newG=G[1:2,])
plot(fit2)
}
}
\references{
Mengyun Wu and Shuangge Ma.
\emph{Robust semiparametric gene-environment interaction analysis using sparse boosting.
Statistics in Medicine, 38(23):4625-4641, 2019.}
}
\seealso{
\code{bs} method for B spline expansion, \code{coef}, \code{predict}, and \code{plot} methods, and \code{Miss.boosting}
method.
}
