% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/alm.R
\name{alm}
\alias{alm}
\title{Augmented Linear Model}
\usage{
alm(formula, data, subset, na.action, distribution = c("dnorm", "dlaplace",
  "ds", "dgnorm", "dlogis", "dt", "dalaplace", "dlnorm", "dllaplace", "dls",
  "dlgnorm", "dbcnorm", "dfnorm", "dinvgauss", "dgamma", "dpois", "dnbinom",
  "dbeta", "dlogitnorm", "plogis", "pnorm"), loss = c("likelihood", "MSE",
  "MAE", "HAM", "LASSO", "RIDGE"), occurrence = c("none", "plogis", "pnorm"),
  orders = c(0, 0, 0), parameters = NULL, fast = FALSE, ...)
}
\arguments{
\item{formula}{an object of class "formula" (or one that can be coerced to
that class): a symbolic description of the model to be fitted. Can also include
\code{trend}, which would add the global trend.}

\item{data}{a data frame or a matrix, containing the variables in the model.}

\item{subset}{an optional vector specifying a subset of observations to be
used in the fitting process.}

\item{na.action}{a function which indicates what should happen when the
data contain NAs. The default is set by the na.action setting of
\link[base]{options}, and is \link[stats]{na.fail} if that is unset. The
factory-fresh default is \link[stats]{na.omit}. Another possible value
is NULL, no action. Value \link[stats]{na.exclude} can be useful.}

\item{distribution}{what density function to use in the process. The full
name of the distribution should be provided here. Values with "d" in the
beginning of the name refer to the density function, while "p" stands for
"probability" (cumulative distribution function). The names align with the
names of distribution functions in R. For example, see \link[stats]{dnorm}.}

\item{loss}{The type of Loss Function used in optimization. \code{loss} can
be:
\itemize{
\item \code{likelihood} - the model is estimated via the maximisation of the
likelihood of the function specified in \code{distribution};
\item \code{MSE} (Mean Squared Error),
\item \code{MAE} (Mean Absolute Error),
\item \code{HAM} (Half Absolute Moment),
\item \code{LASSO} - use LASSO to shrink the parameters of the model;
\item \code{RIDGE} - use RIDGE to shrink the parameters of the model;
}
In case of LASSO / RIDGE, the variables are not normalised prior to the estimation,
but the parameters are divided by the standard deviations of explanatory variables
inside the optimisation. As the result the parameters of the final model have the
same interpretation as in the case of classical linear regression. Note that the
user is expected to provide the parameter \code{lambda}.

A user can also provide their own function here as well, making sure
that it accepts parameters \code{actual}, \code{fitted} and \code{B}. Here is an
example:

\code{lossFunction <- function(actual, fitted, B, xreg) return(mean(abs(actual-fitted)))}
\code{loss=lossFunction}

See \code{vignette("alm","greybox")} for some details on losses and distributions.}

\item{occurrence}{what distribution to use for occurrence variable. Can be
\code{"none"}, then nothing happens; \code{"plogis"} - then the logistic
regression using \code{alm()} is estimated for the occurrence part;
\code{"pnorm"} - then probit is constructed via \code{alm()} for the
occurrence part. In both of the latter cases, the formula used is the same
as the formula for the sizes. Finally, an "alm" model can be provided and
its estimates will be used in the model construction.

If this is not \code{"none"}, then the model is estimated
in two steps: 1. Occurrence part of the model; 2. Sizes part of the model
(excluding zeroes from the data).}

\item{orders}{the orders of ARIMA to include in the model. Only non-seasonal
orders are accepted.}

\item{parameters}{vector of parameters of the linear model. When \code{NULL}, it
is estimated.}

\item{fast}{if \code{TRUE}, then the function won't check whether
the data has variability and whether the regressors are correlated. Might
cause trouble, especially in cases of multicollinearity.}

\item{...}{additional parameters to pass to distribution functions. This
includes:
\itemize{
\item \code{alpha} - value for Asymmetric Laplace distribution;
\item \code{size} - the size for the Negative Binomial distribution;
\item \code{nu} - the number of degrees of freedom for Chi-Squared and Student's t;
\item \code{shape} - the shape parameter for Generalised Normal distribution;
\item \code{lambda} - the meta parameter for LASSO / RIDGE. Should be between 0 and 1,
regulating the strength of shrinkage, where 0 means don't shrink parameters (use MSE)
and 1 means shrink everything (ignore MSE);
\item \code{lambdaBC} - lambda for Box-Cox transform parameter in case of Box-Cox
Normal Distribution.
\item \code{FI=TRUE} will make the function also produce Fisher Information
matrix, which then can be used to calculated variances of smoothing parameters
and initial states of the model. This is used in the \link[stats]{vcov} method;
}

You can also pass parameters to the optimiser:
\enumerate{
\item \code{B} - the vector of starting values of parameters for the optimiser,
should correspond to the ordering of the explanatory variables;
\item \code{algorithm} - the algorithm to use in optimisation
(\code{"NLOPT_LN_SBPLX"} by default);
\item \code{maxeval} - maximum number of evaluations to carry out. Default is 40 per
estimated parameter. In case of LASSO / RIDGE the default is 80 per estimated parameter;
\item \code{maxtime} - stop, when the optimisation time (in seconds) exceeds this;
\item \code{xtol_rel} - the precision of the optimiser (the default is 1E-6);
\item \code{xtol_abs} - the absolute precision of the optimiser (the default is 1E-8);
\item \code{ftol_rel} - the stopping criterion in case of the relative change in the loss
function (the default is 1E-4);
\item \code{ftol_abs} - the stopping criterion in case of the absolute change in the loss
function (the default is 0 - not used);
\item \code{print_level} - the level of output for the optimiser (0 by default).
If equal to 41, then the detailed results of the optimisation are returned.
}
You can read more about these parameters by running the function
\link[nloptr]{nloptr.print.options}.}
}
\value{
Function returns \code{model} - the final model of the class
"alm", which contains:
\itemize{
\item coefficients - estimated parameters of the model,
\item FI - Fisher Information of parameters of the model. Returned only when \code{FI=TRUE},
\item fitted - fitted values,
\item residuals - residuals of the model,
\item mu - the estimated location parameter of the distribution,
\item scale - the estimated scale parameter of the distribution,
\item distribution - distribution used in the estimation,
\item logLik - log-likelihood of the model. Only returned, when \code{loss="likelihood"}
and in several other special cases of distribution and loss combinations (e.g. \code{loss="MSE"},
distribution="dnorm"),
\item loss - the type of the loss function used in the estimation,
\item lossFunction - the loss function, if the custom is provided by the user,
\item lossValue - the value of the loss function,
\item df.residual - number of degrees of freedom of the residuals of the model,
\item df - number of degrees of freedom of the model,
\item call - how the model was called,
\item rank - rank of the model,
\item data - data used for the model construction,
\item terms - terms of the data. Needed for some additional methods to work,
\item occurrence - the occurrence model used in the estimation,
\item B - the value of the optimised parameters. Typically, this is a duplicate of coefficients,
\item other - the list of all the other parameters either passed to the
function or estimated in the process, but not included in the standard output
(e.g. \code{alpha} for Asymmetric Laplace),
\item timeElapsed - the time elapsed for the estimation of the model.
}
}
\description{
Function estimates model based on the selected distribution
}
\details{
This is a function, similar to \link[stats]{lm}, but using likelihood for the cases
of several non-normal distributions. These include:
\enumerate{
\item \link[stats]{dnorm} - Normal distribution,
\item \link[greybox]{dlaplace} - Laplace distribution,
\item \link[greybox]{ds} - S-distribution,
\item \link[greybox]{dgnorm} - Generalised Normal distribution,
\item \link[stats]{dlogis} - Logistic Distribution,
\item \link[stats]{dt} - T-distribution,
\item \link[greybox]{dalaplace} - Asymmetric Laplace distribution,
\item \link[stats]{dlnorm} - Log normal distribution,
\item dllaplace - Log Laplace distribution,
\item dls - Log S-distribution,
\item dlgnorm - Log Generalised Normal distribution,
\item \link[greybox]{dfnorm} - Folded normal distribution,
\item \link[greybox]{dbcnorm} - Box-Cox normal distribution,
\item \link[statmod]{dinvgauss} - Inverse Gaussian distribution,
\item \link[stats]{dgamma} - Gamma distribution,
\item \link[greybox]{dlogitnorm} - Logit-normal distribution,
\item \link[stats]{dbeta} - Beta distribution,
\item \link[stats]{dpois} - Poisson Distribution,
\item \link[stats]{dnbinom} - Negative Binomial Distribution,
\item \link[stats]{plogis} - Cumulative Logistic Distribution,
\item \link[stats]{pnorm} - Cumulative Normal distribution.
}

This function can be considered as an analogue of \link[stats]{glm}, but with the
focus on time series. This is why, for example, the function has \code{orders} parameter
for ARIMA and produces time series analysis plots with \code{plot(alm(...))}.

This function is slower than \code{lm}, because it relies on likelihood estimation
of parameters, hessian calculation and matrix multiplication. So think twice when
using \code{distribution="dnorm"} here.

The estimation is done via the maximisation of likelihood of a selected distribution,
so the number of estimated parameters always includes the scale. Thus the number of degrees
of freedom of the model in case of \code{alm} will typically be lower than in the case of
\code{lm}.

See more details and examples in the vignette for "ALM": \code{vignette("alm","greybox")}
}
\examples{

### An example with mtcars data and factors
mtcars2 <- within(mtcars, {
   vs <- factor(vs, labels = c("V", "S"))
   am <- factor(am, labels = c("automatic", "manual"))
   cyl  <- factor(cyl)
   gear <- factor(gear)
   carb <- factor(carb)
})
# The standard model with Log Normal distribution
ourModel <- alm(mpg~., mtcars2[1:30,], distribution="dlnorm")
summary(ourModel)
\donttest{plot(ourModel)}

# Produce predictions with the one sided interval (upper bound)
predict(ourModel, mtcars2[-c(1:30),], interval="p", side="u")


### Artificial data for the other examples
\donttest{xreg <- cbind(rlaplace(100,10,3),rnorm(100,50,5))
xreg <- cbind(100+0.5*xreg[,1]-0.75*xreg[,2]+rlaplace(100,0,3),xreg,rnorm(100,300,10))
colnames(xreg) <- c("y","x1","x2","Noise")}

# An example with Laplace distribution
\donttest{ourModel <- alm(y~x1+x2+trend, xreg, subset=c(1:80), distribution="dlaplace")
summary(ourModel)
plot(predict(ourModel,xreg[-c(1:80),]))}

# And another one with Asymmetric Laplace distribution (quantile regression)
# with optimised alpha
\donttest{ourModel <- alm(y~x1+x2, xreg, subset=c(1:80), distribution="dalaplace")}

# An example with AR(1) order
\donttest{ourModel <- alm(y~x1+x2, xreg, subset=c(1:80), distribution="dnorm", orders=c(1,0,0))
summary(ourModel)
plot(predict(ourModel,xreg[-c(1:80),]))}

### Examples with the count data
\donttest{xreg[,1] <- round(exp(xreg[,1]-70),0)}

# Negative Binomial distribution
\donttest{ourModel <- alm(y~x1+x2, xreg, subset=c(1:80), distribution="dnbinom")
summary(ourModel)
predict(ourModel,xreg[-c(1:80),],interval="p",side="u")}

# Poisson distribution
\donttest{ourModel <- alm(y~x1+x2, xreg, subset=c(1:80), distribution="dpois")
summary(ourModel)
predict(ourModel,xreg[-c(1:80),],interval="p",side="u")}


### Examples with binary response variable
\donttest{xreg[,1] <- round(xreg[,1] / (1 + xreg[,1]),0)}

# Logistic distribution (logit regression)
\donttest{ourModel <- alm(y~x1+x2, xreg, subset=c(1:80), distribution="plogis")
summary(ourModel)
plot(predict(ourModel,xreg[-c(1:80),],interval="c"))}

# Normal distribution (probit regression)
\donttest{ourModel <- alm(y~x1+x2, xreg, subset=c(1:80), distribution="pnorm")
summary(ourModel)
plot(predict(ourModel,xreg[-c(1:80),],interval="p"))}

}
\seealso{
\code{\link[greybox]{stepwise}, \link[greybox]{lmCombine},
\link[greybox]{xregTransformer}}
}
\author{
Ivan Svetunkov, \email{ivan@svetunkov.ru}
}
\keyword{models}
\keyword{nonlinear}
\keyword{regression}
\keyword{ts}
