\name{ffnet}
\title{fit Fast and Flexible single--hidden--layer neural NETwork}
\usage{
ffnet (formula, data = NULL, errfunc, outtype, \dots)
ffnet (y, x, lag = 1, lagx = 1, steps = 1, errfunc, outtype, \dots)
ffnet (x, y, wts, nhid = 0, hidtype = "SIG", outtype = "LIN",
       shortcut = TRUE, fwts, hessian = FALSE, errfunc,
       type, regularizer = "NOR", regc = 1.0, optimizer = "DFPMIN",
       batch = TRUE, trace = "PRINT", itmax = 1000, tol = 1.0e-6,
       epoch = 10, itepoch = 10, eta = 0.1, alpha = 0.7, temp = 100.0,
       ittemp = 10, hold = FALSE, col = "black", xlim = range(0,itmax),
       ylim = range(nout*nobs*0.001,fo), ptype = "l", ylab = "Error
       Function", xlab = "Iterations", main = paste(optimizer,"Optimization"))
}
\alias{ffnet}
\alias{ffnet.default}
\alias{ffnet.formula}
\alias{ffnet.ts}
\arguments{
  \item{formula}{a symbolic description of the model to be fit. Note,
    that an intercept is always included, whether given in the formula
    or not.}
  \item{data}{an optional data frame containing the variables in
    the model. By default the variables are taken
    from the environment which \code{ffnet} is called from.}
  \item{x}{for \code{ffnet.default} a matrix, vector, or
    data frame of (input) examples. For \code{ffnet.ts} the time series of
    exogenous variables.} 
  \item{y}{for \code{ffnet.default} a matrix, vector, or data frame of
    target examples. For \code{ffnet.ts} the time series of endogenous
    variables.} 
  \item{lag}{an integer specifying the lag order of the time series
    model.}
  \item{lagx}{an integer specifying the number of lagged \code{x} that
    are included in the model.}
  \item{steps}{the time series regression is computed for the \code{steps}-step
    ahead conditional expectation, i.e., the direct approach is used to
    compute multi-step predictions.}
  \item{wts}{a numerical vector of initial weights (parameters). If
    missing, then the weight vector is chosen randomly from a uniform
    distribution on (-1,1).}
  \item{nhid}{the number of hidden units to fit.}
  \item{hidtype}{a string specifying the type of hidden unit activation
    functions. Currently implemented \code{"SIG"} for
    the logistic sigmoid 1/(1+exp(-x)) and \code{"TAN"} for tanh(x).}
  \item{outtype}{specifies the type of output unit activation
    functions. Currently implemented \code{"LIN"} for identity 
    and \code{"SOFT"} for softmax outputs.}
  \item{shortcut}{switch for shortcut connections. The linear model is
    obtained by setting \code{nhid} to zero and switching to shortcuts.}
  \item{fwts}{a vector of integers indicating which weights are fixed
    during the fit.}
  \item{hessian}{if true, the Hessian of the objective function
    at the best set of weights is returned as component \code{hessian}.}
  \item{errfunc}{a string which specifies the type of objective
    function for the fit. Currently implemented \code{"SSE"} for the
    sum of squared errors, \code{"MAD"} for the mean absolute deviation,
    \code{"GSSE"} for the generalized sum of squared errors,
    and \code{"ENTROPY"}. Note that the latter can be used only together
    with softmax units and vice versa. Default to \code{"SSE"} for
    univariate \code{y}, \code{"GSSE"} for multivariate \code{y},
    and \code{"ENTROPY"} for factor \code{y}.}
  \item{type}{a string indicating whether the prediction for targets of
    class \code{"factor"} is a factor or a coded factor. \code{type}
    equals \code{"CLASS"} returns factors and \code{type} equals
    \code{"RAW"} coded factors. It applies only for targets which are
    factors.}
  \item{regularizer}{specifies the type of regularization.
    \code{"NOR"} for no regularization, \code{"WDR"} for weight decay (ridge
    regression), and \code{"LASSO"} for the least absolute shrinkage and
    selection operator (Laplace prior distribution).}
  \item{regc}{the hyper parameter for the regularizer.}
  \item{optimizer}{specifies the optimization procedure for the fit.
    Currently implemented are simple gradient descent with momentum
    term \code{"GRDDSC"}, steepest descent \code{"STPDSC"}, conjugate
    gradient \code{"FRPRMN"}, and BFGS quasi-Newton \code{"DFPMIN"}. 
    Global optimization is provided with two types of simulated
    annealing \code{"SANN"} and \code{"NRSANN"}.}
  \item{batch}{a switch for batch (off-line) or epoch based (on-line)
    training (only for the local optimizers).}
  \item{trace}{a string which specifies the type of optimizer
    output. \code{"PRINT"} is for standard output, \code{"PLOT"} shows
    the optimization process in a graphical window, and \code{"NO"} is for
    silent optimization.}
  \item{itmax}{the maximum number of iterations during optimization.}
  \item{tol}{the convergence tolerance either on the function value
    or on zeroing the gradient (local optimizers),
    c.f. references.}
  \item{epoch}{size of the epoch data set (local optimizers).}
  \item{itepoch}{the optimizer performs at most \code{itepoch}
    iterations on a given epoch data set. \code{itepoch} equals one
    gives the usual on-line fitting (local optimizers).}
  \item{eta}{the learning rate for \code{"GRDDSC"}.}
  \item{alpha}{momentum term for \code{"GRDDSC"}.}
  \item{temp}{the starting temperature for simulated annealing.}
  \item{ittemp}{the number of iterations at constant temperature
    (simulated annealing).}
  \item{hold}{a logical indicating whether a new graphical window is
    opened if \code{trace == "PLOT"}.}
  \item{col}{the plotting color for \code{trace == "PLOT"}.}
  \item{xlim, ylim}{the ranges of the axes for \code{trace == "PLOT"}.}
  \item{ptype}{the type of the plot for \code{trace == "PLOT"}.}
  \item{xlab,ylab}{the axis labels for \code{trace == "PLOT"}.}
  \item{main}{the main title of the plot for \code{trace == "PLOT"}.}
  \item{\dots}{additional arguments to \code{ffnet.default}.}
}
\description{
  Fits a single-hidden-layer neural network, possibly with
  shortcut connections, either for a regression-type model of the form

  \code{y~x},   or    \code{formula},

  or a time series model of order \code{lag} for \code{y} with exogenous
  variables \code{x} of the form
  
  \code{y[t] = NN (y[t-1], \dots, y[t-lag], x[t-1], \dots, x[t-lagx]) +
    e[t]}. 
}
\details{
  These generic procedures call routines from a C/C++ library.
  The library uses some optimization procedures provided by the
  Numerical Recipes in C. The optimization is fairly efficient, since
  it is completely computed in C/C++.

  The Hessian is computed by numerically differentiate the gradient
  vector. This may be quite slow for a large Hessian matrix. 

  Missing values are not allowed.
}
\value{
  A list with class \code{"ffnet"} containing the following components:
  \item{nin, nhid, nout}{the number of input, hidden, and output units.}
  \item{hidtype, outtype}{the type of hidden and output activation function.}
  \item{shortcut}{a binary for the presence of shortcut connections.}
  \item{wts}{the best set of weights found.}
  \item{fwts}{the vector of fixed weights.}
  \item{value}{value of the objective function plus regularizer term at
    the best set of weights.}
  \item{fitted.values}{the fitted values for the training data.}
  \item{residuals}{the residuals found for the training data.}
  \item{x}{the matrix, vector, time series, or data frame of (input) examples.}
  \item{y}{the matrix, vector, time series, or data frame of target examples.}
  \item{hessian}{the Hessian of the objective function plus regularizer
    at the best set of weights.}
  \item{errfunc}{the objective function of the fit.}
  \item{regularizer, regc}{the regularizer and its hyperparameter of the
    fit.}
  \item{coefnames}{the column names of the inputs.}
  \item{lev}{for factor targets and \code{type} \code{"CLASS"}, it
    contains the levels of the factor.}
  \item{lag}{the lag order of the time series model.}
  \item{lagx}{for time series models with specified \code{x} the number
    of lagged \code{x} included in the model.}
  \item{steps}{the number of steps ahead for which the conditional
    expectation is fit.}
  \item{call}{for internal use.}
  \item{terms}{for internal use.}
}
\seealso{
  \code{\link{predict.ffnet}}, \code{\link{hessian.ffnet}},
  \code{\link{print.ffnet}}, \code{\link{fitted.ffnet}},
  \code{\link{residuals.ffnet}}, \code{\link{plot.ffnet}},
  \code{\link{coef.ffnet}}, \code{\link{print.summary.ffnet}}
}
\references{
  C.M. Bishop (1995): \emph{Neural Networks for Pattern
  Recognition}, Clarendon Press, Oxford.

  P.J. Brockwell and R.A. Davis (1991): \emph{Time Series: Theory and
  Methods}, 2nd Edition, Springer, New York.
  
  W.H. Press, S.A. Teukolsky, W.T. Vetterling, and B.P.
  Flannery (1995): \emph{Numerical Recipes in C: The Art of 
  Scientific Computing}, 2nd Edition, Cambridge, MA.
  
  A. Trapletti (1999): FFNET: A Fast and Flexible software
  library for single-hidden-layer neural NETworks, unpublished
  report.
}
\author{A. Trapletti}
\examples{
x <- runif (1000, -1, 1)  # Non-linear Regression 
y <- x^2 - x^3 +0.1*rnorm(x)
plot (x, y)
rr <- lm (y~x)
summary (rr)
points (x, predict(rr,x),col="red")
nn <- ffnet (y~x, nhid=1, itmax=100, optim="DFPMIN", shortcut=T)
summary (nn)
points (x, predict(nn,x), col="green")

# Differences between Optimizers
wts <- c(0.8597997,0.6164846,0.5746838,0.4775891,0.3435093)  
system.time(nn <- ffnet (y~x, nhid=1, itmax=800, optim="DFPMIN", shortcut=T,
            trace="PLOT", wts=wts, main="Differences between Optimizers"))
system.time(nn <- ffnet (y~x, nhid=1, itmax=800, optim="FRPRMN", shortcut=T,
            trace="PLOT", wts=wts, hold=T, col="red"))
system.time(nn <- ffnet (y~x, nhid=1, itmax=800, optim="STPDSC", shortcut=T,
            trace="PLOT", wts=wts, hold=T, col="green"))
system.time(nn <- ffnet (y~x, nhid=1, itmax=800, optim="GRDDSC", shortcut=T,
            trace="PLOT", wts=wts, hold=T, col="blue", eta=0.001))

data (sunspots)  # Time Series Modelling
x <- aggregate (sunspots, nf=1, FUN=mean)/100  
train <- window (x, end=1960)
test <- window (x, start=1961)
lin <- ffnet (train, lag=2)
summary (lin)
nonlin <- ffnet (train, lag=2, nhid=2, batch=F,
                 epoch=length(train)-50, itepoch=300, itmax=5, shortcut=F)
nonlin <- ffnet (train, lag=2, nhid=2, wts=nonlin$wts, shortcut=F)
summary (nonlin)
plot (test, type="both")
points (predict(lin,test), col="red", type="both")
points (predict(nonlin,test), col="green", type="both")

data (iris)  # Classification
nn <- ffnet (Species ~ ., data=iris, nhid=1)
summary (nn)

# Another time series example, see Brockwell and Davis (1991),
# pp. 549-552, for linear and threshold AR models.
data (lynx)  
xin <- window (log10(lynx), end=1920)  # In-Sample end=1929
xout <- window (log10(lynx), start=1909)  # Out-of-Sample start=1921

# Fit particular AR(12) as in Brockwell and Davis (1991).
wts <- c(rnorm(5),rep(0,4),rnorm(1),rep(0,2),rnorm(1))
fwts <- c(6,7,8,9,11,12)
nn.lin <- ffnet (xin, lag=12, wts=wts, fwts=fwts)

# Fit similar AR-NN(12), best In-Sample performance is about 2.37,
# it is possible that you have to re-start the sequence below several
# times to find the best AR-NN(12)
wts <- c(rnorm(5),rep(0,4),rnorm(1),rep(0,2),rnorm(3),nn.lin$wts[2:13])
fwts <- c(6,7,8,9,11,12,20,21,22,23,25,26)
nn.nonlin <- ffnet (xin, lag=12, nhid=1, wts=wts, fwts=fwts, batch=F,
                    epoch=30, itepoch=2, itmax=400, trace="NO")
nn.nonlin <- ffnet (xin, lag=12, nhid=1, wts=nn.nonlin$wts, fwts=fwts)

# Out-of-Sample prediction, threshold model gives 0.120
sqrt(sum((predict(nn.lin,xout)-xout)^2)/14)
sqrt(sum((predict(nn.nonlin,xout)-xout)^2)/14)
}
\keyword{nonlinear, neural, ts}








