\name{maxent}
\alias{maxent}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
trains a maximum entropy model given a training matrix and a vector or factor of labels.
}
\description{
Trains a multinomial logistic regression model of class \code{\link{maxent-class}} given a \code{matrix} or \code{matrix.csr} with training data, and a \code{vector} or \code{factor} with corresponding labels. Additional parameters such as \code{feature_cutoff}, \code{gaussian_prior}, \code{inequality_constraints}, and \code{set_heldout} help prevent model overfitting.
}
\usage{
maxent(feature_matrix, code_vector, feature_cutoff = 0, gaussian_prior = 0, 
inequality_constraints = 0, set_heldout = 0)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{feature_matrix}{
Either a regular \code{matrix} of class \code{DocumentTermMatrix} from package \pkg{tm}, or a \code{matrix.csr} representation generated by \code{\link{as.compressed.matrix}}.
}
  \item{code_vector}{
A \code{factor} or \code{vector} of labels corresponding to each document in the \code{feature_matrix}.
}
  \item{feature_cutoff}{
An \code{integer} setting the feature cutoff for the model.
}
  \item{gaussian_prior}{
An \code{integer} setting the gaussian priors for the model.
}
  \item{inequality_constraints}{
A \code{numeric} setting the inequality constraints for the model.
}
  \item{set_heldout}{
An \code{integer} specifying the number of documents to hold out. Sets a held-out subset of your data to test against and prevent overfitting.
}
}
\details{
Yoshimasa Tsuruoka recommends using one of following three methods if you see overfitting, with #3 as the preferred method.

     1. Set the \code{feature_cutoff} parameter to \code{3}, leaving \code{gaussian_prior} and \code{inequality_constraints} as default.
     
     2. Set the \code{gaussian_prior} parameter to \code{1000}, leaving \code{feature_cutoff} and \code{inequality_constraints} as default.
     
     3. Set the \code{inequality_constraints} parameter to \code{1.0}, leaving \code{feature_cutoff} and \code{gaussian_prior} as default.
}
\value{
Returns an object of class \code{\link{maxent-class}} with two slots.

\item{model}{A \code{character vector} containing the trained maximum entropy model.}
\item{weights}{A \code{data.frame} listing all the weights in three columns: \code{Weight}, \code{Label}, and \code{Feature}.} 
}
\references{
Y. Tsuruoka. "A simple C++ library for maximum entropy classification." University of Tokyo Department of Computer Science (Tsujii Laboratory), 2006. URL \url{http://www-tsujii.is.s.u-tokyo.ac.jp/~tsuruoka/maxent/}.
}
\author{
Timothy P. Jurka <tpjurka@ucdavis.edu>
}
\examples{
# LOAD LIBRARY
library(maxent)

# READ THE DATA, PREPARE THE CORPUS, and CREATE THE MATRIX
data <- read.csv(system.file("data/NYTimes.csv.gz",package="maxent"))
corpus <- Corpus(VectorSource(data$Title))
matrix <- DocumentTermMatrix(corpus)

# TRAIN USING SPARSEM REPRESENTATION
sparse <- as.compressed.matrix(matrix)
model <- maxent(sparse[1:1000,],as.factor(data$Topic.Code)[1:1000])

# TRAIN USING REGULAR MATRIX REPRESENTATION
model <- maxent(as.matrix(matrix)[1:1000,],as.factor(data$Topic.Code)[1:1000])
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
% R documentation directory.
\keyword{methods}