% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/linear_FSSEM.R
\name{do.fssem}
\alias{do.fssem}
\title{Feature Subset Selection using Expectation-Maximization}
\usage{
do.fssem(
  X,
  ndim = 2,
  max.k = 10,
  preprocess = c("null", "center", "scale", "cscale", "whiten", "decorrelate")
)
}
\arguments{
\item{X}{an \eqn{(n\times p)} matrix or data frame whose rows are observations
and columns represent independent variables.}

\item{ndim}{an integer-valued target dimension.}

\item{max.k}{maximum number of clusters for GMM fitting with EM algorithms.}

\item{preprocess}{an additional option for preprocessing the data.
Default is "null". See also \code{\link{aux.preprocess}} for more details.}
}
\value{
a named list containing
\describe{
\item{Y}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.}
\item{trfinfo}{a list containing information for out-of-sample prediction.}
\item{projection}{a \eqn{(p\times ndim)} whose columns are basis for projection.}
}
}
\description{
Feature Subset Selection using Expectation-Maximization (FSSEM) takes a wrapper approach to feature selection problem.
It iterates over optimizing the selection of variables by incrementally including each variable that adds the most
significant amount of scatter separability from a labeling obtained by Gaussian mixture model. This method is
quite computation intensive as it pertains to multiple fitting of GMM. Setting smaller \code{max.k} for each round of
EM algorithm as well as target dimension \code{ndim} would ease the burden.
}
\examples{
## run FSSEM with IRIS dataset - select 2 of 4 variables
data(iris)
irismat = as.matrix(iris[,2:4])

## select 50 observations for CRAN-purpose small example
id50 = sample(1:nrow(irismat), 50)
sel.dat = irismat[id50,]
sel.lab = as.factor(iris[id50,5])

## run and visualize
out0 = do.fssem(sel.dat, ndim=2, max.k=3)
opar = par(no.readonly=TRUE)
plot(out0$Y, main="small run", col=sel.lab, pch=19)
par(opar)

\dontrun{
## NOT-FOR-CRAN example; run at your machine !
## try different maximum number of clusters
out3 = do.fssem(irismat, ndim=2, max.k=3)
out6 = do.fssem(irismat, ndim=2, max.k=6)
out9 = do.fssem(irismat, ndim=2, max.k=9)

## visualize
cols = as.factor(iris[,5])
opar = par(no.readonly=TRUE)
par(mfrow=c(3,1))
plot(out3$Y, main="max k=3", col=cols)
plot(out6$Y, main="max k=6", col=cols)
plot(out9$Y, main="max k=9", col=cols)
par(opar)
}

}
\references{
\insertRef{dy_feature_2004}{Rdimtools}
}
\author{
Kisung You
}
