% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{initialize_clusters}
\alias{initialize_clusters}
\title{Cluster Initialization}
\usage{
initialize_clusters(
  X,
  G,
  init_method = c("kmedoids", "kmeans", "hierarchical", "manual", "soft", "hard"),
  manual_clusters = NULL
)
}
\arguments{
\item{X}{An \eqn{n} by \eqn{d} matrix or data frame where \eqn{n} is the number of
observations and \eqn{d} is the number of columns or variables. Alternately,
\code{X} can be a vector of \eqn{n} observations.}

\item{G}{The number of clusters.}

\item{init_method}{(optional) A string specifying the method to initialize
the EM algorithm. "kmedoids" clustering is used by default. Alternative
methods include "kmeans", "hierarchical", "manual", "soft", "hard". When
"manual" is chosen, a vector \code{manual_clusters} of length \eqn{n} must
be specified.}

\item{manual_clusters}{A vector of length \eqn{n} that specifies the initial
cluster memberships of the user when \code{init_method} is set to "manual".
Both numeric and character vectors are acceptable. This argument is NULL by
default, so that it is ignored whenever other given initialization methods
are chosen.}
}
\value{
A list with the following slots:
  \item{z}{Mapping probabilities in the form of an \eqn{n} by \eqn{G} matrix.}
  \item{clusters}{An numeric vector with values from 1 to \eqn{G} indicating
    initial cluster memberships.}
  \item{pi}{Component mixing proportions.}
  \item{mu}{If \code{X} is a matrix or data frame, \code{mu} is an \eqn{G} by \eqn{d}
    matrix where each row is the component mean vector. If \code{X} is a vector, \code{mu}
    is a vector of \eqn{G} component means.}
  \item{sigma}{If \code{X} is a matrix or data frame, \code{sigma} is a \eqn{G}-dimensional
    array where each \eqn{d} by \eqn{d} matrix is the component covariance matrix. If
    \code{X} is a vector, \code{sigma} is a vector of \eqn{G} component variances.}
}
\description{
Initialize cluster memberships and component parameters to start the EM algorithm
using a heuristic clustering method or user-defined labels.
}
\details{
Available heuristic methods include k-medoids clustering, k-means clustering,
  hierarchical clustering, soft and hard clustering. Alternately, the user can also
  enter pre-specified cluster memberships, making other initialization methods possible.


}
\examples{

#++++ Initialization using a heuristic method ++++#

set.seed(1234)

init <- initialize_clusters(iris[1:4], G = 3)
init <- initialize_clusters(iris[1:4], G = 3, init_method = 'kmeans')
init <- initialize_clusters(iris[1:4], G = 3, init_method = 'hierarchical')
init <- initialize_clusters(iris[1:4], G = 3, init_method = 'soft')
init <- initialize_clusters(iris[1:4], G = 3, init_method = 'hard')

#++++ Initialization using user-defined labels ++++#

init <- initialize_clusters(iris[1:4], G = 3, init_method = 'manual',
                            manual_clusters = iris$Species)

#++++ Initial parameters and pairwise scatterplot showing the mapping ++++#

init$z
init$pi
init$mu
init$sigma

pairs(iris[1:4], col = init$clusters, pch = 16)

}
\references{
Everitt, B., Landau, S., Leese, M., and Stahl, D. (2011). \emph{Cluster Analysis}. John Wiley & Sons. \cr \cr
Kaufman, L. and Rousseeuw, P. J. (2009). \emph{Finding  groups  in  data:  an
  introduction  to  cluster analysis}, volume 344. John Wiley & Sons. \cr \cr
Hartigan, J. A. and Wong, M. A. (1979). Algorithm AS 136: A K-means clustering
 algorithm. \emph{Applied Statistics}, \strong{28}, 100-108. doi: 10.2307/2346830.
}
