% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{cluster_validity_measures}
\alias{cluster_validity_measures}
\alias{calinski_harabasz_index}
\alias{dunnowa_index}
\alias{generalised_dunn_index}
\alias{negated_ball_hall_index}
\alias{negated_davies_bouldin_index}
\alias{negated_wcss_index}
\alias{silhouette_index}
\alias{silhouette_w_index}
\alias{wcnn_index}
\title{Internal Cluster Validity Measures}
\usage{
calinski_harabasz_index(X, y)

dunnowa_index(
  X,
  y,
  M = 25L,
  owa_numerator = "SMin:5",
  owa_denominator = "Const"
)

generalised_dunn_index(X, y, lowercase_d, uppercase_d)

negated_ball_hall_index(X, y)

negated_davies_bouldin_index(X, y)

negated_wcss_index(X, y)

silhouette_index(X, y)

silhouette_w_index(X, y)

wcnn_index(X, y, M = 25L)
}
\arguments{
\item{X}{numeric matrix with \code{n} rows and \code{d} columns,
representing \code{n} points in a \code{d}-dimensional space}

\item{y}{vector of \code{n} integer labels,
representing a partition whose \emph{quality} is to be
assessed; \code{y[i]} is the cluster ID of the \code{i}-th point,
\code{X[i, ]}; \code{1 <= y[i] <= K}, where \code{K} is the number
or clusters}

\item{M}{number of nearest neighbours}

\item{owa_numerator, owa_denominator}{single string specifying
the OWA operators to use in the definition of the DuNN index;
one of: \code{"Mean"}, \code{"Min"}, \code{"Max"}, \code{"Const"},
\code{"SMin:D"}, \code{"SMax:D"}, where \code{D} is an integer
defining the degree of smoothness}

\item{lowercase_d}{an integer between 1 and 5, denoting
\eqn{d_1}, ..., \eqn{d_5} in the definition
of the generalised Dunn (Bezdek-Pal) index (numerator:
min, max, and mean pairwise intracluster distance,
distance between cluster centroids,
weighted point-centroid distance, respectively)}

\item{uppercase_d}{an integer between 1 and 3, denoting
\eqn{D_1}, ..., \eqn{D_3} in the definition
of the generalised Dunn (Bezdek-Pal) index (denominator:
  max and min pairwise intracluster distance, average point-centroid
  distance, respectively)}
}
\value{
A single numeric value (the more, the \emph{better}).
}
\description{
Implementation of a number of so-called cluster validity indices critically
reviewed in (Gagolewski, Bartoszuk, Cena, 2021). See Section 2
therein and (Gagolewski, 2022) for the respective definitions.

The greater the index value, the more \emph{valid} (whatever that means)
the assessed partition. For consistency, the Ball-Hall and
Davies-Bouldin indexes as well as the within-cluster sum of squares (WCSS)
take negative values.
}
\examples{
X <- as.matrix(iris[,1:4])
X[,] <- jitter(X)  # otherwise we get a non-unique solution
y <- as.integer(iris[[5]])
calinski_harabasz_index(X, y)  # good
calinski_harabasz_index(X, sample(1:3, nrow(X), replace=TRUE))  # bad

}
\references{
Ball G.H., Hall D.J.,
\emph{ISODATA: A novel method of data analysis and pattern classification},
Technical report No. AD699616, Stanford Research Institute, 1965.

Bezdek J., Pal N., Some new indexes of cluster validity,
\emph{IEEE Transactions on Systems, Man, and Cybernetics, Part B} 28,
1998, 301-315, \doi{10.1109/3477.678624}.

Calinski T., Harabasz J., A dendrite method for cluster analysis,
\emph{Communications in Statistics} 3(1), 1974, 1-27,
\doi{10.1080/03610927408827101}.

Davies D.L., Bouldin D.W.,
A Cluster Separation Measure,
\emph{IEEE Transactions on Pattern Analysis and Machine Intelligence}
PAMI-1 (2), 1979, 224-227, \doi{10.1109/TPAMI.1979.4766909}.

Dunn J.C., A Fuzzy Relative of the ISODATA Process and Its Use in Detecting
Compact Well-Separated Clusters, \emph{Journal of Cybernetics} 3(3), 1973,
32-57, \doi{10.1080/01969727308546046}.

Gagolewski M., Bartoszuk M., Cena A.,
Are cluster validity measures (in)valid?, \emph{Information Sciences} 581,
620-636, 2021, \doi{10.1016/j.ins.2021.10.004};
preprint: \url{https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2021cvi.pdf}.

Gagolewski M., \emph{A Framework for Benchmarking Clustering Algorithms},
2022, \url{https://clustering-benchmarks.gagolewski.com}.

Rousseeuw P.J., Silhouettes: A Graphical Aid to the Interpretation and
Validation of Cluster Analysis, \emph{Computational and Applied Mathematics}
20, 1987, 53-65, \doi{10.1016/0377-0427(87)90125-7}.
}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
\seealso{
The official online manual of \pkg{genieclust} at \url{https://genieclust.gagolewski.com/}

Gagolewski M., \pkg{genieclust}: Fast and robust hierarchical clustering, \emph{SoftwareX} 15:100722, 2021, \doi{10.1016/j.softx.2021.100722}.

}
