% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SEMhelp.R
\name{benchmark}
\alias{benchmark}
\title{Prediction benchmark evaluation utility}
\usage{
benchmark(yobs, yhat, CT = NULL, thr = 0, F1 = TRUE, verbose = FALSE, ...)
}
\arguments{
\item{yobs}{A binary vector with the true target variable values.}

\item{yhat}{A continuous vector with the predicted target variable values.}

\item{CT}{An optional confusion matrix of dimension 2x2 containing the counts 
for FP, TP, TN, and FN.}

\item{thr}{A numerical value indicating the threshold for converting the
\code{yhat} continuous vector to a binary vector. If \code{yhat} vector 
ranges between -1 and 1, the user can specify \code{thr = 0} (default); 
if \code{yhat} ranges between 0 and 1, the user can specify \code{thr = 0.5}.}

\item{F1}{A logical value. If TRUE (default), precision (pre), recall (rec),
f1 and mcc will be computed. Otherwise, if FALSE, specificity (sp),
sensitivity (se), accuracy (acc) and mcc will be obtained.}

\item{verbose}{A logical value. If FALSE (default), the density plots of 
\code{yhat} per group will not be plotted to screen.}

\item{...}{Currently ignored.}
}
\value{
A data.frame with classification evaluation statistics is returned.
}
\description{
This function is able to calculate a series of binary 
classification evaluation statistics given (i) two vectors: one with the 
true target variable values, and the other with the predicted target variable
values or (ii) a confusion matrix with the counts for False Positives (FP), 
True Positives (TP), True Negatives (TN), and False Negatives (FN).
The user can specify the desired set of metrics to compute: (i) precision, 
recall, f1 score and Matthews Correlation Coefficient (mcc) or 
(ii) specificity, sensitivity, accuracy and mcc.
}
\details{
#' Suppose a 2x2 table with notation

\tabular{rcc}{ \tab Reference \tab \cr Predicted \tab Event \tab No Event
\cr Event \tab A \tab B \cr No Event \tab C \tab D \cr }

The formulas used here are: \deqn{se = A/(A+C)} \deqn{sp =
D/(B+D)} \deqn{acc = (A+D)/(A+B+C+D)} \deqn{pre = A/(A+B)} 
\deqn{rec = A/(A+C)} \deqn{F1 = (2*pre*rec)/(pre+rec)} 
\deqn{mcc = (A*D - B*C)/sqrt((A+B)*(A+C)*(D+B)*(D+C))}
}
\examples{

\donttest{
# Load Amyotrophic Lateral Sclerosis (ALS)
data<- alsData$exprs; dim(data)
data<- transformData(data)$data
group<- alsData$group; table (group)
ig<- alsData$graph; gplot(ig)

#...with train-test (0.5-0.5) samples
set.seed(123)
train<- sample(1:nrow(data), 0.5*nrow(data))

#...with a binary outcome (1=case, 0=control)
ig1<- mapGraph(ig, type = "outcome"); gplot(ig1)
outcome<- group; table(outcome)
data1<- cbind(outcome, data); data1[1:5,1:5]

res <- SEMml(ig1, data1, train, algo="rf")
mse <- predict(res, data1[-train, ])
yobs<- group[-train]
yhat<- mse$Yhat[ ,"outcome"]

# ... evaluate predictive benchmark (sp, se, acc, mcc)
benchmark(yobs, yhat, thr=0, F1=FALSE)

# ... evaluate predictive benchmark (pre, rec, f1, mcc)
benchmark(yobs, yhat, thr=0, F1=TRUE)

#... with confusion matrix table as input
ypred<- ifelse(yhat < 0, 0, 1)
benchmark(CT=table(yobs, ypred), F1=TRUE)

#...with density plots of yhat per group
#old.par <- par(no.readonly = TRUE)
benchmark(yobs, yhat, thr=0, F1=FALSE, verbose = TRUE)
#par(old.par)
}

}
\references{
Sammut, C. & Webb, G. I. (eds.) (2017). Encyclopedia of Machine Learning 
and Data Mining. New York: Springer. ISBN: 978-1-4899-7685-7 

Chicco, D., Jurman, G. (2020) The advantages of the Matthews correlation 
coefficient (MCC) over F1 score and accuracy in binary classification 
evaluation. BMC Genomics 21, 6.
}
\author{
Mario Grassi \email{mario.grassi@unipv.it}
}
