\name{synthetic data sets}
\docType{data}
\alias{vur.test}
\alias{vuc.test}
\title{Synthetic data sets to test fair models}
\description{

  Synthetic data sets used as test cases in the \pkg{fairml} package.

}
\usage{
data(vur.test)
data(vuc.test)
}
\format{

  Each data set is a list with following three elements:
  \itemize{

    \item \code{y}, the response variable;
    \item \code{X}, a numeric matrix containing 3 predictors called \code{X1},
      \code{X2} and \code{X3};
    \item \code{S}, a numeric matrix containing 3 sensitive attributes called
      \code{S1}, \code{S2} and \code{S3}.

  }

}
\note{

  This data sets are called \code{vur.test} and \code{vuc.test} because they
  are generated from \emph{v}ery \emph{u}nfair \emph{r}egression and
  \emph{classification} models in which sensitive attributes explain the lion's
  share of the overall explained variance or deviance.

  The code used to generate the predictors and the sensitive attributes is as
  follows.

\preformatted{
library(mvtnorm)
sigma = matrix(0.3, nrow = 6, ncol = 6)
diag(sigma) = 1
n = 1000
X = rmvnorm(n, mean = rep(0, 6), sigma = sigma)
S = X[, 4:6]
X = X[, 1:3]
colnames(X) = c("X1", "X2", "X3")
colnames(S) = c("S1", "S2", "S3")
}

  The continuous response in \code{vur.test} is produced as follows.

\preformatted{
y = 2 + 2 * X[, 1] + 3 * X[, 2] + 4 * X[, 3] + 5 * X[, 4] +
        6 * X[, 5] + 7 * X[, 6] + rnorm(n, sd = 10)
}

  The discrete response in \code{vuc.test} is produced as follows.

\preformatted{
nu = 1 + 0.5 * X[, 1] + 0.6 * X[, 2] + 0.7 * X[, 3] + 0.8 * X[, 4] +
         0.9 * X[, 5] + 1.0 * X[, 6]
y = rbinom(n = nrow(X), size = 1, prob = exp(nu) / (1 + exp(nu)))
}
}
\examples{
data(vur.test)
sensitive.attributes.model = lm(y ~ S, data = vur.test)
summary(sensitive.attributes.model)$r.squared
overall.model = lm(y ~ X + S, data = vur.test)
summary(overall.model)$r.squared

data(vuc.test)
sensitive.attributes.model = glm(y ~ S, data = vuc.test, family = "binomial")
deviance(sensitive.attributes.model)
overall.model = glm(y ~ X + S, data = vuc.test, family = "binomial")
deviance(overall.model)
}
\keyword{datasets}

