% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/average_observed.R
\name{average_observed}
\alias{average_observed}
\title{Average Observed}
\usage{
average_observed(
  X,
  y,
  w = NULL,
  x_name = "x",
  breaks = "Sturges",
  right = TRUE,
  discrete_m = 5L,
  outlier_iqr = 2,
  seed = NULL,
  ...
)
}
\arguments{
\item{X}{A vector, matrix, or data.frame with variable(s) to be shown on the x axis.}

\item{y}{A numeric vector of observed responses.}

\item{w}{An optional numeric vector of weights.}

\item{x_name}{If \code{X} is a vector: what is the name of the variable? By default "x".}

\item{breaks}{An integer, vector, string or function specifying the bins
of the numeric X variables as in \code{\link[graphics:hist]{graphics::hist()}}. The default is "Sturges".
To allow varying values of \code{breaks} across variables, it can be a list of the
same length as \code{v}, or a \emph{named} list with \code{breaks} for certain variables.}

\item{right}{Should bins be right-closed? The default is \code{TRUE}.
Vectorized over \code{v}. Only relevant for numeric X.}

\item{discrete_m}{Numeric X variables with up to this number of unique values
should not be binned and treated as a factor (after calculating partial dependence)
The default is 5. Vectorized over \code{v}.}

\item{outlier_iqr}{Outliers of a numeric X are capped via the boxplot rule, i.e.,
outside \code{outlier_iqr} * IQR from the quartiles. The default is 2 is more
conservative than the usual rule to account for right-skewed distributions.
Set to 0 or \code{Inf} for no capping. Note that at most 10k observations are sampled
to calculate quartiles. Vectorized over \code{v}.}

\item{seed}{Optional random seed (an integer) used for capping X based on quantiles
calculated from a subsample of 10k observations.}

\item{...}{Currently unused.}
}
\value{
A list (of class "EffectData") with a data.frame of statistics per feature. Use
single bracket subsetting to select part of the output.
}
\description{
Calculates average observed \code{y} values over the values of one or multiple
\code{X} variables. This describes the statistical association between \code{y}
and potential model features.
}
\details{
The function is a convenience wrapper around \code{\link[=feature_effects]{feature_effects()}}.
}
\examples{
M <- average_observed(iris$Species, y = iris$Sepal.Length)
M
M |> plot()

# Or multiple potential features X
average_observed(iris[2:5], y = iris[, 1], breaks = 5) |>
  plot()
}
\seealso{
\code{\link[=feature_effects]{feature_effects()}}
}
