% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/interpret.R
\name{interpret}
\alias{interpret}
\alias{interpret.default}
\alias{interpret.formula}
\title{Fit MID Models}
\usage{
interpret(object, ...)

\method{interpret}{default}(
  object,
  x,
  y = NULL,
  weights = NULL,
  pred.fun = get.yhat,
  link = NULL,
  k = c(NA, NA),
  type = c(1L, 1L),
  frames = list(),
  interactions = FALSE,
  terms = NULL,
  singular.ok = FALSE,
  mode = 1L,
  method = NULL,
  lambda = 0,
  kappa = 1e+06,
  na.action = getOption("na.action"),
  verbosity = 1L,
  encoding.digits = 3L,
  use.catchall = FALSE,
  catchall = "(others)",
  max.nelements = 1000000000L,
  nil = 1e-07,
  tol = 1e-07,
  pred.args = list(),
  ...
)

\method{interpret}{formula}(
  formula,
  data = NULL,
  model = NULL,
  pred.fun = get.yhat,
  weights = NULL,
  subset = NULL,
  na.action = getOption("na.action"),
  verbosity = 1L,
  mode = 1L,
  drop.unused.levels = FALSE,
  pred.args = list(),
  ...
)
}
\arguments{
\item{object}{a fitted model object to be interpreted.}

\item{...}{optional arguments. For \code{interpret.formula()}, arguments to be passed on to \code{interpret.default()}. For \code{interpret.default()}, \code{...} can include convenient aliases (e.g., "ok" for \code{singular.ok}, "ie" for \code{interactions}) as well as several advanced fitting options (see the "Advanced Fitting Options" section for details).}

\item{x}{a matrix or data.frame of predictor variables to be used in the fitting process. The response variable should not be included.}

\item{y}{an optional numeric vector of the model predictions or the response variable.}

\item{weights}{a numeric vector of sample weights for each observation in \code{x}.}

\item{pred.fun}{a function to obtain predictions from a fitted model, where the first argument is for the fitted model and the second argument is for new data. The default is \code{get.yhat()}.}

\item{link}{a character string specifying the link function: one of "logit", "probit", "cauchit", "cloglog", "identity", "log", "sqrt", "1/mu^2", "inverse", "translogit", "transprobit", "identity-logistic" and "identity-gaussian", or an object containing two functions \code{linkfun()} and \code{linkinv()}. See \code{help(make.link)}.}

\item{k}{an integer or a vector of two integers specifying the maximum number of sample points for main effects (\code{k[1]}) and interactions (\code{k[2]}). If a single integer is provided, it is used for main effects while the value for interactions is automatically determined. Any \code{NA} value will also trigger this automatic determination. With non-positive values, all unique data points are used as sample points.}

\item{type}{an integer or integer-valued vector of length two. The type of encoding. The effects of quantitative variables are modeled as piecewise linear functions if \code{type} is \code{1}, and as step functions if \code{type} is \code{0}. If a vector is passed, \code{type[1L]} is used for main effects and \code{type[2L]} is used for interactions.}

\item{frames}{a named list of encoding frames ("numeric.frame" or "factor.frame" objects). The encoding frames are used to encode the variable of the corresponding name. If the name begins with "|" or ":", the encoding frame is used only for main effects or interactions, respectively.}

\item{interactions}{logical. If \code{TRUE} and if \code{terms} and \code{formula} are not supplied, all interactions for each pair of variables are modeled and calculated.}

\item{terms}{a character vector of term labels or formula, specifying the set of component functions to be modeled. If not passed, \code{terms} includes all main effects, and all second-order interactions if \code{interactions} is \code{TRUE}.}

\item{singular.ok}{logical. If \code{FALSE}, a singular fit is an error.}

\item{mode}{an integer specifying the method of calculation. If \code{mode} is \code{1}, the centralization constraints are treated as penalties for the least squares problem. If \code{mode} is \code{2}, the constraints are used to reduce the number of free parameters.}

\item{method}{an integer specifying the method to be used to solve the least squares problem. A non-negative value will be passed to \code{RcppEigen::fastLmPure()}. If negative, \code{stats::lm.fit()} is used.}

\item{lambda}{the penalty factor for pseudo smoothing. The default is \code{0}.}

\item{kappa}{the penalty factor for centering constraints. Used only when \code{mode} is \code{1}. The default is \code{1e+6}.}

\item{na.action}{a function or character string specifying the method of \code{NA} handling. The default is "na.omit".}

\item{verbosity}{the level of verbosity. \code{0}: fatal, \code{1}: warning (default), \code{2}: info or \code{3}: debug.}

\item{encoding.digits}{an integer. The rounding digits for encoding numeric variables. Used only when \code{type} is \code{1}.}

\item{use.catchall}{logical. If \code{TRUE}, less frequent levels of qualitative variables are dropped and replaced by the catchall level.}

\item{catchall}{a character string specifying the catchall level.}

\item{max.nelements}{an integer specifying the maximum number of elements of the design matrix. Defaults to \code{1e9}.}

\item{nil}{a threshold for the intercept and coefficients to be treated as zero. The default is \code{1e-7}.}

\item{tol}{a tolerance for the singular value decomposition. The default is \code{1e-7}.}

\item{pred.args}{optional parameters other than the fitted model and new data to be passed to \code{pred.fun()}.}

\item{formula}{a symbolic description of the MID model to be fit.}

\item{data}{a data.frame, list or environment containing the variables in \code{formula}. If not found in data, the variables are taken from \code{environment(formula)}.}

\item{model}{a fitted model object to be interpreted.}

\item{subset}{an optional vector specifying a subset of observations to be used in the fitting process.}

\item{drop.unused.levels}{logical. If \code{TRUE}, unused levels of factors will be dropped.}
}
\value{
\code{interpret()} returns an object of class "mid". This is a list with the following components:
\item{weights}{a numeric vector of the sample weights.}
\item{call}{the matched call.}
\item{terms}{the \code{\link[stats]{terms.object}} used.}
\item{link}{a "link-glm" or "link-midr" object containing the link function.}
\item{intercept}{the intercept.}
\item{encoders}{a list of variable encoders.}
\item{main.effects}{a list of data frames representing the main effects.}
\item{interacions}{a list of data frames representing the interactions.}
\item{ratio}{the ratio of the sum of squared error between the target model predictions and the fitted MID values, to the sum of squared deviations of the target model predictions.}
\item{linear.predictors}{a numeric vector of the linear predictors.}
\item{fitted.values}{a numeric vector of the fitted values.}
\item{residuals}{a numeric vector of the working residuals.}
\item{na.action}{information about the special handling of \code{NA}s.}
}
\description{
\code{interpret()} is used to fit a Maximum Interpretation Decomposition (MID) model.
MID models are additive, highly interpretable models composed of functions, each with up to two variables.
}
\details{
The MID model approximates a target model's prediction function \eqn{f(\mathbf{x})}, or values of the response variable \eqn{\mathbf{y}}.
This model, denoted as \eqn{\mathcal{F}(\mathbf{x})}, has the following structure: \deqn{\mathcal{F}(\mathbf{x}) = f_\phi + \sum_{j} f_{j}(x_j) + \sum_{j<k} f_{jk}(x_j, x_k)}
where \eqn{f_\phi} is the intercept, \eqn{f_{j}(x_j)} is the main effect of feature \eqn{j}, and \eqn{f_{jk}(x_j, x_k)} is the second-order interaction effect between features \eqn{j} and \eqn{k}.

To ensure that the decomposed components are unique, they are fitted under the \emph{centering constraints}: each main effect's average is constrained to be zero, and each interaction effect's conditional averages are also constrained to be zero.
The model is fitted by minimizing the squared error between the target, \eqn{f(\mathbf{x})} or \eqn{\mathbf{y}}, and the surrogate \eqn{\mathcal{F}(\mathbf{x})}, which is typically evaluated on a representative dataset.
}
\section{Advanced Fitting Options}{

The \code{...} argument can be used to pass several advanced fitting options:
\describe{
  \item{fit.intercept}{logical. If \code{TRUE}, the intercept term is fitted as part of the least squares problem. If \code{FALSE} (default), it is calculated as the weighted mean of the response.}
  \item{interpolate.beta}{a character string specifying the method for interpolating inestimable coefficients (betas) that arise from sparse data regions. Can be "iterative" for an iterative smoothing process, "direct" for solving a linear system, or "none" to disable interpolation.}
  \item{maxit}{an integer specifying the maximum number of iterations for the "iterative" interpolation method.}
  \item{save.memory}{an integer (0, 1, or 2) specifying the memory-saving level. Higher values reduce memory usage at the cost of increased computation time.}
  \item{weighted.norm}{logical. If \code{TRUE}, the columns of the design matrix are normalized by the square root of their weighted sum. This is required to ensure the minimum-norm least squares solution obtained by appropriate methods (i.e., \code{4} or \code{5}) of \code{fastLmPure()} is the minimum-norm solution in a \emph{weighted} sense.}
  \item{weighted.encoding}{logical. If \code{TRUE}, sample weights are used during the encoding process (e.g., for calculating quantiles to determine knots).}
}
}

\examples{
# Fit a MID model as a surrogate for another model
data(cars, package = "datasets")
model <- lm(dist ~ I(speed^2) + speed, cars)
mid <- interpret(dist ~ speed, cars, model)
plot(mid, "speed", intercept = TRUE)
points(cars)

# Fit a MID model as a standalone predictive model
data(airquality, package = "datasets")
mid <- interpret(Ozone ~ .^2, data = airquality, lambda = .5)
plot(mid, "Wind")
plot(mid, "Temp")
plot(mid, "Wind:Temp", main.effects = TRUE)

data(Nile, package = "datasets")
nile <- data.frame(time = 1:length(Nile), flow = as.numeric(Nile))

# A flexible fit with many knots
mid <- interpret(flow ~ time, data = nile, k = 100L)
plot(mid, "time", intercept = TRUE, limits = c(600L, 1300L))
points(x = 1L:100L, y = Nile)

# A smoother fit with fewer knots
mid <- interpret(flow ~ time, data = nile, k = 10L)
plot(mid, "time", intercept = TRUE, limits = c(600L, 1300L))
points(x = 1L:100L, y = Nile)

# A pseudo-smoothed fit using a penalty
mid <- interpret(flow ~ time, data = nile, k = 100L, lambda = 100L)
plot(mid, "time", intercept = TRUE, limits = c(600L, 1300L))
points(x = 1L:100L, y = Nile)
}
\references{
Asashiba R, Kozuma R, Iwasawa H (2025). “midr: Learning from Black-Box Models by Maximum Interpretation Decomposition.” 2506.08338, \url{https://arxiv.org/abs/2506.08338}.
}
\seealso{
\code{\link{print.mid}}, \code{\link{summary.mid}}, \code{\link{predict.mid}}, \code{\link{plot.mid}}, \code{\link{ggmid}}, \code{\link{mid.plots}}, \code{\link{mid.effect}}, \code{\link{mid.terms}}, \code{\link{mid.importance}}, \code{\link{mid.conditional}}, \code{\link{mid.breakdown}}
}
