% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bias_correction.R
\name{ols_bca}
\alias{ols_bca}
\alias{ols_bca.default}
\alias{ols_bca.formula}
\title{Additive bias-corrected OLS (BCA)}
\usage{
ols_bca(
  Y,
  Xhat = NULL,
  fpr,
  m,
  data = parent.frame(),
  intercept = TRUE,
  gen_idx = 1,
  ...
)

\method{ols_bca}{default}(
  Y,
  Xhat,
  fpr,
  m,
  data = parent.frame(),
  intercept = TRUE,
  gen_idx = 1,
  ...
)

\method{ols_bca}{formula}(
  Y,
  Xhat = NULL,
  fpr,
  m,
  data = parent.frame(),
  intercept = TRUE,
  gen_idx = 1,
  ...
)
}
\arguments{
\item{Y}{numeric response vector, or a one-sided formula}

\item{Xhat}{numeric matrix of regressors (if \code{Y} is numeric); the ML-regressor is column \code{gen_idx}}

\item{fpr}{numeric; estimated false-positive rate of the ML regressor}

\item{m}{integer; size of the external sample used to estimate the classifier's false-positive rate. Can be set to a large number when the false-positive rate is known exactly}

\item{data}{data frame (if \code{Y} is a formula)}

\item{intercept}{logical; if \code{TRUE}, prepends a column of 1's to \code{Xhat}}

\item{gen_idx}{integer; 1-based index of the ML-generated variable to apply bias correction to. If not specified, defaults to the first non-intercept variable}

\item{...}{unused}
}
\value{
An object of class \code{mlbc_fit} and \code{mlbc_bca} with:
\itemize{
\item \code{coef}: bias-corrected coefficient estimates (ML-slope first, other slopes, intercept last)
\item \code{vcov}: adjusted variance-covariance matrix for those coefficients
}
}
\description{
Performs an additive bias correction to regressions that include a binary
covariate generated by AI/ML. This method requires an external estimate of
the false-positive rate. Standard errors are adjusted to account for
uncertainty in the false-positive rate estimate.
}
\section{Usage Options}{


\strong{Option 1: Formula Interface}
\itemize{
\item \code{Y}: A one-sided formula string
\item \code{data}: Data frame containing the variables referenced in the formula
}

\strong{Option 2: Array Interface}
\itemize{
\item \code{Y}: Response variable vector
\item \code{Xhat}: Design matrix of covariates
}
}

\examples{
# Load the remote work dataset
data(SD_data)

# Formula interface
fit_bca <- ols_bca(log(salary) ~ wfh_wham + soc_2021_2 + employment_type_name,
                   data = SD_data,
                   fpr = 0.009,  # estimated false positive rate
                   m = 1000)     # validation sample size
summary(fit_bca)

# Array interface
Y <- log(SD_data$salary)
Xhat <- model.matrix(~ wfh_wham + soc_2021_2, data = SD_data)[, -1]
fit_bca2 <- ols_bca(Y, Xhat, fpr = 0.009, m = 1000, intercept = TRUE)
summary(fit_bca2)

}
