% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/OOI.R
\name{OOI}
\alias{OOI}
\title{Outside Option Index}
\usage{
OOI(
  formula = NULL,
  X,
  Z = NULL,
  X.location = NULL,
  Z.location = NULL,
  wgt = rep(1, nrow(X)),
  pred = TRUE,
  method = "logit",
  sim.factor = 1,
  dist.fun = geo_dist,
  dist.order = NULL,
  seed = runif(1, 0, .Machine$integer.max)
)
}
\arguments{
\item{formula}{a formula describing the model to be fitted in order to
estimate P(Z|X) / P(Z). This formula uses a syntax similar to STATA, and so
"x_" refers to all variables with the prefix "x", while
"z_" refers to all variables with the prefix "z". Similarly, "d" refers
to the distance polynomial (see the example below).}

\item{X}{matrix or data frame with workers characteristics. Note that all column names
should start with "x" (necessary for the inner function 'coef_reshape').}

\item{Z}{an optional matrix or data frame with jobs characteristics. Note that all column names
should start with "z" (necessary for the inner function 'coef_reshape').}

\item{X.location}{an optional matrix or data frame with location for workers. Could be
geographical location (i.e., geo-coordinates) or any other
feature that can be used in order to measure distance between
worker and job using 'dist.fun'. Currently the package supports only numeric
inputs.}

\item{Z.location}{same as 'X.location' but for jobs.}

\item{wgt}{an optional numeric vector of weights.}

\item{pred}{logical. If TRUE (default), predicts the ooi for the provided data.}

\item{method}{a method for estimating P(Z|X) / P(Z). Currently not in use.}

\item{sim.factor}{a variable that determines how much fake data to simulate
(relative to real data).}

\item{dist.fun}{a distance function to calculate the distance between X.location and
Z.location. Users interested in using more than one distance metric
should provide a function that returns for each row of X.location and
Z.location a vector with all the necessary metrics. Also - the function
should use columns by their index and not by their names.
The default function is \code{\link{geo_dist}}, which is suitable
for data with geo-coordinates.}

\item{dist.order}{a numeric vector specifying for each distance metric
an order of the distance polynomial.}

\item{seed}{the seed of the random number generator.}
}
\value{
An "ooi" object. This object is a list containing
the following components:
 \item{coeffs}{coefficients from the estimated logit.}
 \item{coeffs_sd}{coefficients SE.}
 \item{pseudo_r2}{McFadden's pseudo-R squared for the estimated logit.}
 \item{standardized_coeffs}{standardized coefficients.}
 \item{ooi}{the Outside Option Index.}
 \item{hhi}{the Herfindahl-Hirschman Index, an alternative measure for outside options.}
 \item{job_worker_prob}{the log probability of each worker to work at his *specific* job (rahter than
                        to work at a job with his specific z)}
 \item{orig_arg}{a list containing the original arguments (necessary
 for \code{\link{predict.ooi}}).}
}
\description{
calculates the 'outside option index' (defined as
\eqn{-\sum P(Z|X) * log(P(Z|X) / P(Z))})
 for workers, using employer-employee data.
}
\examples{
#generate data
#worker and job characteristics:
n <- 100
men <- rbinom(n, 1, 0.5)
size <- 1 + rgeom(n, 0.1)
size[men == 0] <- size[men == 0] + 2
worker_resid <- data.frame(r = round(runif(n, 0, 20), 1))
job_location <- data.frame(l = round(runif(n, 20, 40), 1))
#prepare data
#define distance function:
dist_metric <- function(x, y){abs(y - x)}
X <- data.frame(men = men)
Z <- data.frame(size = size)
#add "x" / "z" to column names:
X <- add_prefix(X, "x.")
Z <- add_prefix(Z, "z.")
#estimate P(Z|X) / P(Z) and calculate the ooi:
ooi_object <- OOI(formula = ~ x_*z_ + x_*d + z_*d, X = X, Z = Z,
                  X.location = worker_resid, Z.location = job_location,
                  sim.factor = 3, dist.fun = dist_metric, dist.order = 3)
#we can extract the ooi using predict():
ooi <- predict(ooi_object)
summary(ooi)
}
