% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/vtreat.R
\name{mkCrossFrameNExperiment}
\alias{mkCrossFrameNExperiment}
\title{Run numeric cross frame experiment.}
\usage{
mkCrossFrameNExperiment(dframe, varlist, outcomename, ..., weights = c(),
  minFraction = 0.02, smFactor = 0, rareCount = 0, rareSig = 1,
  collarProb = 0, scale = FALSE, doCollar = TRUE,
  parallelCluster = NULL)
}
\arguments{
\item{dframe}{Data frame to learn treatments from (training data), must have at least 1 row.}

\item{varlist}{Names of columns to treat (effective variables).}

\item{outcomename}{Name of column holding outcome variable. dframe[[outcomename]] must be only finite non-missing values and there must be a cut such that dframe[[outcomename]] is both above the cut at least twice and below the cut at least twice.}

\item{...}{no additional arguments, declared to forced named binding of later arguments}

\item{weights}{optional training weights for each row}

\item{minFraction}{optional minimum frequency a categorical level must have to be converted to an indicator column.}

\item{smFactor}{optional smoothing factor for impact coding models.}

\item{rareCount}{optional integer, suppress direct effects of level of this count or less.}

\item{rareSig}{optional numeric, suppress direct effects of level of this significance value greater.  Set to one to turn off effect.}

\item{collarProb}{what fraction of the data (pseudo-probability) to collar data at (<0.5).}

\item{scale}{optional if TRUE replace numeric variables with regression ("move to outcome-scale").}

\item{doCollar}{optional if TRUE collar numeric variables by cutting off after a tail-probability specified by collarProb during treatment design.}

\item{parallelCluster}{(optional) a cluster object created by package parallel or package snow}
}
\value{
treatment plan (for use with prepare)
}
\description{
Builds a \code{\link{designTreatmentsC}} treatment plan and a data frame prepared 
from \code{dframe} that is "cross" in the sense each row is treated using a treatment
plan built from a subset of dframe disjoint from the given row.
The goal is to try to and supply a method of breaking nested model bias other than splitting
into calibration, training, test sets.
}
\examples{

set.seed(23525)
zip <- paste('z',1:100)
N = 200
d <- data.frame(zip=sample(zip,N,replace=TRUE),
                zip2=sample(zip,N,replace=TRUE),
                y=runif(N))
del <- runif(length(zip))
names(del) <- zip
d$y <- d$y + del[d$zip2]
d$yc <- d$y>=mean(d$y)
cN <- mkCrossFrameNExperiment(d,c('zip','zip2'),'y',
   rareCount=2,rareSig=0.9)
cor(cN$crossFrame$y,cN$crossFrame$zip_catN)  # poor
cor(cN$crossFrame$y,cN$crossFrame$zip2_catN) # better
treatments <- cN$treatments
dTrainV <- cN$crossFrame

}
\seealso{
\code{\link{designTreatmentsC}} \code{\link{designTreatmentsN}} \code{\link{prepare}}
}

