% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simData.R
\name{simulate_data_step}
\alias{simulate_data_step}
\title{Simulate data with linear confounding and causal effect following a step-function}
\usage{
simulate_data_step(q, p, n, m, make_tree = FALSE)
}
\arguments{
\item{q}{number of confounding covariates in H}

\item{p}{number of covariates in X}

\item{n}{number of observations}

\item{m}{number of covariates with a causal effect on Y}

\item{make_tree}{Whether the random regression tree should be returned.}
}
\value{
a list containing the simulated data:
\item{X}{a \code{matrix} of covariates}
\item{Y}{a \code{vector} of responses}
\item{f_X}{a \code{vector} of the true function f(X)}
\item{j}{the indices of the causal covariates in X}
\item{tree}{If \code{make_tree}, the random regression tree of class 
\code{Node} from \insertCite{Glur2023Data.tree:Structure}{SDModels}}
}
\description{
Simulation of data from a confounded non-linear model. Where the non-linear function is a random regression tree.
The data generating process is given by:
\deqn{Y = f(X) + \delta^T H + \nu}
\deqn{X = \Gamma^T H + E}
where \eqn{f(X)} is a random regression tree with \eqn{m} random splits of the data. 
Resulting in a random step-function with \eqn{m+1} levels, i.e. leaf-levels.
\deqn{f(x_i) = \sum_{k = 1}^K 1_{\{x_i \in R_k\}} c_k}
\eqn{E}, \eqn{\nu} are random error terms and 
\eqn{H \in \mathbb{R}^{n \times q}} is a matrix of random confounding covariates.
\eqn{\Gamma \in \mathbb{R}^{q \times p}} and \eqn{\delta \in \mathbb{R}^{q}} are random coefficient vectors.
For the simulation, all the above parameters are drawn from a standard normal distribution, except for 
\eqn{\delta} which is drawn from a normal distribution with standard deviation 10.
The leaf levels \eqn{c_k} are drawn from a uniform distribution between -50 and 50.
}
\examples{
set.seed(42)
# simulation of confounded data
sim_data <- simulate_data_step(q = 2, p = 15, n = 100, m = 2)
X <- sim_data$X
Y <- sim_data$Y
}
\references{
\insertAllCited{}
}
\seealso{
\code{\link{simulate_data_nonlinear}}
}
\author{
Markus Ulmer
}
