% Generated by roxygen2 (4.0.2): do not edit by hand
\name{h2o.impute}
\alias{h2o.impute}
\title{Basic Imputation of H2O Vectors}
\usage{
h2o.impute(data, column = 0, method = c("mean", "median", "mode"),
  combine_method = c("interpolate", "average", "lo", "hi"), by = NULL,
  groupByFrame = NULL, values = NULL)
}
\arguments{
\item{data}{The dataset containing the column to impute.}

\item{column}{A specific column to impute, default of 0 means impute the whole frame.}

\item{method}{"mean" replaces NAs with the column mean; "median" replaces NAs with the column median;
"mode" replaces with the most common factor (for factor columns only);}

\item{combine_method}{If method is "median", then choose how to combine quantiles on even sample sizes. This parameter is ignored in all other cases.}

\item{by}{group by columns}

\item{groupByFrame}{Impute the column col with this pre-computed grouped frame.}

\item{values}{A vector of impute values (one per column). NaN indicates to skip the column}
}
\value{
an H2OFrame with imputed values
}
\description{
Perform inplace imputation by filling missing values with aggregates
computed on the "na.rm'd" vector. Additionally, it's possible to perform imputation
based on groupings of columns from within data; these columns can be passed by index or
name to the by parameter. If a factor column is supplied, then the method must be
"mode".
}
\details{
The default method is selected based on the type of the column to impute. If the column
is numeric then "mean" is selected; if it is categorical, then "mode" is selected. Other
column types (e.g. String, Time, UUID) are not supported.
}
\examples{
\donttest{
 h2o.init()
 fr <- as.h2o(iris, destination_frame="iris")
 fr[sample(nrow(fr),40),5] <- NA  # randomly replace 50 values with NA
 # impute with a group by
 fr <- h2o.impute(fr, "Species", "mode", by=c("Sepal.Length", "Sepal.Width"))
}
}

