% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/categorize.R
\name{categorize}
\alias{categorize}
\title{Categorize codes according to a mapping}
\usage{
categorize(x, map, id_name, code_name, return_df = FALSE,
  return_binary = FALSE, restore_id_order = TRUE, unique_ids = FALSE,
  preserve_visit_id_type = FALSE, comorbid_fun = comorbidMatMul,
  factor_fun = factor_nosort_rcpp, ...)
}
\arguments{
\item{x}{Data frame containing a column for an 'id' and a column for a code,
e.g., an ICD-10 code.}

\item{map}{named list containing vectors of ICD-9 codes. E.g. the AHRQ ICD-9
comorbidities, contains \code{list(OBESE = c("2780", "27800", "27801",
"27803", "V8554", "79391", "64910", "64911", "64912", "64913", "64914",
"V8530", "V8531", "V8532", "V8533", "V8534", "V8535", "V8536", "V8537",
"V8538", "V8539", "V8541", "V8542", "V8543", "V8544", "V8545" ), DEPRESS =
c("3004", "30112", "3090", "3091", "311"))} amongst other longer groups.}

\item{id_name}{The name of the \code{data.frame} field which is the unique
identifier.}

\item{code_name}{String with name of column containing the codes.}

\item{return_df}{single logical value, if \code{TRUE}, return the result as a data frame with the first column being
the \code{visit_id}, and the second being the count. If \code{visit_id} was a factor or named differently in the
input, this is preserved.}

\item{return_binary}{Logical value, if \code{TRUE}, the output will be in 0s
and 1s instead of TRUE and FALSE.}

\item{restore_id_order}{Logical value, if \code{TRUE}, the default, the order
of the visit IDs will match the order of visit IDs first encountered in the
input data. This takes a third of the time in calculations on data with
tens of millions of rows, so, if the visit IDs will be discarded when
summarizing data, this can be set to \code{FALSE} for a big speed-up.}

\item{unique_ids}{Single logical value, if \code{TRUE} then the visit IDs in
column given by \code{id_name} are assumed to be unique. Otherwise, the
default action is to ensure they are unique.}

\item{preserve_visit_id_type}{Single logical value, if \code{TRUE}, the visit
ID column will be converted back to its original type. The default of
\code{FALSE} means only \code{factors} and \code{character} types are
restored in the returned data frame. For matrices, the row names are
necessarily stored as character vectors.}

\item{comorbid_fun}{function i.e. the function symbol (not character string)
to be called to do the comorbidity calculation}

\item{factor_fun}{function symbol to call to generate factors. Default is a
very simple \code{Rcpp} implementation \code{factor_nosort_rcpp}.}
}
\description{
This is the function which optimizes the input data for the categorization,
and forms the core of the package, along with the C++ matrix code.
}
\examples{
\dontrun{
u <- uranium_pathology
m <- icd10_map_ahrq
u$icd10 <- decimal_to_short(u$icd10)
j <- categorize(u, m, id_name = "case", code_name = "icd10")
}
}
\keyword{internal}
