% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/source_data.R
\name{prepare_data}
\alias{prepare_data}
\title{Prepare source data}
\usage{
prepare_data(
  df,
  field_types,
  override_column_names = FALSE,
  na = c("", "NA", "NULL"),
  dataset_description = NULL,
  show_progress = TRUE
)
}
\arguments{
\item{df}{A data frame}

\item{field_types}{\code{\link[=field_types]{field_types()}} object specifying names and types of
fields (columns) in the supplied \code{df}. See also \link{field_types_available}.}

\item{override_column_names}{If \code{FALSE}, column names in the supplied \code{df}
must match the names specified in \code{field_types} exactly. If \code{TRUE}, column
names in the supplied \code{df} will be replaced with the names specified in
\code{field_types}. The specification must therefore contain the columns in the
correct order. Default = \code{FALSE}}

\item{na}{vector containing strings that should be interpreted as missing
values. Default = \code{c("","NA","NULL")}. Additional column-specific values
can be specified in the \code{\link[=field_types]{field_types()}} object}

\item{dataset_description}{Short description of the dataset being checked.
This will appear on the report. If blank, the name of the data frame object
will be used}

\item{show_progress}{Print progress to console. Default = \code{TRUE}}
}
\value{
A \code{daiquiri_source_data} object
}
\description{
Validate a data frame against a \code{\link[=field_types]{field_types()}} specification, and prepare
for aggregation.
}
\examples{
# load example data into a data.frame
raw_data <- read_data(
  system.file("extdata", "example_prescriptions.csv", package = "daiquiri"),
  delim = ",",
  col_names = TRUE
)

# validate and prepare the data for aggregation
source_data <- prepare_data(
  raw_data,
  field_types = field_types(
    PrescriptionID = ft_uniqueidentifier(),
    PrescriptionDate = ft_timepoint(),
    AdmissionDate = ft_datetime(includes_time = FALSE),
    Drug = ft_freetext(),
    Dose = ft_numeric(),
    DoseUnit = ft_categorical(),
    PatientID = ft_ignore(),
    Location = ft_categorical(aggregate_by_each_category = TRUE)
  ),
  override_column_names = FALSE,
  na = c("", "NULL"),
  dataset_description = "Example data provided with package"
)

source_data
}
\seealso{
\code{\link[=field_types]{field_types()}}, \code{\link[=field_types_available]{field_types_available()}},
\code{\link[=aggregate_data]{aggregate_data()}}, \code{\link[=report_data]{report_data()}},
\code{\link[=daiquiri_report]{daiquiri_report()}}
}
