% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/baselinenowcast.R
\name{baselinenowcast.data.frame}
\alias{baselinenowcast.data.frame}
\title{Create a dataframe of nowcast results from a dataframe of cases
indexed by reference date and report date}
\usage{
\method{baselinenowcast}{data.frame}(
  data,
  scale_factor = 3,
  prop_delay = 0.5,
  output_type = c("samples", "point"),
  draws = 1000,
  uncertainty_model = fit_by_horizon,
  uncertainty_sampler = sample_nb,
  max_delay = NULL,
  delays_unit = "days",
  strata_cols = NULL,
  strata_sharing = "none",
  preprocess = preprocess_negative_values,
  ...
)
}
\arguments{
\item{data}{Data.frame in a long tidy format with counts by reference date
and report date for one or more strata. Must contain the following
columns:
- \code{reference_date}: Column of type \code{Date} containing the dates
of the primary event occurrence.
\itemize{
\item \code{report_date}: Column of type \code{Date} containing the dates of
report of the primary event.
\item \code{count}: Column of numeric or integer indicating the new confirmed
counts pertaining to that reference and report date.
Additional columns indicating the columns which set the unit of a single
can be included. The user can specify these columns with the
\code{strata_cols} argument, otherwise it will be assumed that the \code{data}
contains only data for a single strata.
}}

\item{scale_factor}{Numeric value indicating the multiplicative factor on
the maximum delay to be used for estimation of delay and uncertainty.
Default is \code{3}.}

\item{prop_delay}{Numeric value <1 indicating what proportion of all
reference times in the reporting triangle to be used for delay
estimation. Default is \code{0.5}.}

\item{output_type}{Character string indicating whether the output should be
samples (\code{"samples"}) from the estimate with full uncertainty or whether to
return the point estimate (\code{"point"}). Default is \code{"samples"}. If
\code{"point"}estimates are specified, the minimum number of reference times
needed is the number needed for delay estimation, otherwise, if
\code{"samples"} are specified, at least 2 additional reference times are
required for uncertainty estimation.}

\item{draws}{Integer indicating the number of probabilistic draws to include
if \code{output_type} is \code{"samples"}. Default is 1000.}

\item{uncertainty_model}{Function that ingests a matrix of observations and a
matrix of predictions and returns a vector that can be used to
apply uncertainty using the same error model. Default is
\code{fit_by_horizon} with arguments of \code{obs} matrix of observations and
\code{pred} the matrix of predictions that fits each column (horizon)
to a negative binomial observation model by default. The user can
specify a different fitting model by replacing the
\code{fit_model} argument in \code{fit_by_horizon}.}

\item{uncertainty_sampler}{Function that ingests a vector or matrix of
predictions and a vector of uncertainty parameters and generates draws
from the observation model. Default is \code{sample_nb} which expects
arguments \code{pred} for the vector of predictions and uncertainty parameters
for the corresponding vector of uncertainty parameters, and draws from a
negative binomial for each element of the vector.}

\item{max_delay}{Maximum delay (in units of \code{delays_unit}) to include in the
nowcast. If NULL (default), all delays in the data are used. If specified,
only observations with delay <= max_delay are included.}

\item{delays_unit}{Character string specifying the temporal granularity of
the delays. Options are \code{"days"}, \code{"weeks"}, \code{"months"}, \code{"years"}.
Default is \code{"days"}.}

\item{strata_cols}{Vector of character strings indicating the names of the
columns in \code{data} that determine how to stratify the data for nowcasting.
The unique combinations of the entries in the \code{strata_cols} denote the
unit of a single nowcast. Within a strata, there can be no repeated
unique combinations of reference dates and report dates. Default is \code{NULL}
which assumes that the data.frame being passed in represents a single
strata (only one nowcast will be produced). All columns that are not
part of the \code{strata_cols} will be removed.}

\item{strata_sharing}{Vector of character strings. Indicates if and what
estimates should be shared for different nowcasting steps. Options are
\code{"none"} for no sharing (each \code{strata_cols} is fully independent),
\code{"delay"} for delay sharing and \code{"uncertainty"} for uncertainty sharing.
Both \code{"delay"} and \code{"uncertainty"} can be passed at the same time.}

\item{preprocess}{Function to apply to the reporting triangle before
estimation, or NULL to skip preprocessing. Default is
\code{\link[=preprocess_negative_values]{preprocess_negative_values()}}, which handles negative values by
redistributing them to earlier delays. Set to NULL if you want to preserve
negative values. Custom preprocess functions must accept a \code{validate}
parameter (defaults to TRUE) to enable validation optimisation in internal
function chains.}

\item{...}{Additional arguments passed to
\code{\link[=estimate_uncertainty]{estimate_uncertainty()}}
and \code{\link[=sample_nowcast]{sample_nowcast()}}.}
}
\value{
Data.frame of class \code{\link{baselinenowcast_df}}
}
\description{
This function ingests a data.frame with the number of incident
cases indexed by reference date and report date for one or multiple
strata, which define the unit of a single nowcast (e.g. age groups or
locations). It returns a data.frame containing nowcasts by reference
date for each strata, which are by default estimated independently.
This function will by default estimate uncertainty using
past retrospective nowcast errors and generate probabilistic nowcasts,
which are samples from the predictive distribution of the estimated final
case count at each reference date.

This function implements the full nowcasting workflow on multiple reporting
triangles, generating estimates of the delay and uncertainty parameters
for all strata using estimates from across strata if specified.
\enumerate{
\item \code{\link[=estimate_delay]{estimate_delay()}} - Estimate a delay PMF across strata if
\code{strata_sharing} contains \code{"delay"}
\item \code{\link[=estimate_uncertainty_retro]{estimate_uncertainty_retro()}} - Estimates uncertainty parameters
across strata if \code{strata_sharing} contains \code{"uncertainty"}
\item \code{\link[=as_reporting_triangle]{as_reporting_triangle()}} - Generates a reporting triangle object
from a data.frame
\item \code{\link[=baselinenowcast.reporting_triangle]{baselinenowcast.reporting_triangle()}} - Generates point or
probabilistic nowcasts depending on \code{output_type} for each strata.
}

@detail See documentation for the arguments of this function which can be
used to set the model specifications (things like number of reference times
for delay and uncertainty estimation, the observation model, etc.).
The function expects that each strata in
the dataframe has the same maximum delay. If sharing estimates across
all strata, the shared estimates will be made using the shared set of
reference and report dates across strata.
}
\examples{
# Filter data to exclude most recent report dates and limit to 75
# reference dates
max_ref_date <- max(germany_covid19_hosp$reference_date)
min_ref_date <- max_ref_date - 74
covid_data_to_nowcast <- germany_covid19_hosp[
  germany_covid19_hosp$report_date < max_ref_date &
    germany_covid19_hosp$reference_date >= min_ref_date,
]
nowcasts_df <- baselinenowcast(covid_data_to_nowcast,
  max_delay = 25,
  strata_cols = c("age_group", "location"),
  draws = 100
)
nowcasts_df
}
\seealso{
Main nowcasting interface functions
\code{\link{assert_baselinenowcast_df}()},
\code{\link{baselinenowcast}()},
\code{\link{baselinenowcast.reporting_triangle}()},
\code{\link{baselinenowcast_df-class}},
\code{\link{new_baselinenowcast_df}()}
}
\concept{baselinenowcast_df}
