% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/episode_group.R
\name{episode_group}
\alias{episode_group}
\alias{fixed_episodes}
\alias{rolling_episodes}
\title{Episode grouping for record deduplication and case assignment}
\usage{
episode_group(df, sn = NULL, strata = NULL, date, case_length,
  episode_type = "fixed", episode_unit = "days", episodes_max = Inf,
  recurrence_length = NULL, rolls_max = Inf, data_source = NULL,
  custom_sort = NULL, from_last = FALSE, overlap_method = c("across",
  "inbetween", "aligns_start", "aligns_end", "chain"),
  bi_direction = FALSE, group_stats = FALSE, display = TRUE)

fixed_episodes(x, strata = NULL, case_length, episodes_max = Inf,
  from_last = FALSE, overlap_method = c("across", "inbetween",
  "aligns_start", "aligns_end", "chain"), deduplicate = FALSE,
  display = TRUE)

rolling_episodes(x, strata = NULL, case_length,
  recurrence_length = NULL, from_last = FALSE,
  overlap_method = c("across", "inbetween", "aligns_start", "aligns_end",
  "chain"), deduplicate = FALSE, display = TRUE)
}
\arguments{
\item{df}{\code{data.frame}. One or more datasets appended together.}

\item{sn}{Unique \code{numeric} record identifier. Optional.}

\item{strata}{Column names. Episodes will be unique to each \code{strata}. \code{\link{record_group}} can be used to create \code{strata} within datasets.}

\item{date}{Record date or interval. \code{date}, \code{datetime} or \code{\link{number_line}} objects.}

\item{case_length}{Period from a \code{"Case"} within which another record of the same \code{strata} is considered a \code{"Duplicate"} record.}

\item{episode_type}{\code{"fixed"} or \code{"rolling"}.}

\item{episode_unit}{Time units as supported by lubridate's \code{\link[lubridate]{duration}} function.}

\item{episodes_max}{Maximum number of times to group episodes within each \code{strata}.}

\item{recurrence_length}{Period from the last record of an episode within which another record of the same \code{strata} is considered a \code{"Recurrent"} record. If a \code{recurrence_length} is not supplied, the \code{case_length} is used.}

\item{rolls_max}{Maximum number of recurrence permitted within each episode. Only used if \code{episode_type} is \code{"rolling"}.}

\item{data_source}{Unique dataset identifier for the \code{data.frame}. Useful when \code{data.frame} contains multiple datasets.}

\item{custom_sort}{If \code{TRUE}, \code{"Case"} assignment will be in preference to this sort order. Useful in specifying that episode grouping begins at a particular kind of record regardless of chronological order.}

\item{from_last}{If \code{TRUE}, episode grouping will be backwards in time - starting at the most recent record and proceeding to the earliest. If \code{FALSE}, it'll be forward in time - starting at the earliest record and proceeding to the most recent one.}

\item{overlap_method}{A set of methods for grouped intervals to overlap. Options are; \code{"across"}, \code{"aligns_start"}, \code{"aligns_end"}, \code{"inbetween"}, \code{"chain"}. See \code{\link{overlap}} functions.}

\item{bi_direction}{If \code{FALSE}, \code{"Duplicate"} records will be those within the \code{case_length} and \code{recurrence_length}, before or after the \code{"Case"} as determined by \code{from_last}. If \code{TRUE}, \code{"Duplicate"} records will be those on both sides of the \code{"Case"}.}

\item{group_stats}{If \code{TRUE}, the output will include additional columns with useful stats for each episode group.}

\item{display}{If \code{TRUE}, status messages are printed on screen.}

\item{x}{Record date or interval. \code{date}, \code{datetime}, \code{number_line} objects or other \code{numeric} based objects.}

\item{deduplicate}{if \code{TRUE}, retains only one the \code{"Case"} from an episode group.}
}
\value{
\code{episode_group} - \code{data.frame}

\itemize{
\item \code{sn} - unique record identifier as provided
\item \code{epid} - unique episode identifier
\item \code{case_nm} - record type in regards to case assignment
\item \code{epid_dataset} - datasets contained in each episode
\item \code{epid_interval} - Episode start and end dates. \code{\link{number_line}} object.
\item \code{epid_length} - Difference between episode start and end dates. \code{difftime} object. If possible, the same unit supplied to \code{episode_unit} is used otherwise, a difference in days is returned
\item \code{epid_total} - number of records in each record group
}

\code{fixed_episodes} and \code{rolling_episodes} - \code{number_line}.
\itemize{
\item \code{id} - unique record identifier as provided
\item \code{gid} - unique episode identifier
\item \code{start} - Episode start dates
\item \code{.Data} - Difference between episode start and end dates. \code{numeric} object
}

Use \code{\link{number_line_width}} to extract the \code{epid_interval}

Use \code{\link{right_point}} or \code{\link{end_point}} to extract the episode end date
}
\description{
Group records into chronological episodes
}
\details{
Episode grouping begins at a reference record (\code{"Case"}) and proceeds forward or backward in time depending on \code{from_last}.
If \code{custom_sort} is used, episode grouping can be forced to begin at certain record before proceeding forward or backwards in time.
The maximum duration of a \code{"fixed"} episode is the \code{case_length} while, the maximum duration of a \code{"rolling"} episode is the
\code{case_length} in addition to all recurrence periods. A recurrence period is the \code{recurrence_length} from the last record in an episode

\code{fixed_episodes} and \code{rolling_episodes} are more convenient implementations of \code{episode_group}.
However, these are less efficient in dealing with large datasets, and lack the following features;
\code{"custom_sort", "rolls_max", "episodes_max", "data_source", "episode_unit", "bi_direction" and "group_stats"}
}
\examples{
library(dplyr)
library(lubridate)
library(diyar)

data(infections); infections

# 16-hour (difference of 15 hours) episodes beginning from the earliest record
epids <- episode_group(infections, sn = rd_id, date = date, case_length = epi_len,
from_last = TRUE, episode_unit = "hours", group_stats = TRUE)
left_join(infections, epids, by=c("rd_id"="sn"))

# One rolling episode per strata. Initial case_length of 16 days (difference of 15 days) and
# one recurrence period of 31 days (difference of 30 days)
infections$recur <- 30
epids <- episode_group(infections, date=date, case_length = epi_len, episode_type = "rolling",
recurrence_length = recur, episodes_max = 1, rolls_max = 1, display = FALSE, group_stats = TRUE)
bind_cols(infections, epids)

# User defined case assignment
# Preference for case assignment - UTI > BSI > RTI
infections$infx <- factor(infections$infection, levels = c("UTI","BSI","RTI"))

# Different case and recurrence lengths for different sources of infection
infections <- mutate(infections,
                     epi_len = case_when(
                       infection == "BSI" ~ 14,
                       infection == "UTI" ~ 30,
                       infection == "RTI" ~ 60
                     )
)

# n-day episodes beginning with the earliest record with the specified preference; UTI > BSI > RTI
epids <- episode_group(infections, rd_id, date=date, case_length = epi_len,
                       custom_sort = infx, group_stats = TRUE,  display = FALSE)
bind_cols(infections, epids)

# Another preference - RTI > UTI, or  RTI > BSI, or earliest record
infections$infx <- ifelse(infections$infection =="RTI",0,1)
epids <- episode_group(infections, rd_id, date=date, case_length = epi_len,
custom_sort = infx, from_last = TRUE, bi_direction = TRUE, display = FALSE, group_stats = TRUE)
bind_cols(infections, epids)

# Stratified episode grouping
infections$patient_id <- c(rep("PID 1",8), rep("PID 2",3))

# Only three 9-day (difference of 8 days) rolling episodes per patient and infection.
infections$epi_len <- 8
epids <- episode_group(infections, rd_id, date=date, strata = c(patient_id, infection),
case_length = epi_len, episode_type = "rolling", recurrence_length = recur, episodes_max = 3,
data_source = c(patient_id, infection), display = FALSE)

bind_cols(infections, epids)

# Interval grouping
data(hospital_admissions); hospital_admissions

hospital_admissions$admin_period <- number_line(hospital_admissions$admin_dt,
hospital_admissions$discharge_dt)
hospital_admissions <- select(hospital_admissions, -c(discharge_dt, admin_dt))

# Episodes of overlaping intervals of admission
epids <- episode_group(hospital_admissions, date=admin_period,
sn=rd_id, case_length = epi_len, group_stats = TRUE)
bind_cols(hospital_admissions, epids)

# Overlaping intervals of admission seperated by 1 month
hospital_admissions$epi_len <- 1
epids <- episode_group(hospital_admissions, date=admin_period, sn=rd_id,
case_length = epi_len, episode_unit = "months")
bind_cols(hospital_admissions, epids)

# Episodes of chained intervals, and those with aligned end periods
hospital_admissions$epi_len <- 0
epids <- episode_group(hospital_admissions, date=admin_period, sn=rd_id,
case_length = epi_len, overlap_method = c("chain","aligns_end"))
bind_cols(hospital_admissions, epids)

# Convenient versions of episode_group
# Episodes from time points
dts <- c("13/04/2019", "01/04/2019", "05/05/2019", "10/04/2019", "01/05/2019")
dts <- as.Date(dts, "\%d/\%m/\%Y")
dts

epids <- fixed_episodes(dts, case_length = 5, display = FALSE)
epids; str(epids); unique(epids)

# Episodes from time periods
pds <- as.number_line(dts)
pds <- expand_number_line(pds, 1, "end")
pds

epids <- rolling_episodes(pds, case_length = 5, recurrence_length =11,
deduplicate = TRUE, display = FALSE)
epids; str(epids)

db_a <- infections
db_b <- mutate(db_a, epid_interval= fixed_episodes(x = date, case_length = epi_len,
strata = infection, from_last = FALSE, display = FALSE, deduplicate = FALSE))

db_b
str(db_b$epid_interval)
db_b$epid <- db_b$epid_interval$gid
db_b

}
\seealso{
\code{\link{record_group}}, \code{\link{overlap}} and \code{\link{number_line}}
}
