% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/links.R
\name{links}
\alias{links}
\title{Multistage record linkage}
\usage{
links(
  criteria,
  sub_criteria = NULL,
  sn = NULL,
  strata = NULL,
  data_source = NULL,
  data_links = "ANY",
  display = "none",
  group_stats = FALSE,
  expand = TRUE,
  shrink = FALSE,
  recursive = "none",
  check_duplicates = FALSE,
  tie_sort = NULL,
  batched = "yes",
  repeats_allowed = FALSE,
  permutations_allowed = FALSE,
  ignore_same_source = FALSE
)
}
\arguments{
\item{criteria}{\code{[list|atomic]}. Ordered list of attributes to be compared. Each element of the list is a stage in the linkage process. See \code{Details}.}

\item{sub_criteria}{\code{[list|\link{sub_criteria}]}. Nested match criteria. This must be paired to a stage of the linkage process (\code{criteria}). See \code{\link{sub_criteria}}}

\item{sn}{\code{[integer]}. Unique record ID.}

\item{strata}{\code{[atomic]}. Subsets of the dataset. Record-groups are created separately for each \code{strata}. See \code{Details}.}

\item{data_source}{\code{[character]}. Source ID for each record. If provided, a list of all sources in each record-group is returned. See \code{\link[=pid-class]{pid_dataset slot}}.}

\item{data_links}{\code{[list|character]}. \code{data_source} required in each \code{\link[=pid-class]{pid}}. A record-group without records from these \code{data_sources} will be \code{\link[=delink]{unlinked}}. See \code{Details}.}

\item{display}{\code{[character]}. Display progress update and/or generate a linkage report for the analysis. Options are; \code{"none"} (default), \code{"progress"}, \code{"stats"}, \code{"none_with_report"}, \code{"progress_with_report"} or \code{"stats_with_report"}.}

\item{group_stats}{\code{[character]}. A selection of group specific information to be return for each record-group. Most are added to slots of the \code{\link[=pid-class]{pid}} object.
Options are \code{NULL} or any combination of \code{"XX"}, \code{"XX"} and \code{"XX"}.}

\item{expand}{\code{[logical]}. If \code{TRUE}, a record-group gains new records if a match is found at the next stage of the linkage process. \emph{Not interchangeable with \code{shrink}}.}

\item{shrink}{\code{[logical]}. If \code{TRUE}, a record-group loses existing records if no match is found at the next stage of the linkage process. \emph{Not interchangeable with \code{expand}}.}

\item{recursive}{\code{[logical]}. If \code{TRUE}, within each iteration of the process, a match can spawn new matches. Ignored when \code{batched} is \code{"no"}.}

\item{check_duplicates}{\code{[logical]}. If \code{TRUE}, within each iteration of the process, duplicates values of an attributes are not checked. The outcome of the logical test on the first instance of the value will be recycled for the duplicate values. Ignored when \code{batched} is \code{"no"}.}

\item{tie_sort}{\code{[atomic]}. Preferential order for breaking match ties within an iteration of record linkage.}

\item{batched}{\code{[character]} Determines if record-pairs are created and compared in batches. Options are \code{"yes"}, \code{"no"} or \code{"semi"}.}

\item{repeats_allowed}{\code{[logical]} If \code{TRUE}, pairs made up of repeat records are not created and compared. Only used when \code{batched} is \code{"no"}.}

\item{permutations_allowed}{\code{[logical]} If \code{TRUE}, permutations of record-pairs are created and compared. Only used when \code{batched} is \code{"no"}.}

\item{ignore_same_source}{\code{[logical]} If \code{TRUE}, only records-pairs from a different \code{data_source} are created and compared.}
}
\value{
\code{\link[=pid-class]{pid}}; \code{list}
}
\description{
Assign records to unique groups based on an ordered set of match criteria.
}
\details{
The priority of matches decreases with each subsequent stage of the linkage process.
Therefore, the attributes in \code{criteria} should be in an order of decreasing relevance.

Records with missing data (\code{NA}) for each \code{criteria} are
skipped at the respective stage, while records with
missing data \code{strata} are skipped from every stage.

If a record is skipped from a stage, another attempt will be made to
match the record at the next stage. If a record is still unmatched
by the last stage, it is assigned a unique group ID.

A \code{\link{sub_criteria}} adds nested match criteria
to each stage of the linkage process. If used, only
records with a matching \code{criteria} and \code{sub_criteria} are linked.

In \bold{\code{\link{links}}}, each \code{\link{sub_criteria}} must
be linked to a \code{criteria}. This is done by adding each \code{\link{sub_criteria}}
to a named element of a list - "cr" concatenated with
the corresponding stage's number.
For example, 3 \code{sub_criteria} linked to
\code{criteria} 1, 5 and 13 will be;

\deqn{list(cr1 = sub_criteria(...), cr5 = sub_criteria(...), cr13 = sub_criteria(...))}

Any unlinked \code{\link{sub_criteria}} will be ignored.

Every element in \code{data_links} must be named \code{"l"} (links) or \code{"g"} (groups).
Unnamed elements of \code{data_links} will be assumed to be \code{"l"}.
\itemize{
\item If named \code{"l"}, groups without records from every listed \code{data_source} will be unlinked.
\item If named \code{"g"}, groups without records from any listed \code{data_source} will be unlinked.
}

See \code{vignette("links")} for more information.
}
\examples{
data(patient_records)
dfr <- patient_records
# An exact match on surname followed by an exact match on forename
stages <- as.list(dfr[c("surname", "forename")])
p1 <- links(criteria = stages)

# An exact match on forename followed by an exact match on surname
p2 <- links(criteria = rev(stages))

# Nested matches
# Same sex OR birth year
m.cri.1 <- sub_criteria(
  format(dfr$dateofbirth, "\%Y"), dfr$sex,
  operator = "or")

# Same middle name AND a 10 year age difference
age_diff <- function(x, y){
  diff <- abs(as.numeric(x) - as.numeric(y))
  wgt <-  diff \%in\% 0:10 & !is.na(diff)
  wgt
}
m.cri.2 <- sub_criteria(
  format(dfr$dateofbirth, "\%Y"), dfr$middlename,
  operator = "and",
  match_funcs = c(age_diff, exact_match))

# Nested match criteria 'm.cri.1' OR 'm.cri.2'
n.cri <- sub_criteria(
  m.cri.1, m.cri.2,
  operator = "or")

# Record linkage with additional match criteria
p3 <- links(
  criteria = stages,
  sub_criteria = list(cr1 = m.cri.1,
                      cr2 = m.cri.2))

# Record linkage with additonal nested match criteria
p4 <- links(
  criteria = stages,
  sub_criteria = list(cr1 = n.cri,
                      cr2 = n.cri))

dfr$p1 <- p1; dfr$p2 <- p2
dfr$p3 <- p3; dfr$p4 <- p4

head(dfr)

}
\seealso{
\code{\link{links_af_probabilistic}}; \code{\link{episodes}};
\code{\link{predefined_tests}}; \code{\link{sub_criteria}}
}
