% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getMatches.R
\name{getMatches}
\alias{getMatches}
\title{Search for tokens.}
\usage{
getMatches(
  labbcat.url,
  pattern,
  participant.ids = NULL,
  transcript.types = NULL,
  main.participant = TRUE,
  aligned = FALSE,
  matches.per.transcript = NULL,
  words.context = 0,
  max.matches = NULL,
  no.progress = FALSE
)
}
\arguments{
\item{labbcat.url}{URL to the LaBB-CAT instance}

\item{pattern}{An object representing the pattern to search for.

Strictly speaking, this should be a named list that replicates the structure of the
    `search matrix' in the LaBB-CAT browser interface, with one element called
    ``columns'', containing a named list for each column.

Each element in the ``columns'' named list contains an element named ``layers'', whose
    value is a named list for patterns to match on each layer, and optionally an
    element named ``adj'', whose value is a number representing the maximum distance, in
    tokens, between this column and the next column - if ``adj'' is not specified, the
    value defaults to 1, so tokens are contiguous.

Each element in the ``layers'' named list is named after the layer it matches, and the
    value is a named list with the following possible elements:
\itemize{
 \item{\emph{pattern}  A regular expression to match against the label}
 \item{\emph{min}  An inclusive minimum numeric value for the label}
 \item{\emph{max}  An exclusive maximum numeric value for the label}
 \item{\emph{not}  TRUE to negate the match}
 \item{\emph{anchorStart}  TRUE to anchor to the start of the annotation on this layer
    (i.e. the matching word token will be the first at/after the start of the matching
    annotation on this layer)}
 \item{\emph{anchorEnd}  TRUE to anchor to the end of the annotation on this layer
    (i.e. the matching word token will be the last before/at the end of the matching
    annotation on this layer)}
 \item{\emph{target}  TRUE to make this layer the target of the search; the results will
    contain one row for each match on the target layer}
}

Examples of valid pattern objects include:
\preformatted{
## words starting with 'ps...'
pattern <- list(columns = list(
    list(layers = list(
           orthography = list(pattern = "ps.*")))))

## the word 'the' followed immediately or with one intervening word by
## a hapax legomenon (word with a frequency of 1) that doesn't start with a vowel
pattern <- list(columns = list(
    list(layers = list(
           orthography = list(pattern = "the")),
         adj = 2),
    list(layers = list(
           phonemes = list(not = TRUE, pattern = "[cCEFHiIPqQuUV0123456789~#\\$@].*"),
           frequency = list(max = "2")))))
}
For ease of use, the function will also accept the following abbreviated forms:

\preformatted{
## a single list representing a 'one column' search, 
## and string values, representing regular expression pattern matching
pattern <- list(orthography = "ps.*")

## a list containing the columns (adj defaults to 1, so matching tokens are contiguous)...
pattern <- list(
    list(orthography = "the"),
    list(phonemes = list(not = TRUE, pattern = "[cCEFHiIPqQuUV0123456789~#\\$@].*"),
         frequency = list(max = "2")))
}}

\item{participant.ids}{An optional list of participant IDs to search the utterances of. If
not supplied, all utterances in the corpus will be searched.}

\item{transcript.types}{An optional list of transcript types to limit the results
to. If null, all transcript types will be searched.}

\item{main.participant}{TRUE to search only main-participant utterances, FALSE to
search all utterances.}

\item{aligned}{true to include only words that are aligned (i.e. have anchor
confidence &ge; 50, false to search include un-aligned words as well.}

\item{matches.per.transcript}{Optional maximum number of matches per transcript to
return. NULL means all matches.}

\item{words.context}{Number of words context to include in the `Before.Match' and
`After.Match' columns in the results.}

\item{max.matches}{The maximum number of matches to return, or null to return all.}

\item{no.progress}{Optionally suppress the progress bar when
multiple fragments are  specified - TRUE for no progress bar.}
}
\value{
A data frame identifying matches, containing the following columns:
\itemize{
 \item{\emph{SearchName} A name based on the pattern -- the same for all rows}
 \item{\emph{Number} Row number}
 \item{\emph{Transcript} Name of the transcript in which the match was found}
 \item{\emph{Line} The start offset of the utterance/line}
 \item{\emph{LineEnd} The end offset of the utterance/line}
 \item{\emph{MatchId} A unique ID for the matching target token}
 \item{\emph{Before.Match} Transcript text immediately before the match}
 \item{\emph{Text} Transcript text of the match}
 \item{\emph{Before.Match} Transcript text immediately after the match}
 \item{\emph{Target.transcript} Text of the target word token}
 \item{\emph{Target.transcript.start} Start offset of the target word token}
 \item{\emph{Target.transcript.end} End offset of the target word token}
 \item{\emph{Target.segments} Label of the target segment (only present if the segment
    layer is included in the pattern)}
 \item{\emph{Target.segments.start} Start offset of the target segment (only present if the
    segment layer is included in the pattern)}
 \item{\emph{Target.segments.end} End offset of the target segment (only present if the
    segment layer is included in the pattern)}
}
}
\description{
Searches through transcripts for tokens matching the given pattern.
}
\examples{
\dontrun{
## define the LaBB-CAT URL
labbcat.url <- "https://labbcat.canterbury.ac.nz/demo/"

## create a pattern object to match against
pattern <- list(columns = list(
    list(layers = list(
           orthography = list(pattern = "the")),
         adj = 2),
    list(layers = list(
           phonemes = list(not=TRUE, pattern = "[cCEFHiIPqQuUV0123456789~#\\\\$@].*"),
           frequency = list(max = "2")))))

## get the tokens matching the pattern
results <- getMatches(labbcat.url, pattern)

## results$MatchId can be used to access results
}

}
\seealso{
\code{\link{getParticipantIds}}
}
\keyword{search}
