% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ithresh.R
\name{ithresh}
\alias{ithresh}
\title{Threshold selection in the i.i.d. case (peaks over threshold)}
\usage{
ithresh(data, u_vec, ..., n_v = 1, npy = NULL, use_rcpp = TRUE)
}
\arguments{
\item{data}{A numeric vector of observations.  Any missing values will
be removed.  The argument \code{npy} (see below) may be supplied
as an attribute of \code{data} using \code{attr(data, "npy") <- value},
where \code{value} is the value of \code{npy} (see \code{\link{attr}}).
If \code{npy} is supplied twice, as both \code{attr(data, "npy")})
and using the \code{npy} argument, then the former is used.}

\item{u_vec}{A numeric vector. A vector of \emph{training} thresholds
at which inferences are made from a binomial-GP model.  These could be
set at sample quantiles of  \code{data} using
\code{\link[stats]{quantile}}.  Any duplicated values will be removed.}

\item{...}{Further (optional) arguments to be passed to the
  \code{\link[revdbayes]{revdbayes}} function
  \code{\link[revdbayes]{rpost_rcpp}} (or \code{\link[revdbayes]{rpost}}),
  which use the generalized ratio-of-uniforms method to simulate from
  extreme value posterior distributions.
  In particular:
\itemize{
  \item {\code{n}} {The size of the posterior sample used to perform
    predictive inference.  Default: \code{n = 1000}.}
  \item {\code{prior}} {A prior for GP parameters to be passed to the
    \strong{revdbayes} function \code{\link[revdbayes]{set_prior}}.
    Can be either a character scalar that chooses an in-built prior,
    or a user-supplied R function or pointer to a compiled C++ function.
    See the \code{\link[revdbayes]{set_prior}} documentation for details
    of the in-built priors.
    See the \strong{revdbayes} vignette
    \href{https://cran.r-project.org/package=revdbayes}{Faster simulation
    using revdbayes} for information about creating
    a pointer to a C++ function. See also the \strong{Examples} section.

    If the user supplies and R function then \code{\link{rpost}} will be
    used for posterior simulation, rather than (the faster)
    \code{\link{rpost_rcpp}}, regardless of the input value of
    \code{use_rcpp}.

    Default: \code{prior = "mdi"} with \code{a = 0.6} and \code{min_xi = -1}.
    This particular prior is studied in
    \href{https://doi.org/10.1111/rssc.12159}{Northrop et al. (2017)}}.
  \item {\code{h_prior}} {A \emph{list} of further arguments
    (hyperparameters) for the GP prior specified in \code{prior}.}
  \item {\code{bin_prior}} {A character scalar that chooses an in-built
    prior for the threshold exceedance probability \eqn{p}, to be passed to
    the \strong{revdbayes} function \code{\link[revdbayes]{set_bin_prior}}.
    only relevant if \code{prior == "beta"}.

    Default: \code{prior = "jeffreys"}, i.e. Beta(1/2, 1/2).}
  \item {\code{h_bin_prior}} {A \emph{list} of further arguments
    (hyperparameters) for the binomial prior specified in \code{bin_prior}.
    See the \code{\link[revdbayes]{set_bin_prior}} documentation for details
    of the in-built priors.}
  \item {\code{trans}} {A character scalar: either \code{"none"} or
    \code{"BC"}.  See \code{\link{rpost_rcpp}} for details.
    The default is \code{"none"}, which is usually faster than \code{"BC"}.
    However, if there are very few threshold excesses then using
    \code{trans = "BC"} can make the optimizations involved in the
    generalized ratio-of-uniforms algorithm more stable.  If using
    \code{trans = "none"} produces an error for a particular posterior
    simulation then \code{trans = "BC"} is used instead.}
}}

\item{n_v}{A numeric scalar.
Each of the \code{n_v} largest values in \code{u_vec} will be used
(separately) as a \emph{validation} threshold for the training thresholds
in \code{u_vec} that lie at or below that validation threshold.
If \code{n_v = 1} then all the training thresholds are used with
validation performed using the threshold \code{u_vec[length(u_vec)]}.
If \code{n_v = 2} then, in addition, the assessment is performed using
\code{u_vec[1], ..., u_vec[length(u_vec) - 1]} with
validation threshold \code{u_vec[length(u_vec) - 1]},
and so on.}

\item{npy}{A numeric scalar. The mean number of observations per year
  of data, after excluding any missing values, i.e. the number of
  non-missing observations divided by total number of years of non-missing
  data.  May be supplied using as an attribute \code{attr(data, "npy")}
  of \code{data} instead.

  The value of \code{npy} does not affect any calculation in
  \code{ithresh}, it only affects subsequent extreme value inferences using
  \code{predict.ithresh}.  However, setting \code{npy} in the call to
  \code{rpost}, or as an attribute of \code{data} avoids the need to
  supply \code{npy} when calling \code{predict.ithresh}.}

\item{use_rcpp}{A logical scalar.  If \code{TRUE} (the default) the
revdbayes function \code{\link[revdbayes]{rpost_rcpp}} is used for
posterior simulation.  If \code{FALSE}, or if the user supplies an R
function to set the prior for GP parameters,
the (slower) function \code{\link[revdbayes]{rpost}} is used.}
}
\value{
An object (list) of class \code{"ithresh"}, containing the
  components
  \itemize{
    \item{\code{pred_perf}:} A numeric matrix with \code{length(u_vec)}
    rows and \code{n_v} columns.  Each column contains the values of
    the measure of predictive performance.  Entries corresponding
    to cases where the training threshold is above the validation
    threshold will be \code{NA}.
    \item{\code{u_vec}:} The argument \code{u_vec} to \code{ithresh}.
    \item{\code{v_vec}:} A numeric vector.  The validation thresholds
      implied by the argument \code{n_v} to \code{ithresh}.
    \item{\code{u_ps}:} A numeric vector. The approximate levels of the
      sample quantiles to which the values in \code{u_vec} correspond,
      i.e. the approximate percentage of the data the lie at or below
      each element in \code{u_vec}.
    \item{\code{v_ps}:} A numeric vector.  The values in \code{u_ps}
      that correspond to the validation thresholds.
    \item{\code{sim_vals}:} A numeric matrix with 4 columns and
      \code{n} x \code{length(u_vec)} rows.  The \eqn{j}th block of
      \code{n} rows contains in columns 1-3 the posterior samples of
      the threshold exceedance probability, the GP scale
      parameter and the GP shape parameter respectively, and in
      column 4 the value of \eqn{j}.
    \item{\code{n}:} A numeric scalar.  The value of \code{n}.
    \item{\code{npy}:} A numeric scalar.  The value of \code{npy}.
    \item{\code{data}:} The argument \code{data} to \code{ithresh}
      detailed above, with any missing values removed.
    \item{\code{use_rcpp}:} A logical scalar indicating whether
      \code{\link[revdbayes]{rpost_rcpp}} (\code{use_rcpp = TRUE}) or
      \code{\link[revdbayes]{rpost}} (\code{use_rcpp = FALSE})
      was used for posterior simulation.
    \item{\code{for_post}:} A list containing arguments with which
      \code{\link[revdbayes]{rpost_rcpp}}
      (or \code{\link[revdbayes]{rpost}}) was called, including
      any user-supplied arguments to these functions.
  }
}
\description{
Produces a diagnostic plot to assist in the selection of an extreme value
threshold in the case where the data can be treated as independent and
identically distributed (i.i.d.) observations.  For example, it could be
that these observations are the cluster maxima resulting from the
declustering of time series data.  The predictive ability of models
fitted using each of a user-supplied set of thresholds is assessed using
leave-one-out cross-validation in a Bayesian setup.
These models are based on a Generalized Pareto (GP) distribution for
threshold excesses and a binomial model for the probability of threshold
exceedance.  See
\href{https://doi.org/10.1111/rssc.12159}{Northrop et al. (2017)}
for details.
}
\details{
For a given threshold in \code{u_vec}:
\itemize{
  \item {the number of values in \code{data} that exceed the threshold,
    and the amounts (the \emph{threshold excesses}) by which these value
    exceed the threshold are calculated;}
  \item {\code{\link[revdbayes]{rpost_rcpp}}
    (or \code{\link[revdbayes]{rpost}}) is used to sample from the posterior
    distributions of the parameters of a GP model for the threshold
    excesses and a binomial model for the probability of threshold
    exceedance;}
  \item {the ability of this binomial-GP model to predict data
    thresholded at the validation threshold(s) specified by \code{n_v} is
    assessed using leave-one-out cross-validation (the measure of
    this is given in equation (7) of
    \href{https://doi.org/10.1111/rssc.12159}{Northrop et al. (2017)}).}
}
  See \href{https://doi.org/10.1111/rssc.12159}{Northrop et al. (2017)}
  and the introductory threshr vignette for further details and examples.
}
\examples{
# Note:
# 1. Smoother plots result from making n larger than the default n = 1000.
# 2. In the examples below validation thresholds rather higher than is
#    advisable have been used, with far fewer excesses than the minimum of
#    50 suggested by Jonathan and Ewans (2013).

## North Sea significant wave heights, default prior -----------------------
#' # A plot akin to the top left of Figure 7 in Northrop et al. (2017)
#' # ... but with fewer training thresholds

u_vec_ns <- quantile(ns, probs = seq(0.1, 0.9, by = 0.1))
ns_cv <- ithresh(data = ns, u_vec = u_vec_ns, n_v = 2)
plot(ns_cv, lwd = 2, add_legend = TRUE, legend_pos = "topright")
mtext("significant wave height / m", side = 3, line = 2.5)

## Gulf of Mexico significant wave heights, default prior ------------------

u_vec_gom <- quantile(gom, probs = seq(0.2, 0.9, by = 0.1))
# Setting a prior using its name and parameter value(s) --------------------
# This example gives the same prior as the default
gom_cv <- ithresh(data = gom, u_vec = u_vec_gom, n_v = 2, prior = "mdi",
                  h_prior = list(a = 0.6))

## Setting a user-defined (log-)prior R function ---------------------------
# This example also gives the same prior as the default
# (It will take longer to run than the example above because ithresh detects
#  that the prior is an R function and sets use_rcpp to FALSE.)
\donttest{
user_prior <- function(pars, a, min_xi = -1) {
  if (pars[1] <= 0 | pars[2] < min_xi) {
    return(-Inf)
  }
  return(-log(pars[1]) - a * pars[2])
}
gom_cv <- ithresh(data = gom, u_vec = u_vec_gom, n_v = 2, prior = user_prior,
                  h_prior = list(a = 0.6))
}
## Setting a user-defined (log-)prior (pointer to a) C++ function ----------
# We make use of a C++ function and function create_prior_xptr() to create
# the required pointer from the revdbayes package

prior_ptr <- revdbayes:::create_prior_xptr("gp_flat")
gom_cv <- ithresh(data = gom, u_vec = u_vec_gom, n_v = 2, prior = prior_ptr,
                  h_prior = list(min_xi = -1))
}
\references{
Northrop, P.J. and Attalides, N. (2016) Posterior propriety in
  Bayesian extreme value analyses using reference priors
  \emph{Statistica Sinica}, \strong{26}(2), 721--743
  \url{http://dx.doi.org/10.5705/ss.2014.034}.

Northrop, P. J., Attalides, N. and Jonathan, P. (2017)
  Cross-validatory extreme value threshold selection and uncertainty
  with application to ocean storm severity.
  \emph{Journal of the Royal Statistical Society Series C: Applied
  Statistics}, \strong{66}(1), 93-120.
  \url{http://dx.doi.org/10.1111/rssc.12159}

Jonathan, P. and Ewans, K. (2013) Statistical modelling
  of extreme ocean environments for marine design : a review.
  \emph{Ocean Engineering}, \strong{62}, 91-109.
  \url{http://dx.doi.org/10.1016/j.oceaneng.2013.01.004}
}
\seealso{
\code{\link{plot.ithresh}} for the S3 plot method for objects of
  class \code{ithresh}.

\code{\link{summary.ithresh}} Summarizing measures of threshold
  predictive performance.

\code{\link{predict.ithresh}} for predictive inference for the
  largest value observed in N years.

\code{\link[revdbayes]{rpost}} in the
  \code{\link[revdbayes]{revdbayes}} package for details of the arguments
  that can be passed to
  \code{\link[revdbayes]{rpost_rcpp}}/\code{\link[revdbayes]{rpost}}.

\code{\link[revdbayes]{set_prior}} and
  \code{\link[revdbayes]{set_bin_prior}} in the
  \code{\link[revdbayes]{revdbayes}} package for details of how to set a
  prior distributions for GP parameters and for the exceedance probability
  \eqn{p}.

\code{\link[stats]{quantile}}.
}
