#' @title Analyses of simulation studies including Monte Carlo error
#' @description `simsum` computes performance measures for simulation studies in which each simulated data set yields point estimates by one or more analysis methods. Bias, empirical standard error and precision relative to a reference method can be computed for each method.  If, in addition, model-based standard errors are available then `simsum` can compute the average model-based standard error, the relative error in the model-based standard error, the coverage of nominal confidence intervals, the coverage under the assumption that there is no bias (bias-eliminated coverage), and the power to reject a null hypothesis. Monte Carlo errors are available for all estimated quantities.
#' @param data A `data.frame` in which variable names are interpreted. It has to be in tidy format, e.g. each variable forms a column and each observation forms a row.
#' @param estvarname The name of the variable containing the point estimates.
#' @param true The true value of the parameter. This is used in calculations of bias and coverage.
#' @param se The name of the variable containing the standard errors of the point estimates.
#' @param methodvar The name of the variable containing the methods to compare. For instance, methods could be the models compared within a simulation study. Can be `NULL`.
#' @param ref Specifies the reference method against which relative precision will be calculated. Only useful if `methodvar` is specified.
#' @param by A vector of variable names to compute performance measures by a list of factors. Factors listed here are the (potentially several) data-generating mechanisms used to simulate data under different scenarios (e.g. sample size, true distribution of a variable, etc.). Can be `NULL`.
#' @param ci.limits A numeric vector of length 2 specifying the limits (lower and upper) of confidence intervals used to calculate coverage. Useful for non-Wald type estimators (e.g. bootstrap). Defaults to `NULL`, where Wald-type confidence intervals based on the provided SEs are calculated for coverage. This feature is experimental, use with caution.
#' @param dropbig Specifies that point estimates or standard errors beyond the maximum acceptable values should be dropped. Defaults to `FALSE`.
#' @param x Set to `TRUE` to include the `data` argument used to calculate summary statistics (i.e. after pre-processing the input dataset e.g. removing values deemed too large via the `dropbig` argument) as a slot. Calling `simsum` with `x = TRUE` is required to produce zipper plots. The downside is that the size of the returned object increases considerably, therefore it is set to `FALSE` by default.
#' @param control A list of parameters that control the behaviour of `simsum`. Possible values are:
#' * `mcse`, whether to calculate Monte Carlo standard errors. Defaults to `TRUE`;
#' * `level`, the significance level used for coverage, bias-eliminated coverage, and power. Defaults to 0.95;
#' * `df`, whether to use robust critical values from a t distribution with `df` degrees of freedom when calculating coverage, bias-eliminated coverage, and power. Defaults to `NULL`, in which case a Gaussian distribution is used;
#' * `na.rm`, whether to remove point estimates or standard errors where either (or both) is missing. Defaults to `TRUE`;
#' * `char.sep`, a character utilised when splitting the input dataset `data`. Generally, this should not be changed;
#' * `dropbig.max`, specifies the maximum acceptable absolute value of the point estimates, after standardisation. Defaults to 10;
#' * `dropbig.semax`, specifies the maximum acceptable absolute value of the standard error, after standardisation. Defaults to 100
#' * `dropbig.robust`, specifies whether to use robust standardisation (using median and inter-quartile range) rather than normal standardisation (using mean and standard deviation). Defaults to `TRUE`, in which case robust standardisation will be used for `dropbig`.
#' @return An object of class `simsum`.
#' @references White, I.R. 2010. simsum: Analyses of simulation studies including Monte Carlo error. The Stata Journal 10(3): 369-385. \url{http://www.stata-journal.com/article.html?article=st0200}
#' @references Morris, T.P., White, I.R. and Crowther, M.J. 2019. _Using simulation studies to evaluate statistical methods_. Statistics in Medicine, \doi{10.1002/sim.8086}
#' @references Gasparini, A. 2018. rsimsum: Summarise results from Monte Carlo simulation studies. Journal of Open Source Software 3(26):739, \doi{10.21105/joss.00739}
#' @export
#' @details
#' The following names are not allowed for `estvarname`, `se`, `methodvar`, `by`: `stat`, `est`, `mcse`, `lower`, `upper`.
#'
#' @examples
#' data("MIsim", package = "rsimsum")
#' s <- simsum(data = MIsim, estvarname = "b", true = 0.5, se = "se", methodvar = "method", ref = "CC")
#' # If 'ref' is not specified, the reference method is inferred
#' s <- simsum(data = MIsim, estvarname = "b", true = 0.5, se = "se", methodvar = "method")
simsum <- function(data,
                   estvarname,
                   true,
                   se,
                   methodvar = NULL,
                   ref = NULL,
                   by = NULL,
                   ci.limits = NULL,
                   dropbig = FALSE,
                   x = FALSE,
                   control = list()) {
  ### Check arguments
  arg_checks <- checkmate::makeAssertCollection()
  # 'data' must be a data.frame
  checkmate::assert_data_frame(x = data, add = arg_checks)
  # 'estvarname', 'se', 'methodvar', 'ref' must be a single string value
  checkmate::assert_string(x = estvarname, add = arg_checks)
  checkmate::assert_string(x = se, add = arg_checks)
  checkmate::assert_string(x = methodvar, null.ok = TRUE, add = arg_checks)
  checkmate::assert_string(x = ref, null.ok = TRUE, add = arg_checks)
  # 'true' must be a single numberic value
  checkmate::assert_number(x = true, add = arg_checks)
  # 'dropbig', 'mcse', 'x' must be single logical value
  checkmate::assert_logical(x = dropbig, len = 1, add = arg_checks)
  checkmate::assert_logical(x = x, len = 1, add = arg_checks)
  # 'by' must be a vector of strings; can be NULL
  checkmate::assert_character(x = by, null.ok = TRUE, add = arg_checks)
  # 'estvarname', 'se' must be in 'data'; all elements of 'by' must be in 'data'; 'methodvar' must be in 'data'
  checkmate::assert_subset(x = estvarname, choices = names(data), add = arg_checks)
  checkmate::assert_subset(x = se, choices = names(data), add = arg_checks)
  checkmate::assert_subset(x = by, choices = names(data), add = arg_checks)
  checkmate::assert_subset(x = methodvar, choices = names(data), add = arg_checks)
  # 'ref' must be one of the options in 'methodvar'
  if (!is.null(methodvar)) {
    checkmate::assert_subset(x = ref, choices = as.character(unique(data[[methodvar]])), add = arg_checks)
  }
  # 'estvarname', 'se', 'methodvar', 'by' must not be any in ('stat', 'est', 'mcse', 'lower', 'upper')
  checkmate::assert_false(x = (estvarname %in% c("stat", "est", "mcse", "lower", "upper")), add = arg_checks)
  checkmate::assert_false(x = (se %in% c("stat", "est", "mcse", "lower", "upper")), add = arg_checks)
  if (!is.null(methodvar)) checkmate::assert_false(x = (methodvar %in% c("stat", "est", "mcse", "lower", "upper")))
  if (!is.null(by)) checkmate::assert_false(x = any(by %in% c("stat", "est", "mcse", "lower", "upper")))
  # 'ci.limits' must be a numeric vector of length 2
  checkmate::assert_numeric(x = ci.limits, len = 2, null.ok = TRUE, add = arg_checks)
  # 'control' must be a list, with well defined components
  checkmate::assert_list(x = control, add = arg_checks)
  checkmate::assert_subset(x = names(control), choices = c("mcse", "level", "df", "na.rm", "char.sep", "dropbig.max", "dropbig.semax", "dropbig.robust"), empty.ok = TRUE, add = arg_checks)
  checkmate::assert_logical(x = control$mcse, len = 1, null.ok = TRUE, add = arg_checks)
  checkmate::assert_number(x = control$level, lower = 0, upper = 1, null.ok = TRUE, add = arg_checks)
  checkmate::assert_number(x = control$df, null.ok = TRUE, add = arg_checks)
  checkmate::assert_logical(x = control$na.rm, len = 1, null.ok = TRUE, add = arg_checks)
  checkmate::assert_string(x = control$char.sep, null.ok = TRUE, add = arg_checks)
  checkmate::assert_number(x = control$dropbig.max, null.ok = TRUE, add = arg_checks)
  checkmate::assert_number(x = control$dropbig.semax, null.ok = TRUE, add = arg_checks)
  checkmate::assert_logical(x = control$dropbig.robust, len = 1, null.ok = TRUE, add = arg_checks)
  # Report
  if (!arg_checks$isEmpty()) checkmate::reportAssertions(arg_checks)

  ### Set control parameters
  control.default <- list(mcse = TRUE, level = 0.95, df = NULL, na.rm = TRUE, char.sep = "~", dropbig.max = 10, dropbig.semax = 100, dropbig.robust = TRUE)
  control.tmp <- unlist(list(
    control[names(control) %in% names(control.default)],
    control.default[!(names(control.default) %in% names(control))]
  ), recursive = FALSE)
  control <- control.tmp

  ### Factorise 'methodvar', 'by'
  data <- .factorise(data = data, cols = c(methodvar, by))

  ### Check that levels of factors are ok
  .validate_levels(data = data, cols = c(methodvar, by), char = control$char.sep)

  ### Set reference method if `ref` is not specified
  if (!is.null(methodvar)) {
    methods <- levels(data[[methodvar]])
    if (is.null(ref)) {
      ref <- methods[1]
      message(paste("'ref' method was not specified,", ref, "set as the reference"))
    }
    data[[methodvar]] <- relevel(data[[methodvar]], ref = ref)
  }

  ### Throw a warning if `ref` is specified and `methodvar` is not
  if (is.null(methodvar) & !is.null(ref)) {
    warning("'ref' method is specified while 'methodvar' is not: 'ref' will be ignored")
    ref <- NULL
  }

  ### Identify and drop (if required) point estimates and standard errors that are too big
  if (dropbig) {
    data <- .dropbig(data = data, estvarname = estvarname, se = se, methodvar = methodvar, by = by, max = control$dropbig.max, semax = control$dropbig.semax, robust = control$dropbig.robust)
  }

  ### Drop estimates if SE is missing, and vice versa
  data <- .na_pair(data = data, estvarname = estvarname, se = se)

  ### Compute summary statistics
  # Split by first
  data <- .split_by(data = data, by = by)

  # Then, split methodvar
  data <- lapply(X = seq_along(data), FUN = function(i) .split_by(data = data[[i]], by = methodvar))

  # Then call .performance to compute all performance measures
  summ <- lapply(X = seq_along(data), FUN = function(i) {
    if (!is.null(methodvar)) {
      rho <- vapply(X = methods, FUN = function(x) stats::cor(data[[i]][[ref]][[estvarname]], data[[i]][[x]][[estvarname]], use = ifelse(control$na.rm, "na.or.complete", "everything")), FUN.VALUE = numeric(1))
    } else {
      rho <- NULL
    }
    out.out <- lapply(X = seq_along(data[[i]]), FUN = function(j) {
      if (!is.null(methodvar)) {
        empse_ref <- sqrt(stats::var(data[[i]][[ref]][[estvarname]], na.rm = control$na.rm))
      } else {
        empse_ref <- NULL
      }
      out.in <- .performance(data = data[[i]][[j]], estvarname = estvarname, se = se, true = true, rho = rho[names(data[[i]][j])], empse_ref = empse_ref, ci.limits = ci.limits, control = control)
      if (!is.null(methodvar)) {
        out.in[[methodvar]] <- unique(data[[i]][[j]][[methodvar]])
      }
      for (byval in by) {
        out.in[[byval]] <- unique(data[[i]][[j]][[byval]])
      }
      out.in
    })
    out.out <- .br(out.out)
    out.out
  })
  summ <- .br(summ)

  ### Include stuff into object to return
  obj <- list()
  obj$summ <- summ
  obj$estvarname <- estvarname
  obj$true <- true
  obj$se <- se
  obj$methodvar <- methodvar
  obj$ref <- ref
  obj$dropbig <- dropbig
  if (!is.null(ci.limits)) {
    obj$ci.limits <- ci.limits
  }
  obj$by <- by
  obj$control <- control
  if (x) {
    obj$x <- .br(lapply(data, .br))
    rownames(obj$x) <- NULL
  }

  ### Return object of class simsum
  class(obj) <- c("list", "simsum")
  return(obj)
}
