# Internal documentation -------------------------------------------------------

# MSE estimation - parametric bootstrap procedure

# Function parametric_bootstrap conducts the MSE estimation defined in function
# mse_estim (see below)
# The parametric boostrap approach can be find in Molina and Rao (2010) p. 376

# External documentation -------------------------------------------------------

# #' Estimates the Parametric Bootstrap MSE of the EBP approach
# #'
# #' Function \code{parametric_bootstrap} estimates the parametric bootstrap MSE
# #' for regional disaggregated indicators using the Parametric Bootstrap approach
# #' by \cite{Gonzalez-Manteiga et al. (2008)}.
# #'
# #' @param framework the result of applying function \code{\link{notation}}. A
# #' list with 15 components containing rearranged data sets and vectors of
# #' variables indicating the domains as well as determinants of the notational
# #' framework (see also \code{\link{emdiObject}}).
# #' @param point_estim the result of applying \code{\link{point_estim}}.
# #' @param fixed fixed a two-sided linear formula object describing the
# #' fixed-effects part of the nested error linear regression model with the
# #' dependent variable on the left of a ~ operator and the explanatory
# #' variables on the right, separated by + operators. The argument corresponds
# #' to the argument \code{fixed} in function \code{\link[nlme]{lme}}.
# #' @param transformation a character string. Three different transformation
# #' types for the dependent variable can be chosen (i) no transformation ("no");
# #' (ii) log transformation ("log"); (iii) Box-Cox transformation ("box.cox").
# #' @param interval a numeric vector containing a lower and upper limit
# #' determining an interval for the estimation of the optimal parameter. Defaults
# #' to c(-1,2).
# #' @param L a number determining the number of Monte-Carlo simulations.
# #' @param B a number determining the number of bootstrap populations in the
# #' parametric bootstrap approach (see also \cite{Gonzalez-Manteiga et al. (2008)})
# #' used in the MSE estimation.
# #' @return a data frame with MSE estimators for each indicator per domain.
# #' @references
# #' Gonzalez-Manteiga, W. et al. (2008). Bootstrap mean squared error of
# #' a small-area EBLUP. Journal of Statistical Computation and Simulation,
# #' 78:5, 443-462.
# #' @seealso \code{\link{ebp}}, \code{\link{emdiObject}}, \code{\link{notation}},
# #' \code{\link{point_estim}}
# #' @examples
# #' # Loading data
# #' data("Xoutsamp_AuxVar")
# #' data("incomedata")
# #'
# #' # Determine notational framework
# #' framework <- notation(Xoutsamp_AuxVar, "provlab", incomedata, "provlab")
# #'
# #' # Obtain point predictions
# #' set.seed(100); point <- point_estim(framework, income~educ1, 4500,
# #' "box.cox", L=2)
# #'
# #' # Parametric bootstrap MSE
# #' set.seed(100); mse <- parametric_bootstrap(framework, point, income ~ educ1,
# #' 4500, "box.cox", L=2, B=2)
# #' @export
# #' @import nlme

parametric_bootstrap <- function(framework,
                                 point_estim,
                                 fixed,
                                 transformation,
                                 interval=c(-1,2),
                                 L,
                                 B,
                                 parallel_mode,
                                 cpus) {
  cat('\r', "Bootstrap started                                                                        ")
  
  start_time = Sys.time()
  if(cpus > 1){
    cpus <- min(cpus, detectCores())
    parallelStart(mode = parallel_mode, cpus = cpus, show.info = FALSE)
    parallelLibrary("nlme")
    mses <- simplify2array(parallelLapply(xs              = 1:B, 
                                           fun             = mse_estim_wrapper,
                                           B               = B,
                                           framework       = framework,
                                           lambda          = point_estim$optimal_lambda,
                                           shift           = point_estim$shift_par,
                                           model_par       = point_estim$model_par,
                                           gen_model       = point_estim$gen_model,
                                           fixed           = fixed,
                                           transformation  = transformation,
                                           interval        = interval,
                                           L               = L,
                                           start_time      = start_time
                  )
            )
    parallelStop()
  } else{
      mses <- simplify2array(lapply(        X               = 1:B,  
                                            FUN             = mse_estim_wrapper,
                                            B               = B,
                                            framework       = framework,
                                            lambda          = point_estim$optimal_lambda,
                                            shift           = point_estim$shift_par,
                                            model_par       = point_estim$model_par,
                                            gen_model       = point_estim$gen_model,
                                            fixed           = fixed,
                                            transformation  = transformation,
                                            interval        = interval,
                                            L               = L,
                                            start_time      = start_time
        )
      )
  }
  
  cat('\r', "Bootstrap completed                                                                      ")
  if(.Platform$OS.type == "windows"){
    flush.console()
  }
  mses <- apply(mses, c(1,2), fast_mean)
  mses <- data.frame(Domain = unique(framework$pop_domains_vec), mses)

  return(mses)
}




# mse_estim (only internal) ----------------------------------------------------

# The mse_estim function defines all parameters and estimations which have to
# be replicated B times for the Parametric Bootstrap Approach.
# See Molina and Rao (2010) p. 376

mse_estim <- function(framework,
                      lambda,
                      shift,
                      model_par,
                      gen_model,
                      fixed,
                      transformation,
                      interval,
                      L
                      ) {



  # The function superpopulation returns an income vector and a temporary
  # variable that passes the random effect to generating bootstrap populations
  # in bootstrap_par.
  superpop <- superpopulation(framework      = framework,
                              model_par      = model_par,
                              gen_model      = gen_model,
                              lambda         = lambda,
                              shift          = shift, 
                              transformation = transformation)

  pop_income_vector <- superpop$pop_income_vector

  # True indicator values
  true_indicators <- matrix(nrow = framework$N_dom_pop,
                            data = unlist(lapply(framework$indicator_list,
                                   function(f, pov_line){
                                     matrix(nrow = framework$N_dom_pop,
                                            data = unlist(tapply(pop_income_vector,
                                                                 framework$pop_domains_vec, f,
                                                                 pov_line = framework$pov_line ,
                                                                 simplify = TRUE)
                                                          ),
                                            byrow=TRUE)
                                     },
                                   pov_line = framework$pov_line)
                                   )
                            )

    colnames(true_indicators) <- framework$indicator_names

  # The function bootstrap_par returns a sample that can be given into the
  # point estimation to get predictors of the indicators that can be compared
  # to the "truth".
  bootstrap_sample <- bootstrap_par(fixed          = fixed,
                                    transformation = transformation,
                                    framework      = framework,
                                    model_par      = model_par,
                                    lambda         = lambda,
                                    shift          = shift,
                                    vu_tmp         = superpop$vu_tmp
                                    )

  framework$smp_data <- bootstrap_sample

  # Prediction of indicators with bootstap sample.
  bootstrap_point_estim <- as.matrix(point_estim(fixed          = fixed,
                                                 transformation = transformation,
                                                 interval       = interval,
                                                 L              = L,
                                                 framework      = framework
                                                 )[[1]][,-1])

  return((bootstrap_point_estim - true_indicators)^2)
} # End mse_estim


# Superpopulation function -----------------------------------------------------

# The model parameter from the nested error linear regression model are
# used to contruct a superpopulation model.
superpopulation <-  function(framework, model_par, gen_model, lambda, shift, 
                             transformation) {
  # superpopulation individual errors
  eps <- vector(length = framework$N_pop)
  eps[framework$obs_dom]  <-  rnorm(sum(framework$obs_dom),
                                    0,
                                    sqrt(model_par$sigmae2est))
  eps[!framework$obs_dom] <- rnorm(sum(!framework$obs_dom),
                                   0,
                                   sqrt(model_par$sigmae2est+model_par$sigmau2est))
  # superpopulation random effect
  vu_tmp <- rnorm(framework$N_dom_pop, 0, sqrt(model_par$sigmau2est))
  vu_pop <- rep(vu_tmp, framework$n_pop)
  #  superpopulation income vector
  Y_pop_b <- gen_model$mu_fixed + eps + vu_pop
  Y_pop_b <- back_transformation(y             = Y_pop_b,
                                 transformation = transformation,
                                 lambda         = lambda,
                                 shift          = shift
  )
  Y_pop_b[!is.finite(Y_pop_b)] <- 0
  
  return(list(pop_income_vector=Y_pop_b, vu_tmp=vu_tmp))
}

# Bootstrap function -----------------------------------------------------------

bootstrap_par <- function(fixed,
                          transformation,
                          framework,
                          model_par,
                          lambda,
                          shift,
                          vu_tmp) {
  # Bootstrap sample individual error term
  eps <- rnorm(framework$N_smp, 0, sqrt(model_par$sigmae2est))
  # Bootstrap sample random effect
  vu_smp <- rep(vu_tmp[framework$dist_obs_dom], framework$n_smp)
  # Extraction of design matrix
  X_smp <- model.matrix(fixed, framework$smp_data)
  # Constant part of income vector for bootstrap sample
  mu_smp <- X_smp %*% model_par$betas
  # Transformed bootstrap income vector
  Y_smp_b <- mu_smp + eps + vu_smp
  # Back transformation of bootstrap income vector
  Y_smp_b <- back_transformation(y              = Y_smp_b,
                                 transformation = transformation,
                                 lambda         = lambda,
                                 shift          = shift
  )
  Y_smp_b[!is.finite(Y_smp_b)] <- 0
  
  # Inclusion of bootstrap income vector into sample data
  bootstrap_smp <- framework$smp_data
  bootstrap_smp[paste(fixed[2])] <- Y_smp_b
  
  return(bootstrap_sample = bootstrap_smp)
}


# progress for mse_estim (only internal) ----------

mse_estim_wrapper <-  function(i,
                               B,
                               framework,
                               lambda,
                               shift,
                               model_par,
                               gen_model,
                               fixed,
                               transformation,
                               interval,
                               L,
                               start_time) {

  tmp <- mse_estim(framework       = framework,
                   lambda          = lambda,
                   shift           = shift,
                   model_par       = model_par,
                   gen_model       = gen_model,
                   fixed           = fixed,
                   transformation  = transformation,
                   interval        = interval,
                   L               = L
                   )

  if(i%%10 == 0) {
    if(i != B) {
      delta <- difftime(Sys.time(), start_time, units = "secs")
      remaining <- (delta/i)*(B-i)
      remaining <- unclass(remaining)
      remaining <- sprintf("%02d:%02d:%02d:%02d",
                           remaining %/% 86400,  # days
                           remaining %% 86400 %/% 3600,  # hours
                           remaining %% 3600 %/% 60,  # minutes
                           remaining %% 60 %/% 1) # seconds)

      cat('\r', i, " of ", B, " Bootstrap iterations completed \t Approximately ",
          remaining, " remaining")
      if(.Platform$OS.type == "windows") flush.console()
    }
  }
  return(tmp)
}

















