#' @importFrom stats aggregate
###### some data proprocessing functions before model fitting ######

######Transform long-format data into person-level format
data_sim_copula_scmprisk_addhaz_tvc_longclean <- function(data,var_list){

  data <- as.data.frame(data)
  data <- data[order(data$id, data$visit_time), ]

  visit_counts <- aggregate(visit_time ~ id, data = data, FUN = length)
  max_visits <- max(visit_counts$visit_time)

  new_data <- data.frame(id = integer(), Left = numeric(), Right = numeric(), status = integer(),
                         timeD = numeric(), statusD = integer(), weight = numeric(), stringsAsFactors = FALSE)

  for (z in var_list) {
    new_data[[paste0(z, "_change")]] <- numeric()
    new_data[[paste0(z, "_timeD_exact")]] <- integer()
    new_data[[paste0(z, "_left")]] <- numeric()
    new_data[[paste0(z, "_right")]] <- numeric()
    new_data[[paste0(z, "_timeD")]] <- numeric()
  }

  for (i in 2:max_visits) {
    new_data[[paste0("time_", i)]] <- numeric()
  }

  unique_ids <- unique(data$id)


  for (id in unique_ids) {
    temp_data <- data[data$id == id, ]

    max_status <- max(temp_data$status, na.rm = TRUE)
    max_statusD <- max(temp_data$statusD, na.rm = TRUE)
    weight <- temp_data$weight[1]

    # Disability
    if (any(temp_data$status == 1)) {
      right_index <- which(temp_data$status == 1)[1]
      right_time <- temp_data$visit_time[right_index]
      left_time <- if (right_index > 1) temp_data$visit_time[right_index - 1] else NA
    } else {
      right_time <- Inf
      left_time <- max(temp_data$visit_time)
    }
    # Death
    timeD <- if (any(temp_data$statusD == 1)) temp_data$visit_time[which(temp_data$statusD == 1)[1]] else max(temp_data$visit_time)

    new_row <- list(id = id, Left = left_time, Right = right_time, status = max_status,
                    timeD = timeD, statusD = max_statusD, weight=weight)

    for (i in 2:max_visits) {
      new_row[[paste0("time_", i)]] <- ifelse(i <= nrow(temp_data), temp_data$visit_time[i], Inf)
    }


    for (z in var_list) {
      z_indices <- which(temp_data[[z]] == 1)
      if (length(z_indices) > 0) {
        change_time <- mean(c(temp_data$visit_time[z_indices[1]], temp_data$visit_time[max(z_indices[1] - 1, 1)]))
        new_row[[paste0(z, "_change")]] <- change_time
      } else {
        change_time <- Inf
        new_row[[paste0(z, "_change")]] <- change_time
      }
      new_row[[paste0(z, "_timeD_exact")]] <- ifelse(any(temp_data$visit_time == timeD), temp_data[[z]][which(temp_data$visit_time == timeD)], NA)
      new_row[[paste0(z, "_left")]] <- ifelse(left_time <= change_time,0,left_time - change_time)
      new_row[[paste0(z, "_right")]] <- ifelse(right_time <= change_time, 0, right_time-change_time)
      new_row[[paste0(z, "_timeD")]] <- ifelse(timeD <= change_time,0, timeD-change_time)
    }
    new_data <- rbind(new_data, as.data.frame(new_row, stringsAsFactors = FALSE))
  }
  return(new_data)
}


####### ic, semi-competing risk, semi-parametric, general
data_process_scmprisk_ic_sp_tvc <- function(data, var_list, l1, u1, m1, l2, u2, m2) {

  var_list_new = c(paste0(var_list, c("_left")), paste0(var_list, c("_right")), paste0(var_list, c("_timeD")))
  var_list_exact = c(paste0(var_list, c("_timeD_exact")))
  var_list_right = paste0(var_list, c("_right"))
  var_list_change = paste0(var_list, c("_change"))

  # replace Inf by constant u
  data$Right[data$status==0] <- u1

  for (j in 1:length(var_list)){
    data[,var_list_right[j]] = ifelse(data$Right <= data[,var_list_change[j]], 0, data$Right - data[,var_list_change[j]])
  }


  indata1 <- data[,c("Left","Right","status",var_list_new)]
  indata2 <- data[,c("timeD","statusD",var_list_new, var_list_exact)]
  t1_left <- indata1[,"Left"]
  t1_right <- indata1[,"Right"]
  t2 <- indata2[,"timeD"]

  dim_m <- dim(indata1)
  n <- dim_m[1]
  p <- length(var_list)

  # matix
  x1_left = as.matrix(indata1[,c(paste0(var_list, c("_left")))],dim_m[1])
  x1_right = as.matrix(indata1[,c(paste0(var_list, c("_right")))],dim_m[1])
  x2_timeD = as.matrix(indata2[,c(paste0(var_list, c("_timeD")),var_list_exact)],dim_m[1])


  # BP
  bl1 <-matrix(0,nrow = n,ncol = m1+1) #  left end
  br1 <-matrix(0,nrow = n,ncol = m1+1) #  right end
  b2 <- matrix(0,nrow = n,ncol = m2+1) # terminal

  for (i in 0:m1) {
    bl1[,(i+1)] <- bern(i,m1,l1,u1,t1_left)
    br1[,(i+1)] <- bern(i,m1,l1,u1,t1_right)
  }

  for (i in 0:m2) {
    b2[,(i+1)] <- bern(i,m2,l2,u2,t2)
  }

  # BP derivatives
  bl1_d <-matrix(0,nrow = n,ncol = m1+1) #  left end
  br1_d <-matrix(0,nrow = n,ncol = m1+1) #  right end
  b2_d  <- matrix(0,nrow = n,ncol = m2+1) # terminal

  for (i in 0:m1) {
    bl1_d[,(i+1)] <- bern_derivative(i,m1,l1,u1,t1_left)
    br1_d[,(i+1)] <- bern_derivative(i,m1,l1,u1,t1_right)
  }

  for (i in 0:m2) {
    b2_d[,(i+1)] <- bern_derivative(i,m2,l2,u2,t2)
  }

  return(list(indata1=indata1, indata2=indata2, t1_left=t1_left, t1_right=t1_right, t2=t2,
              n=n, p=p, x1_left=x1_left, x1_right=x1_right, x2_timeD=x2_timeD,
              var_list = var_list, var_list_new = var_list_new, var_list_exact = var_list_exact,var_list_change = var_list_change,
              bl1=bl1, br1=br1, b2=b2, bl1_d=bl1_d, br1_d=br1_d, b2_d=b2_d))

}



