#  Make this function data.table aware
#
.datatable.aware = TRUE
#
# standard data.table variables
if (getRversion() >= "2.15.1")
  utils::globalVariables(c(".", ".I", ".N", ".SD"), utils::packageName())
#
#' Profile a data frame
#'
#' Given the data frame `DF`, create a new data frame with one row for
#' each column of `DF` that characterizes that column in terms of the
#' number and fraction of missing values, the most frequent value and
#' its frequency and other characteristics like the Shannon homogeneity
#' measure computed by the [ShannonHomogeneity()] function.
#'
#' @param DF data frame to be characterized
#' @param dgts digits retained for numerical characterizations like fractions (default = 3)
#' @param charMax maximum number of characters retained in representing the most frequent value for a variable (default = 20)
#'
#' @return data frame with one row for each column of `DF` and these columns:
#'   * Variable the name of the column from `DF` being characterized
#'   * Type the class of `Variable` (e.g., numeric, integer, character, etc.)
#'   * nMiss the number of missing (NA) or blank `Variable` records
#'   * fracMiss the fraction of total records represented by `nMiss`
#'   * nLevels the number of distinct values `Variable` exhibits
#'   * topValue the most frequently occurring `Variable` value, truncated to `charMax` characters
#'   * topChars the actual number of characters required to represent `topValue`
#'   * topFreq the number of times `topValue` occurs
#'   * topFrac the fraction of total records represented by `topFreq`
#'   * Homog the Shannon homogeneity measure for `Variable`
#'
#' @export
#'
#' @examples
#' ProfileDataFrame(ChickWeight)
ProfileDataFrame <- function(DF, dgts = 3, charMax = 20){
  #
  stopifnot("DF must be a data frame"= is.data.frame(DF))
  stopifnot("dgts must be positive"= dgts > 0)
  stopifnot("charMax must be positive"= charMax > 0)
  #
  nRec <- nrow(DF)
  nVar <- ncol(DF)
  #
  DT <- data.table::data.table(DF)
  #
  sumClass <- DT[, lapply(.SD, class)]
  sumTop <- DT[, lapply(.SD, topFun, dgts, charMax)]
  sumMiss <- DT[, lapply(.SD, nMissFun, dgts)]
  sumHomog <- DT[, lapply(.SD, ShannonHomogeneity, dgts)]
  #
  outFrame <- data.frame(Variable = colnames(DF),
                         Type = unlist(sumClass[1]),
                         nMiss = unlist(sumMiss[1]),
                         fracMiss = unlist(sumMiss[2]),
                         nLevels = unlist(sumTop[1]),
                         topValue = unlist(sumTop[2]),
                         topChars = unlist(sumTop[3]),
                         topFreq = unlist(sumTop[4]),
                         topFrac = unlist(sumTop[5]),
                         Homog = unlist(sumHomog[1]))

  rownames(outFrame) <- 1:nVar
  #
  return(outFrame)
  #
}

topFun <- function(x, dgts, charMax){
  tbl <- table(x, useNA = "ifany")
  index <- which.max(tbl)
  topTbl <- tbl[index[1]]
  topName <- names(topTbl)
  topNum <- as.numeric(topTbl)
  outList <- list(nLevels = length(tbl),
                  topLevel = substr(topName, 1, charMax),
                  topChars = nchar(topName),
                  topFreq = topNum,
                  topFrac = round(topNum/length(x), digits = dgts))
}

nMissFun <- function(x, dgts){
  #
  nMiss <- length(which(is.na(x) | x == ""))
  fracMiss <- round(nMiss/length(x), digits = dgts)
  #
  outList <- list(nMiss = nMiss, fracMiss = fracMiss)
}

