#' Read (Normalized) Quantitation Data Files Produced By Wombat At Protein Level
#'
#' Protein quantification results from \href{https://github.com/wombat-p}{Wombat-P} using the Bioconductor package Normalizer can be read using this function and relevant information extracted.
#' Input files compressed as .gz can be read as well.
#' The protein abundance values (XIC), peptide counting get extracted. Since protein annotation is not very extensive with this format of data, the function allows reading the
#' initial fasta files (from the directory above the quantitation-results) allowing to extract more protein-annotation (like species).
#' Sample-annotation (if available) can be extracted from  sdrf files, which are typically part of the Wombat output, too.
#' The protein abundance values may be normalized using multiple methods (median normalization as default), the determination of normalization factors can be restricted to specific proteins
#' (normalization to bait protein(s), or to invariable matrix of spike-in experiments).
#' The protein annotation data gets parsed to extract specific fields (ID, name, description, species ...).
#' Besides, a graphical display of the distribution of protein abundance values may be generated before and after normalization.
#'
#' @details
#' By standard wrokflow of Wombat-P writes the results of each analysis-method/quantification-algorithm as .csv files
#' Meta-data describing the proteins may be available from two sources :
#' a) The 1st column of the Wombat/normalizer output.
#' b) Form the .fasta file in the directory above the analysis/quantiication results of the Wombar-workflow
#'
#'
#' Meta-data describing the samples and experimental setup may be available from a sdrf-file (from the directory above the analysis/quantiication results)
#' If available, the meta-data will be examined for determining groups of replicates and
#' the results thereof can be found in $sampleSetup$levels.
#' Alternatively, a dataframe formatted like sdrf-files (ie for each sample a separate line, see also function \code{readSdrf}) may be given, too.
#'
#' This import-function has been developed using Wombat-P version 1.x.
#' The final output is a list containing these elements: \code{$raw}, \code{$quant}, \code{$annot}, \code{$counts}, \code{$sampleSetup}, \code{$quantNotes}, \code{$notes}, or (if \code{separateAnnot=FALSE}) data.frame
#'   with annotation- and main quantification-content. If \code{sdrf} information has been found, an add-tional list-element \code{setup}
#' will be added containg the entire meta-data as \code{setup$meta} and the suggested organization as \code{setup$lev}.
#'
#'
#' @param fileName (character) name of file to be read (default 'proteinGroups.txt' as typically generated by Compomics in txt folder). Gz-compressed files can be read, too.
#' @param path (character) path of file to be read
#' @param quantSoft (character) qunatification-software used inside Wombat-P
#' @param fasta (logical or character) if \code{TRUE} the (first) fasta from one direcory higher than \code{fileName} will be read as fasta-file to extract further protein annotation;
#'    if \code{character} a fasta-file at this location will be read/used/
#' @param normalizeMeth (character) normalization method, defaults to \code{median}, for more details see \code{\link[wrMisc]{normalizeThis}})
#' @param quantCol (character or integer) exact col-names, or if length=1 content of \code{quantCol} will be used as pattern to search among column-names for $quant using \code{grep}
#' @param contamCol (character or integer, length=1) which columns should be used for contaminants
#' @param pepCountCol (character) pattern to search among column-names for count data (1st entry for 'Razor + unique peptides', 2nd fro 'Unique peptides', 3rd for 'MS.MS.count' (PSM))
#' @param read0asNA (logical) decide if initial quntifications at 0 should be transformed to NA (thus avoid -Inf in log2 results)
#' @param sampleNames (character) custom column-names for quantification data; this argument has priority over \code{suplAnnotFile}
#' @param extrColNames (character) column names to be read (1st position: prefix for LFQ quantitation, default 'LFQ.intensity'; 2nd: column name for protein-IDs, default 'Majority.protein.IDs'; 3rd: column names of fasta-headers, default 'Fasta.headers', 4th: column name for number of protein IDs matching, default 'Number.of.proteins')
#' @param specPref (character) prefix to identifiers allowing to separate i) recognize contamination database, ii) species of main identifications and iii) spike-in species
#' @param refLi (character or integer) custom specify which line of data should be used for normalization, ie which line is main species; if character (eg 'mainSpe'), the column 'SpecType' in $annot will be searched for exact match of the (single) term given
#' @param remRev (logical) option to remove all protein-identifications based on reverse-peptides
#' @param remConta (logical) option to remove all proteins identified as contaminants
#' @param separateAnnot (logical) if \code{TRUE} output will be organized as list with \code{$annot}, \code{$abund} for initial/raw abundance values and \code{$quant} with final normalized quantitations
#' @param gr (character or factor) custom defined pattern of replicate association, will override final grouping of replicates from \code{sdrf} and/or \code{suplAnnotFile} (if provided)   \code{}
#' @param sdrf (logical, character, list or data.frame) optional extraction and adding of experimenal meta-data:
#'   if \code{sdrf=TRUE} the 1st sdrf in the directory above \code{fileName} will be used
#'   if character, this may be the ID at ProteomeExchange,
#'   the second element may give futher indicatations for automatic organization of groups of replicates.
#'   Besides, the output from \code{readSdrf} or a list from \code{defineSamples} may be provided; if \code{gr} is provided, \code{gr} gets priority for grouping of replicates
#' @param suplAnnotFile (logical or character) optional reading of supplemental files produced by Compomics; if \code{gr} is provided, it gets priority for grouping of replicates
#'  if \code{TRUE} default to files 'summary.txt' (needed to match information of \code{sdrf}) and 'parameters.txt' which can be found in the same folder as the main quantitation results;
#'  if \code{character} the respective file-names (relative ro absolute path), 1st is expected to correspond to 'summary.txt' (tabulated text, the samples as given to Compomics) and 2nd to 'parameters.txt' (tabulated text, all parameters given to Compomics)
#' @param groupPref (list) additional parameters for interpreting meta-data to identify structure of groups (replicates), will be passed to \code{readSampleMetaData}.
#'   May contain \code{lowNumberOfGroups=FALSE} for automatically choosing a rather elevated number of groups if possible (defaults to low number of groups, ie higher number of samples per group)
#' @param plotGraph (logical) optional plot vioplot of initial and normalized data (using \code{normalizeMeth}); alternatively the argument may contain numeric details that will be passed to \code{layout} when plotting
#' @param titGraph (character) custom title to plot of distribution of quantitation values
#' @param wex (numeric)  relative expansion factor of the violin in plot
#' @param silent (logical) suppress messages
#' @param debug (logical) additional messages for debugging
#' @param callFrom (character) allow easier tracking of messages produced
#' @return This function returns a list with  \code{$raw} (initial/raw abundance values), \code{$quant} with final normalized quantitations, \code{$annot} (columns ), \code{$counts} an array with 'PSM' and 'NoOfRazorPeptides',
#'   \code{$quantNotes}, \code{$notes} and optional \code{setup} for meta-data from \code{sdrf}; or a data.frame with quantitation and annotation if \code{separateAnnot=FALSE}
#' @seealso \code{\link[utils]{read.table}}, \code{\link[wrMisc]{normalizeThis}}) , \code{\link{readProteomeDiscovererFile}}; \code{\link{readProlineFile}} (and other import-functions), \code{\link{matrixNAinspect}}
#' @examples
#' path1 <- system.file("extdata", package="wrProteo")
#' # Here we'll load a short/trimmed example file (originating from Compomics)
#' fiNa <- "tinyWombCompo1.csv.gz"
#' dataWB <- readWombatNormFile(file=fiNa, path=path1, tit="tiny Wombat/Compomics, Normalized ")
#' summary(dataWB$quant)
#' matrixNAinspect(dataWB$quant, gr=gl(2,4))
#' @export
readWombatNormFile <- function(fileName, path=NULL, quantSoft="(quant software not specified)", fasta=NULL, normalizeMeth="none", quantCol="abundance_", contamCol=NULL,
  pepCountCol=c("number_of_peptides"), read0asNA=TRUE, refLi=NULL, sampleNames=NULL,
  extrColNames=c("protein_group"), specPref=NULL,
  remRev=TRUE, remConta=FALSE, separateAnnot=TRUE, gr=NULL, sdrf=NULL, suplAnnotFile=NULL, groupPref=list(lowNumberOfGroups=TRUE),
  titGraph=NULL, wex=1.6, plotGraph=TRUE, silent=FALSE, debug=FALSE, callFrom=NULL) {
  ## prepare
  fxNa <- wrMisc::.composeCallName(callFrom, newNa="readWombatNormFile")
  oparMar <- if(plotGraph) graphics::par("mar") else NULL       # only if figure might be drawn
  remStrainNo <- TRUE                   # if TRUE extract Species in very stringent pattern
  cleanDescription <- TRUE              # clean 'Description' for artifacts of truncated text (tailing ';' etc)
  oparMar <- graphics::par("mar")

  ## functions
  .checkFilePath <- function(fileName, path, expectExt="csv", silent=FALSE, debug=FALSE, callFrom=NULL) {
    ## check file-input if available to read  (moove to wrMisc ?)
    fxNa <-  wrMisc::.composeCallName(callFrom, newNa=".checkFilePath")
    msg <- "Invalid entry for 'path'  "
    ## check path
    if(length(path) >0) { path <- path[1]
       if(is.na(path)) { stop(msg,"(must be character-string for valid path or NULL)")}
       if(!dir.exists(path)) { path <- "."
         if(!silent) message(fxNa, msg, path[1],"'  (not existing), ignoring...")
       } }
    ## check for 'fileName'
    msg <- "Invalid entry for 'fileName'"
    if(length(fileName) >1) { fileName <- fileName[1]
      if(!silent) message(fxNa," 'fileName' shoud be of length=1, using 1st value")
    } else { if(length(fileName) <1) stop(msg) else if(is.na(fileName) || nchar(fileName) <1) stop(msg)}
    if(grepl("^\\.",expectExt)) expectExt <- sub("^\\.", "", expectExt)   # remove heading '.' if accidently given

    if(!grepl(paste0("\\.",expectExt,"$|\\.",expectExt,"\\.gz$"), fileName)) message(fxNa,"Trouble ?  Expecting .",expectExt," file (the file'",fileName,"' might not be right format) !!")
    #if(!grepl("\\.csv$|\\.csv\\.gz$", fileName)) message(fxNa,"Trouble ? Expecting .csv file (the file'",fileName,"' might not be right format) !!")
    if(debug) {message(fxNa,"cFP1 "); cFP1 <- list(fileName=fileName,path=path)}

    ## check for compressed version of 'fileName'
    chFi <- if(length(path) >0) file.exists(file.path(path, fileName)) else file.exists(fileName)
    if(!chFi && grepl("\\.",expectExt,"$",fileName)) { fiNa2 <- paste0(fileName,".gz")
      chFi <- if(length(path) >0) file.exists(file.path(path, fiNa2)) else file.exists(fiNa2)
      if(chFi) {if(!silent) message(fxNa,"Note : File '",fileName,"'  was NOT FOUND, but a .gz compressed version exists, using compressed file.."); fileName <- fiNa2}
    }
    if(chFi) { paFi <- if(length(path) >0) file.path(path, fileName) else fileName
    } else stop(" File '",fileName,"'  was NOT found ",if(length(path) >0) paste(" in path ",path)," !")
    if(debug) message(fxNa,"cFP2 .. Ready to read", if(length(path) >0) c(" from path ",path[1])," the file  ",fileName[1])
    paFi }

  .cleanMQann <- function(x, sep="\\|", silent=FALSE, debug=FALSE, callFrom=NULL) {
    ## split multiple protein entries as with 1st column of MaxQuant data
    ## return matrix with
    ## example ann1 <- read.delim(file.path(system.file("extdata", package="wrProteo"), "tinyWombCompo1.csv.gz"), sep=",", stringsAsFactors=FALSE)[,1]
    ##   .cleanMQann(ann1)
    #  x=rWB4a$tmp[c(5,31:32,81:82,111:114),1]
    isCont <- grepl("CON__", x)
    mult <- nchar(x) - nchar(gsub(";", "", x))
    chMult <- mult >0
    if(any(chMult)) {
      spl1 <- strsplit(x[which(chMult)], ";")
      ## use entry with most separators (when multiple entries, eg 'sp|P00761|CON__TRYP_PIG;CON__P00761')
      spl1 <- sapply(spl1, function(y) { nSep <- nchar(y) - nchar(gsub("|","",y)); y[which.max(nSep)] })
      x[which(chMult)] <- spl1 }
    xIni <- x       # keep backup for recuperating bizzare nonparsed
    ## split separators
    chSpl <- function(y) {chID <- grepl("^[[:upper:]]{1,3}[[:digit:]]{2,}|^[[:upper:]]{1,3}[[:digit:]]+[[:upper:]]+[[:digit:]]*", y); chName <- grepl("[A-Z0-9]_[[:upper:]]",y);   # extract db, ID & prot-name
      c(dbIni= if((length(y) >1 && grepl("^[[:lower:]]{1,8}$", y[1])) || length(y) >2 && grepl("^[[:lower:]]{2}|[[:lower:]]{2}$",
        y[1])) y[1] else NA, IDini=if(any(chID)) y[which(chID)[1]] else NA, nameIni=if(any(chName)) y[which(chName)[1]] else NA) }
    x <- t(sapply(strsplit(x, sep), chSpl))
    nColIni <- ncol(x)
    cleanID <- function(y, useCol=c(db=1, ID=2, name=3)) {
      ext <- grepl("[[:lower:]]+$", y[,useCol[2]])    # look for extension like 'P08758ups'
      extNoDb <- which(ext & is.na(y[,useCol[1]]))
      if(any(ext)) { cleanID <- sub("[[:lower:]]+$","", y[which(ext), useCol[2]])
        if(length(extNoDb) >0) y[which(ext), useCol[1]] <- substring(y[which(ext), useCol[2]], nchar(cleanID) +1 )
        y[which(ext), useCol[2]] <- cleanID }
      prefi <- grepl("^[[:upper:]]+__[[:upper:]]", y[,useCol[3]])       # look for prefix like 'CON__FA5_BOVIN'
      if(any(prefi)) { ch2 <- grepl("[A-Z0-9]_[[:upper:]]",  y[which(prefi), useCol[3]]); if(any(ch2)) {
        y[which(prefi)[which(ch2)], useCol[1]] <- tolower(sub("__[[:upper:]].+","", y[which(prefi)[which(ch2)], useCol[3]]))
        y[which(prefi)[which(ch2)], useCol[3]] <- sub("^[[:upper:]]+__","", y[which(prefi)[which(ch2)], useCol[3]])}}
      colnames(y) <- c("db","ID","name")
      y }
    x <- cbind(x, cleanID(x, useCol=c(db=1, ID=2, name=3)))
    x <- cbind(x, conta=grepl("^con|^REV_", x[,"db"]) | grepl("__CON__",xIni))
    ## recuperate all (bizarre) non-parsed into ID
    isNa <- rowSums(is.na(x)) > nColIni -2
    if(any(isNa)) x[which(isNa),c(2+nColIni)] <- xIni[which(isNa)]
    x[,c((nColIni+1):ncol(x), 1:nColIni)] }


  ## init check
  reqPa <- c("utils","wrMisc")
  chPa <- sapply(reqPa, requireNamespace, quietly=TRUE)
  if(any(!chPa)) stop("Package(s) '",paste(reqPa[which(!chPa)], collapse="','"),"' not found ! Please install first from CRAN")
  if(!isTRUE(silent)) silent <- FALSE
  if(isTRUE(debug)) silent <- FALSE else debug <- FALSE
         excluCol <- "^Abundances.Count"   # exclude this from quantifications columns
  cleanDescription <- TRUE                 # clean 'Description' for artifacts of truncated text (tailing ';' etc)
  infoDat <- infoFi <- setupSd <- parametersD <- annot <- annotMQ <- NULL        # initialize

  ## check if path & file exist
  paFi <- .checkFilePath(fileName=fileName, path=path, expectExt="csv", silent=silent, debug=debug, callFrom=fxNa)
  ## read (main) file
  ## future: look for fast reading of files
    #  read.delim("C:\\E\\projects\\MassSpec\\smallProj\\ElixirBenchmark\\deWombat\\deGit24may23\\PXD009815dev\\dev\\stand_prot_quant_mergedcompomics.csv", sep=",")
  tmp <- try(utils::read.delim(paFi, sep=",", stringsAsFactors=FALSE), silent=TRUE)

  if(length(tmp) <1 || inherits(tmp, "try-error") || length(dim(tmp)) <2) {
    if(inherits(tmp, "try-error")) warning("Unable to read input file ('",paFi,"')!  (check format or if rights to read)") else {
      if(!silent) message(fxNa,"Content of  file '",paFi,"' seeps empty or non-conform !  Returning NULL; check if this is really a Compomics-file") }
    tmp <- NULL
    return(NULL)
  } else {
    ## start checking format
    if(debug) { message(fxNa,"rWB1 .. dims of initial data : ", nrow(tmp)," li and ",ncol(tmp)," col "); rWB1 <- list(fileName=fileName,path=path,paFi=paFi,tmp=tmp,normalizeMeth=normalizeMeth,read0asNA=read0asNA,quantCol=quantCol,
      refLi=refLi,separateAnnot=separateAnnot   )}  # annotCol=annotCol,FDRCol=FDRCol
    ## check which columns can be extracted (for annotation)
    if(is.integer(contamCol) && length(contamCol) >0 ) contamCol <- colnames(tmp)[contamCol]
    extrColNames <- union(extrColNames, contamCol)                     # add contamCol if not included in extrColNames
    chCol <- extrColNames %in% colnames(tmp)
    if(!any(chCol, na.rm=TRUE)) { extrColNames <- gsub("\\."," ",extrColNames)
      chCol <- extrColNames %in% colnames(tmp) }
    if(all(!chCol, na.rm=TRUE)) stop("Problem locating annotation columns (",wrMisc::pasteC(extrColNames, quoteC="''"),")")
    if(any(!chCol, na.rm=TRUE)) {
      if(!silent) message(fxNa,"Note: Can't find columns ",wrMisc::pasteC(extrColNames[!chCol], quoteC="'")," !")
    }
    message(fxNa,"rWB1c")
  }

  if(length(tmp) >0) {
    ## further extracting : quantitation
    useDCol <- grep(paste0("^",quantCol), colnames(tmp))
    if(length(useDCol) <1) stop("NO columns matching term ",wrMisc::pasteC(quantCol, quoteC="'")," from argument 'quantCol' found !")
    abund <- as.matrix(tmp[,useDCol])               # normalized log2 abundances

    chNum <- is.numeric(abund)
    if(!chNum) {abund <- apply(tmp[,quantCol], 2, wrMisc::convToNum, convert="allChar", silent=silent, callFrom=fxNa)}
    if(length(dim(abund)) <2 && !is.numeric(abund)) abund <- matrix(as.numeric(abund), ncol=ncol(abund), dimnames=dimnames(abund))
    ch1 <- grepl("^abundance_CT\\.mixture\\.QY\\.", colnames(abund))
    if(all(ch1)) colnames(abund) <- sub("abundance_CT\\.mixture\\.QY\\.", "", colnames(abund))
    ch1 <- grepl("^abundance_", colnames(abund))
    if(all(ch1)) colnames(abund) <- sub("abundance_", "", colnames(abund))
    ch1 <- grepl("\\.CV\\.Standards\\.Research\\.Group", colnames(abund))
    if(all(ch1)) colnames(abund) <- sub("\\.CV\\.Standards\\.Research\\.Group", "", colnames(abund))

    trimColNames <- FALSE
    if(trimColNames) {  ## further trim
      colnames(abund) <- wrMisc::.trimFromStart(wrMisc::.trimFromEnd( sub(paste0("^",quantCol),"", colnames(abund))))
      ## no trim needed for Wombat ?
    }
    if(debug) {message(fxNa,"rWB3"); rWB3 <- list(abund=abund,paFi=paFi,path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,remConta=remConta,pepCountCol=pepCountCol)}

    ## convert 0 to NA
    if(!isFALSE(read0asNA)) { ch1 <- abund <= 0
      if(any(ch1, na.rm=TRUE)) { abund[which(ch1)] <- NA
        if(!silent) message(fxNa,"Transform ",sum(ch1),"(",100*round(sum(ch1)/length(ch1),3),"%) initial '0' values to 'NA'")}}

    ## further extracting : prepare for countig data
    ch1 <- grep(pepCountCol[1], colnames(tmp))
    if(length(ch1) ==ncol(abund)) {
      counts <- array(dim=c(nrow(tmp), ncol(abund), 1), dimnames=list(NULL, colnames(abund), pepCountCol))
      counts[,,1] <- suppressWarnings(as.numeric(as.matrix(tmp[,ch1])))
    } else {
      counts <- NULL
      if(!silent) message(fxNa,"Could not find column(s) with peptide per protein counts (argument 'pepCountCol') matching to '",pepCountCol,"'") }
    if(debug) {message(fxNa,"rWB4"); rWB4 <- list(abund=abund,counts=counts,annot=annot,paFi=paFi,path=path,chPa=chPa,tmp=tmp,pepCountCol=pepCountCol,extrColNames=extrColNames,chCol=chCol,remConta=remConta,quantSoft=quantSoft)}

    ## make array of PSM counts etc
    ch2 <- if(length(pepCountCol) >0) lapply(pepCountCol, grepl, colnames(tmp))  #grepl(pepCountCol[2], colnames(tmp)) else grepl(pepCountCol, colnames(tmp))
    ch2a <- sapply(ch2, sum, na.rm=TRUE) >0
    if(any(ch2a, na.rm=TRUE)) {
      counts <- array(dim=c(nrow(tmp), ncol(abund), sum(ch2a)), dimnames=list(NULL, colnames(abund), pepCountCol[which(ch2a)]))
      for(i in 1:sum(ch2a)) counts[,,i] <- suppressWarnings(as.numeric(as.matrix(tmp[, which(ch2[[which(ch2a)[i]]])])))
    } else counts <- NULL
    if(debug) {message(fxNa,"rWB4a"); rWB4a <- list(abund=abund,counts=counts,annot=annot,paFi=paFi,path=path,chPa=chPa,tmp=tmp,pepCountCol=pepCountCol,extrColNames=extrColNames,chCol=chCol,remConta=remConta,quantSoft=quantSoft)}

    ## Annotation
    if(any(c("MQ","MaxQuant") %in% quantSoft) && "protein_group" %in% colnames(tmp)) {    # special case MQ: parse annot from column 'protein_group'
      annotMQ <- .cleanMQann(tmp[,"protein_group"])
    }

    ## read fasta from higher dir (specific to Wombat)
    if(length(fasta) >0) {fasta <- fasta[1]; if(isFALSE(fasta) || is.na(fasta)) fasta <- NULL}
    if(isTRUE(fasta)) {
      hiDir <- dir(file.path(dirname(paFi),".."))
      chFa <- grep("\\.fasta$", hiDir)
      faFi <- file.path(dirname(paFi),"..",hiDir[chFa[1]])
    } else faFi <- fasta
    if(length(faFi) >0) {     # has fasta for recuperating annotation
      ##fasta <- try(readFasta2("C:\\E\\projects\\MassSpec\\smallProj\\ElixirBenchmark\\deWombat\\deGit24may23\\PXD009815dev\\uniprot_contaminant_yeast_ups_prot_03022023.fasta", tableOut=TRUE), silent=TRUE)
      fasta <- try(readFasta2(faFi, tableOut=TRUE, silent=silent,debug=debug,callFrom=fxNa), silent=TRUE)
      ## Potential problem with inconsistent format of fasta
      if(inherits(fasta, "try-error")) { fasta <- NULL
        if(!silent) message(fxNa,"Unable to read/open fasta file '",faFi,"'  (check rights to read ?)")
      } else {
        useLi <- match(tmp[,1], fasta[,2])
        chNa <- is.na(useLi)
        if(any(chNa)) { chForm <- grep("[[:digit:]][[:lower:]]+$", tmp[,1])            # locate eg 'P04040ups' for trimming to 'P04040'
          if(length(chForm) >0) {
            tmp[chForm, 1] <- sub("[[:lower:]]+$","", tmp[chForm,1])
            useLi <- match(tmp[,1], fasta[,2])              # update
          } else if(debug) message(fxNa,"None of the ",sum(chNa)," non-recognized IDs follow pattern for recuperating")
          if(debug) {message(fxNa,"rWB4aa"); rWB4aa <- list(abund=abund,counts=counts,annot=annot,fasta=fasta,tmp=tmp,useLi=useLi,chNa=chNa,chForm=chForm) }
        }
        annot <- fasta[useLi, c("uniqueIdentifier","entryName","proteinName","OS","OX","GN")]     # do not export full 'sequence'
        colnames(annot) <- c("Accession","Description","EntryName","Species","OX","GeneName") }
    } else { annot <- matrix(tmp[,1], ncol=1, dimnames=list(NULL,"Accession"))
      if(debug) message(fxNa,"No fasta-file found in directory above data...")
    }
    if(debug) {message(fxNa,"dim annot",nrow(annot)," ",ncol(annot),"  rWB4b"); rWB4b <- list(annot=annot,faFi=faFi,abund=abund,tmp=tmp)}

    ## remove lines wo IDs
    chNa <- is.na(annot[,1])
    if(any(chNa)) {
      if(!silent) message(fxNa,"Removing ",sum(chNa)," out of ",nrow(abund)," lines wo ID")
      rmLi <- which(chNa)
      tmp <- tmp[-rmLi,]
      annot <- annot[-rmLi,]
      if(length(dim(annot)) <2) annot <- matrix(annot, ncol=1, dimnames=list(NULL,colnames(tmp)[1]))
      abund <- abund[-rmLi,]
      if(length(counts) >0) counts <- if(length(dim(counts))==3) counts[-rmLi,,] else counts[-rmLi,]
    }
    if(debug) {message(fxNa,"dim annot",nrow(annot)," ",ncol(annot),"  rWB4c"); rWB4c <- list(annot=annot,faFi=faFi,abund=abund,tmp=tmp)}

    ## unique ID
    chD <- duplicated(annot[,1])
    uniqueID <- if(any(chD, na.rm=TRUE)) wrMisc::correctToUnique(annot[,1], silent=silent, callFrom=fxNa) else annot[,1]
    rownames(annot) <- rownames(abund) <- uniqueID
    if(length(counts) >0) rownames(counts) <- uniqueID
    if(debug) {message(fxNa,"rWB4ad"); rWB4ad <- list(annot=annot,faFi=faFi,abund=abund,annotMQ=annotMQ,tmp=tmp,uniqueID=uniqueID)}

    if(length(annotMQ) >0) {
      ## MQ only : fuse with annot
      chDim <- isTRUE(nrow(annotMQ) == nrow(annot))
      if(!chDim) { message(fxNa,"BIZZARE, annotation from 'protein_group' and annoation based on fasta don't match in number of lines !!'")
      } else {
        if(all(is.na(annot))) {
          if(debug) message(fxNa,"All annotation from fasta is NA; using annot from (MQ) 'protein_group' only")
          colnames(annotMQ)[c(2:3)] <- c("Accession","EntryName")
          annot <- cbind(Accession=annotMQ[,"ID"], Description=NA, EntryName=annotMQ[,"name"], Species=NA, GeneName=NA, annotMQ[,c("db","conta","nameIni")])
        } else {
          ## need to match order of annot & annotMQ
          annotM <- cbind(Accession=annotMQ[,"ID"], Description=NA, EntryName=annotMQ[,"name"], Species=NA, GeneName=NA, annotMQ[,c("db","conta","nameIni")])
          matchAcc <- wrMisc::naOmit(match(wrMisc::naOmit(annot[,"Accession"]), annotM[,"Accession"]))
          if(debug) message(fxNa,"Matching annotation from fasta to annot from (MQ) 'protein_group' : Adding ",length(matchAcc)," items")
          if(length(matchAcc) >0) {
            annotM[which(annotM[,"Accession"] %in% wrMisc::naOmit(annot[,"Accession"])), c("Description","Species","GeneName")] <- annot[matchAcc]
          }
          annot <- annotM
          rm(annotM)
        }
      }
    }
    if(debug) {message(fxNa,"rWB4e"); rWB4e <- list(paFi=paFi,path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,counts=counts,
      quantCol=quantCol,abund=abund,chNum=chNum,ch2=ch2,annot=annot,remConta=remConta,specPref=specPref)}

    ## remove Wombat contaminants
    conLi <- grep("CON__[[:alnum:]]", annot[, if(ncol(annot) >1) "Accession" else 1])
    if(remConta) {
      if(length(conLi) >0) {
        iniLi <- nrow(annot)
        annot <- annot[-conLi,]
        abund <- abund[-conLi,]
        counts <- if(length(dim(counts))==3) counts[-conLi,,] else counts[-conLi,,]
        if(debug) message(fxNa,"Removing ",length(conLi)," instances of contaminants to final ",nrow(annot)," lines/IDs")}
    }

    ## split Annotation
    if(debug) {message(fxNa,"rWB4f"); rWB4f <- list(path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,counts=counts,
      quantCol=quantCol,abund=abund,chNum=chNum,ch2=ch2,annot=annot,remConta=remConta,specPref=specPref)}

    ## finalize annotation
    chCols <- c("EntryName","GeneName","Species","Contam","Description")
    chCol2 <- chCols %in% colnames(annot)
    if(any(!chCol2)) annot <- cbind(annot, matrix(NA, nrow=nrow(annot), ncol=sum(!chCol2), dimnames=list(NULL, chCols[which(!chCol2)]))) # add columns so far not present
    if(!remConta && length(conLi) >0) annot[conLi, "Contam"] <- "TRUE"

    if(debug) {message(fxNa,"rWB5"); rWB5 <- list(path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,counts=counts,
      quantCol=quantCol,abund=abund,chNum=chNum,ch2=ch2,annot=annot,remConta=remConta,remStrainNo=remStrainNo, specPref=specPref)}

    ## extract species according to custom search parameters 'specPref'
    if(remStrainNo && any(!is.na(annot[,"Species"]))) {
      annot[,"Species"] <- sub(" \\(strain [[:alnum:]].+","", annot[,"Species"])
    }
    ## complete species annot   by info extracted from fasta : ' OS='
    .completeSpeciesAnnot <- function(spe=c("Homo sapiens", "_HUMAN"), anno=annot, exCoNa=c("Species", "EntryName")) {    # re-written 12jun23
      ## complete species if missing in anno[,exCoNa[2]] but found in anno[,exCoNa[1]]; return corrected anno
      chNa <- is.na(anno[,exCoNa[1]]) | nchar(anno[,exCoNa[1]]) <1             # missing (species) annotation
      if(any(chNa, na.rm=TRUE)) {        # suppose that all 'exCoNa' are present as colnames in 'annot'
        chS <- grep(spe[2], annot[,exCoNa[2]])
        if(length(chS) >0) anno[which(chS), exCoNa[1]] <- spe[1]
      }
      anno }
    if(TRUE) {          # try to recuperate/fix non-given/bad formatted species
      chNa <- is.na(annot[,"Species"])
      if(any(chNa)) {
        commonSpec <- .commonSpecies()
        for(i in 1:nrow(commonSpec)) annot[which(chNa),] <- .completeSpeciesAnnot(commonSpec[i,], annot[which(chNa),], exCoNa=c("Species","EntryName")) }
      if(debug) {message(fxNa,"rWB6"); rWB6 <- list(path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,counts=counts,
        quantCol=quantCol,abund=abund,chNum=chNum,ch2=ch2,annot=annot,remConta=remConta,remStrainNo=remStrainNo, specPref=specPref)}

      ## check/complete for truncated species names (ie names found inside other ones)
      chSpe <- which(!is.na(annot[,"Species"]) & nchar(annot[,"Species"]) >0)
      if(length(chSpe) >0) {
        OS <- gsub(";{1,5}$", "", annot[chSpe,"Species"])  # remove tailing separators
        OSna <- unique(OS)
        ch1 <- nchar(OSna) <1
        if(debug) {message(fxNa,"rWB6b")}
        if(any(ch1, na.rm=TRUE)) OSna <- OSna[which(nchar(OSna) >0)]     # (just in case) remove empty tags
        ch2 <- lapply(OSna, grep, OSna)
        chTr <- sapply(ch2, length) >1
        if(any(chTr, na.rm=TRUE)) { if(!silent) message(fxNa,"Found ",sum(chTr)," species name(s) appearing inside other ones, assume as truncated (eg  ",OSna[which(chTr)[1]],")")
          for(i in which(chTr)) OS[which(OS==OSna[i])] <- OSna[ch2[[i]][1]]
        }
        annot[chSpe,"Species"] <- OS}
    }
    if(debug) {message(fxNa,"rWB7"); rWB7 <- list(path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,quantCol=quantCol,remStrainNo=remStrainNo,
      abund=abund,chNum=chNum,ch2=ch2, annot=annot,remConta=remConta,counts=counts)}

    ## look for tags from  specPref
    if(length(specPref) >0) {
      ## set annot[,"specPref"] according to specPref
      annot <- .extrSpecPref(specPref, annot, useColumn=c("Description","Species","EntryName","GeneName"), silent=silent, debug=debug, callFrom=fxNa)
    } else if(debug) message(fxNa,"Note: Argument 'specPref' not specifed (empty)")
    if(debug) {message(fxNa,"rWB7b") }

    if(!silent) { chSp <- sum(is.na(annot[,"Species"]))
    if(chSp >0) message(fxNa,"Note: ",chSp," proteins with unknown species")

    tab <- table(annot[,"Species"])
    if(length(tab) >0) {
      tab <- rbind(names(tab), paste0(": ",tab,",  "))
      if(!silent) message("     data by species : ", apply(tab, 2, paste)) } }              # all lines assigned

    if(debug) {message(fxNa,"rWB8")}

    ## look for unique col from $annot to use as rownames
    chAn <- colSums(apply(annot[,c(1:min(ncol(annot),7))], 2, duplicated), na.rm=TRUE)          # look at first 6 cols : how many elements per column duplicated
    if(!silent) message(fxNa,"Use column '",colnames(annot)[which.min(chAn)],"' as identifyer (has fewest, ie ",chAn[which.min(chAn)]," duplicated entries) as rownames")
    rownames(abund) <- rownames(annot) <- if(any(chAn==0)) annot[,which(chAn==0)[1]] else wrMisc::correctToUnique(annot[,which.min(chAn)], callFrom=fxNa)
    if(length(counts) >0) rownames(counts) <- rownames(annot)
    if(debug) {message(fxNa,"rWB9"); rWB9 <- list(path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,quantCol=quantCol,abund=abund,chNum=chNum,ch2=ch2,
      annot=annot,refLi=refLi,remConta=remConta)}

    ## check for reference for normalization
    refLiIni <- refLi
    if(is.character(refLi) && length(refLi)==1) {
       refLi <- which(annot[,"SpecType"]==refLi)
      if(length(refLi) <1 ) { refLi <- 1:nrow(abund)
        if(!silent) message(fxNa,"Could not find any proteins matching argument 'refLi=",refLiIni,"', ignoring ...")
      } else {
        if(!silent) message(fxNa,"Normalize using (custom) subset of ",length(refLi)," lines specified as '",refLiIni,"'")}}    # may be "mainSpe"

    ## take log2 & normalize
    quant <- try(wrMisc::normalizeThis(log2(abund), method=normalizeMeth, mode="additive", refLines=refLi, silent=silent, debug=debug, callFrom=fxNa), silent=TRUE)
    if(inherits(quant, "try-error")) { warning(fxNa,"PROBLEMS ahead : Unable to normalize as log2-data !!") }

    if(debug) {message(fxNa,"rWB10"); rWB10 <- list(path=path,chPa=chPa,tmp=tmp,extrColNames=extrColNames,chCol=chCol,quantCol=quantCol,abund=abund,chNum=chNum,ch2=ch2,
      quant=quant,annot=annot,remConta=remConta)}

    ### GROUPING OF REPLICATES AND SAMPLE META-DATA
    ## prepare for sdrf (search in directory above)
    if(isTRUE(sdrf)) {
      hiDir <- dir(file.path(dirname(paFi),".."))
      chFa <- grep("^sdrf.+\\.tsv$", hiDir)
      if(length(chFa) >0) sdrf <- file.path(dirname(paFi),"..",hiDir[chFa[1]]) else {sdrf <- NULL
        if(!silent) message(fxNa,"NO sdrf file found in directory above main data !")}
    }

    if(length(suplAnnotFile) >0 || length(sdrf) >0) {
      setupSd <- readSampleMetaData(sdrf=sdrf, suplAnnotFile=suplAnnotFile, quantMeth=paste0("WB",quantSoft), path=path, abund=utils::head(quant), groupPref=groupPref, silent=silent, debug=debug, callFrom=fxNa)
    }
    if(debug) {message(fxNa,"rWB13 .."); rWB13 <- list(sdrf=sdrf,gr=gr,suplAnnotFile=suplAnnotFile,abund=abund, quant=quant,refLi=refLi,annot=annot,setupSd=setupSd,sampleNames=sampleNames)}

    ## finish groups of replicates & annotation setupSd
    setupSd <- .checkSetupGroups(abund=abund, setupSd=setupSd, gr=gr, sampleNames=sampleNames, quantMeth="WB", silent=silent, debug=debug, callFrom=fxNa)
    colnames(quant) <- colnames(abund) <- if(length(setupSd$sampleNames)==ncol(abund)) setupSd$sampleNames else setupSd$groups
    if(length(dim(counts)) >1 && length(counts) >0) colnames(counts) <- setupSd$sampleNames

    if(debug) {message(fxNa,"Read sample-meta data, rWB14"); rWB14 <- list(sdrf=sdrf,suplAnnotFile=suplAnnotFile,abund=abund, quant=quant,refLi=refLi,annot=annot,setupSd=setupSd,plotGraph=plotGraph)}

    ## main plotting of distribution of intensities
    custLay <- NULL
    if(is.numeric(plotGraph) && length(plotGraph) >0) {custLay <- as.integer(plotGraph); plotGraph <- TRUE} else {
        if(!isTRUE(plotGraph)) plotGraph <- FALSE}
    if(plotGraph) .plotQuantDistr(abund=abund, quant=quant, custLay=custLay, normalizeMeth=normalizeMeth, softNa=paste("Wombat-P",quantSoft),
      refLi=refLi, refLiIni=refLiIni, tit=titGraph, silent=silent, callFrom=fxNa, debug=debug)
## meta-data
    notes <- c(inpFile=paFi, qmethod=paste("Wombat-P",quantSoft), qMethVersion=if(length(infoDat) >0) unique(infoDat$Software.Revision) else NA,
    	rawFilePath= if(length(infoDat) >0) infoDat$File.Name[1] else NA, normalizeMeth=normalizeMeth, call=match.call(),
      created=as.character(Sys.time()), wrProteo.version=utils::packageVersion("wrProteo"), machine=Sys.info()["nodename"])
    ## final output
    if(isTRUE(separateAnnot)) list(raw=abund, quant=quant, annot=annot, counts=counts, sampleSetup=setupSd, quantNotes=parametersD, notes=notes) else data.frame(quant,annot) }
}

