
#' Open a Sciex EchoMS wiff File for Reading
#'
#' @param wiffFile A valid file path to the Sciex data file (extension .wiff)
#' containing the metadata for one or more EchoMS runs
#' @param wiffScanFile An optional file path the the Sciex scan file (extension
#' .wiff.scan) containing the full raw EchoMS data
#'
#' @return An object of class `rtmsWiffReader` (see Details)
#'
#' @details
#' The reader object produced by this function is of class `rtmsWiffReader`,
#' and contains the information necessary to extract more detailed data from
#' this file and the associated .wiff.scan file.  It contains the following
#' fields:
#' * `wiff`: The path to the .wiff file being used
#' * `wiffScan`: If given, the path of the associated .wiff.scan file. Some but
#' not all functions require this file; this will be specified in their
#' documentation.
#' * `cfbf`: A list of values used to parse the .wiff file's internal CFBF
#' structure
#' * `samples`: The "sample" list for the .wiff file.  It is important to note
#' that these are actually different *runs* whose data have been stored in one
#' file.  Contains data on the specific masses measured in each run.
#'
#' @export
newWiffReader <- function(wiffFile,wiffScanFile=NULL) {
	rcon <- file(wiffFile,"rb")
	on.exit(close(rcon))

	cfbf <- cfbf_parseFile(rcon)

	if (cfbf_checkObject(cfbf,c("SampleSubtree","SampleTable"))) {

		sampleTable <- cfbf_getObject(rcon,cfbf,c("SampleSubtree","SampleTable"))
		tcon <- rawConnection(sampleTable)
		seek(tcon,40)
		numSamples <- readBin(tcon,"integer",1,size=2,signed=TRUE,endian="little")
		seek(tcon,2,origin="current")
		comments <- rep("",numSamples)
		datetimes <- c()
		for (ti in seq_len(numSamples)) {
			seek(tcon,4,origin="current")
			ticheck <- readBin(tcon,"integer",1,size=2,signed=TRUE,endian="little")
			seek(tcon,12,origin="current")
			posix <- readBin(tcon,"integer",1,size=4,endian="little")
			datetimes[[ti]] <-as.POSIXct(posix,origin="1970-01-01")
			seek(tcon,4,origin="current")
			ticheck <- readBin(tcon,"integer",1,size=2,signed=TRUE,endian="little")
			comments[[ti]] <- cfbf_readTwoByteString(tcon)
		}
		close(tcon)

		sampleIdxTable <- cfbf_getObject(rcon,cfbf,c("SampleSubtree","SampleIdxTable"))
		tcon <- rawConnection(sampleIdxTable)
		seek(tcon,32)
		offsets <- rep(NA,numSamples)
		for (ti in seq_len(numSamples)) {
			ticheck <- readBin(tcon,"integer",1,size=4,endian="little")
			offsets[[ti]] <- readBin(tcon,"integer",1,size=4,endian="little")
			seek(tcon,8,origin="current")
		}
		close(tcon)

		samples <- list()
		tics <- list()
		subclass <- NA_character_
		for (ti in seq_len(numSamples)) {
			sample <- list(comment=comments[[ti]],
						   datetime=datetimes[[ti]],
						   offset=offsets[[ti]])

			path <- c("MethodSubtree",
					  sprintf("Method%d",ti),
					  "DeviceMethod0",
					  "Period0",
					  "Experiment0",
					  "ExperimentHeader")
			experimentHeader <- cfbf_getObject(rcon,cfbf,path)
			hlen <- length(experimentHeader)
			numMasses <- readBin(experimentHeader[(hlen-3):hlen],"integer",size=4,endian="little")

			path <- c("MethodSubtree",
					  sprintf("Method%d",ti),
					  "DeviceMethod0",
					  "Period0",
					  "Experiment0",
					  "MassRangeEx",
					  "MassRangeEx")
			massRangeEx <- cfbf_getObject(rcon,cfbf,path)

			masses <- list()
			tcon <- rawConnection(massRangeEx)
			seek(tcon,32)
			checks <- readBin(tcon,"integer",2,size=4,endian="little")
			numMassValues <- checks[[2]]
			for (mi in seq_len(numMasses)) {
				mass <- list()
				mass$values <- readBin(tcon,"double",4,size=4,endian="little")
				seek(tcon,4,origin="current")
				mass$name <- cfbf_readTwoByteString(tcon)
				parameters <- list()
				for (iter in seq_len(numMassValues)) {
					pname <- cfbf_readTwoByteString(tcon)
					parameters[[pname]] <- readBin(tcon,"double",2,size=4,endian="little")
					seek(tcon,4,origin="current")
				}
				mass$parameters <- parameters
				masses[[mi]] <- mass
			}
			close(tcon)

			sample$masses <- masses
			samples[[ti]] <- sample

			tics[[ti]] <- getTIC_internal(rcon,cfbf,ti)

			if (nrow(tics[[ti]])==0) {

			} else if (all(tics[[ti]]$full==1) && (is.na(subclass) || subclass=="rtmsWiffReaderFS")) {
				subclass <- "rtmsWiffReaderFS"
			} else if (all(tics[[ti]]$full==0) && (is.na(subclass) || subclass=="rtmsWiffReaderMRM")) {
				subclass <- "rtmsWiffReaderMRM"
			} else {
				stop("rtmsEcho only supports wiff files containing one type of data (MRM or scan).")
			}
		}

		rawvec <- cfbf_getObject(rcon,cfbf,c("SampleSubtree","DabsInfo"))
		rawcon <- rawConnection(rawvec,"rb")
		on.exit(close(rawcon),add=TRUE)
		seek(rawcon,36)

		strvals <- c()
		slength <- readBin(rawcon,"integer",1,size=2,endian="little",signed=FALSE)
		while(slength>0) {
			strvals <- c(strvals,rawToChar(readBin(rawcon,"raw",slength)[seq(1,slength,by=2)]))
			seek(rawcon,4,origin="current")
			slength <- readBin(rawcon,"integer",1,size=2,endian="little",signed=FALSE)
		}

		shots <- strsplit(strvals[[2]],", ")[[1]]
		classvec <- c(subclass,"rtmsWiffReader")
	} else {
		samples <- list()
		tics <- list()
		shots <- list()
		classvec <- "rtmsWiffReader"
		warning("The WIFF file does not appear to contain any smaple data.")
	}

	structure(
		list(wiff=wiffFile,
			 wiffScan=wiffScanFile,
			 cfbf=cfbf,
			 samples=samples,
			 tics=tics,
			 shots=shots),
		class=classvec
	)
}

#' List of Shot Names in a given Wiff File
#'
#' @param wiff A wiff reader object of class `rtmsWiffReader`
#'
#' @return A character vector containing the shot names for all shots fired in
#' each run in a given file.  Note: *not necessarily in the correct order* (see
#' Details).
#'
#' @details
#' The .wiff file includes these shot names in the order in which they were
#' input by the user.  They are generally names of wells in a plate, but the
#' order in which they are given by the user is not necessarily the order in
#' which the EchoMS will sample them.  By default, the EchoMS will use a more
#' "efficient" ordering, moving back and forth across the rows of the plate.
#' So the shot names here may not correlate in order with the measurements
#' represented by the intensities in the total ion chromatogram.
#'
#'
#' @export
#'
#' @examples
#' wiff <- exampleWiff
#'
#' shots <- getWiffShots(wiff)
getWiffShots <- function(wiff) {
	if (!inherits(wiff,"rtmsWiffReader")) { stop("'wiff' must be an object of class 'rtmsWiffReader'.")}
	wiff$shots
}

#' Extract a total ion chromatogram (TIC) from a Wiff file
#'
#' @param wiff A wiff reader object of class `rtmsWiffReader`
#' @param index The particular run within the wiff file for which the TIC
#' should be extracted.  Defaults to 1, the first (and often only) run.
#'
#' @return A data frame containing the TIC data (see Details)
#'
#' @details
#' The total ion chromatogram data in a .wiff file contains data for every
#' moment at which the EchoMS took a measurement. Each measurement is
#' represented by a single row in the returned data frame, with the following
#' four values:
#' * `time`: The time (in seconds) after the beginning of the run at which the
#' measurement was taken
#' * `intensity`: The total ion intensity across all masses measured
#' * `full`: Either 0 (indicating data is stored as a discrete set of targeted
#' intensities) or 1 (indicating the data is stored as a compressed full
#' spectrum)
#' * `offset`: The binary offset within this run's data block in the associated
#' .wiff.scan file at which this measurement's raw data can be found
#' * `size`: The size (in bytes) of this measurment's raw data in the associated
#' .wiff.scan file.
#'
#' @export
#'
#' @examples
#' wiff <- exampleWiff
#'
#' tic <- getTIC(wiff)
#'
#' # Plot the first 60 seconds of the total ion chromatgram using [ggplot2]
#' ggplot2::ggplot(tic,ggplot2::aes(x=time,y=intensity))+
#'		ggplot2::geom_line(color="red")+
#'		ggplot2::xlim(0,60)
getTIC <- function(wiff,index=1) {
	if (!inherits(wiff,"rtmsWiffReader")) { stop("'wiff' must be an object of class 'rtmsWiffReader'.")}
	wiff$tics[[index]]
}

#' Extract all total ion chromatograms from a Wiff file
#'
#' @param wiff A wiff reader object of class `rtmsWiffReader`
#'
#' @return A list of total ion chromatograms of the format extracted by
#' [getTIC()], one for each run in the given .wiff file
#'
#' @export
#'
#' @examples
#' wiff <- exampleWiff
#'
#' tics <- getAllTIC(wiff)
#' tic <- tics[[1]]
getAllTIC <- function(wiff) {
	if (!inherits(wiff,"rtmsWiffReader")) { stop("'wiff' must be an object of class 'rtmsWiffReader'.")}
	wiff$tics
}

getTIC_internal <- function(rcon,cfbf,index) {
	idxpath <- c("SampleSubtree",
				 sprintf("Sample%d",index),
				 "Idx")
	if (!cfbf_checkObject(cfbf,idxpath)) { return(data.frame()) }
	entry <- cfbf_getDirectoryEntry(cfbf$directory,idxpath)
	nval <- floor((entry$streamSize-32)/54)

	offsetvec <- sizevec <- timevec <- fullvec <- intensityvec <- rep(0,nval)
	seek(rcon,(entry$startingSector+1)*cfbf$header$sectorSize+32)
	for (iter in 1:nval) {
		offsetvec[[iter]] <- readBin(rcon,"integer",1,size=4)
		sizevec[[iter]] <- readBin(rcon,"integer",1,size=4)
		timevec[[iter]] <- readBin(rcon,"double",1,size=8)
		fullvec[[iter]] <- readBin(rcon,"integer",1,size=2,signed=FALSE)
		intensityvec[[iter]] <- readBin(rcon,"double",1,size=8)
		readBin(rcon,"raw",28)
	}
	ticdf <- data.frame(offset=offsetvec,size=sizevec,
						 time=timevec,full=fullvec,intensity=intensityvec)
	ticdf$time <- ticdf$time/1000
	ticdf
}

#' Measure total ion chromatogram peaks to estimate ejection time
#'
#' @param wiff Either an ojbect of class `rtmsWiffReader`, or a data frame
#' representing a full EchoMS total ion chromatogram as extracted by [getTIC()]
#' or [getAllTIC()]
#' @param efficientPath If `TRUE`, the extraction proceeds under the assumption
#' that shots were fired from the wells named in `shots` using the "efficient"
#' method of the EchoMS, which proceeds left to right across odd rows (A, C, E,
#' etc.) and right to left across even rows (B, D, F, etc.). If `FALSE`, shots
#' are assumed to be fired from left to right across all rows
#' @param center A guess at the timing (in seconds) between EchoMS shots.  Most
#' runs are 3 seconds apart, but the EchoMS does permit a fast mode.  If a fast
#' mode is used, 0.3 seconds should be added to fast mode timing, as the Echo
#' requires those 0.3 to eject a droplet
#' @param guess If included, used as the guess for the timing of the first
#' shot.  If desired this can usually be estimated visually from a plot of the
#' total ion chromatogram intensity.  Actual shot timing will still be
#' optimized to align with TIC intensities.  If `NULL` (the default), the
#' function will guess at the timing of the first shot based on existing runs.
#' @param index The particular run within the wiff file for which the TIC
#' should be extracted.  Defaults to 1, the first (and often only) run.
#' @param shots A list of shot names as extracted by [getWiffShots()]; each
#' name should begin with an alphanumeric well name (e.g. "A01", "B7", "F15")
#' and may contain the string `"Marker"` to indicate the well was used as a
#' marker.
#'
#' @return A data frame of measured ejections (see Details)
#'
#' @details
#' The function assumes equally timed ejections, approximately `center` seconds
#' apart, can be found in the total ion chromatogram. Once the timing of these
#' peaks is selected, the chromatogram is analyzed to locate the boundaries of
#' these peaks, using baseline noise or local minima to draw these boundaries.
#' The function returns a data frame with one row for each peak, with the
#' following columns:
#' * `shotorder`: The order within the run in which the peaks appeared
#' * `shot`: The full shot name found in `shots`
#' * `marker`: `TRUE` if the shot is indicated as coming from a marker well
#' * `well`: The standard alphanumeric well name found in the shot name
#' * `time`: The time (in seconds) after the beginning of the run at which the
#' peak is expected to reach its maximum
#' * `minTime`: The time (in seconds) after the beginning of the run at which
#' the peak is measured to begin
#' * `maxTime`: The time (in seconds) after the beginning of the run at which
#' the peak is measured to end
#' * `area`: The total area (in counts, as intensity is counts per second and
#' the extend of the peak is measured in seconds) of the peak
#' * `height`: The maximum height of the peak (in counts per second)
#' * `width`: The total width of the peak (in seconds)
#' * `halfWidth`: The width of the peak at half of its maximum intensity (in
#' seconds)
#'
#' @export
#'
#' @examples
#' wiff <- exampleWiff
#'
#' # Works if file is run with 3 seconds between peaks and shots are fired
#' # from wells in the efficient "back-and-forth" path
#' ejections <- measureEjections(wiff)
measureEjections <- function(wiff,efficientPath=TRUE,center=3,guess=NULL,index=1,shots=NULL)
	UseMethod("measureEjections")

#' @export
#' @rdname measureEjections
measureEjections.rtmsWiffReader <- function(wiff,efficientPath=TRUE,center=3,guess=NULL,index=1,shots=NULL) {
	tic <- getTIC(wiff,index)
	shots <- getWiffShots(wiff)
	measureEjections.default(tic,efficientPath=efficientPath,center=center,guess=guess,shots=shots)
}

#' @export
#' @rdname measureEjections
measureEjections.default <- function(wiff,efficientPath=TRUE,center=3,guess=NULL,index=1,shots=NULL) {
	tic <- wiff

	markerVec <- grepl("Marker",shots,ignore.case=TRUE)
	rm <- regmatches(shots,regexec("^([A-Z]+)(\\d+)",shots))
	wellrow <- vapply(rm,
					  function(m) (if (length(m)>0) m[[2]] else NA_character_),
					  NA_character_)
	wellcol <- vapply(rm,
					  function(m) (if (length(m)>0) m[[3]] else NA_character_),
					  NA_character_)
	shottable <- data.frame(shot=shots,marker=markerVec,row=wellrow,col=wellcol,stringsAsFactors=FALSE)
	shottable$rowid <- seq_along(shots)
	shottable$well <- paste0(shottable$row,shottable$col)

	if (efficientPath) {
		shottable$col <- as.numeric(shottable$col)

		rowlevels <- c(LETTERS[1:26],paste0("A",LETTERS[1:26]))
		rowlevels <- rowlevels[rowlevels %in% unique(shottable$row)]
		shottable$rownumber <- as.numeric(factor(shottable$row,rowlevels))
		shottable$evenness <- (shottable$rownumber %% 2)==0

		if (efficientPath) { shottable$col <- ifelse(shottable$evenness,-shottable$col,shottable$col) }
		shottable <- shottable[order(shottable$rownumber,shottable$col,shottable$rowid),]
	}

	shottable <- shottable[,c("rowid","shot","marker","well")]
	shottable$shotorder <- seq_along(shots)

	bf <- findBestFrequency(tic,length(shots),center,guess)
	breaks <- bf$offset+(0:(length(shots)-1))*bf$spacing

	meanstep <- mean(diff(tic$time))
	sigma <- 0.05
	ssigma <- sigma/meanstep
	if (ssigma<=1.5) {
		kn <- ceiling(4*ssigma^2)
		kernel <- choose(kn,0:kn)
	} else {
		kt <- seq(-ceiling(3*ssigma),ceiling(3*ssigma))
		kernel <- exp(-(kt^2)/(2*ssigma^2))
	}
	kernel <- kernel/sum(kernel)
	tic$smoothed <- stats::filter(tic$intensity,kernel,circular=TRUE)
	tic$area <- c(diff(tic$time),meanstep)*tic$intensity

	peaks <- data.frame(shotorder=seq_along(shots),time=breaks,
						minTime=NA,maxTime=NA,area=NA,
						height=NA,width=NA,halfWidth=NA)

	upstep <- max(1.0,(2/3)*bf$spacing)
	downstep <- max(0.5,(1/2)*bf$spacing)
	thresh <- max(tic$intensity[tic$time<5])
	for (piter in seq_along(breaks)) {
		b <- breaks[[piter]]

		loceil<- max(tic$time[tic$time<b])

		lorel <- tic[tic$time >= min(loceil,b-downstep) & tic$time < b,]
		if (min(lorel$smoothed) < thresh) {
			threshmax <- max(which(lorel$smoothed<thresh))
			if (threshmax==nrow(lorel)) { minTime <- b }
			else { minTime <- lorel$time[threshmax+1] }
		} else {
			minTime <- lorel$time[which.min(lorel$smoothed)]
		}

		hifloor <- min(tic$time[tic$time>b])
		hirel <- tic[tic$time > b & tic$time <= max(hifloor,b+upstep),]
		if (min(hirel$smoothed) < thresh) {
			threshmin <- min(which(hirel$smoothed<thresh))
			if (threshmin==1) { maxTime <- b }
			else { maxTime <- hirel$time[threshmin-1] }
		} else {
			maxTime <- hirel$time[which.min(hirel$smoothed)]
		}

		prel <- tic[tic$time >= minTime & tic$time <= maxTime,]
		if (nrow(prel)<=1) {
			peaks$minTime[[piter]] <- minTime
			peaks$maxTime[[piter]] <- maxTime
			peaks$area[[piter]] <- sum(prel$area)
			if (nrow(prel)==0) {
				peaks$height[[piter]] <- 0
			} else {
				peaks$height[[piter]] <- max(prel$smoothed)
			}
			peaks$width[[piter]] <- maxTime-minTime
			peaks$halfWidth[[piter]] <- 0
			next
		}
		baseline <- diff(range(prel$time))*mean(prel$smoothed[c(1,nrow(prel))])
		peak <- max(prel$intensity)
		smoothPeak <- max(prel$smoothed)
		minHalfTime <- prel$time[min(which(prel$smoothed>smoothPeak/2))]
		maxHalfTime <- prel$time[max(which(prel$smoothed>smoothPeak/2))]

		peaks$minTime[[piter]] <- minTime
		peaks$maxTime[[piter]] <- maxTime
		peaks$area[[piter]] <- sum(prel$area)
		peaks$height[[piter]] <- smoothPeak
		peaks$width[[piter]] <- maxTime-minTime
		peaks$halfWidth[[piter]] <- maxHalfTime-minHalfTime
	}

	peaks <- merge(shottable,peaks,by="shotorder")
	peaks <- peaks[order(peaks$rowid),]
	peaks <- peaks[,c("shotorder","shot","marker","well",
					  "time","minTime","maxTime",
					  "area","height","width","halfWidth")]
	peaks
}

findBestFrequency <- function(tic,nspots,center=3,guess=NULL) {

	if (is.null(guess)) { tmin <- 16+2.1*center }
	else { tmin <- guess-0.5075*center }
	tmax <- tmin+nspots*(center+0.015)

	reltic <- tic[tic$time>tmin & tic$time<tmax,c("time","intensity")]
	reltic$time <- reltic$time - tmin

	cfreq <- center
	for (res in 0:3) {
		freqvec <- cfreq+((-15:15)*10^(-(3+res)))
		sinvec <- cosvec <- rep(0,length(freqvec))
		for (fi in seq_along(freqvec)) {
			sinv <- sin(2*pi*(reltic$time)/freqvec[[fi]])
			cosv <- cos(2*pi*(reltic$time)/freqvec[[fi]])
			sinvec[[fi]] <- sum(sinv*reltic$intensity)
			cosvec[[fi]] <- sum(cosv*reltic$intensity)
		}
		absvec <- sqrt(sinvec^2+cosvec^2)
		bestind <- which.max(absvec)
		cfreq <- freqvec[bestind]
		offset <- cfreq*atan2(sinvec[bestind],cosvec[bestind])/(2*pi)
		if (offset<0) { offset <- offset+cfreq }
	}

	offset <- offset+tmin

	shifts <- c(-1,0,1,2)
	shiftEval <- rep(0,length(shifts))
	for (si in seq_along(shifts)) {
		shift <- shifts[[si]]
		tmin <- offset+(shift-0.5)*cfreq
		tmax <- tmin+nspots*cfreq
		rel <- tic$time>tmin & tic$time<tmax
		cosv <- cos(2*pi*(tic$time[rel]-offset)/cfreq)
		shiftEval[[si]] <- sum(cosv*tic$intensity[rel])
	}
	bestShift <- shifts[[which.max(shiftEval)]]
	offset <- offset+bestShift*cfreq

	return(list(spacing=cfreq,offset=offset))
}
