#' Parsing a CFBF File
#'
#' Nearly every proprietary instrument format is a container for all kinds of
#' relevant data and metadata, and as such, has to find some way of organizing
#' those bits of data in such a way that they can be stored and located
#' efficiently.  While some formats build their own such structure, many make
#' use of an existing file format system, and the WIFF files produce by Sciex
#' are no different.  They use a format called CFBF (the compound file binary
#' format), which basically treats the inside of the WIFF file like it was its
#' own little hard drive, with folders and paths and everything. The
#' functions in this file are used by `rtmsEcho` to parse and navigate this
#' pretend file structure.
#'
#' If you'd like to get into the details of how it all works, you can feel free
#' to dig through the code below.  But basically, parsing a CFBF file produces
#' a "directory", that lists all the data objects in the file, their names,
#' where in the "folder structure" they can be found, and where in the actual
#' binary file they begin.  So if you know the full "path" to a given object
#' (e.g. "SampleSubtree/Sample1/Idx") then you can extract that particular data
#' block very quickly.  This is accomplished using the function
#' `cfbf_getObject()`.  Of course, it's often a good idea to check if a
#' particular object is even present, this can be done with
#' `cfbf_checkObject()`.
#'
#' Getting into way more detail than you're likely ever to want. CFBF breaks all
#' the bytes in a file into "sectors"; these sectors can take a variety of
#' powers of two as their size, but the typical size (and the one that seems to
#' be used by WIFF files) is 512 bytes (2 to the 9th power).  All of the data
#' objects that are stored in the WIFF file are represented by blocks of bytes.
#' Bigger ones are given one or more full sectors and stored in a chain of
#' sectors.  The location and order of all these full size sectors is stored in
#' a special object called the "FAT table".  Smaller objects are actually
#' stored in even smaller "mini-sectors" (usually just 64 bytes), which are
#' catalogued in a special "miniFAT table"; the full set of mini-sectors is
#' stored in a special chain of full size sectors, which is also logged in
#' the main FAT table.
#'
#' @noRd
cfbf_parseFile <- function(rcon) {
	# Read in CBFF Header
	cfbfHeader <- cfbf_readHeader(rcon)

	# Process the "DiFAT table" (used to list the sectors of the FAT table) and
	# the FAT table
	nextDifatSector <- cfbfHeader$firstDifatSector
	fatTable <- data.frame()
	difatTable <- data.frame(sector=cfbfHeader$initialDifat,expand=TRUE)
	while(any(difatTable$expand)) {
		for (difatRow in which(difatTable$expand)) {
			seek(rcon,(difatTable$sector[difatRow]+1)*cfbfHeader$sectorSize)
			fatIndices <- nrow(fatTable):(nrow(fatTable)+127)
			fatTable <- rbind(fatTable,
							  data.frame(Index=fatIndices,
							  		   NextIndex=readBin(rcon,"integer",128,size=4)))
			difatTable$expand[difatRow] <- FALSE

			if (nextDifatSector != -2) {
				moreDifat <- cfbf_getFatChain(fatTable,nextDifatSector)
				nextDifatSector <- rev(moreDifat$NextIndex)[1]
				difatTable <- rbind(difatTable,data.frame(sector=moreDifat$Index,expand=TRUE))
			}
		}
	}

	# Read and parse the miniFAT table
	minifatSectors <- cfbf_getFatChain(fatTable,cfbfHeader$firstMinifatSector)
	minifatTable <- data.frame()
	for (sector in minifatSectors$Index) {
		seek(rcon,(sector+1)*cfbfHeader$sectorSize)
		minifatIndices <- nrow(minifatTable):(nrow(minifatTable)+127)
		minifatTable <- rbind(minifatTable,
							  data.frame(Index=minifatIndices,
							  		   NextIndex=readBin(rcon,"integer",128,size=4)))
	}


	# Read and parse the CFBF directory
	directorySectors <- cfbf_getFatChain(fatTable,cfbfHeader$firstDirectorySector)
	directory <- data.frame()
	for (sector in directorySectors$Index) {
		seek(rcon,(sector+1)*cfbfHeader$sectorSize)
		for (entry in 1:4) {
			directory <- rbind(directory,cfbf_readDirectoryEntry(rcon))
		}
	}

	ministreamSectors <- cfbf_getFatChain(fatTable,directory$startingSector[[1]])

	# Confusingly, the directory table does not specify which element of the
	# directory is a given element's parent.  THis has something to do with the
	# RB-trees algorithm used to generate the tree, but it makes locating a
	# given object on a path very difficult.  This code adds a "parent" column
	# to the directory to make it easier to navigate
	directory$parent <- NA
	directory$path <- NA
	directory$expand <- FALSE
	directory$sibexpand <- FALSE
	directory$parent[[1]] <- -1
	directory$expand[[1]] <- TRUE
	while (any(directory$expand==TRUE)) {
		expander <- which(directory$expand==TRUE)[1]
		if (directory$child[expander]>=0) {
			directory$parent[directory$child[expander]+1] <- expander-1
			directory$expand[directory$child[expander]+1] <- TRUE
			directory$sibexpand[directory$child[expander]+1] <- TRUE
		}
		directory$expand[expander] <- FALSE

		while (any(directory$sibexpand)) {
			sibexpander <- which(directory$sibexpand)[1]
			if (directory$leftSibling[sibexpander]>0) {
				directory$parent[directory$leftSibling[sibexpander]+1] <- expander-1
				directory$expand[directory$leftSibling[sibexpander]+1] <- TRUE
				directory$sibexpand[directory$leftSibling[sibexpander]+1] <- TRUE
			}
			if (directory$rightSibling[sibexpander]>0) {
				directory$parent[directory$rightSibling[sibexpander]+1] <- expander-1
				directory$expand[directory$rightSibling[sibexpander]+1] <- TRUE
				directory$sibexpand[directory$rightSibling[sibexpander]+1] <- TRUE
			}
			directory$sibexpand[sibexpander] <- FALSE
		}
	}
	directory$expand <- NULL
	directory$sibexpand <- NULL

	# Store only the values of the header necessary for further processing
	# of the file
	newHeader <-
		list(majorVersion=cfbfHeader$majorVersion,
			 minorVersion=cfbfHeader$minorVersion,
			 sectorSize=cfbfHeader$sectorSize,
			 minisectorSize=cfbfHeader$minisectorSize,
			 signatureNumber=cfbfHeader$signatureNumber,
			 miniStreamCutoff=cfbfHeader$miniStreamCutoff)

	# Output the complete CFBF structure
	list(header=newHeader,directory=directory,fatTable=fatTable,
		 minifatSectors=minifatSectors,minifatTable=minifatTable,
		 ministreamSectors=ministreamSectors)
}

#' Read the header of a CFBF file
#'
#' The first 512 bytes of any CFBF file is a specially formatted header that
#' contains much of the necessary parameters and data for parsing the file.
#' This includes things like sector size, mini-sector size, locations of the
#' beginning of certain essential CFBF structures, etc.  This function reads
#' the first 512 bytes of a file and outputs all of theses CFBF metadata values
#' as a list.
#'
#' @noRd
cfbf_readHeader <- function(rcon) {
	on.exit(close(rcon))

	seek(rcon,0)
	# d0 cf 11 e0 a1 b1 1a e1
	bin_checkBytes(rcon,as.raw(c(208, 207, 17, 224, 161, 177, 26, 225)))
	bin_checkBytes(rcon,as.raw(rep(0,16)))
	minorVersion <- readBin(rcon,"integer",1,size=2,signed=FALSE)
	majorVersion <- readBin(rcon,"integer",1,size=2,signed=FALSE)
	bin_checkBytes(rcon,as.raw(c(254,255)))
	sectorSize <- 2^readBin(rcon,"integer",1,size=2,signed=FALSE)
	if (majorVersion==3) {
		stopifnot(majorVersion==3 && sectorSize==512)
	} else if (majorVersion==4) {
		stopifnot(majorVersion==4 && sectorSize==4096)
	} else {
		stopifnot(majorVersion%in%c(3,4))
	}
	minisectorSize <- 2^readBin(rcon,"integer",1,size=2,signed=FALSE)
	stopifnot(minisectorSize==64)
	bin_checkBytes(rcon,as.raw(rep(0,6)))
	dirSize <- readBin(rcon,"integer",1,size=4)
	if (majorVersion==3) {
		stopifnot(majorVersion==3 && dirSize==0)
	}
	fatSectors <- readBin(rcon,"integer",1,size=4)
	stopifnot(fatSectors>0)
	firstDirectorySector <- readBin(rcon,"integer",1,size=4)
	signatureNumber <- readBin(rcon,"integer",1,size=4)
	miniStreamCutoff <- readBin(rcon,"integer",1,size=4)
	firstMinifatSector <- readBin(rcon,"integer",1,size=4)
	minifatSectors <- readBin(rcon,"integer",1,size=4)
	firstDifatSector <- readBin(rcon,"integer",1,size=4)
	difatSectors <- readBin(rcon,"integer",1,size=4)
	initialDifat <- readBin(rcon,"integer",min(fatSectors,109),size=4)
	on.exit()
	list(majorVersion=majorVersion,
		 minorVersion=minorVersion,
		 sectorSize=sectorSize,
		 minisectorSize=minisectorSize,
		 dirSize=dirSize,
		 fatSectors=fatSectors,
		 firstDirectorySector=firstDirectorySector,
		 signatureNumber=signatureNumber,
		 miniStreamCutoff=miniStreamCutoff,
		 firstMinifatSector=firstMinifatSector,
		 minifatSectors=minifatSectors,
		 firstDifatSector=firstDifatSector,
		 difatSectors=difatSectors,
		 initialDifat=initialDifat)
}

#' Extract a linked chain of indices
#'
#' The FAT tale, DiFAT table, and miniFAT table all store chains of objects in
#' a simple forward-linked table.  Each entry lists the next index in the chain,
#' or a negative value if the chain is complete.  So starting from a given
#' index, it is possible to extract the full chain.
#'
#' @noRd
cfbf_getFatChain <- function(fatTable,firstIndex) {
	nextIndex <- firstIndex
	indexVec <- nextIndexVec <- c()
	while(any(fatTable$Index==nextIndex)) {
		rel <- which(fatTable$Index==nextIndex)
		indexVec <- c(indexVec,nextIndex)
		nextIndex <- fatTable$NextIndex[rel]
		nextIndexVec <- c(nextIndexVec,nextIndex)
	}
	data.frame(Index=indexVec,NextIndex=nextIndexVec)
}

#' Read a single row of the CFBF directory object
#'
#' The heart of the CFBF structure is the directory, which lists every data
#' object and "folder" found in the file's internal structure.  The data in the
#' directory is stored in a special chain of directory sectors, and can be
#' read one at a time in to a full table.  This function reads and parses a
#' single directory entry.
#'
#' @noRd
cfbf_readDirectoryEntry <- function(rcon) {
	on.exit(close(rcon))

	nameBytes <- readBin(rcon,"raw",64)
	nameLength <- readBin(rcon,"integer",1,size=2,signed=FALSE)
	objectType <- readBin(rcon,"integer",1,size=1,signed=FALSE)
	stopifnot(objectType%in%c(0,1,2,5))
	if (objectType==0) {
		# Sectors (which are usually 512 bytes) can contain more than one
		# directory entry, but the last directory entry may not be full. This
		# branch ensures that empty directory entries are processed but return
		# no data
		readBin(rcon,"raw",128-64-2-1)
		on.exit()
		return(data.frame())
	}
	name <- rawToChar(nameBytes[seq(1,(nameLength-2),by=2)])
	colorFlag <- readBin(rcon,"integer",1,size=1,signed=FALSE)
	stopifnot(colorFlag%in%c(0,1))
	if (colorFlag==0) { color <- "red" }
	else { color <- "black" }
	leftSibling <- readBin(rcon,"integer",1,size=4)
	rightSibling <- readBin(rcon,"integer",1,size=4)
	child <- readBin(rcon,"integer",1,size=4)
	class <- paste0(as.character(as.hexmode(as.numeric(
		readBin(rcon,"raw",16)
	))),collapse="")
	state <- readBin(rcon,"integer",1,size=4)
	creationTime <- paste0(as.character(as.hexmode(as.numeric(
		readBin(rcon,"raw",8)
	))),collapse="")
	modifiedTime <- paste0(as.character(as.hexmode(as.numeric(
		readBin(rcon,"raw",8)
	))),collapse="")
	startingSector <- readBin(rcon,"integer",1,size=4)
	streamSize <- readBin(rcon,"integer",1,size=4)
	readBin(rcon,"integer",1,size=4)

	on.exit()
	data.frame(name=name, objectType=objectType, color=color,
			   leftSibling=leftSibling, rightSibling=rightSibling, child=child,
			   class=class, state=state, creationTime=creationTime,
			   modifiedTime=modifiedTime, startingSector=startingSector,
			   streamSize=streamSize)
}

#' Check if a given CFBF object is present
#'
#' Searches the CFBF directory for an object with the specified path.  Returns
#' `TRUE` if it is present, and `FALSE` if it is not
#'
#' @noRd
cfbf_checkObject <- function(cfbf,path) {
	return(cfbf_checkDirectoryEntry(cfbf$directory,path))
}

#' Retrieve a given CFBF object
#'
#' Objects can be extracted from a CFBF file using the "path" that locates them
#' in the file's internal folder structure.  This function locates the relevant
#' directory entry given a particular path (throwing an error if that path is
#' not found), and then extracts the relevant block of data.  Data objects that
#' are small enough (lying below what's called the "ministream cutoff") are
#' are stored in one or more ministream sectors; larger objects are stored in
#' one or more full sectors.  The full set of minisectors or sectors is
#' extracted, and then cut to the correct size for the given data block.  The
#' function returns a raw vector with the full data object
#'
#' @noRd
cfbf_getObject <- function(rcon,cfbf,path) {
	entry <- cfbf_getDirectoryEntry(cfbf$directory,path)

	stream <- c()
	if (entry$streamSize>cfbf$header$miniStreamCutoff) {
		currentSectors <- cfbf_getFatChain(cfbf$fatTable,entry$startingSector)
		for (sector in currentSectors$Index) {
			seek(rcon,(sector+1)*cfbf$header$sectorSize)
			stream <- c(stream,readBin(rcon,"raw",cfbf$header$sectorSize))
		}
	} else {
		currentMinisectors <- cfbf_getFatChain(cfbf$minifatTable,entry$startingSector)
		for (minisector in currentMinisectors$Index) {
			stream <- c(stream,cfbf_getMinistreamSector(rcon,minisector,cfbf))
		}
	}
	stream <- stream[seq_len(entry$streamSize)]
	stream
}

#' Recursively checks a directory path
#'
#' @noRd
cfbf_checkDirectoryEntry <- function(directory,path,root=0) {
	if (length(path)==0) { return(FALSE) }
	nextIndex <- which(directory$parent==root & directory$name==path[[1]])
	if (length(nextIndex)!=1) { return(FALSE)  }
	if (length(path)==1) { return(TRUE) }
	else { return(cfbf_checkDirectoryEntry(directory,path[-1],root=(nextIndex-1))) }
}

#' Recursively follows a directory path and extracts the relevant entry
#'
#' @noRd
cfbf_getDirectoryEntry <- function(directory,path,root=0) {
	if (length(path)==0) { stop("Unable to locate specified directory path") }
	nextIndex <- which(directory$parent==root & directory$name==path[[1]])
	if (length(nextIndex)!=1) { stop("Unable to locate specified directory path") }
	if (length(path)==1) { return(as.list(directory[nextIndex,])) }
	else { return(cfbf_getDirectoryEntry(directory,path[-1],root=(nextIndex-1))) }
}

#' Get a single CFBF ministream sector
#'
#' @noRd
cfbf_getMinistreamSector <- function(rcon,minisector,cfbf) {
	minisector <- minisector
	miniPerSector <- cfbf$header$sectorSize/cfbf$header$minisectorSize
	sector <- cfbf$ministreamSectors$Index[1+floor(minisector/miniPerSector)]
	miniOff <- minisector%%miniPerSector
	seek(rcon,(sector+1)*cfbf$header$sectorSize+miniOff*cfbf$header$minisectorSize)
	readBin(rcon,"raw",cfbf$header$minisectorSize)
}

#' Read a string encoded as two-byte (16-bit) binary data
#'
#' @noRd
cfbf_readTwoByteString <- function(tcon) {
	stringLength <- readBin(tcon,"integer",1,size=2,signed=TRUE,endian="little")
	if (stringLength==0) { return("") }
	stringVector <- readBin(tcon,"raw",stringLength)
	rawToChar(stringVector[seq(1,stringLength,by=2)])
}

# Standard binary function to check a series of binary bytes in an open binary
# connection
#'
#' @noRd
bin_checkBytes <- function(fcon,bytes,message="Invalid bytes") {
	# Check that the next string of bytes in a file matches a given set
	if (!all(readBin(fcon,"raw",length(bytes))==bytes)) { stop(message) }
}
