% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tdmReadAndSplit.r
\name{tdmReadAndSplit}
\alias{tdmReadAndSplit}
\alias{TDMdata}
\title{Read and split the task data.}
\usage{
tdmReadAndSplit(opts, tdm, nExp = 0, dset = NULL)
}
\arguments{
\item{opts}{a list from which we need here the elements
\itemize{
  \item \code{READ.INI}:  [T] =T: do read and split, =F: return NULL   
  \item \code{READ.*}:  other settings for \code{\link{tdmReadDataset}}  
  \item \code{filename}:  needed for \code{\link{tdmReadDataset}}  
  \item \code{filetest}:  needed for \code{\link{tdmReadDataset}}  
  \item \code{TST.testFrac}:  [0.1] set this fraction of the daa aside for testing 
  \item \code{TST.COL}:   string with name for the partitioning column, if tdm$umode is not "SP_T".
                 (If tdm$umode=="SP_T", then TST.COL="tdmSplit" is used.)
}}

\item{tdm}{a list from  which we need here the elements 
\itemize{
  \item \code{mainFile}:  if not NULL, set working dir to \code{dir(mainFile)} before executing  \code{\link{tdmReadDataset}} 
  \item \code{umode}:  [ "RSUB" | "CV" | "TST" | "SP_T" ], how to divide in training/validation data for tuning
                 and test data for the unbiased runs  
  \item \code{SPLIT.SEED}:  if NULL, set random number generator (RNG) to \code{\link{tdmRandomSeed}} when constructing.
                 \code{dataObj}. If not NULL, set RNG to SPLIT.SEED + nExp --> deterministic test set split
  \item \code{stratified}: [NULL] string specifying the column with the response variable for classification.
                 If not NULL, do the split by stratified sampling (at least one record of each class level
                 found in \code{dset[,tdm$stratified]} shall appear in the train-vali-set). Recommended for classification
}}

\item{nExp}{[0] experiment counter, used to select a reproducible different seed, if \code{tdm$SPLIT.SEED!=NULL}}

\item{dset}{[NULL] if non-NULL, reading of dset is skipped and the given data frame dset is used.}
}
\value{
\code{dataObj}, either NULL (if \code{opts$READ.INI==FALSE}) or an object of class \code{\link{TDMdata}} containing
     \item{dset}{ a data frame with the complete data set}
     \item{TST.COL}{ string, the name of the column in \code{dset} which has a 1 for 
                     records belonging to the test set and a 0 for train/vali records. If tdm$umode=="SP_T", then 
                     TST.COL="tdmSplit", else TST.COL=opts$TST.COL. }
     \item{filename}{ \code{opts$filename}, from where the data were read}
   Use the accessor functions  \code{\link{dsetTrnVa.TDMdata}} and \code{\link{dsetTest.TDMdata}} to extract the train/vali and 
   the test data, resp., from \code{dataObj}.
  
   Known caller: \code{\link{tdmBigLoop}}
}
\description{
Read the task data using \code{\link{tdmReadDataset}} and split them into a test part and 
  a training/validation-part and return a \code{\link{TDMdata}} object.
}
\details{
If \code{dset} is NULL, the files specified in \code{opts} are read into dset, see 
  \code{\link{tdmReadDataset}} for details. Then, depending on the value of \code{tdm$umode}
  \itemize{
     \item \code{"SP_T"}: split the data randomly into training and test data with test 
       set fraction according to \code{opts$TST.testFrac}. Make use of \code{tdm$SPLIT.SEED}
       and \code{tdm$stratified}, if given. Set TST.COL to \code{"tdmSplit"}.
     \item \code{"RSUB", "CV"}: use all data for training/validation. That is, the 
       training-validation split is done later in \code{\link{tdmClassifyLoop}} or 
       \code{\link{tdmRegressLoop}}.\cr
     \item \code{"TST"}: split the data into training and test data according to column.
       \code{opts$TST.COL} (usually \code{"TST.COL"}), which carries a 1 for each test record and a 0 else. 
       If \code{opts$filetest} is specified, then all records from this file will 
       carry a 1 in \code{opts$TST.COL}. All records from \code{opts$filename} carry a 0.
  }
}
\seealso{
\code{\link{dsetTrnVa.TDMdata}}, \code{\link{dsetTest.TDMdata}}, \code{\link{tdmReadDataset}}, \code{\link{tdmBigLoop}}
}
\author{
Wolfgang Konen (\email{wolfgang.konen@th-koeln.de}), THK
}
