% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/create_data.R
\name{create_data}
\alias{create_data}
\title{Create a Unified Observation Dataset in the BD_Obs Format from Multiple CSV Files
This function constructs a unified dataset (\code{BD_Obs} structure) by merging multiple CSV files,
each containing in-situ observations from different stations. The function standardizes the format
required by downstream interpolation or bias correction algorithms by aligning all station data
into a single \code{data.table}, with dates as rows and station identifiers as columns.}
\usage{
create_data(file.path, Start_date, End_Date, ncores = NULL, max.na = NULL)
}
\arguments{
\item{file.path}{\code{character}. Path to the folder containing the CSV files. Each file should represent a single station and be named using the station ID (e.g., \code{M001.csv}). Each file must have exactly two columns: a date column and a column of in-situ observations for the variable to be interpolated.}

\item{Start_date}{\code{Date}. Start date of the period to be included in the merged dataset. If the CSV files cover different date ranges, this defines the initial bound of the common time window for merging.}

\item{End_Date}{\code{Date}. End date of the period to be included in the merged dataset. This sets the upper bound of the time window to consider across all files.}

\item{ncores}{\code{integer}. Number of processing cores to be used when reading and merging CSV files in parallel. If If you want to perform the procedure without parallelization, set \code{ncores = NULL}. The default is \code{NULL}.}

\item{max.na}{\code{numeric}, optional. Maximum acceptable percentage of missing values per station (from 0 to 100). Stations exceeding this threshold will be excluded. If \code{NULL}, no filtering is performed. Default is \code{NULL}.}
}
\value{
If \code{max.na} is \code{NULL}, the function returns a \code{data.table} structured in the \code{BD_Obs} format,
where the first column contains the dates and the remaining columns correspond to individual stations.
This format preserves the full dataset without filtering for missing values.

If \code{max.na} is not \code{NULL}, the function returns a named list containing:
\describe{
\item{\code{data}}{A \code{data.table} in the \code{BD_Obs} format that includes only stations with a percentage
of missing values less than or equal to \code{max.na}.}
\item{\code{Na_stations}}{A \code{data.table} summarizing the percentage of missing values for each station,
useful for assessing data quality and supporting decisions about station selection.}
}
}
\description{
Each input CSV file must contain exactly two columns: the first with dates (\code{Date}) and the second
with the in-situ measurements of the variable to be interpolated.
}
\examples{
\donttest{
# Example usage
file.path <- system.file("extdata/Folds_ejs_create_data", package = "InterpolateR")

# Create a data with all stations
data <- create_data(file.path, Start_date = "2015-01-01", End_Date = "2015-03-01", ncores = NULL)
}
}
\author{
Jonnathan Augusto landi Bermeo, jonnathan.landi@outlook.com
}
