% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prepInputs.R
\name{prepInputs}
\alias{prepInputs}
\title{Download and optionally post process files}
\usage{
prepInputs(targetFile = NULL, url = NULL, archive = NULL,
  alsoExtract = NULL, destinationPath = ".", fun = NULL,
  quick = getOption("reproducible.quick"), overwrite = FALSE,
  purge = FALSE, useCache = getOption("reproducible.useCache", FALSE), ...)
}
\arguments{
\item{targetFile}{Character string giving the path to the eventual file
(raster, shapefile, csv, etc.) after downloading and extracting from a zip
or tar archive. This is the file \emph{before} it is passed to
\code{postProcess}. Currently, the internal checksumming does not checksum
the file after it is \code{postProcess}ed (e.g., cropped/reprojected/masked).
Using \code{Cache} around \code{prepInputs} will do a sufficient job in these cases.
See table in \code{\link{preProcess}}.}

\item{url}{Optional character string indicating the URL to download from.
If not specified, then no download will be attempted. If not entry
exists in the \code{CHECKSUMS.txt} (in \code{destinationPath}), an entry
will be created or appended to. This \code{CHECKSUMS.txt} entry will be used
in subsequent calls to
\code{prepInputs} or \code{preProcess}, comparing the file on hand with the ad hoc
\code{CHECKSUMS.txt}. See table in \code{\link{preProcess}}.}

\item{archive}{Optional character string giving the path of an archive
containing \code{targetFile}, or a vector giving a set of nested archives
(e.g., \code{c("xxx.tar", "inner.zip")}). If there is/are (an) inner
archive(s), but they are unknown, the function will try all until it finds
the \code{targetFile}. See table in \code{\link{preProcess}}.}

\item{alsoExtract}{Optional character string naming files other than
\code{targetFile} that must be extracted from the \code{archive}. If
\code{NULL}, the default, then it will extract all files. Other options:
\code{"similar"} will extract all files with the same filename without
file extension as \code{targetFile}. \code{NA} will extract nothing other
than \code{targetFile}. A character string of specific file names will cause
only those to be extracted. See table in \code{\link{preProcess}}.}

\item{destinationPath}{Character string of a directory in which to download
and save the file that comes from \code{url} and is also where the function
will look for \code{archive} or \code{targetFile}.}

\item{fun}{Character string indicating the function to use to load
\code{targetFile} into an \code{R} object.}

\item{quick}{Logical. This is passed internally to \code{\link{Checksums}}
(the quickCheck argument), and to
\code{\link{Cache}} (the quick argument). This results in faster, though
less robust checking of inputs. See the respective functions.}

\item{overwrite}{Logical. Should downloading and all the other actions occur
even if they pass the checksums or the files are all there.}

\item{purge}{Logical or Integer. \code{0/FALSE} (default) keeps existing
\code{CHECKSUMS.txt} file and
\code{prepInputs} will write or append to it. \code{1/TRUE} will deleted the entire
\code{CHECKSUMS.txt} file. Other options, see details.}

\item{useCache}{Passed to Cache in various places. Default \code{FALSE}}

\item{...}{Additional arguments passed to \code{fun} (i.e,. user supplied),
 \code{\link{postProcess}} and \code{\link[reproducible]{Cache}}.
Since \code{...} is passed to \code{\link{postProcess}}, these will
\code{...} will also be passed into the inner
functions, e.g., \code{\link{cropInputs}}. See details and examples.}
}
\description{
This function can be used to prepare R objects from remote or local data
sources. The object of this function is to provide a reproducible version of
a series of commonly used steps for getting, loading, and processing data.
This function has two stages: Getting data (download, extracting from archives,
loading into R) and postProcessing (for \code{Spatial*} and \code{Raster*}
objects, this is crop, reproject, mask/intersect).
To trigger the first stage, provide \code{url} or \code{archive}.
To trigger the second stage, provide \code{studyArea} or \code{rasterToMatch}.
See examples.
}
\note{
This function is still experimental: use with caution.
}
\section{Stage 1 - Getting data}{


See \code{\link{preProcess}} for combinations of arguments.

  \enumerate{
    \item Download from the web via either \code{\link[googledrive]{drive_download}},
    \code{\link[utils]{download.file}};
    \item Extract from archive using \code{\link{unzip}} or \code{\link{untar}};
    \item Load into R using \code{\link[raster]{raster}},
    \code{\link[raster]{shapefile}}, or any other function passed in with \code{fun};
    \item Checksumming of all files during this process. This is put into a
    \file{CHECKSUMS.txt} file in the \code{destinationPath}, appending if it is
    already there, overwriting the entries for same files if entries already exist.
 }
}

\section{Stage 2 - Post processing}{


  This will be triggered if either \code{rasterToMatch} or \code{studyArea}
  is supplied.

  \enumerate{
    \item Fix errors. Currently only errors fixed are for \code{SpatialPolygons}
    using \code{buffer(..., width = 0)};
    \item Crop using \code{\link{cropInputs}};
    \item Project using \code{\link{projectInputs}};
    \item Mask using \code{\link{maskInputs}};
    \item Determine file name \code{\link{determineFilename}} via \code{filename2};
    \item Optionally, write that file name to disk via \code{\link{writeOutputs}}.
   }

  NOTE: checksumming does not occur during the post-processing stage, as
  there are no file downloads. To achieve fast results, wrap
  \code{prepInputs} with \code{Cache}.

  NOTE: \code{sf} objects are still very experimental.

\subsection{postProcessing of \code{Raster*} and \code{Spatial*} objects:}{

  If \code{rasterToMatch} or \code{studyArea} are used, then this will
  trigger several subsequent functions, specifically the sequence,
  \emph{Crop, reproject, mask}, which appears to be a common sequence in
  spatial simulation. See \code{\link{postProcess.spatialObjects}}.

  \emph{Understanding various combinations of \code{rasterToMatch}
  and/or \code{studyArea}:}
  Please see \code{\link{postProcess.spatialObjects}}.
 }
}

\section{\code{purge}}{


In options for control of purging the \code{CHECKSUMS.txt} file are:

  \tabular{cl}{
    \code{0} \tab keep file \cr
    \code{1} \tab delete file \cr
    \code{2} \tab delete entry for \code{targetFile} \cr
    \code{4} \tab delete entry for \code{alsoExtract} \cr
    \code{3} \tab delete entry for \code{archive} \cr
    \code{5} \tab delete entry for \code{targetFile} & \code{alsoExtract} \cr
    \code{6} \tab delete entry for \code{targetFile}, \code{alsoExtract} & \code{archive} \cr
    \code{7} \tab delete entry that is failing (i.e., for the file downloaded by the \code{url})\cr
  }
will only remove entries in the \code{CHECKSUMS.txt} that are associated with
   \code{targetFile}, \code{alsoExtract} or \code{archive} When prepInputs is called, it will write or append to a (if
   already exists)
  \code{CHECKSUMS.txt} file. If the \code{CHECKSUMS.txt} is not correct, use
  this argument to remove it.
}

\examples{
# This function works within a module; however, currently,
#   \\cde{sourceURL} is not yet working as desired. Use \\code{url}.
\dontrun{
# download a zip file from internet, unzip all files, load as shapefile, Cache the call
# First time: don't know all files - prepInputs will guess, if download file is an archive,
#   then extract all files, then if there is a .shp, it will load with raster::shapefile
dPath <- file.path(tempdir(), "ecozones")
shpEcozone <- prepInputs(destinationPath = dPath,
                         url = "http://sis.agr.gc.ca/cansis/nsdb/ecostrat/zone/ecozone_shp.zip")

# Robust to partial file deletions:
unlink(dir(dPath, full.names = TRUE)[1:3])
shpEcozone <- prepInputs(destinationPath = dPath,
                     url = "http://sis.agr.gc.ca/cansis/nsdb/ecostrat/zone/ecozone_shp.zip")
unlink(dPath, recursive = TRUE)

# Once this is done, can be more precise in operational code:
#  specify targetFile, alsoExtract, and fun, wrap with Cache
ecozoneFilename <- file.path(dPath, "ecozones.shp")
ecozoneFiles <- c("ecozones.dbf", "ecozones.prj",
                  "ecozones.sbn", "ecozones.sbx", "ecozones.shp", "ecozones.shx")
shpEcozone <- prepInputs(targetFile = ecozoneFilename,
                    url = "http://sis.agr.gc.ca/cansis/nsdb/ecostrat/zone/ecozone_shp.zip",
                    alsoExtract = ecozoneFiles,
                    fun = "shapefile", destinationPath = dPath)
unlink(dPath, recursive = TRUE)

#' # Add a study area to Crop and Mask to
# Create a "study area"
library(sp)
library(raster)
coords <- structure(c(-122.98, -116.1, -99.2, -106, -122.98, 59.9, 65.73, 63.58, 54.79, 59.9),
                    .Dim = c(5L, 2L))
Sr1 <- Polygon(coords)
Srs1 <- Polygons(list(Sr1), "s1")
StudyArea <- SpatialPolygons(list(Srs1), 1L)
crs(StudyArea) <- "+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

#  specify targetFile, alsoExtract, and fun, wrap with Cache
ecozoneFilename <- file.path(dPath, "ecozones.shp")
# Note, you don't need to "alsoExtract" the archive... if the archive is not there, but the
#   targetFile is there, it will not redownload the archive.
ecozoneFiles <- c("ecozones.dbf", "ecozones.prj",
                  "ecozones.sbn", "ecozones.sbx", "ecozones.shp", "ecozones.shx")
shpEcozoneSm <- Cache(prepInputs,
                         url = "http://sis.agr.gc.ca/cansis/nsdb/ecostrat/zone/ecozone_shp.zip",
                         targetFile = reproducible::asPath(ecozoneFilename),
                         alsoExtract = reproducible::asPath(ecozoneFiles),
                         studyArea = StudyArea,
                         fun = "shapefile", destinationPath = dPath,
                         filename2 = "EcozoneFile.shp") # passed to determineFilename

plot(shpEcozone)
plot(shpEcozoneSm, add = TRUE, col = "red")
unlink(dPath)

# Big Raster, with crop and mask to Study Area - no reprojecting (lossy) of raster,
#   but the StudyArea does get reprojected, need to use rasterToMatch
dPath <- file.path(tempdir(), "LCC")
lcc2005Filename <- file.path(dPath, "LCC2005_V1_4a.tif")
url <- file.path("ftp://ftp.ccrs.nrcan.gc.ca/ad/NLCCLandCover",
                 "LandcoverCanada2005_250m/LandCoverOfCanada2005_V1_4.zip")

# messages received below may help for filling in more arguments in the subsequent call
LCC2005 <- prepInputs(url = url,
                     destinationPath = asPath(dPath),
                     studyArea = StudyArea)

plot(LCC2005)

# if wrapped with Cache, will be fast second time, very fast 3rd time (via memoised copy)
LCC2005 <- Cache(prepInputs, url = url,
                     targetFile = lcc2005Filename,
                     archive = asPath("LandCoverOfCanada2005_V1_4.zip"),
                     destinationPath = asPath(dPath),
                     studyArea = StudyArea)
}

}
\seealso{
\code{\link{downloadFile}}, \code{\link{extractFromArchive}},
         \code{\link{downloadFile}}, \code{\link{postProcess}}.
}
\author{
Eliot McIntire

Jean Marchal
}
