% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hts_illumina.R
\name{illumina}
\alias{illumina}
\title{Create and write Illumina reads to FASTQ file(s).}
\usage{
illumina(obj,
         out_prefix,
         n_reads,
         read_length,
         paired,
         frag_mean = 400,
         frag_sd = 100,
         matepair = FALSE,
         seq_sys = NULL,
         profile1 = NULL,
         profile2 = NULL,
         ins_prob1 = 0.00009,
         del_prob1 = 0.00011,
         ins_prob2 = 0.00015,
         del_prob2 = 0.00023,
         frag_len_min = NULL,
         frag_len_max = NULL,
         variant_probs = NULL,
         barcodes = NULL,
         prob_dup = 0.02,
         sep_files = FALSE,
         compress = FALSE,
         comp_method = "bgzip",
         n_threads = 1L,
         read_pool_size = 1000L,
         show_progress = FALSE,
         overwrite = FALSE)
}
\arguments{
\item{obj}{Sequencing object of class \code{ref_genome} or \code{variants}.}

\item{out_prefix}{Prefix for the output file(s), including entire path except
for the file extension.}

\item{n_reads}{Number of reads you want to create.}

\item{read_length}{Length of reads.}

\item{paired}{Logical for whether to use paired-end reads.
This argument is changed to \code{TRUE} if \code{matepair} is \code{TRUE}.}

\item{frag_mean}{Mean of the Gamma distribution that generates fragment sizes.
Defaults to \code{400}.}

\item{frag_sd}{Standard deviation of the Gamma distribution that generates
fragment sizes.
Defaults to \code{100}.}

\item{matepair}{Logical for whether to simulate mate-pair reads.
Defaults to \code{FALSE}.}

\item{seq_sys}{Full or abbreviated name of sequencing system to use.
See "Sequencing systems" section for options.
See "Sequencing profiles" section for more information on how this argument,
\code{profile1}, and \code{profile2} are used to specify profiles.
Defaults to \code{NULL}.}

\item{profile1}{Custom profile file for read 1.
See "Sequencing profiles" section for more information on how this argument,
\code{profile2}, and \code{seq_sys} are used to specify profiles.
Defaults to \code{NULL}.}

\item{profile2}{Custom profile file for read 2.
See "Sequencing profiles" section for more information on how this argument,
\code{profile1}, and \code{seq_sys} are used to specify profiles.
Defaults to \code{NULL}.}

\item{ins_prob1}{Insertion probability for read 1. Defaults to \code{0.00009}.}

\item{del_prob1}{Deletion probability for read 1. Defaults to \code{0.00011}.}

\item{ins_prob2}{Insertion probability for read 2. Defaults to \code{0.00015}.}

\item{del_prob2}{Deletion probability for read 2. Defaults to \code{0.00023}.}

\item{frag_len_min}{Minimum fragment size. A \code{NULL} value results in the read length.
Defaults to \code{NULL}.}

\item{frag_len_max}{Maximum fragment size.
A \code{NULL} value results in \code{2^32-1}, the maximum allowed value.
Defaults to \code{NULL}}

\item{variant_probs}{Relative probability of sampling each variant.
This is ignored if sequencing a reference genome.
\code{NULL} results in all having the same probability.
Defaults to \code{NULL}.}

\item{barcodes}{Character vector of barcodes for each variant, or a single barcode
if sequencing a reference genome. \code{NULL} results in no barcodes.
Defaults to \code{NULL}.}

\item{prob_dup}{A single number indicating the probability of duplicates.
Defaults to \code{0.02}.}

\item{sep_files}{Logical indicating whether to make separate files for each variant.
This argument is coerced to \code{FALSE} if the \code{obj} argument is not
a \code{variants} object.
Defaults to \code{FALSE}.}

\item{compress}{Logical specifying whether or not to compress output file, or
an integer specifying the level of compression, from 1 to 9.
If \code{TRUE}, a compression level of \code{6} is used.
Defaults to \code{FALSE}.}

\item{comp_method}{Character specifying which type of compression to use if any
is desired. Options include \code{"gzip"} and \code{"bgzip"}.
This is ignored if \code{compress} is \code{FALSE}, and it throws an error if
it's set to \code{"gzip"} when \code{n_threads > 1} (since I don't have a method to
do gzip compression in parallel).
Defaults to \code{"bgzip"}.}

\item{n_threads}{The number of threads to use in processing.
If \code{compress} is \code{TRUE} or \code{> 0} (indicating compressed output),
setting \code{n_threads} to \code{2} or more makes this function first create an
uncompressed file/files using \code{n_threads} threads, then compress that/those
file/files also using \code{n_threads} threads.
There is no speed increase if you try to use multiple threads to create
compressed output on the fly, so that option is not included.
If you want to be conservative with disk space (by not having an uncompressed
file present even temporarily), set \code{n_threads} to \code{1}.
Threads are NOT spread across chromosomes or variants, so you don't need to
think about these when choosing this argument's value.
However, all threads write to the same file/files, so there are diminishing
returns for providing many threads.
This argument is ignored if the package was not compiled with OpenMP.
Defaults to \code{1}.}

\item{read_pool_size}{The number of reads to store before writing to disk.
Increasing this number should improve speed but take up more memory.
Defaults to \code{1000}.}

\item{show_progress}{Logical for whether to show a progress bar.
Defaults to \code{FALSE}.}

\item{overwrite}{Logical for whether to overwrite existing FASTQ file(s) of the
same name, if they exist.}
}
\value{
Nothing is returned.
}
\description{
From either a reference genome or set of haploid variants, create Illumina reads
from error profiles and write them to FASTQ output file(s).
I encourage you to cite the reference below in addition to \code{jackalope} if you use
this function.
}
\section{Sequencing profiles}{

This section outlines how to use the \code{seq_sys}, \code{profile1},
and \code{profile2} arguments.
If all arguments are \code{NULL} (their defaults), a sequencing system is chosen
based on the read length.
If, however, one or more arguments has been provided, then how they're provided
should depend on whether you want single- or paired-end reads.

\strong{For single-end reads}
\itemize{
\item \code{profile2} should be \code{NULL}.
\item Only \code{seq_sys} or \code{profile1} should be provided, not both.
}

\strong{For paired-end reads}
\itemize{
\item If providing \code{seq_sys}, don't provide either \code{profile1} or \code{profile2}.
\item If providing \code{profile1}, you must also provide \code{profile2} (they can be the
same if you want) and you cannot provide \code{seq_sys}.
}
}

\section{Sequencing systems}{

Sequencing system options are the following, where, for each system,
"name" is the full name, "abbrev" is the abbreviated name,
"max_len" indicates the maximum allowed read length,
and
"paired" indicates whether paired-end sequencing is allowed.

\tabular{llll}{
name                 \tab abbrev   \tab max_len \tab paired \cr
Genome Analyzer I    \tab GA1      \tab 44      \tab Yes   \cr
Genome Analyzer II   \tab GA2      \tab 75      \tab Yes   \cr
HiSeq 1000           \tab HS10     \tab 100     \tab Yes   \cr
HiSeq 2000           \tab HS20     \tab 100     \tab Yes   \cr
HiSeq 2500           \tab HS25     \tab 150     \tab Yes   \cr
HiSeqX v2.5 PCR free \tab HSXn     \tab 150     \tab Yes   \cr
HiSeqX v2.5 TruSeq   \tab HSXt     \tab 150     \tab Yes   \cr
MiniSeq TruSeq       \tab MinS     \tab 50      \tab No    \cr
MiSeq v1             \tab MSv1     \tab 250     \tab Yes   \cr
MiSeq v3             \tab MSv3     \tab 250     \tab Yes   \cr
NextSeq 500 v2       \tab NS50     \tab 75      \tab Yes   \cr
}
}

\section{ID lines}{

The ID lines for FASTQ files are formatted as such:

\code{@<genome name>-<chromosome name>-<starting position>-<strand>[/<read#>]}

where the part in \code{[]} is only for paired-end Illumina reads, and where \code{genome name}
is always \code{REF} for reference genomes (as opposed to variants).
}

\examples{
\donttest{
rg <- create_genome(10, 100e3, 100)
dir <- tempdir(TRUE)
illumina(rg, paste0(dir, "/illumina_reads"), n_reads = 100,
         read_length = 100, paired = FALSE)
}

}
\references{
Huang, W., L. Li, J. R. Myers, and G. T. Marth. 2012. ART: a next-generation
sequencing read simulator. \emph{Bioinformatics} \strong{28}:593–594.
}
