% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/set_na.R
\name{set_na}
\alias{set_na}
\alias{set_na_if}
\title{Replace specific values in vector with NA}
\usage{
set_na(x, ..., na, drop.levels = TRUE, as.tag = FALSE)

set_na_if(x, predicate, na, drop.levels = TRUE, as.tag = FALSE)
}
\arguments{
\item{x}{A vector or data frame.}

\item{...}{Optional, unquoted names of variables that should be selected for
further processing. Required, if \code{x} is a data frame (and no
vector) and only selected variables from \code{x} should be processed.
You may also use functions like \code{:} or tidyselect's
\code{\link[tidyselect]{select_helpers}}.
See 'Examples' or \href{../doc/design_philosophy.html}{package-vignette}.}

\item{na}{Numeric vector with values that should be replaced with NA values,
or a character vector if values of factors or character vectors should be
replaced. For labelled vectors, may also be the name of a value label. In
this case, the associated values for the value labels in each vector
will be replaced with NA (see 'Examples').}

\item{drop.levels}{Logical, if \code{TRUE}, factor levels of values that have
been replaced with \code{NA} are dropped. See 'Examples'.}

\item{as.tag}{Logical, if \code{TRUE}, values in \code{x} will be replaced
by \code{tagged_na}, else by usual \code{NA} values. Use a named
vector to assign the value label to the tagged NA value (see 'Examples').}

\item{predicate}{A predicate function to be applied to the columns. The
variables for which \code{predicate} returns \code{TRUE} are selected.}
}
\value{
\code{x}, with all values in \code{na} being replaced by \code{NA}.
  If \code{x} is a data frame, the complete data frame \code{x} will
  be returned, with NA's set for variables specified in \code{...};
  if \code{...} is not specified, applies to all variables in the
  data frame.
}
\description{
This function replaces specific values of variables with \code{NA}.
   \code{set_na_if()} is a scoped variant of \code{set_na()}, where values
   will be replaced only with NA's for those variables that match the logical
   condition of \code{predicate}.
}
\details{
\code{set_na()} converts all values defined in \code{na} with
  a related \code{NA} or tagged NA value (see \code{\link[haven]{tagged_na}}).
  Tagged \code{NA}s work exactly like regular R missing values
  except that they store one additional byte of information: a tag,
  which is usually a letter ("a" to "z") or character number ("0" to "9").
  \cr \cr
  Furthermore, see also 'Details' in \code{\link{get_na}}.
}
\note{
Labels from values that are replaced with NA and no longer used will be
  removed from \code{x}, however, other value and variable label
  attributes are preserved. For more details on labelled data,
  see vignette \href{https://cran.r-project.org/package=sjlabelled/vignettes/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}.
}
\examples{
# create random variable
dummy <- sample(1:8, 100, replace = TRUE)
# show value distribution
table(dummy)
# set value 1 and 8 as missings
dummy <- set_na(dummy, na = c(1, 8))
# show value distribution, including missings
table(dummy, useNA = "always")

# add named vector as further missing value
set_na(dummy, na = c("Refused" = 5), as.tag = TRUE)
# see different missing types
library(haven)
library(sjlabelled)
print_tagged_na(set_na(dummy, na = c("Refused" = 5), as.tag = TRUE))


# create sample data frame
dummy <- data.frame(var1 = sample(1:8, 100, replace = TRUE),
                    var2 = sample(1:10, 100, replace = TRUE),
                    var3 = sample(1:6, 100, replace = TRUE))
# set value 2 and 4 as missings
dummy \%>\% set_na(na = c(2, 4)) \%>\% head()
dummy \%>\% set_na(na = c(2, 4), as.tag = TRUE) \%>\% get_na()
dummy \%>\% set_na(na = c(2, 4), as.tag = TRUE) \%>\% get_values()

data(efc)
dummy <- data.frame(
  var1 = efc$c82cop1,
  var2 = efc$c83cop2,
  var3 = efc$c84cop3
)
# check original distribution of categories
lapply(dummy, table, useNA = "always")
# set 3 to NA for two variables
lapply(set_na(dummy, var1, var3, na = 3), table, useNA = "always")

# drop unused factor levels when being set to NA
x <- factor(c("a", "b", "c"))
x
set_na(x, na = "b", as.tag = TRUE)
set_na(x, na = "b", drop.levels = FALSE, as.tag = TRUE)

# set_na() can also remove a missing by defining the value label
# of the value that should be replaced with NA. This is in particular
# helpful if a certain category should be set as NA, however, this category
# is assigned with different values accross variables
x1 <- sample(1:4, 20, replace = TRUE)
x2 <- sample(1:7, 20, replace = TRUE)
x1 <- set_labels(x1, labels = c("Refused" = 3, "No answer" = 4))
x2 <- set_labels(x2, labels = c("Refused" = 6, "No answer" = 7))

tmp <- data.frame(x1, x2)
get_labels(tmp)
table(tmp, useNA = "always")

get_labels(set_na(tmp, na = "No answer"))
table(set_na(tmp, na = "No answer"), useNA = "always")

# show values
tmp
set_na(tmp, na = c("Refused", "No answer"))


}
\seealso{
\code{\link{replace_na}} to replace \code{\link{NA}}'s with specific
  values, \code{\link{rec}} for general recoding of variables and
  \code{\link{recode_to}} for re-shifting value ranges. See
  \code{\link{get_na}} to get values of missing values in
  labelled vectors.
}
