% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/factors.R
\name{factor_}
\alias{factor_}
\alias{as_factor}
\alias{levels_factor}
\alias{levels_used}
\alias{levels_unused}
\alias{used_levels}
\alias{unused_levels}
\alias{levels_rm}
\alias{levels_add}
\alias{levels_add_na}
\alias{levels_drop_na}
\alias{levels_drop}
\alias{levels_reorder}
\alias{levels_rename}
\alias{levels_lump}
\alias{levels_count}
\title{A cheaper version of \code{factor()} along with cheaper utilities}
\usage{
factor_(
  x = integer(),
  levels = NULL,
  order = TRUE,
  na_exclude = TRUE,
  ordered = is.ordered(x)
)

as_factor(x)

levels_factor(x)

levels_used(x)

levels_unused(x)

used_levels(x)

unused_levels(x)

levels_rm(x, levels)

levels_add(x, levels, where = c("last", "first"))

levels_add_na(x, name = NA, where = c("last", "first"))

levels_drop_na(x)

levels_drop(x)

levels_reorder(x, order_by, decreasing = FALSE)

levels_rename(x, ..., .fun = NULL)

levels_lump(
  x,
  n,
  prop,
  other_category = "Other",
  ties = c("min", "average", "first", "last", "random", "max")
)

levels_count(x)
}
\arguments{
\item{x}{A vector.}

\item{levels}{Optional factor levels.}

\item{order}{Should factor levels be sorted? Default is \code{TRUE}.
It typically is faster to set this to \code{FALSE}, in which case the levels
are sorted by order of first appearance.}

\item{na_exclude}{Should \code{NA} values be excluded from the factor levels?
Default is \code{TRUE}.}

\item{ordered}{Should the result be an ordered factor?}

\item{where}{Where should \code{NA} level be placed? Either first or last.}

\item{name}{Name of \code{NA} level.}

\item{order_by}{A vector to order the levels of \code{x} by using the medians of
\code{order_by}.}

\item{decreasing}{Should the reordered levels be in decreasing order?
Default is \code{FALSE}.}

\item{...}{Key-value pairs where the key is the new name and
value is the name to replace that with the new name. For example
\code{levels_rename(x, new = old)} replaces the level "old" with the
level "new".}

\item{.fun}{Renaming function applied to each level.}

\item{n}{Top n number of levels to calculate.}

\item{prop}{Top proportion of levels to calculate.
This is a proportion of the total unique levels in x.}

\item{other_category}{Name of 'other' category.}

\item{ties}{Ties method to use. See \code{?rank}.}
}
\value{
A \code{factor} or \code{character} in the case of \code{levels_used} and \code{levels_unused}.
\code{levels_count} returns a data frame of counts and proportions for each level.
}
\description{
A fast version of \code{factor()} using the collapse package. \cr

There are some additional utilities, most of which begin with the prefix
'levels_', such as
\code{as_factor()} which is an efficient way to coerce both vectors and factors,
\code{levels_factor()} which returns the levels of a factor, as a factor,
\code{levels_used()} which returns the used levels of a factor,
\code{levels_unused()} which returns the unused levels of a factor,
\code{levels_add()} adds the specified levels onto the existing levels,
\code{levels_rm()} removes the specified levels,
\code{levels_add_na()} which adds an explicit \code{NA} level,
\code{levels_drop_na()} which drops the \code{NA} level,
\code{levels_drop()} which drops unused factor levels,
\code{levels_rename()} for renaming levels,
\code{levels_lump()} which returns top n levels and lumps all others into the
same category, \cr
\code{levels_count()} which returns the counts of each level,
and finally \code{levels_reorder()} which reorders the levels of \code{x}
based on \code{y} using the ordered median values of \code{y} for each level.
}
\details{
This operates similarly to \code{collapse::qF()}. \cr
The main difference internally is that \code{collapse::funique()} is used
and therefore s3 methods can be written for it. \cr
Furthermore, for date-times \code{factor_} differs in that it differentiates
all instances in time whereas \code{factor} differentiates calendar times.
Using a daylight savings example where the clocks go back: \cr
\code{factor(as.POSIXct(1729984360, tz = "Europe/London") + 3600 *(1:5))}
produces 4 levels whereas \cr
\code{factor_(as.POSIXct(1729984360, tz = "Europe/London") + 3600 *(1:5))}
produces 5 levels.

\code{levels_lump()} is a cheaper version of \code{forcats::lump_n()} but returns
levels in order of highest frequency to lowest. This can be very useful
for plotting.
}
\examples{
library(cheapr)

x <- factor_(sample(letters[sample.int(26, 10)], 100, TRUE), levels = letters)
x
# Used/unused levels

levels_used(x)
levels_unused(x)

# Drop unused levels
levels_drop(x)

# Top 3 letters by by frequency
lumped_letters <- levels_lump(x, 3)
levels_count(lumped_letters)

# To remove the "other" category, use `levels_rm()`

levels_count(levels_rm(lumped_letters, "Other"))

# We can use levels_lump to create a generic top n function for non-factors too

get_top_n <- function(x, n){
  f <- levels_lump(factor_(x, order = FALSE), n = n)
  levels_count(f)
}

get_top_n(x, 3)

# A neat way to order the levels of a factor by frequency
# is the following:

levels(levels_lump(x, prop = 1)) # Highest to lowest
levels(levels_lump(x, prop = -1)) # Lowest to highest

}
