% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GaussSuppressionFromData.R
\name{GaussSuppressionFromData}
\alias{GaussSuppressionFromData}
\title{Cell suppression from input data containing inner cells}
\usage{
GaussSuppressionFromData(
  data,
  dimVar = NULL,
  freqVar = NULL,
  ...,
  numVar = NULL,
  weightVar = NULL,
  charVar = NULL,
  hierarchies = NULL,
  formula = NULL,
  maxN = suppressWarnings(formals(c(primary)[[1]])$maxN),
  protectZeros = suppressWarnings(formals(c(primary)[[1]])$protectZeros),
  secondaryZeros = suppressWarnings(formals(candidates)$secondaryZeros),
  candidates = CandidatesDefault,
  primary = PrimaryDefault,
  forced = NULL,
  hidden = NULL,
  singleton = SingletonDefault,
  singletonMethod = ifelse(secondaryZeros, "anySumNOTprimary", "anySum"),
  printInc = TRUE,
  output = "publish",
  x = NULL,
  crossTable = NULL,
  preAggregate = is.null(freqVar),
  extraAggregate = preAggregate & !is.null(charVar),
  structuralEmpty = FALSE,
  extend0 = FALSE,
  spec = NULL,
  specLock = FALSE,
  freqVarNew = rev(make.unique(c(names(data), "freq")))[1],
  nUniqueVar = rev(make.unique(c(names(data), "nUnique")))[1],
  forcedInOutput = "ifNonNULL",
  unsafeInOutput = "ifForcedInOutput",
  lpPackage = NULL
)
}
\arguments{
\item{data}{Input data as a data frame}

\item{dimVar}{The main dimensional variables and additional aggregating variables. This parameter can be  useful when hierarchies and formula are unspecified.}

\item{freqVar}{A single variable holding counts (name or number).}

\item{...}{Further arguments to be passed to the supplied functions and to \code{\link{ModelMatrix}} (such as \code{inputInOutput} and \code{removeEmpty}).}

\item{numVar}{Other numerical variables to be aggregated}

\item{weightVar}{weightVar Weights (costs) to be used to order candidates for secondary suppression}

\item{charVar}{Other variables possibly to be used within the supplied functions}

\item{hierarchies}{List of hierarchies, which can be converted by \code{\link{AutoHierarchies}}.
Thus, the variables can also be coded by \code{"rowFactor"} or \code{""}, which correspond to using the categories in the data.}

\item{formula}{A model formula}

\item{maxN}{Suppression parameter. Cells with frequency \verb{<= maxN} are set as primary suppressed.
Using the default \code{primary} function, \code{maxN} is by default set to \code{3}. See details.}

\item{protectZeros}{Suppression parameter.
When \code{TRUE}, cells with zero frequency or value are set as primary suppressed.
Using the default \code{primary} function, \code{protectZeros} is by default set to \code{TRUE}. See details.}

\item{secondaryZeros}{Suppression parameter.
When \code{TRUE}, cells with zero frequency or value are prioritized to be published so that they are not secondary suppressed.
Using the default \code{candidates} function, \code{secondaryZeros} is by default set to \code{FALSE}.
See details.}

\item{candidates}{GaussSuppression input or a function generating it (see details) Default: \code{\link{CandidatesDefault}}}

\item{primary}{GaussSuppression input or a function generating it (see details) Default: \code{\link{PrimaryDefault}}}

\item{forced}{GaussSuppression input or a function generating it (see details)}

\item{hidden}{GaussSuppression input or a function generating it (see details)}

\item{singleton}{GaussSuppression input or a function generating it (see details) Default: \code{\link{SingletonDefault}}}

\item{singletonMethod}{\code{\link{GaussSuppression}} input. The default value depends on parameter \code{secondaryZeros} which depends on \code{candidates} (see details).}

\item{printInc}{\code{\link{GaussSuppression}} input}

\item{output}{One of \code{"publish"} (default), \code{"inner"}, \code{"publish_inner"}, \code{"publish_inner_x"}, \code{"publish_x"},
\code{"inner_x"}, \code{"input2functions"} (input to supplied functions),
\code{"inputGaussSuppression"}, \code{"inputGaussSuppression_x"},
\code{"outputGaussSuppression"}  \code{"outputGaussSuppression_x"},
\code{"primary"},  \code{"secondary"} and \code{"all"}.
Here "inner" means input data (possibly pre-aggregated) and
"x" means dummy matrix (as input parameter x).
All input to and output from \code{\link{GaussSuppression}}, except \code{...}, are returned when \code{"outputGaussSuppression_x"}.
Excluding x and only input are also possible.
The code \code{"all"} means all relevant output after all the calculations.
Currently, this means the same as \code{"publish_inner_x"} extended with the matrices (or NULL) \code{xExtraPrimary}  and \code{unsafe}.
The former matrix is usually made by \code{\link{KDisclosurePrimary}}.
This latter matrix contains the columns representing unsafe primary suppressions.
In addition to \code{x} columns corresponding to unsafe in ordinary output (see parameter \code{unsafeInOutput} below),
possible columns from  \code{xExtraPrimary} may also be included in the unsafe matrix (see details).}

\item{x}{\code{x} (\code{modelMatrix}) and \code{crossTable} can be supplied as input instead of generating it from  \code{\link{ModelMatrix}}}

\item{crossTable}{See above.}

\item{preAggregate}{When \code{TRUE}, the data will be aggregated within the function to an appropriate level.
This is defined by the dimensional variables according to \code{dimVar}, \code{hierarchies} or \code{formula} and in addition \code{charVar}.}

\item{extraAggregate}{When \code{TRUE}, the data will be aggregated by the dimensional variables according to \code{dimVar}, \code{hierarchies} or \code{formula}.
The aggregated data and the corresponding x-matrix will only be used as input to the singleton
function and \code{\link{GaussSuppression}}.
This extra aggregation is useful when parameter \code{charVar} is used.
Supply \code{"publish_inner"}, \code{"publish_inner_x"}, \code{"publish_x"} or \code{"inner_x"} as \code{output} to obtain extra aggregated results.
Supply \code{"inner"} or \code{"input2functions"} to obtain other results.}

\item{structuralEmpty}{When \code{TRUE}, output cells with no contributing inner cells (only zeros in column of \code{x})
are forced to be not primary suppressed.
Thus, these cells are considered as structural zeros.
When \code{structuralEmpty} is \code{TRUE}, the following error message is avoided:
\code{Suppressed} \code{cells} \code{with} \code{empty} \code{input} \code{will} \code{not} \code{be} \code{protected.}
\code{Extend} \code{input} \code{data} \code{with} \verb{zeros?}.
When \code{removeEmpty} is \code{TRUE} (see "\code{...}" below), \code{structuralEmpty} is superfluous}

\item{extend0}{Data is automatically extended by \code{Extend0} when \code{TRUE}.
Can also be set to \code{"all"} which means that input codes in hierarchies are considered in addition to those in data.
Parameter \code{extend0} can also be specified as a list meaning parameter \code{varGroups} to \code{Extend0}.}

\item{spec}{\code{NULL} or a named list of arguments that will act as default values.}

\item{specLock}{When \code{TRUE}, arguments in \code{spec} cannot be changed.}

\item{freqVarNew}{Name of new frequency variable generated when input \code{freqVar} is NULL and \code{preAggregate} is TRUE.
Default is \code{"freq"} provided this is not found in \code{names(data)}.}

\item{nUniqueVar}{Name of variable holding the number of unique contributors.
This variable will be generated in the \code{extraAggregate} step.
Default is \code{"nUnique"} provided this is not found in \code{names(data)}.
If an existing variable is passed as input,
this variable will apply only when \code{preAggregate}/\code{extraAggregate} is not done.}

\item{forcedInOutput}{Whether to include \code{forced} as an output column.
One of \code{"ifNonNULL"} (default), \code{"always"}, \code{"ifany"} and \code{"no"}.
In addition, \code{TRUE} and \code{FALSE} are allowed as alternatives to  \code{"always"} and \code{"no"}.}

\item{unsafeInOutput}{Whether to include \code{usafe} as an output column.
One of \code{"ifForcedInOutput"} (default), \code{"always"}, \code{"ifany"} and \code{"no"}.
In addition, \code{TRUE} and \code{FALSE} are allowed as alternatives to  \code{"always"} and \code{"no"}.
see details.}

\item{lpPackage}{\itemize{
\item \strong{\code{lpPackage}:}
When non-NULL, intervals by \code{\link{ComputeIntervals}}
will be included in the output.
See its documentation for valid parameter values for 'lpPackage'.
If, additionally, at least one of the two \code{\link{RangeLimitsDefault}} parameters below is specified,
further suppression will be performed to satisfy the interval width requirements.
Then, the values in the output variable \code{suppressed_integer} means:
no suppression (0),
primary suppression (1),
secondary suppression (2),
additional suppression applied by an interval algorithm limited to linearly independent cells (3),
and further suppression according to the final gauss algorithm (4).
Intervals, \verb{[lo_1, up_1]}, are intervals calculated prior to additional suppression.
\itemize{
\item \strong{\code{rangePercent}:} Required interval width expressed as a percentage
\item \strong{\code{rangeMin}:} Minimum required width of the interval

\if{html}{\out{<div class="sourceCode">}}\preformatted{         Please note that interval calculations may have a 
         different interface in future versions.
}\if{html}{\out{</div>}}
}
}}
}
\value{
Aggregated data with suppression information
}
\description{
Aggregates are generated followed by
primary suppression followed by
secondary suppression by Gaussian elimination by \code{\link{GaussSuppression}}
}
\details{
The supplied functions for generating \code{\link{GaussSuppression}} input takes the following arguments:
\code{crossTable},  \code{x}, \code{freq}, \code{num}, \code{weight}, \code{maxN}, \code{protectZeros}, \code{secondaryZeros}, \code{data}, \code{freqVar}, \code{numVar}, \code{weightVar}, \code{charVar}, \code{dimVar} and \code{...}.
where the two first are  \code{\link{ModelMatrix}} outputs (\code{modelMatrix} renamed to \code{x}).
The vector, \code{freq}, is aggregated counts (\code{t(x) \%*\% data[[freqVar]]}).
In addition, the supplied \code{singleton} function also takes \code{nUniqueVar} and (output from) \code{primary} as input.

Similarly, \code{num}, is a data frame of aggregated numerical variables.
It is possible to supply several primary functions joined by \code{c}, e.g. (\code{c(FunPrim1, FunPrim2)}).
All \code{NA}s returned from any of the functions force the corresponding cells not to be primary suppressed.

The effect of \code{maxN} , \code{protectZeros} and \code{secondaryZeros} depends on the supplied functions where these parameters are used.
Their default values are inherited from the default values of the first \code{primary} function (several possible) or,
in the case of \code{secondaryZeros}, the \code{candidates} function.
When defaults cannot be inherited, they are set to \code{NULL}.
In practice the function \code{formals} are still used to generate the defaults when \code{primary} and/or \code{candidates} are not functions.
Then \code{NULL} is correctly returned, but \code{suppressWarnings} are needed.

Singleton handling can be turned off by \code{singleton = NULL} or \code{singletonMethod = "none"}.
Both of these choices are identical in the sense that \code{singletonMethod} is set to \code{"none"} whenever \code{singleton} is \code{NULL} and vice versa.

Information about uncertain primary suppressions due to forced cells can be found
as described by parameters \code{unsafeInOutput} and \code{output}  (\verb{= "all"}).
When forced cells affect singleton problems, this is not implemented.
Some information can be seen from warnings.
This can also be seen by choosing \code{output = "secondary"} together
with \code{unsafeInOutput = "ifany"} or \code{unsafeInOutput = "always"}.
Then, negative indices from \code{\link{GaussSuppression}} using
\code{unsafeAsNegative = TRUE} will be included in the output.
Singleton problems may, however, be present even if it cannot be seen as warning/output.
In some cases, the problems can be detected by \code{\link{GaussSuppressDec}}.

In some cases, cells that are forced, hidden, or primary suppressed can overlap.
For these situations, forced has precedence over hidden and primary.
That is, if a cell is both forced and hidden, it will be treated as a forced cell and thus published.
Similarly, any primary suppression of a forced cell will be ignored
(see parameter \code{whenPrimaryForced} to \code{\link{GaussSuppression}}).
It is, however, meaningful to combine primary and hidden.
Such cells will be protected while also being assigned the \code{NA} value in the \code{suppressed} output variable.
}
\examples{

z1 <- SSBtoolsData("z1")
GaussSuppressionFromData(z1, 1:2, 3)

z2 <- SSBtoolsData("z2")
GaussSuppressionFromData(z2, 1:4, 5, protectZeros = FALSE)


# Data as in GaussSuppression examples
df <- data.frame(values = c(1, 1, 1, 5, 5, 9, 9, 9, 9, 9, 0, 0, 0, 7, 7), 
                 var1 = rep(1:3, each = 5), var2 = c("A", "B", "C", "D", "E"))

GaussSuppressionFromData(df, c("var1", "var2"), "values")
GaussSuppressionFromData(df, c("var1", "var2"), "values", formula = ~var1 + var2, maxN = 10)
GaussSuppressionFromData(df, c("var1", "var2"), "values", formula = ~var1 + var2, maxN = 10,
      protectZeros = TRUE, # Parameter needed by SingletonDefault and default not in primary  
      primary = function(freq, crossTable, maxN, ...) 
                   which(freq <= maxN & crossTable[[2]] != "A" & crossTable[, 2] != "C"))
                   
# Combining several primary functions 
# Note that NA & c(TRUE, FALSE) equals c(NA, FALSE)                      
GaussSuppressionFromData(df, c("var1", "var2"), "values", formula = ~var1 + var2, maxN = 10, 
       primary = c(function(freq, maxN, protectZeros = TRUE, ...) freq >= 45,
                   function(freq, maxN, ...) freq <= maxN,
                   function(crossTable, ...) NA & crossTable[[2]] == "C",  
                   function(crossTable, ...) NA & crossTable[[1]]== "Total" 
                                                & crossTable[[2]]== "Total"))                    
                   
# Similar to GaussSuppression examples
GaussSuppressionFromData(df, c("var1", "var2"), "values", formula = ~var1 * var2, 
       candidates = NULL, singleton = NULL, protectZeros = FALSE, secondaryZeros = TRUE)
GaussSuppressionFromData(df, c("var1", "var2"), "values", formula = ~var1 * var2, 
       singleton = NULL, protectZeros = FALSE, secondaryZeros = FALSE)
GaussSuppressionFromData(df, c("var1", "var2"), "values", formula = ~var1 * var2, 
       protectZeros = FALSE, secondaryZeros = FALSE)

              
# Examples with zeros as singletons
z <- data.frame(row = rep(1:3, each = 3), col = 1:3, freq = c(0, 2, 5, 0, 0, 6:9))
GaussSuppressionFromData(z, 1:2, 3, singleton = NULL) 
GaussSuppressionFromData(z, 1:2, 3, singletonMethod = "none") # as above 
GaussSuppressionFromData(z, 1:2, 3)
GaussSuppressionFromData(z, 1:2, 3, protectZeros = FALSE, secondaryZeros = TRUE, singleton = NULL)
GaussSuppressionFromData(z, 1:2, 3, protectZeros = FALSE, secondaryZeros = TRUE)      
}
\author{
Øyvind Langsrud and Daniel Lupp
}
