% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/contactCompare_chisq.R
\name{contactCompare_chisq}
\alias{contactCompare_chisq}
\title{Compare Observed Contacts to a Random Distribution Using Chi-Square GoF}
\usage{
contactCompare_chisq(
  x.summary,
  y.summary,
  x.potential,
  y.potential = NULL,
  importBlocks = FALSE,
  shuffle.type = 1,
  pairContacts = TRUE,
  totalContacts = TRUE,
  popLevelOutput = FALSE,
  parallel = FALSE,
  nCores = (parallel::detectCores()/2),
  ...
)
}
\arguments{
\item{x.summary}{List or single-data frame output from the summarizeContacts
function refering to the empirical data. Note that if x.summary is a list
of data frames, only the first data frame will be used in the function.}

\item{y.summary}{List or single-data frame output from the summarizeContacts
function refering to the randomized data (i.e., NULL model 
contact-network edge weights). Note that if y.summary is a list
of data frames, only the first data frame will be used in the function.}

\item{x.potential}{List or single-data frame output from the 
potentialDurations function refering to the empirical data. Note that if 
x.potential is a list of data frames, potential contact durations used in
the function will be determined by averaging those reported in each list 
entry.}

\item{y.potential}{List or single-data frame output from the 
potentialDurations function refering to the randomized data. Note that if 
y.potential is a list of data frames, potential contact durations used in
the function will be determined by averaging those reported in each list 
entry. If NULL, reverts to x.potential. Defaults to NULL.}

\item{importBlocks}{Logical. If true, each block in x.summary will be 
analyzed separately. Defaults to FALSE. Note that the "block" column must
exist in .summary AND .potential objects, and values must be identical 
(i.e., if block 100 exists in x inputs, it must also exist in y inputs), 
otherwise an error will be returned.}

\item{shuffle.type}{Integer. Describes which shuffle.type (from the 
randomizePaths function) was used to randomize the y.summary data 
set(s). Takes the values "0," "1," or "2." This is important because 
there are different assumptions associated with each shuffle.type.}

\item{pairContacts}{Logical. If TRUE individual id columns from x.summary 
and y.summary inputs will be included in analyses. Defaults to TRUE.}

\item{totalContacts}{Logical. If TRUE totalDegree and totalContactDurations
columns from x.summary and y.summary inputs will be included in analyses.
Defaults to TRUE.}

\item{popLevelOutput}{Logical. If TRUE a secondary output describing 
population-level comparisons will be appended to the standard, 
individual-level function output.}

\item{parallel}{Logical. If TRUE, sub-functions within the summarizeContacts
wrapper will be parallelized. Note that the only sub-function 
parallelized here is called ONLY when importBlocks == TRUE.}

\item{nCores}{Integer. Describes the number of cores to be dedicated to 
parallel processes. Defaults to half of the maximum number of cores 
available (i.e., (parallel::detectCores()/2)).}

\item{...}{Other arguments to be passed to the chisq.test function.}
}
\value{
Output format is dependent on \code{popLevelOutput} value.

   If \code{popLevelOut} == FALSE output will be a single two data frame 
   containing individual-level pairwise analyses of node degree, total 
   edge weight (i.e., the sum of all observed contacts involving each 
   individual), and specific dyad weights (e.g., contacts between 
   individuals 1 and 2). The data frame contains the following columns: 
   
   \item{id}{the id of the specific individual.}
   \item{metric}{designation of what is being compared (e.g., totalDegree, 
   totalContactDurations, individual 2, etc.). Content will 
   change depending on which data frame is being observed.}
   \item{method}{Statistical test used to determine significance.}
   \item{X.squared}{Test statistic associated with the comparison.}
   \item{p.val}{p.values associated with each comparison.}
   \item{df}{Degrees of freedom associated with the statistical test.}
   \item{contactDurations.x}{Describes the number of observed events
   in x.summary.}
   \item{contactDurations.y}{Describes the number of observed events in 
   y.summary.}
   \item{noContactDurations.x}{Describes the number of empirical events that
   were not observed given the total number of potential events in 
   x.potential.}
   \item{noContactDurations.y}{Describes the number of random events that
   were not observed given the total number of potential events in 
   y.potential.}
   \item{difference}{The absolute value given by subtracting 
   contactDurations.y from contactDurations.x.}
   \item{warning}{Denotes if any specific warning occurred during analysis.}
   \item{block.x}{Denotes the specific time block from x.(Only if 
   \code{importBlocks} == TRUE)}
   \item{block.start.x}{Denotes the specific timepoint at the beginning of 
   each time block. (Only if \code{importBlocks} == TRUE)}
   \item{block.end.x}{Denotes the specific timepoint at the end of each time
   block. (Only if \code{importBlocks} == TRUE)}
   \item{block.y}{Denotes the specific time block from y.(Only if 
   \code{importBlocks} == TRUE)}
   \item{block.start.y}{Denotes the specific timepoint at the beginning of 
   each time block. (Only if \code{importBlocks} == TRUE)}
   \item{block.end.y}{Denotes the specific timepoint at the end of each time
   block. (Only if \code{importBlocks} == TRUE)}
   
   If \code{popLevelOutput} == TRUE, output will be a list of two data 
   frames: The one described above, and second describing the 
   population-level comparisons. Columns in each data frame are identical.
}
\description{
This function is used to determine if tracked individuals in an 
   empirical dataset had more or fewer contacts with other tracked 
   individuals/specified locations than would be expected at random. The
   function works by comparing an empirical contact distribution (generated 
   using x.summary and x.potential) to a NULL distribution (generated using 
   y.summary and y.potential) using a X-square goodness-of-fit test. Note
   that here, the NULL hypothesis is that empirical data are consistent with
   the NULL distribution, and the alternative hypothesis is that the data
   are NOT consistent. This function SHOULD NOT be used to compare two 
   empirical networks using Chi-squared tests, as the function assumes 
   x.summary and y.summary represent observed and expected values, 
   respectively. Please note that this is a function of convience that is 
   essentially a wrapper for the chisq.test function, that allows users to 
   easily compare contact networks created using our pipeline of contact:: 
   functions.
   
This function was inspired by the methods described by Spiegel et al. 2016. 
   They determined individuals to be expressing social behavior when nodes 
   had greater degree values than would be expected at random, with 
   randomized contact networks derived from movement paths randomized 
   according to their novel methodology (that can be implemented using our 
   randomizePaths function). Here, users can also identify when more or 
   fewer contacts (demonstrated by the sign of values in the "difference" 
   column in the output) with specific individuals than would be expected 
   at random, given a pre-determined p-value threshold. Such relationships 
   suggest social affinities or aversions, respectively, may exist between 
   specific individuals.
   
Note:The default tested column (i.e., categorical data column from which 
   data is drawn to be compared to randomized sets herein) is "id." This 
   means that contacts involving each individual (defined by a unique "id") 
   will be compared to randomized sets. Users may not use any data column 
   for analysis other than "id." If users want to use another categorical 
   data column in analyses rather than "id," we recommend re-processing 
   data (starting from the dist.all/distToArea functions), while specifying 
   this new data as an "id." For example, users may annotate an illness 
   status column to the empirical input, wherein they describe if the 
   tracked individual displayed gastrointestinal ("gastr"), respiratory 
   ("respr"), both ("both"), illness symptoms, or were consistently healthy 
   ("hel") over the course of the tracking period. Users could set this 
   information as the "id," and carry it forward as such through the 
   data-processing pipeline. Ultimately, they could determine if each of 
   these disease states affected contact rates, relative to what would be 
   expected at random.    
   
Take care to ensure that the same shuffle.type is denoted as was originally 
   used to randomize individuals' locations (assuming the randomizePaths 
   function was used to do so). This is important for two reasons: 1.) If 
   there was no y.potential input, the function assumes that x.potential is 
   relevant to the random set as well. This is a completely fair assumption 
   when importBlocks == FALSE or when the shuffleUnit == 0. In cases when 
   the shuffle.type is 1 or 2, however, this assumption can lead to 
   erroneous results and/or errors in the function. 2.) In the 
   randomizePaths function, setting shuffle.type == 2 produces only 1 
   shuffle.unit's worth of data (e.g., 1 day), rather than a dataset with 
   the same length of x. As such, there may be a different number of blocks 
   in y compared to x. Here we assume that the mean randomized durations 
   per block in y.summary and y.potential, are representative of mean 
   randomized durations per block across each shuffle unit (e.g., day 1 is 
   represntative of day 3, etc.).
   
Finally, if X-square expected values will be very small, 
   approximations of p may not be correct (and in fact, all estimates will 
   be poor). It may be best to weight these tests differently. In the event 
   that this is the case, \code{\link{contactCompare_binom}} may be used to 
   obtain more-accurate estimates.
}
\examples{
\donttest{
data(calves) #load data

calves.dateTime<-datetime.append(calves, date = calves$date,
                                 time = calves$time) #add dateTime column

calves.agg<-tempAggregate(calves.dateTime, id = calves.dateTime$calftag,
                       dateTime = calves.dateTime$dateTime, point.x = calves.dateTime$x,
                       point.y = calves.dateTime$y, secondAgg = 300, extrapolate.left = FALSE,
                       extrapolate.right = FALSE, resolutionLevel = "reduced", parallel = FALSE,
                       na.rm = TRUE, smooth.type = 1) #aggregate to 5-min timepoints

calves.dist<-dist2All_df(x = calves.agg, parallel = FALSE,
                       dataType = "Point", lonlat = FALSE) #calculate  inter-calf distances

calves.contact.block<-contactDur.all(x = calves.dist, dist.threshold=1,
                       sec.threshold=10, blocking = TRUE, blockUnit = "hours", blockLength = 1,
                       equidistant.time = FALSE, parallel = FALSE, reportParameters = TRUE)

emp.summary <- summarizeContacts(calves.contact.block, 
                                 importBlocks = TRUE) #empirical contact summ.
emp.potential <- potentialDurations(calves.dist, blocking = TRUE, 
                                    blockUnit = "hours", blockLength = 1, 
                                    distFunction = "dist2All_df") 

calves.agg.rand<-randomizePaths(x = calves.agg, id = "id",
                       dateTime = "dateTime", point.x = "x", point.y = "y", poly.xy = NULL,
                       parallel = FALSE, dataType = "Point", numVertices = 1, blocking = TRUE,
                       blockUnit = "mins", blockLength = 20, shuffle.type = 0, shuffleUnit = NA,
                       indivPaths = TRUE, numRandomizations = 2) #randomize calves.agg

calves.dist.rand<-dist2All_df(x = calves.agg.rand, point.x = "x.rand",
                       point.y = "y.rand", parallel = FALSE, dataType = "Point", lonlat = FALSE)

calves.contact.rand<-contactDur.all(x = calves.dist.rand,
                       dist.threshold=1, sec.threshold=10, blocking = TRUE, blockUnit = "hours",
                       blockLength = 1, equidistant.time = FALSE, parallel = FALSE,
                       reportParameters = TRUE) #NULL model contacts (list of 2)

rand.summary <- summarizeContacts(calves.contact.rand, avg = TRUE,
                                  importBlocks = TRUE) #NULL contact summary
rand.potential <- potentialDurations(calves.dist.rand, blocking = TRUE, 
                                     blockUnit = "hours", blockLength = 1, 
                                     distFunction = "dist2All_df") 


contactCompare_chisq(x.summary = emp.summary, y.summary = rand.summary, 
                     x.potential = emp.potential, y.potential = rand.potential,
                     importBlocks = FALSE, shuffle.type = 0, 
                     popLevelOut = TRUE, parallel = FALSE) #no blocking

contactCompare_chisq(x.summary = emp.summary, y.summary = rand.summary, 
                     x.potential = emp.potential, y.potential = rand.potential,
                     importBlocks = TRUE, shuffle.type = 0, 
                     popLevelOut = TRUE, parallel = FALSE) #blocking
   }
}
\references{
Agresti, A. 2007. An introduction to categorical data analysis, 
   2nd ed. New York: John Wiley & Sons. 38.

   Farine, D.R., 2017. A guide to null models for animal social 
   network analysis. Methods in Ecology and Evolution 8:1309-1320.
   https://doi.org/10.1111/2041-210X.12772.
   
   Spiegel, O., Leu, S.T., Sih, A., and C.M. Bull. 2016. Socially 
   interacting or indifferent neighbors? Randomization of movement paths to 
   tease apart social preference and spatial constraints. Methods in Ecology
   and Evolution 7:971-979. https://doi.org/10.1111/2041-210X.12553.
}
\keyword{network-analysis}
\keyword{social-network}
