% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/path_coeff.R
\name{path_coeff}
\alias{path_coeff}
\alias{path_coeff_mat}
\alias{path_coeff_seq}
\title{Path coefficients with minimal multicollinearity}
\usage{
path_coeff(
  .data,
  resp,
  pred = everything(),
  by = NULL,
  exclude = FALSE,
  correction = NULL,
  knumber = 50,
  brutstep = FALSE,
  maxvif = 10,
  missingval = "pairwise.complete.obs",
  plot_res = FALSE,
  verbose = TRUE,
  ...
)

path_coeff_mat(cor_mat, resp, correction = NULL, knumber = 50, verbose = TRUE)

path_coeff_seq(.data, resp, chain_1, chain_2, by = NULL, verbose = TRUE, ...)
}
\arguments{
\item{.data}{The data. Must be a data frame or a grouped data passed from
\code{\link[dplyr:group_by]{dplyr::group_by()}}}

\item{resp}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> The dependent trait.}

\item{pred}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> The predictor traits. set to
\code{everything()}, i.e., the predictor traits are all the numeric traits in
the data except that in \code{resp}. To select multiple traits, use a
comma-separated vector of names, (e.g., \code{pred = c(V1, V2, V2)}), an
interval of trait names, (e.g., \code{pred = c(V1:V3)}), or even a select helper
(e.g., \code{pred = starts_with("V")}).}

\item{by}{One variable (factor) to compute the function by. It is a shortcut
to \code{\link[dplyr:group_by]{dplyr::group_by()}}. To compute the statistics by more than
one grouping variable use that function.}

\item{exclude}{Logical argument, set to false. If \code{exclude = TRUE}, then
the traits in \code{pred} are deleted from the data, and the analysis
will use as predictor those that remained, except that in \code{resp}.}

\item{correction}{Set to \code{NULL}. A correction value (k) that will be
added into the diagonal elements of the \strong{X'X} matrix aiming at
reducing the harmful problems of the multicollinearity in path analysis
(Olivoto et al., 2017)}

\item{knumber}{When \code{correction = NULL}, a plot showing the values of
direct effects in a set of different k values (0-1) is produced.
\code{knumber} is the number of k values used in the range of 0 to 1.}

\item{brutstep}{Logical argument, set to \code{FALSE}. If true, then an
algorithm will select a subset of variables with minimal multicollinearity
and fit a set of possible models. See the \strong{Details} section for more
information.}

\item{maxvif}{The maximum value for the Variance Inflation Factor (cut point)
that will be accepted. See the \strong{Details} section for more information.}

\item{missingval}{How to deal with missing values. For more information,
please see \code{\link[stats:cor]{stats::cor()}}.}

\item{plot_res}{If \code{TRUE}, create a scatter plot of residual against
predicted value and a normal Q-Q plot.}

\item{verbose}{If \code{verbose = TRUE} then some results are shown in the
console.}

\item{...}{Depends on the function used:
\itemize{
\item For \code{path_coeff()} additional arguments passed on to \code{\link[stats:plot.lm]{stats::plot.lm()}}.
\item For \code{path_coeff_seq()} additional arguments passed on to \link{path_coeff}.
}}

\item{cor_mat}{Matrix of correlations containing both dependent and
independent traits.}

\item{chain_1, chain_2}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> The traits used
in the first (primary) and second (secondary) chain.}
}
\value{
Depends on the function used:
\itemize{
\item \code{path_coeff()}, returns a list with the following items:
\itemize{
\item \strong{Corr.x} A correlation matrix between the predictor variables.
\item \strong{Corr.y} A vector of correlations between each predictor variable
with the dependent variable.
\item \strong{Coefficients} The path coefficients. Direct effects are the
diagonal elements, and the indirect effects those in the off-diagonal
elements (lines).
\item \strong{Eigen} Eigenvectors and eigenvalues of the \code{Corr.x.}
\item \strong{VIF} The Variance Inflation Factors.
\item \strong{plot} A ggplot2-based graphic showing the direct effects in 21
different k values.
\item \strong{Predictors} The predictor variables used in the model.
\item \strong{CN} The Condition Number, i.e., the ratio between the highest and
lowest eigenvalue.
\item \strong{Det} The matrix determinant of the \code{Corr.x.}.
\item \strong{R2} The coefficient of determination of the model.
\item \strong{Residual} The residual effect of the model.
\item \strong{Response} The response variable.
\item \strong{weightvar} The order of the predictor variables with the highest
weight (highest eigenvector) in the lowest eigenvalue.
}
\item \code{path_coeff_seq()} returns a list with the following objects
\itemize{
\item \strong{resp_fc} an object of class \code{path_coeff} with the results for the
analysis with dependent trait and first chain predictors.
\item \strong{resp_sc} an object of class \code{path_coeff} with the results for the
analysis with dependent trait and second chain predictors.
\item \strong{resp_sc2} The path coefficients of second chain predictors and the
dependent trait through the first chain predictors
\item \strong{fc_sc_list} A list of objects with the path analysis using each trait
in the first chain as dependent and second chain as predictors.
\item \strong{fc_sc_coef} The coefficients between first- and second-chain traits.
\item \strong{cor_mat} A correlation matrix between the analyzed traits.
If \code{.data} is a grouped data passed from \code{\link[dplyr:group_by]{dplyr::group_by()}}
then the results will be returned into a list-column of data frames.
}
}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
\itemize{
\item \code{path_coeff()} computes a path analysis using a data frame as input data.
\item \code{path_coeff_seq()} computes a sequential path analysis using primary and secondary traits.
\item \code{path_coeff_mat()} computes a path analysis using correlation matrices as
input data.
}
}
\details{
In \code{path_coeff()}, when \code{brutstep = TRUE}, an algorithm to
select a set of predictors with minimal multicollinearity and high
explanatory power is implemented. first, the algorithm will select a set of
predictors with minimal multicollinearity. The selection is based on the
variance inflation factor (VIF). An iterative process is performed until
the maximum VIF observed is less than \code{maxvif}. The variables selected
in this iterative process are then used in a series of stepwise-based
regressions. The first model is fitted and p-1 predictor variables are
retained (p is the number of variables selected in the iterative process.
The second model adjusts a regression considering p-2 selected variables,
and so on until the last model, which considers only two variables. Three
objects are created. \code{Summary}, with the process summary,
\code{Models}, containing the aforementioned values for all the adjusted
models; and \code{Selectedpred}, a vector with the name of the selected
variables in the iterative process.
}
\examples{
library(metan)

# Using KW as the response variable and all other ones as predictors
pcoeff <- path_coeff(data_ge2, resp = KW)

# The same as above, but using the correlation matrix
cor_mat <- cor(data_ge2 \%>\% select_numeric_cols())
pcoeff2 <- path_coeff_mat(cor_mat, resp = KW)

# Declaring the predictors
# Create a residual plot with 'plot_res = TRUE'
pcoeff3<- path_coeff(data_ge2,
                      resp = KW,
                      pred = c(PH, EH, NKE, TKW),
                      plot_res = TRUE)

# Selecting a set of predictors with minimal multicollinearity
# Maximum variance Inflation factor of 5
pcoeff4 <- path_coeff(data_ge2,
                     resp = KW,
                     brutstep = TRUE,
                     maxvif = 5)


# When one analysis should be carried out for each environment
# Using the forward-pipe operator \%>\%
pcoeff5 <- path_coeff(data_ge2, resp = KW, by = ENV)


# sequential path analysis
# KW as dependent trait
# NKE and TKW as primary predictors
# PH, EH, EP, and EL as secondary traits
pcoeff6 <-
 path_coeff_seq(data_ge2,
               resp = KW,
               chain_1 = c(NKE, TKW),
               chain_2 = c(PH, EH, EP, EL))
pcoeff6$resp_sc$Coefficients
pcoeff6$resp_sc2


}
\references{
Olivoto, T., V.Q. Souza, M. Nardino, I.R. Carvalho, M. Ferrari, A.J.
Pelegrin, V.J. Szareski, and D. Schmidt. 2017. Multicollinearity in path
analysis: a simple method to reduce its effects. Agron. J. 109:131-142.
\doi{10.2134/agronj2016.04.0196}

Olivoto, T., M. Nardino, I.R. Carvalho, D.N. Follmann, M. Ferrari, et al.
2017. REML/BLUP and sequential path analysis in estimating genotypic values
and interrelationships among simple maize grain yield-related traits. Genet.
Mol. Res. 16(1): gmr16019525. \doi{10.4238/gmr16019525}
}
\author{
Tiago Olivoto \email{tiagoolivoto@gmail.com}
}
