% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/conf_mat.R
\name{conf_mat}
\alias{conf_mat}
\alias{conf_mat.table}
\alias{conf_mat.default}
\alias{conf_mat.data.frame}
\alias{tidy.conf_mat}
\title{Confusion Matrix for Categorical Data}
\usage{
conf_mat(data, ...)

\method{conf_mat}{data.frame}(
  data,
  truth,
  estimate,
  dnn = c("Prediction", "Truth"),
  case_weights = NULL,
  ...
)

\method{tidy}{conf_mat}(x, ...)
}
\arguments{
\item{data}{A data frame or a \code{\link[base:table]{base::table()}}.}

\item{...}{Not used.}

\item{truth}{The column identifier for the true class results
(that is a \code{factor}). This should be an unquoted column name although
this argument is passed by expression and supports
\link[rlang:topic-inject]{quasiquotation} (you can unquote column
names). For \verb{_vec()} functions, a \code{factor} vector.}

\item{estimate}{The column identifier for the predicted class
results (that is also \code{factor}). As with \code{truth} this can be
specified different ways but the primary method is to use an
unquoted variable name. For \verb{_vec()} functions, a \code{factor} vector.}

\item{dnn}{A character vector of dimnames for the table.}

\item{case_weights}{The optional column identifier for case weights.
This should be an unquoted column name that evaluates to a numeric column
in \code{data}. For \verb{_vec()} functions, a numeric vector,
\code{\link[hardhat:importance_weights]{hardhat::importance_weights()}}, or \code{\link[hardhat:frequency_weights]{hardhat::frequency_weights()}}.}

\item{x}{A \code{conf_mat} object.}
}
\value{
\code{conf_mat()} produces an object with class \code{conf_mat}. This contains the
table and other objects. \code{tidy.conf_mat()} generates a tibble with columns
\code{name} (the cell identifier) and \code{value} (the cell count).

When used on a grouped data frame, \code{conf_mat()} returns a tibble containing
columns for the groups along with \code{conf_mat}, a list-column
where each element is a \code{conf_mat} object.
}
\description{
Calculates a cross-tabulation of observed and predicted classes.
}
\details{
For \code{\link[=conf_mat]{conf_mat()}} objects, a \code{broom} \code{tidy()} method has been created
that collapses the cell counts by cell into a data frame for
easy manipulation.

There is also a \code{summary()} method that computes various classification
metrics at once. See \code{\link[=summary.conf_mat]{summary.conf_mat()}}

There is a \code{\link[ggplot2:autoplot]{ggplot2::autoplot()}}
method for quickly visualizing the matrix. Both a heatmap and mosaic type
is implemented.

The function requires that the factors have exactly the same levels.
}
\examples{
\dontshow{if (rlang::is_installed(c("tidyr", "ggplot2"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
library(dplyr)
data("hpc_cv")

# The confusion matrix from a single assessment set (i.e. fold)
cm <- hpc_cv \%>\%
  filter(Resample == "Fold01") \%>\%
  conf_mat(obs, pred)
cm

# Now compute the average confusion matrix across all folds in
# terms of the proportion of the data contained in each cell.
# First get the raw cell counts per fold using the `tidy` method
library(tidyr)

cells_per_resample <- hpc_cv \%>\%
  group_by(Resample) \%>\%
  conf_mat(obs, pred) \%>\%
  mutate(tidied = lapply(conf_mat, tidy)) \%>\%
  unnest(tidied)

# Get the totals per resample
counts_per_resample <- hpc_cv \%>\%
  group_by(Resample) \%>\%
  summarize(total = n()) \%>\%
  left_join(cells_per_resample, by = "Resample") \%>\%
  # Compute the proportions
  mutate(prop = value / total) \%>\%
  group_by(name) \%>\%
  # Average
  summarize(prop = mean(prop))

counts_per_resample

# Now reshape these into a matrix
mean_cmat <- matrix(counts_per_resample$prop, byrow = TRUE, ncol = 4)
rownames(mean_cmat) <- levels(hpc_cv$obs)
colnames(mean_cmat) <- levels(hpc_cv$obs)

round(mean_cmat, 3)

# The confusion matrix can quickly be visualized using autoplot()
library(ggplot2)

autoplot(cm, type = "mosaic")
autoplot(cm, type = "heatmap")
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link[=summary.conf_mat]{summary.conf_mat()}} for computing a large number of metrics from one
confusion matrix.
}
