% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/undersample.R
\name{undersample_tomek}
\alias{undersample_tomek}
\title{Undersample a dataset by removing Tomek links.}
\usage{
undersample_tomek(data, cls, cls_col, m, tomek = "minor", force_m = TRUE, ...)
}
\arguments{
\item{data}{Dataset to be undersampled.}

\item{cls}{Majority class to be undersampled.}

\item{cls_col}{Column in data containing class memberships.}

\item{m}{Desired number of samples in undersampled dataset.}

\item{tomek}{Definition used to determine if a point is considered a minority in the Tomek link definition.
\itemize{
\item \code{minor}: Minor classes are all those with fewer than \code{m} instances.
\item \code{diff}: Minor classes are all those that aren't \code{cls}.
}}

\item{force_m}{If \code{TRUE}, uses random undersampling to discard samples if insufficient Tomek links are present to yield \code{m} rows of data.}

\item{...}{Additional arguments passed to \code{\link[stats]{dist}()}.}
}
\value{
Undersampled dataframe containing only \code{cls}.
}
\description{
A Tomek link is a minority instance and majority instance that are each other's nearest neighbor. This function removes sufficient Tomek links that are an instance of cls to yield m instances of cls. If desired, samples are randomly discarded to yield m rows if insufficient Tomek links are in the data.
}
\examples{
table(iris$Species)
undersamp <- undersample_tomek(iris, "setosa", "Species", 15, tomek = "diff", force_m = TRUE)
nrow(undersamp)
undersamp2 <- undersample_tomek(iris, "setosa", "Species", 15, tomek = "diff", force_m = FALSE)
nrow(undersamp2)
}
