% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generate_population_totals.R
\name{generate_population_totals}
\alias{generate_population_totals}
\title{Generate population totals for a calibration design matrix}
\usage{
generate_population_totals(
  population_df,
  calibration_formula,
  weights = NULL,
  contrasts = NULL,
  include_intercept = TRUE,
  sparse = FALSE,
  na_action = stats::na.pass,
  drop_zero_cols = FALSE
)
}
\arguments{
\item{population_df}{A data frame containing the calibration population.}

\item{calibration_formula}{A one-sided formula specifying main effects and interactions
(e.g., \code{~ stype + api00_bin:stype}). The intercept is handled by
\code{include_intercept}.}

\item{weights}{Optional numeric vector of population weights (length \code{nrow(population_df)}).
If \code{NULL} (default), unweighted totals are computed.}

\item{contrasts}{Optional named list of contrasts to pass to \code{model.matrix()} (e.g.,
\code{list(stype = contr.treatment)}). If \code{NULL}, the current global
\code{options(contrasts=...)} are used.}

\item{include_intercept}{Logical; if \code{TRUE} (default) keep the \code{(Intercept)} column
in the totals (it will sum to \code{sum(weights)} or \code{nrow(population_df)} if unweighted).}

\item{sparse}{Logical; if \code{TRUE}, return the population model matrix internally as a sparse
Matrix while computing totals. (Totals are always returned as a base numeric vector.)}

\item{na_action}{NA handling passed to \code{model.frame()}; defaults to \code{stats::na.pass}.
Consider \code{stats::na.omit} for stricter behavior.}

\item{drop_zero_cols}{Logical; if \code{TRUE}, drop columns whose population total is exactly zero.
Default \code{FALSE}. A message is emitted if any zero-total columns are found.}
}
\value{
An object of class \code{"calib_totals"}: a list with
\itemize{
\item \code{population_totals}: named numeric vector of column totals
\item \code{levels}: list of factor levels observed in the population (for reproducibility)
\item \code{terms}: the \code{terms} object built on \code{population_df}
\item \code{contrasts}: the contrasts actually used (from the model matrix)
}
}
\description{
Build a fixed model matrix on a population frame and return the
column totals needed for calibration (optionally weighted). The function
freezes dummy/interaction structure on the population by constructing
a \code{terms} object, so downstream use on respondent data can reuse
the exact same encoding.
}
\examples{
\donttest{
# Example using the API data from the survey package
library(survey)
data(api) # loads apipop, apisrs, apistrat, etc.

# Build a population frame and create some binary fields used in a formula
pop <- apipop
pop$api00_bin <- as.factor(ifelse(pop$api00 >= 700, "700plus", "lt700"))
pop$growth_bin <- as.factor(ifelse(pop$growth >= 0, "nonneg", "neg"))
pop$ell_bin <- as.factor(ifelse(pop$ell >= 10, "highELL", "lowELL"))
pop$comp.imp_bin <- as.factor(ifelse(pop$comp.imp >= 50, "highComp", "lowComp"))
pop$hsg_bin <- as.factor(ifelse(pop$hsg >= 60, "highHSG", "lowHSG"))

# A calibration formula with main effects + a few interactions
cal_formula <- ~ stype + growth_bin + api00_bin + ell_bin + comp.imp_bin + hsg_bin +
  api00_bin:stype + hsg_bin:stype + comp.imp_bin:stype + api00_bin:growth_bin

# (Optional) frame weights if available; here we use unweighted totals
gp <- generate_population_totals(
  population_df        = pop,
  calibration_formula  = cal_formula,
  include_intercept    = TRUE
)

# Named totals ready for calibration:
head(gp$population_totals)

# If you later build a respondent model matrix, reuse gp$terms to ensure alignment:
# X_resp <- model.matrix(gp$terms, data = apisrs)
# stopifnot(identical(colnames(X_resp), names(gp$population_totals)))
}

}
