% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/create_knockoffs.R
\name{create_knockoffs}
\alias{create_knockoffs}
\title{Create Multiple Knockoffs for Genetic Data}
\usage{
create_knockoffs(
  X,
  pos,
  chr_info = NULL,
  sample_ids = NULL,
  M = 5,
  save_gds = TRUE,
  output_dir = NULL,
  start = NULL,
  end = NULL,
  corr_max = 0.75,
  maxN_neighbor = Inf,
  maxBP_neighbor = 1e+05,
  n_AL = floor(10 * nrow(X)^(1/3) * log(nrow(X))),
  thres_ultrarare = 25,
  R2_thres = 1,
  prob_eps = 1e-12,
  irlba_maxit = 1500
)
}
\arguments{
\item{X}{A sparse matrix (n x p) of genotype data where n is the number of 
samples and p is the number of SNPs. Typically coded as 0, 1, 2 for
genotype dosages.}

\item{pos}{A numeric vector of SNP positions (in base pairs) for linkage
disequilibrium-aware knockoff generation.}

\item{chr_info}{Optional chromosome information. Can be either:
(1) A data frame with chromosome information from BIM file containing 
    a column named "chr" or "CHR" with chromosome numbers, or
(2) A vector of chromosome numbers directly.
Chromosome information will be automatically extracted.}

\item{sample_ids}{A character vector of sample IDs (default: NULL, will generate)}

\item{M}{Number of knockoff copies to generate (default: 5). More copies
can improve statistical power but increase computational cost.}

\item{save_gds}{Whether to save knockoffs to GDS format (default: TRUE)}

\item{output_dir}{Directory to save GDS files (default: NULL, uses tempdir())}

\item{start}{Start position for file naming (default: min(pos))}

\item{end}{End position for file naming (default: max(pos))}

\item{corr_max}{Maximum correlation threshold for clustering variants
(default: 0.75). Higher values create fewer, larger clusters.}

\item{maxN_neighbor}{Maximum number of neighboring variants to consider
for each variant (default: Inf).}

\item{maxBP_neighbor}{Maximum base pair distance to consider variants as
neighbors (default: 100,000 bp).}

\item{n_AL}{Number of samples to use for adaptive lasso fitting
(default: automatically determined based on sample size).}

\item{thres_ultrarare}{Minimum minor allele count threshold for variant
inclusion (default: 25).}

\item{R2_thres}{R-squared threshold for model fitting (default: 1).}

\item{prob_eps}{Minimum probability value to prevent numerical issues
(default: 1e-12).}

\item{irlba_maxit}{Maximum iterations for truncated SVD (default: 1500).}
}
\value{
If save_gds is TRUE, returns the path to the saved GDS file.
  Otherwise, returns a list of M matrices, each of the same dimensions as X, 
  containing knockoff variables.
}
\description{
Generate knockoff variables for genotype data using the Multiple knockoff 
method with leveraging scores and clustering specifically optimized for
genetic variant data.
}
