#' Synchronize FASTA and FASTQ files or objects
#'
#' @description \code{fastx_synchronize} synchronizes sequences between two
#' FASTA/FASTQ files or objects by retaining only the common sequences present
#' in both.
#'
#' @param file1 (Required). A FASTQ file path, a FASTQ tibble, or a
#' paired-end tibble of class \code{"pe_df"}. See \emph{Details}.
#' @param file2 (Optional). A FASTQ file path or a FASTQ tibble. Optional if
#' \code{file1} is a \code{"pe_df"} object. See \emph{Details}.
#' @param file_format (Optional). Format of the input (\code{file1} and
#' \code{file2}) and the desired output format: \code{"fasta"} or \code{"fastq"}
#' (default). This determines the format for both outputs.
#' @param file1_out (Optional). Name of the output file for synchronized reads
#' from \code{file1}. The file is in either FASTA or FASTQ format, depending on
#' \code{file_format}. If \code{NULL} (default), no sequences are written to a
#' file. See \emph{Details}.
#' @param file2_out (Optional). Name of the output file for synchronized reads
#' from \code{file2}. The file is in either FASTA or FASTQ format, depending on
#' \code{file_format}. If \code{NULL} (default), no sequences are written to a
#' file. See \emph{Details}.
#'
#' @details
#' \code{file1} and \code{file2} can either be paths to FASTA/FASTQ files or
#' tibble objects containing the sequences.
#' FASTA objects are tibbles that contain the columns \code{Header} and
#' \code{Sequence}, see \code{\link[microseq]{readFasta}}. FASTQ objects are
#' tibbles that contain the columns \code{Header}, \code{Sequence}, and
#' \code{Quality}, see \code{\link[microseq]{readFastq}}.
#'
#' If \code{file1} is an object of class \code{"pe_df"}, the second read tibble
#' is automatically extracted from its \code{"reverse"} attribute unless
#' explicitly provided via the \code{file2} argument. This allows streamlined
#' input handling for paired-end tibbles created by
#' \code{\link{vs_fastx_trim_filt}}.
#'
#' Sequence IDs in the \code{Header} fields must be identical for each read pair
#' in both \code{file1} and \code{file2} for synchronization to work correctly.
#'
#' If \code{file1_out} and \code{file2_out} are specified, the synchronized
#' sequences are written to these files in the format specified by
#' \code{file_format}.
#'
#' If \code{file1_out} and \code{file2_out} are \code{NULL}, the function
#' returns a FASTA/FASTQ object containing synchronized reads from \code{file1}.
#' The synchronized reads from \code{file2} are included as an attribute named
#' \code{"reverse"} in the returned tibble.
#'
#' The returned tibble is assigned the S3 class \code{"pe_df"}, indicating that
#' it represents paired-end sequence data. Downstream functions can use this
#' class tag to distinguish paired-end tibbles from other tibbles.
#'
#' Both \code{file1_out} and \code{file2_out} must either be \code{NULL} or both
#' must be character strings specifying the file paths.
#'
#' @return A tibble or \code{NULL}.
#'
#' If both \code{file1_out} and \code{file2_out} are \code{NULL}, a tibble
#' containing the synchronized reads from \code{file1} is returned. The
#' synchronized reads from \code{file2} are accessible via the \code{"reverse"}
#' attribute of the returned tibble.
#'
#' If both \code{file1_out} and \code{file2_out} are specified, the synchronized
#' sequences are written to the specified output files, and no tibble is
#' returned.
#'
#'
#' @examples
#' # Define arguments
#' file1 <- system.file("extdata/small_R1.fq", package = "Rsearch")
#' file2 <- system.file("extdata/small_R1.fq", package = "Rsearch")
#' file_format <- "fastq"
#' file1_out <- NULL
#' file2_out <- NULL
#'
#' # Synchronize files and return as a tibble
#' sync_seqs <- fastx_synchronize(file1 = file1,
#'                                file2 = file2,
#'                                file_format = file_format,
#'                                file1_out = file1_out,
#'                                file2_out = file2_out)
#'
#' # Extract tibbles with synchronized sequences
#' R1_sync <- sync_seqs
#' R2_sync <- attr(sync_seqs, "reverse")
#'
#' # Synchronize files and write to output files
#'
#' # Define output file names
#' out1 <- tempfile(fileext = ".fastq")
#' out2 <- tempfile(fileext = ".fastq")
#'
#' fastx_synchronize(file1 = file1,
#'                   file2 = file2,
#'                   file_format = file_format,
#'                   file1_out = out1,
#'                   file2_out = out2)
#'
#' \dontshow{unlink(c(out1, out2))}
#'
#' @aliases fastx_synchronize fastq_synchronize fasta_synchronize
#'
#' @export
#'
fastx_synchronize <- function(file1,
                              file2 = NULL,
                              file_format = "fastq",
                              file1_out = NULL,
                              file2_out = NULL) {

  # Validate file_format
  if (!file_format %in% c("fasta", "fastq")) {
    stop("Invalid file_format. The files must be a fasta or fastq.")
  }

  # Extract file2 if file1 is a pe_df object and file2 is not provided
  if (is_pe_df(file1) && is.null(file2)) {
    file2 <- attr(file1, "reverse")
    if (is.null(file2)) {
      stop("file1 has class 'pe_df' but no 'reverse' attribute found.")
    }
  }

  # Validate output files
  if ((is.null(file1_out) && !is.null(file2_out)) ||
      (!is.null(file1_out) && is.null(file2_out))) {
    stop("Either both file1_out and file2_out must be NULL, or both must be specified.")
  }

  if (!is.null(file1_out) && !is.character(file1_out)) {
    stop("file1_out must be a character string specifying the output file path.")
  }

  if (!is.null(file2_out) && !is.character(file2_out)) {
    stop("file2_out must be a character string specifying the output file path.")
  }

  # Handle input file1: file or tibble
  if (!is.character(file1)){ # If tibble
    if (file_format == "fastq") {
      required_cols <- c("Header", "Sequence", "Quality")
      if (!all(required_cols %in% colnames(file1))) {
        stop("file1 FASTQ object must contain columns: Header, Sequence, Quality")
      }
    }
    if (file_format == "fasta") {
      required_cols <- c("Header", "Sequence")
      if (!all(required_cols %in% colnames(file1))) {
        stop("file1 FASTA object must contain columns: Header and Sequence")
      }
    }
  } else {
    # Check if file 1 exists
    if (!file.exists(file1)) stop("Cannot find input file: ", file1)
    # Normalize file path
    file1 <- normalizePath(file1)

    if (file_format == "fastq") {
      file1 <- microseq::readFastq(file1)
    }

    if (file_format == "fasta") {
      file1 <- microseq::readFasta(file1)
    }
  }

  # Handle input file2: file or tibble
  if (!is.character(file2)){ # If tibble
    if (file_format == "fastq") {
      required_cols <- c("Header", "Sequence", "Quality")
      if (!all(required_cols %in% colnames(file2))) {
        stop("file2 FASTQ object must contain columns: Header, Sequence, Quality")
      }
    }
    if (file_format == "fasta") {
      required_cols <- c("Header", "Sequence")
      if (!all(required_cols %in% colnames(file2))) {
        stop("file2 FASTA object must contain columns: Header and Sequence")
      }
    }
  } else {
    # Check if file2 exists
    if (!file.exists(file2)) {
      stop("Cannot find input file: ", file2)
    }
    # Normalize file paths
    file2 <- normalizePath(file2)

    if (file_format == "fastq") {
      file2 <- microseq::readFastq(file2)
    }

    if (file_format == "fasta") {
      file2 <- microseq::readFasta(file2)
    }
  }

  # Create tag column with sequence id
  file1 <- file1 |>
    dplyr::mutate(tag = stringr::str_extract(Header, "^\\S+")) |>
    dplyr::mutate(tag = stringr::str_remove(tag, "/1$")) |>
    dplyr::mutate(tag = stringr::str_remove(tag, "/2$"))

  file2 <- file2 |>
    dplyr::mutate(tag = stringr::str_extract(Header, "^\\S+")) |>
    dplyr::mutate(tag = stringr::str_remove(tag, "/1$")) |>
    dplyr::mutate(tag = stringr::str_remove(tag, "/2$"))

  # Find common tags
  common_tags <- intersect(file1$tag, file2$tag)

  # Keep only sequences from common tags
  sync_file1 <- file1 |>
    dplyr::filter(tag %in% common_tags) |>
    dplyr::arrange(tag) |>
    dplyr::select(-tag)

  sync_file2 <- file2 |>
    dplyr::filter(tag %in% common_tags) |>
    dplyr::arrange(tag) |>
    dplyr::select(-tag)

  # Write output files if specified
  if (file_format == "fastq" && !is.null(file1_out) && !is.null(file2_out)) {
    microseq::writeFastq(sync_file1, file1_out)
    microseq::writeFastq(sync_file2, file2_out)
  }

  if (file_format == "fasta" && !is.null(file1_out) && !is.null(file2_out)) {
    microseq::writeFasta(sync_file1, file1_out)
    microseq::writeFasta(sync_file2, file2_out)
  }

  # Add class label
  class(sync_file1) <- c("pe_df", class(sync_file1))

  # Return results
  if (is.null(file1_out) && is.null(file2_out)) { # Return tibble
    # Add sync_file2 as "reverse" attribute
    attr(sync_file1, "reverse") <- sync_file2
    return(sync_file1)
  } else {
    return(invisible(NULL)) # No return when files are written
  }
}
