# @rdname check_visits_within_span_SU
check_visits_within_span_SU <- function(dates, n_SU, t_SU) {
  dates <- sort(dates)  # Ensure dates are sorted
  for (i in seq_along(dates)) {
    if (sum(dates >= dates[i] & dates <= dates[i] + t_SU) >= n_SU) {
      return(TRUE)
    }
  }
  return(FALSE)
}

# @rdname compute_SU_status
compute_SU_status <- function(Diagnostic_H, Diagnostic_P, n_SUH, n_SUP, t_SU) {
  # Check matches within span for Diagnostic_H and Diagnostic_P
  match_H = check_visits_within_span_SU(Diagnostic_H, n_SUH, t_SU)
  match_P = check_visits_within_span_SU(Diagnostic_P, n_SUP, t_SU)

  # Determine status
  if (match_H || match_P) {
    return("YES")
  } else {
    return("NO")
  }
}


#' @rdname SU_status
#' @title Substance Use status detection in North American Healthcare Administrative Databases
#' @description Substance Use status is detected in North American Healthcare Administrative Databases  using clinician's parameters of interest such as minimum number of hospital visits, minimum number of physician services, time lag between them and plausible ICD diagnostics.
#' @param inputdata  a dataframe including columns: ClientID,  VisitDate,  Diagnostic_H, and Diagnostic_P
#' @param n_SUH minimum number of potential substance use related hospital visits
#' @param n_SUP minimum number of potential substance use related medical service physician visits
#' @param t_SU maximum time lag (in days) between all hospital visits and all medical service physician visits
#' @param ICD_SU plausible list of Substance Use status diagnostic codes
#' @returns a dataframe matrix with  Clients' ID, earliest date of Substance Use, latest date of Substance Use, and Substance Use status
#'
#' @references
#' Khan S. (2017). Concurrent mental and substance use disorders in Canada. Health reports, 28(8), 3–8, Ottawa, ON, Canada, PMID: 29044442. https://pubmed.ncbi.nlm.nih.gov/29044442/
#' Canadian Institute for Health Information. (2022). Canadian coding standards for version 2022 ICD-10-CA and CCI. Canadian Institute for Health Information. Ottawa, Ontario, Canada. https://www.cihi.ca/en/version-2022-icd-10-cacci-classifications-canadian-coding-standards-and-related-products
#' Centers for Disease Control and Prevention. (2024). International classification of diseases, tenth revision, clinical modification (ICD-10-CM) 2024. National Center for Health Statistics. Atlanta, GA, United States. https://www.cdc.gov/nchs/icd/icd-10-cm/index.html
#' Casillas, S. M., Scholl, L., Mustaquim, D., & Vivolo-Kantor, A. (2022). Analysis of trends and usage of ICD-10-CM discharge diagnosis codes for poisonings by fentanyl, tramadol, and other synthetic narcotics in emergency department data. Addictive Behaviors Reports, 16, 100464. Atlanta, GA, USA. https://doi.org/10.1016/j.abrep.2022.100464
#'
#' @import dplyr
#' @import tidyr
#' @import purrr
#' @import utils
#'
#' @examples
#' data(SampleRWD)
#' myexample <- SampleRWD[,c(1:4)]
#' SampleSU_1 <- SU_status(myexample,
#' n_SUH=1, n_SUP=1,  t_SU=60,
#' ICD_SU=c("F100","T4041","F120","F140"))
#' head(SampleSU_1)
#'
#' @export
#'
# SU_status

SU_status <- function(inputdata, n_SUH, n_SUP, t_SU, ICD_SU) {

    # Check matches and compute status(Check if the first 3/4/5 digits are in the ICD10ICD09_SU codes)
  result <- inputdata %>%
    mutate(
      Diagnostic_H_match = sapply(strsplit(.data$Diagnostic_H, ","), function(x) {
        any(sapply(3:5, function(n) any(substr(x, 1, n) %in% ICD_SU)))
      }),
      Diagnostic_P_match = sapply(strsplit(.data$Diagnostic_P, ","), function(x) {
        any(sapply(3:5, function(n) any(substr(x, 1, n) %in% ICD_SU)))
      })
    ) %>%
    group_by(.data$ClientID) %>%
    summarise(
      Diagnostic_H_filtered = list(.data$VisitDate[.data$Diagnostic_H_match]),
      Diagnostic_P_filtered = list(.data$VisitDate[.data$Diagnostic_P_match]),
      earliestdate_SU = ifelse(
        any(.data$Diagnostic_H_match | .data$Diagnostic_P_match),
        format(min(.data$VisitDate[.data$Diagnostic_H_match | .data$Diagnostic_P_match], na.rm = TRUE), "%Y-%m-%d"),
        NA
      ),
      latestdate_SU = ifelse(
        any(.data$Diagnostic_H_match | .data$Diagnostic_P_match),
        format(max(.data$VisitDate[.data$Diagnostic_H_match | .data$Diagnostic_P_match], na.rm = TRUE), "%Y-%m-%d"),
        NA
      ),
      SU_status = compute_SU_status(
        unlist(.data$Diagnostic_H_filtered),
        unlist(.data$Diagnostic_P_filtered),
        n_SUH,
        n_SUP,
        t_SU
      )
    ) %>%
    select(.data$ClientID, .data$earliestdate_SU, .data$latestdate_SU, .data$SU_status)

  return(result)
}


