#' Filter Sequences
#'
#' Filters sequences to remove non-standard amino acids.
#'
#' @param input_file Path to the input FASTA file.
#' @return A `AAStringSet` object containing the filtered sequences.
#' @export
filter_sequences <- function(input_file) {
  # Read the FASTA file
  sequences <- Biostrings::readAAStringSet(input_file, format="fasta")


  # Further filter out sequences that contain non-standard amino acids
  non_standard_aa <- "[^ACDEFGHIKLMNPQRSTVWY]"
  filtered_sequences <- sequences[!grepl(non_standard_aa, as.character(sequences))]
  # Print the number of sequences after filtering (optional, controlled by verbose)
  message("Sequences after filtering non-standard amino acids: ", length(filtered_sequences))

  # Return the filtered sequences as an object
  return(filtered_sequences)
}

