#' Return species lists
#'
#' While there are reasons why users may need to check every record meeting their
#' search criteria (i.e. using [atlas_occurrences()]), a common use case
#' is to simply identify which species occur in a specified region, time period,
#' or taxonomic group. This function returns a `data.frame` with one row
#' per species, and columns giving associated taxonomic information.
#'
#' @param request optional `data_request` object: generated by a call to
#' [galah_call()].
#' @param identify `data.frame`: generated by a call to
#' [galah_identify()].
#' @param filter `data.frame`: generated by a call to
#' [galah_filter()]
#' @param geolocate `string`: generated by a call to
#' [galah_geolocate()]
#' @param data_profile `string`: generated by a call to
#' [galah_apply_profile()]
#' @param refresh_cache `logical`: if set to `TRUE` and 
#' `galah_config(caching = TRUE)` then files cached from a previous query will 
#' be replaced by the current query
#' @return An object of class `tbl_df` and `data.frame` (aka a tibble), 
#' returning matching species The `data.frame` object has attributes listing of 
#' the user-supplied arguments of the `data_request` 
#' (i.e., identify, filter, geolocate, columns)
#' @details
#' The primary use case of this function is to extract species-level information
#' given a set of criteria defined by [search_taxa()],
#' [galah_filter()] or [galah_geolocate()]. If the purpose
#' is simply to get taxonomic information that is not restricted by filtering,
#' then [search_taxa()] is more efficient. Similarly, if counts are
#' required that include filter but without returning taxonomic detail, then
#' [atlas_counts()] is more efficient (see examples).
#' @examples
#' # First register a valid email address
#' galah_config(email = "ala4r@ala.org.au")
#' 
#' # Get a list of species within genus "Heleioporus"
#' # (every row is a species with associated taxonomic data)
#' galah_call() |>
#'   galah_identify("Heleioporus") |>
#'   atlas_species()
#' 
#' # Get a list of species within family "Peramelidae"
#' galah_call() |>
#'   galah_identify("peramelidae") |>
#'   atlas_species()
#' 
#' # It's good idea to find how many species there are before downloading
#' galah_call() |>
#'   galah_identify("Heleioporus") |>
#'   atlas_counts(type = "species")
#' 
#' @export
atlas_species <- function(request = NULL,
                          identify = NULL,
                          filter = NULL,
                          geolocate = NULL,
                          data_profile = NULL,
                          refresh_cache = FALSE
                          ) {
                            
  if(!is.null(request)){
    check_data_request(request)
    
    # update with any other named arguments that have been given
    current_call <- update_galah_call(request, 
      identify = identify,
      filter = filter,
      geolocate = geolocate,
      data_profile = data_profile,
      refresh_cache = refresh_cache
    ) 
  } else {
    current_call <- galah_call(
      identify = identify,
      filter = filter,
      geolocate = geolocate,
      data_profile = data_profile,
      refresh_cache = refresh_cache
    )
  }

  # choose beahviour depending on whether we are calling LAs or GBIF
  if(is_gbif()){
    function_name <- "occurrences_GBIF"
    current_call$format <- "SPECIES_LIST"
    arg_names <- names(formals(occurrences_GBIF))
  }else{
    function_name <- "atlas_species_internal"
    arg_names <- names(formals(atlas_species_internal))
  }

  # subset to available arguments
  custom_call <- current_call[names(current_call) %in% arg_names]
  class(custom_call) <- "data_request"

  # check for caching
  caching <- getOption("galah_config")$package$caching
  cache_file <- cache_filename("species", unlist(custom_call))
  if (caching && file.exists(cache_file) && !refresh_cache) {
    return(read_cache_file(cache_file))
  }

  # run function using do.call
  result <- do.call(function_name, custom_call)
  if(is.null(result)){
    result <- tibble()
  }
  attr(result, "data_type") <- "species"
  attr(result, "data_request") <- custom_call

  # if caching requested, save
  if (caching) {
    write_cache_file(object = result, 
                     data_type = "species",
                     cache_file = cache_file)
  }

  result
}


atlas_species_internal <- function(request,
                                   identify,
                                   filter,
                                   geolocate,
                                   data_profile,
                                   refresh_cache
                                   ) {
  query <- list()

  if (missing(identify) & missing(filter) & missing(geolocate)) {
    warn("This query will return a list of all species in the ALA.")
  }

  # ensure profile works from galah_filter as well as galah_profile   
  if(is.null(data_profile)){
    if(is.null(filter)){
      profile <- NULL
    }else{
      profile <- extract_profile(filter)
    }
  }else{
    profile <- data_profile$data_profile
  }
  
  query <- c(
    build_query(identify, filter, geolocate, profile = profile),
    emailNotify = email_notify(),
    sourceTypeId = 2004,
    reasonTypeId = getOption("galah_config")$user$download_reason_id,
    email = user_email(), 
    facets = species_facets(),
    lookup = "true"
  )

  tmp <- tempfile()
  url <- url_lookup("records_species")
  result <- url_download(url, params = query, cache_file = tmp, ext = "csv")

  if(is.null(result)){
    system_down_message("atlas_species")
    return(tibble())
  }else{
  
    if(getOption("galah_config")$atlas$region == "Australia"){
      # overwrite file with fixed names
      names(result) <- rename_columns(names(result), type = "checklist")
      result <- result[, wanted_columns("checklist")]
    }
    return(result |> tibble())
  }
}
