test_that("prep_for_anomaly handles basic data correctly", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000),
    gender = sample(c("M", "F"), 100, replace = TRUE)
  )
  
  result <- prep_for_anomaly(data, id_cols = "patient_id")
  
  expect_type(result, "list")
  expect_named(result, c("prepared_data", "metadata"))
  expect_true(is.matrix(result$prepared_data))
  expect_true(nrow(result$prepared_data) == 100)
  expect_true("patient_id" %in% result$metadata$id_cols)
})

test_that("prep_for_anomaly auto-detects ID columns", {
  data <- data.frame(
    patient_id = 1:50,
    age = rnorm(50, 50, 15),
    cost = rnorm(50, 10000, 5000)
  )
  
  result <- prep_for_anomaly(data)
  
  expect_true("patient_id" %in% result$metadata$id_cols)
})

test_that("prep_for_anomaly handles missing values", {
  data <- data.frame(
    patient_id = 1:100,
    age = c(rnorm(90, 50, 15), rep(NA, 10)),
    cost = rnorm(100, 10000, 5000)
  )
  
  result <- prep_for_anomaly(data, id_cols = "patient_id")
  
  # Should not have NA values in prepared data
  expect_false(any(is.na(result$prepared_data)))
})

test_that("prep_for_anomaly handles categorical variables", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    gender = sample(c("M", "F", "Other"), 100, replace = TRUE),
    diagnosis = sample(c("A", "B", "C"), 100, replace = TRUE)
  )
  
  result <- prep_for_anomaly(data, id_cols = "patient_id")
  
  expect_true(length(result$metadata$categorical_cols) == 2)
  expect_true(is.matrix(result$prepared_data))
})

test_that("prep_for_anomaly handles different scale methods", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000)
  )
  
  result_mad <- prep_for_anomaly(data, id_cols = "patient_id", scale_method = "mad")
  result_minmax <- prep_for_anomaly(data, id_cols = "patient_id", scale_method = "minmax")
  result_none <- prep_for_anomaly(data, id_cols = "patient_id", scale_method = "none")
  
  expect_true(is.matrix(result_mad$prepared_data))
  expect_true(is.matrix(result_minmax$prepared_data))
  expect_true(is.matrix(result_none$prepared_data))
})

test_that("prep_for_anomaly handles exclude_cols", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000),
    exclude_me = rnorm(100, 0, 1)
  )
  
  result <- prep_for_anomaly(data, id_cols = "patient_id", exclude_cols = "exclude_me")
  
  expect_false("exclude_me" %in% result$metadata$numeric_cols)
  expect_true("exclude_me" %in% result$metadata$excluded_cols)
})

test_that("prep_for_anomaly errors on invalid inputs", {
  expect_error(prep_for_anomaly("not a data frame"))
  expect_error(prep_for_anomaly(data.frame()))
  
  data <- data.frame(
    patient_id = 1:100
  )
  expect_error(prep_for_anomaly(data, id_cols = "patient_id"))
})

test_that("prep_for_anomaly handles all numeric data", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000),
    length_stay = rpois(100, 5)
  )
  
  result <- prep_for_anomaly(data, id_cols = "patient_id")
  
  expect_true(length(result$metadata$numeric_cols) == 3)
  expect_true(length(result$metadata$categorical_cols) == 0)
})

test_that("prep_for_anomaly handles all categorical data", {
  data <- data.frame(
    patient_id = 1:100,
    gender = sample(c("M", "F"), 100, replace = TRUE),
    diagnosis = sample(c("A", "B"), 100, replace = TRUE)
  )
  
  result <- prep_for_anomaly(data, id_cols = "patient_id")
  
  expect_true(length(result$metadata$numeric_cols) == 0)
  expect_true(length(result$metadata$categorical_cols) == 2)
})

test_that("prep_for_anomaly handles missing values in categorical", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    gender = c(sample(c("M", "F"), 90, replace = TRUE), rep(NA, 10))
  )
  
  result <- prep_for_anomaly(data, id_cols = "patient_id")
  
  expect_false(any(is.na(result$prepared_data)))
})

