Set anomaly detection threshold — set_anomaly

This function provides various methods to set thresholds for anomaly detection based on anomaly scores from isoForest.

Usage

set_anomaly_threshold(
  object,
  method = "contamination",
  contamination = 0.05,
  quantile_threshold = 0.95,
  iqr_multiplier = 1.5,
  zscore_threshold = 2,
  mad_multiplier = 3,
  kde_multiplier = 3,
  mtt_alpha = 0.05,
  mtt_max_iter = 30,
  manual_threshold = NULL
)

Arguments

object: An isoForest model object
method: The method to use for threshold setting. Options include: "contamination" (default), "quantile", "iqr", "zscore", "mad", "kde_weighted", "mtt", "manual"
contamination: The expected proportion of outliers (for contamination method). Default is 0.05
quantile_threshold: The quantile threshold (for quantile method). Default is 0.95
iqr_multiplier: The IQR multiplier (for iqr method). Default is 1.5
zscore_threshold: The z-score threshold (for zscore method). Default is 2
mad_multiplier: The MAD multiplier (for mad method). Default is 3
kde_multiplier: The multiplier for standard deviation (for kde_weighted method). Default is 3
mtt_alpha: Significance level for MTT (Modified Thompson Tau) test. Default is 0.05
mtt_max_iter: Maximum iterations for iterative outlier removal in MTT. Default is 30
manual_threshold: The manual threshold value (for manual method)

Value

A list containing:

threshold: The calculated threshold value
method: The method used
predictions: A data frame with id, anomaly_score, and is_anomaly columns
summary: A summary of the results

Examples

# Load data and train model
data(iris)
model <- isoForest(iris[1:4])

# Method 1: Contamination-based (most common)
result1 <- set_anomaly_threshold(model, method = "contamination", contamination = 0.05)

# Method 2: Quantile-based
result2 <- set_anomaly_threshold(model, method = "quantile", quantile_threshold = 0.95)

# Method 3: IQR-based
result3 <- set_anomaly_threshold(model, method = "iqr", iqr_multiplier = 1.5)

# Method 4: Z-score based
result4 <- set_anomaly_threshold(model, method = "zscore", zscore_threshold = 2)

# Method 5: MAD-based (robust)
result5 <- set_anomaly_threshold(model, method = "mad", mad_multiplier = 3)

# Method 6: KDE-weighted (density-weighted robust mean)
result6 <- set_anomaly_threshold(model, method = "kde_weighted", kde_multiplier = 3)

# Method 7: MTT-based (Modified Thompson Tau test, good for small samples)
result7 <- set_anomaly_threshold(model, method = "mtt", mtt_alpha = 0.05)

# Method 8: Manual threshold
result8 <- set_anomaly_threshold(model, method = "manual", manual_threshold = 0.6)

# View results
print(result1$summary)
#> $method
#> [1] "contamination"
#> 
#> $threshold
#>       95% 
#> 0.6088238 
#> 
#> $total_samples
#> [1] 150
#> 
#> $detected_anomalies
#> [1] 8
#> 
#> $actual_contamination_rate
#> [1] 0.05333333
#> 
#> $score_range
#> [1] 0.5471808 0.6647263
#> 
#> $score_mean
#> [1] 0.5661733
#> 
#> $score_median
#> [1] 0.5593003
#> 
#> $score_sd
#> [1] 0.02091958
#> 
head(result1$predictions)
#>   id average_depth anomaly_score is_anomaly
#> 1  1         7.960     0.5480909      FALSE
#> 2  2         7.914     0.5499988      FALSE
#> 3  3         7.860     0.5522470      FALSE
#> 4  4         7.740     0.5572759      FALSE
#> 5  5         7.894     0.5508304      FALSE
#> 6  6         7.478     0.5684154      FALSE