From e4c47c26e3627745e4a065200b5fe16977ab9710 Mon Sep 17 00:00:00 2001 From: Chao Pang Date: Thu, 12 Sep 2024 17:13:09 -0400 Subject: [PATCH] we only add a new value to the running stat if the value is between the lower and upper bounds --- src/cehrbert/utils/stat_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cehrbert/utils/stat_utils.py b/src/cehrbert/utils/stat_utils.py index ba3d5eba..182ee986 100644 --- a/src/cehrbert/utils/stat_utils.py +++ b/src/cehrbert/utils/stat_utils.py @@ -6,13 +6,16 @@ class RunningStatistics(OnlineStatistics): def __init__(self, capacity=100, value_outlier_std=2.0): super().__init__() + self.value_outlier_std = value_outlier_std self.excluding_outlier_online_statistics = ExcludingOutlierOnlineStatistics( capacity=capacity, value_outlier_std=value_outlier_std ) def add(self, weight: float, value: float) -> None: if self.excluding_outlier_online_statistics.is_full(): - super().add(weight, value) + std = self.standard_deviation() + if self.current_mean - self.value_outlier_std * std <= self.current_mean + self.value_outlier_std * std: + super().add(weight, value) else: self.excluding_outlier_online_statistics.add(value) if self.excluding_outlier_online_statistics.is_full():