saluki_core/observability/metrics/
histogram.rs

1//! Fixed-bucket histogram aggregation for Prometheus exposition.
2
3use std::sync::LazyLock;
4
5use crate::data_model::event::metric::Histogram;
6
7const HISTOGRAM_BUCKET_COUNT: usize = 30;
8static TIME_HISTOGRAM_BUCKETS: LazyLock<[(f64, &'static str); HISTOGRAM_BUCKET_COUNT]> =
9    LazyLock::new(|| histogram_buckets::<HISTOGRAM_BUCKET_COUNT>(0.000000128, 4.0));
10static NON_TIME_HISTOGRAM_BUCKETS: LazyLock<[(f64, &'static str); HISTOGRAM_BUCKET_COUNT]> =
11    LazyLock::new(|| histogram_buckets::<HISTOGRAM_BUCKET_COUNT>(1.0, 2.0));
12
13/// An aggregated histogram with fixed buckets, suitable for Prometheus exposition.
14///
15/// Bucket layout follows a log-linear schedule. A metric name ending in `_seconds` selects the
16/// time-oriented bucket schedule; everything else uses the non-time schedule.
17#[derive(Clone, Debug)]
18pub struct AggregatedHistogram {
19    sum: f64,
20    count: u64,
21    buckets: Vec<(f64, &'static str, u64)>,
22}
23
24impl AggregatedHistogram {
25    /// Creates a new `AggregatedHistogram` for the given metric name.
26    ///
27    /// The metric name determines the bucket schedule: names ending in `_seconds` use a
28    /// time-oriented schedule, everything else uses a generic non-time schedule.
29    pub fn new(metric_name: &str) -> Self {
30        let base_buckets = if metric_name.ends_with("_seconds") {
31            &TIME_HISTOGRAM_BUCKETS[..]
32        } else {
33            &NON_TIME_HISTOGRAM_BUCKETS[..]
34        };
35
36        let buckets = base_buckets
37            .iter()
38            .map(|(upper_bound, upper_bound_str)| (*upper_bound, *upper_bound_str, 0))
39            .collect();
40
41        Self {
42            sum: 0.0,
43            count: 0,
44            buckets,
45        }
46    }
47
48    /// Merges another aggregated histogram into this one.
49    ///
50    /// The two histograms must have been constructed with the same bucket schedule (in practice,
51    /// this means they were constructed for the same metric name).
52    pub fn merge(&mut self, other: &AggregatedHistogram) {
53        self.sum += other.sum;
54        self.count += other.count;
55        for (dst, src) in self.buckets.iter_mut().zip(other.buckets.iter()) {
56            dst.2 += src.2;
57        }
58    }
59
60    /// Folds the samples from `histogram` into this aggregated histogram.
61    pub fn merge_histogram(&mut self, histogram: &Histogram) {
62        for sample in histogram.samples() {
63            self.add_sample(sample.value.into_inner(), sample.weight.0 as u64);
64        }
65    }
66
67    fn add_sample(&mut self, value: f64, weight: u64) {
68        self.sum += value * weight as f64;
69        self.count += weight;
70
71        for (upper_bound, _, count) in &mut self.buckets {
72            if value <= *upper_bound {
73                *count += weight;
74            }
75        }
76    }
77
78    /// Returns the running sum of all observed samples.
79    pub fn sum(&self) -> f64 {
80        self.sum
81    }
82
83    /// Returns the running count of all observed samples.
84    pub fn count(&self) -> u64 {
85        self.count
86    }
87
88    /// Returns an iterator over the buckets as `(upper_bound_str, cumulative_count)` pairs, in
89    /// ascending order by upper bound.
90    pub fn buckets(&self) -> impl Iterator<Item = (&'static str, u64)> + '_ {
91        self.buckets
92            .iter()
93            .map(|(_, upper_bound_str, count)| (*upper_bound_str, *count))
94    }
95}
96
97/// Generates a set of `N` log-linear histogram buckets.
98///
99/// The n-th pair of buckets is `(i, j)`, where `i = base * scale^n` and `j` is the midpoint between
100/// `i` and `base * scale^(n+1)`. For example, with `base=2` and `scale=4`, the sequence is `2, 5, 8,
101/// 20, 32, 80, 128, 320, 512, ...`.
102fn histogram_buckets<const N: usize>(base: f64, scale: f64) -> [(f64, &'static str); N] {
103    let mut buckets = [(0.0, ""); N];
104
105    let log_linear_buckets = std::iter::repeat(base).enumerate().flat_map(|(i, base)| {
106        let pow = scale.powf(i as f64);
107        let value = base * pow;
108
109        let next_pow = scale.powf((i + 1) as f64);
110        let next_value = base * next_pow;
111        let midpoint = (value + next_value) / 2.0;
112
113        [value, midpoint]
114    });
115
116    for (i, current_le) in log_linear_buckets.enumerate().take(N) {
117        let (bucket_le, bucket_le_str) = &mut buckets[i];
118        let current_le_str = format!("{}", current_le);
119
120        *bucket_le = current_le;
121        *bucket_le_str = current_le_str.leak();
122    }
123
124    buckets
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    #[test]
132    fn generates_log_linear_bucket_schedule() {
133        // Ported directly from the worked example in `histogram_buckets`'s own doc comment: with
134        // base=2 and scale=4, the n-th pair is `(base*scale^n, midpoint(base*scale^n, base*scale^(n+1)))`,
135        // producing the flattened sequence 2, 5, 8, 20, 32, 80, 128, 320, 512.
136        let buckets = histogram_buckets::<9>(2.0, 4.0);
137        let bounds: Vec<f64> = buckets.iter().map(|(bound, _)| *bound).collect();
138        assert_eq!(bounds, vec![2.0, 5.0, 8.0, 20.0, 32.0, 80.0, 128.0, 320.0, 512.0]);
139
140        // The string labels are the plain `Display` form of each upper bound.
141        let labels: Vec<&str> = buckets.iter().map(|(_, label)| *label).collect();
142        assert_eq!(labels, vec!["2", "5", "8", "20", "32", "80", "128", "320", "512"]);
143
144        // The generic (non-time) schedule uses base=1, scale=2.
145        let non_time = histogram_buckets::<6>(1.0, 2.0);
146        let non_time_bounds: Vec<f64> = non_time.iter().map(|(bound, _)| *bound).collect();
147        assert_eq!(non_time_bounds, vec![1.0, 1.5, 2.0, 3.0, 4.0, 6.0]);
148    }
149
150    #[test]
151    fn metric_name_suffix_selects_the_bucket_schedule() {
152        // A `_seconds` suffix selects the time-oriented schedule; everything else uses the generic
153        // non-time schedule, which starts at an upper bound of 1.0.
154        let time = AggregatedHistogram::new("request_latency_seconds");
155        let non_time = AggregatedHistogram::new("request_count");
156
157        let time_first = time.buckets().next().expect("time schedule has buckets").0;
158        let non_time_first = non_time.buckets().next().expect("non-time schedule has buckets").0;
159
160        assert_eq!(
161            non_time_first, "1",
162            "the non-time schedule starts at an upper bound of 1.0"
163        );
164        assert_ne!(
165            time_first, non_time_first,
166            "the `_seconds` suffix must select a different, time-oriented bucket schedule"
167        );
168    }
169
170    #[test]
171    fn adds_weighted_samples_into_cumulative_buckets() {
172        // Using the non-time schedule (1, 1.5, 2, 3, 4, ...), a single value of 2.0 with weight 3 adds
173        // to the sum/count by its weight and increments every cumulative "le" bucket whose upper bound
174        // is >= 2.0.
175        let mut histogram = AggregatedHistogram::new("queue_depth");
176        histogram.add_sample(2.0, 3);
177
178        assert_eq!(histogram.count(), 3);
179        assert_eq!(histogram.sum(), 6.0);
180
181        let buckets: Vec<(&str, u64)> = histogram.buckets().collect();
182        assert_eq!(buckets[0], ("1", 0));
183        assert_eq!(buckets[1], ("1.5", 0));
184        assert_eq!(buckets[2], ("2", 3));
185        assert_eq!(buckets[3], ("3", 3));
186    }
187
188    #[test]
189    fn merge_sums_counts_sum_and_per_bucket_counts() {
190        let mut left = AggregatedHistogram::new("queue_depth");
191        left.add_sample(2.0, 1);
192
193        let mut right = AggregatedHistogram::new("queue_depth");
194        right.add_sample(4.0, 2);
195
196        left.merge(&right);
197
198        // Sum is weighted (2.0*1 + 4.0*2), count is total weight (1 + 2).
199        assert_eq!(left.count(), 3);
200        assert_eq!(left.sum(), 10.0);
201
202        let buckets: Vec<(&str, u64)> = left.buckets().collect();
203        // The `2` bucket only counts the value 2.0 (weight 1); the `4` bucket counts both samples.
204        assert_eq!(buckets[2], ("2", 1));
205        assert_eq!(buckets[4], ("4", 3));
206    }
207}
saluki_core/observability/metrics/histogram.rs

saluki_core/observability/metrics/
histogram.rs