saluki_core/data_model/event/metric/value/
histogram.rs

1use std::{fmt, num::NonZeroU64};
2
3use ordered_float::OrderedFloat;
4use smallvec::SmallVec;
5
6use super::{SampleRate, TimestampedValue, TimestampedValues};
7
8/// A weighted sample.
9///
10/// A weighted sample contains a value and a weight. The weight corresponds to the sample rate of the value, where the
11/// value can be considered to be responsible for `weight` samples overall. For example, if a metric was emitted 10
12/// times with the same value, you could instead emit it once with a sample rate of 10%, which would be equivalent to a
13/// weight of 10 (1/0.1).
14#[derive(Clone, Debug, Eq, PartialEq)]
15pub struct WeightedSample {
16    /// The sample value.
17    pub value: OrderedFloat<f64>,
18
19    /// The sample weight.
20    pub weight: u64,
21}
22
23/// A basic histogram.
24#[derive(Clone, Debug, Default, Eq, PartialEq)]
25pub struct Histogram {
26    sum: OrderedFloat<f64>,
27    samples: SmallVec<[WeightedSample; 3]>,
28}
29
30impl Histogram {
31    /// Insert a sample into the histogram.
32    pub fn insert(&mut self, value: f64, sample_rate: SampleRate) {
33        self.sum += value * sample_rate.raw_weight();
34        self.samples.push(WeightedSample {
35            value: OrderedFloat(value),
36            weight: sample_rate.weight(),
37        });
38    }
39
40    /// Returns a summary view over the histogram.
41    ///
42    /// The summary view contains basic aggregates (sum, count, min, max, etc) and allows querying the histogram for
43    /// various quantiles.
44    pub fn summary_view(&mut self) -> HistogramSummary<'_> {
45        // Sort the samples by their value, lowest to highest.
46        //
47        // This is required as we depend on it as an invariant in `HistogramSummary` to quickly answer aggregates like
48        // minimum and maximum, as well as quantile queries.
49        self.samples.sort_unstable_by_key(|sample| sample.value);
50
51        let mut count = 0;
52        let mut sum = 0.0;
53        for sample in &self.samples {
54            count += sample.weight;
55            sum += sample.value.0 * sample.weight as f64;
56        }
57
58        HistogramSummary {
59            histogram: self,
60            count,
61            sum,
62        }
63    }
64
65    /// Merges another histogram into this one.
66    pub fn merge(&mut self, other: &mut Histogram) {
67        self.sum += other.sum;
68        self.samples.extend(other.samples.drain(..));
69    }
70
71    /// Returns the raw weighted samples of the histogram.
72    pub fn samples(&self) -> &[WeightedSample] {
73        &self.samples
74    }
75}
76
77/// Summary view over a [`Histogram`].
78pub struct HistogramSummary<'a> {
79    histogram: &'a Histogram,
80    count: u64,
81    sum: f64,
82}
83
84impl HistogramSummary<'_> {
85    /// Returns the number of samples in the histogram.
86    ///
87    /// This is adjusted by the weight of each sample, based on the sample rate given during insertion.
88    pub fn count(&self) -> u64 {
89        self.count
90    }
91
92    /// Returns the sum of all samples in the histogram.
93    ///
94    /// This is adjusted by the weight of each sample, based on the sample rate given during insertion.
95    pub fn sum(&self) -> f64 {
96        self.sum
97    }
98
99    /// Returns the minimum value in the histogram.
100    ///
101    /// If the histogram is empty, `None` will be returned.
102    pub fn min(&self) -> Option<f64> {
103        // NOTE: Samples are sorted before `HistogramSummary` is created, so we know that the first value is the minimum.
104        self.histogram.samples.first().map(|sample| sample.value.0)
105    }
106
107    /// Returns the maximum value in the histogram.
108    ///
109    /// If the histogram is empty, `None` will be returned.
110    pub fn max(&self) -> Option<f64> {
111        // NOTE: Samples are sorted before `HistogramSummary` is created, so we know that the last value is the maximum.
112        self.histogram.samples.last().map(|sample| sample.value.0)
113    }
114
115    /// Returns the average value in the histogram.
116    pub fn avg(&self) -> f64 {
117        self.sum / self.count as f64
118    }
119
120    /// Returns the median value in the histogram.
121    ///
122    /// If the histogram is empty, `None` will be returned.
123    pub fn median(&self) -> Option<f64> {
124        self.quantile(0.5)
125    }
126
127    /// Returns the given quantile in the histogram.
128    ///
129    /// If the histogram is empty, or if `quantile` is less than 0.0 or greater than 1.0, `None` will be returned.
130    pub fn quantile(&self, quantile: f64) -> Option<f64> {
131        if !(0.0..=1.0).contains(&quantile) {
132            return None;
133        }
134
135        let scaled_quantile = (quantile * 1000.0) as u64 / 10;
136        let target = (scaled_quantile * self.count - 1) / 100;
137
138        let mut weight = 0;
139        for sample in &self.histogram.samples {
140            weight += sample.weight;
141            if weight > target {
142                return Some(sample.value.0);
143            }
144        }
145
146        None
147    }
148}
149
150/// A set of histogram points.
151///
152/// Used to represent the data points of histograms. Each data point is attached to an optional timestamp.
153///
154/// Histograms are conceptually similar to sketches, but hold all raw samples directly and thus have a slightly worse
155/// memory efficiency as the number of samples grows.
156#[derive(Clone, Debug, Eq, PartialEq)]
157pub struct HistogramPoints(TimestampedValues<Histogram, 1>);
158
159impl HistogramPoints {
160    pub(super) fn inner(&self) -> &TimestampedValues<Histogram, 1> {
161        &self.0
162    }
163
164    pub(super) fn inner_mut(&mut self) -> &mut TimestampedValues<Histogram, 1> {
165        &mut self.0
166    }
167
168    pub(super) fn drain_timestamped(&mut self) -> Self {
169        Self(self.0.drain_timestamped())
170    }
171
172    pub(super) fn split_at_timestamp(&mut self, timestamp: u64) -> Option<Self> {
173        self.0.split_at_timestamp(timestamp).map(Self)
174    }
175
176    /// Returns `true` if this set is empty.
177    pub fn is_empty(&self) -> bool {
178        self.0.values.is_empty()
179    }
180
181    /// Returns the number of points in this set.
182    pub fn len(&self) -> usize {
183        self.0.values.len()
184    }
185
186    /// Merges another set of points into this one.
187    ///
188    /// If a point with the same timestamp exists in both sets, the histograms will be merged together. Otherwise, the
189    /// points will appended to the end of the set.
190    pub fn merge(&mut self, other: Self) {
191        let mut needs_sort = false;
192        for mut other_value in other.0.values {
193            if let Some(existing_value) = self
194                .0
195                .values
196                .iter_mut()
197                .find(|value| value.timestamp == other_value.timestamp)
198            {
199                existing_value.value.merge(&mut other_value.value);
200            } else {
201                self.0.values.push(other_value);
202                needs_sort = true;
203            }
204        }
205
206        if needs_sort {
207            self.0.sort_by_timestamp();
208        }
209    }
210}
211
212impl From<Histogram> for HistogramPoints {
213    fn from(value: Histogram) -> Self {
214        Self(TimestampedValue::from(value).into())
215    }
216}
217
218impl From<f64> for HistogramPoints {
219    fn from(value: f64) -> Self {
220        let mut histogram = Histogram::default();
221        histogram.insert(value, SampleRate::unsampled());
222
223        Self(TimestampedValue::from(histogram).into())
224    }
225}
226
227impl From<(u64, f64)> for HistogramPoints {
228    fn from((ts, value): (u64, f64)) -> Self {
229        let mut histogram = Histogram::default();
230        histogram.insert(value, SampleRate::unsampled());
231
232        Self(TimestampedValue::from((ts, histogram)).into())
233    }
234}
235
236impl<const N: usize> From<[f64; N]> for HistogramPoints {
237    fn from(values: [f64; N]) -> Self {
238        let mut histogram = Histogram::default();
239        for value in values {
240            histogram.insert(value, SampleRate::unsampled());
241        }
242
243        Self(TimestampedValue::from(histogram).into())
244    }
245}
246
247impl<const N: usize> From<(u64, [f64; N])> for HistogramPoints {
248    fn from((ts, values): (u64, [f64; N])) -> Self {
249        let mut histogram = Histogram::default();
250        for value in values {
251            histogram.insert(value, SampleRate::unsampled());
252        }
253
254        Self(TimestampedValue::from((ts, histogram)).into())
255    }
256}
257
258impl<const N: usize> From<[(u64, f64); N]> for HistogramPoints {
259    fn from(values: [(u64, f64); N]) -> Self {
260        Self(
261            values
262                .into_iter()
263                .map(|(ts, value)| {
264                    let mut histogram = Histogram::default();
265                    histogram.insert(value, SampleRate::unsampled());
266
267                    (ts, histogram)
268                })
269                .into(),
270        )
271    }
272}
273
274impl<'a> From<&'a [f64]> for HistogramPoints {
275    fn from(values: &'a [f64]) -> Self {
276        let mut histogram = Histogram::default();
277        for value in values {
278            histogram.insert(*value, SampleRate::unsampled());
279        }
280
281        Self(TimestampedValue::from(histogram).into())
282    }
283}
284
285impl<'a, const N: usize> From<&'a [f64; N]> for HistogramPoints {
286    fn from(values: &'a [f64; N]) -> Self {
287        let mut histogram = Histogram::default();
288        for value in values {
289            histogram.insert(*value, SampleRate::unsampled());
290        }
291
292        Self(TimestampedValue::from(histogram).into())
293    }
294}
295
296impl IntoIterator for HistogramPoints {
297    type Item = (Option<NonZeroU64>, Histogram);
298    type IntoIter = HistogramIter;
299
300    fn into_iter(self) -> Self::IntoIter {
301        HistogramIter {
302            inner: self.0.values.into_iter(),
303        }
304    }
305}
306
307impl<'a> IntoIterator for &'a HistogramPoints {
308    type Item = (Option<NonZeroU64>, &'a Histogram);
309    type IntoIter = HistogramIterRef<'a>;
310
311    fn into_iter(self) -> Self::IntoIter {
312        HistogramIterRef {
313            inner: self.0.values.iter(),
314        }
315    }
316}
317
318impl<'a> IntoIterator for &'a mut HistogramPoints {
319    type Item = (Option<NonZeroU64>, &'a mut Histogram);
320    type IntoIter = HistogramIterRefMut<'a>;
321
322    fn into_iter(self) -> Self::IntoIter {
323        HistogramIterRefMut {
324            inner: self.0.values.iter_mut(),
325        }
326    }
327}
328
329impl fmt::Display for HistogramPoints {
330    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
331        write!(f, "[")?;
332        for (i, point) in self.0.values.iter().enumerate() {
333            if i > 0 {
334                write!(f, ",")?;
335            }
336
337            let ts = point.timestamp.map(|ts| ts.get()).unwrap_or_default();
338            write!(f, "({}, [", ts)?;
339            for (j, sample) in point.value.samples().iter().enumerate() {
340                if j > 0 {
341                    write!(f, ",")?;
342                }
343                write!(f, "{{{} * {}}}", sample.value, sample.weight)?;
344            }
345            write!(f, "])")?;
346        }
347        write!(f, "]")
348    }
349}
350
351pub struct HistogramIter {
352    inner: smallvec::IntoIter<[TimestampedValue<Histogram>; 1]>,
353}
354
355impl Iterator for HistogramIter {
356    type Item = (Option<NonZeroU64>, Histogram);
357
358    fn next(&mut self) -> Option<Self::Item> {
359        self.inner.next().map(|value| (value.timestamp, value.value))
360    }
361}
362
363pub struct HistogramIterRef<'a> {
364    inner: std::slice::Iter<'a, TimestampedValue<Histogram>>,
365}
366
367impl<'a> Iterator for HistogramIterRef<'a> {
368    type Item = (Option<NonZeroU64>, &'a Histogram);
369
370    fn next(&mut self) -> Option<Self::Item> {
371        self.inner.next().map(|value| (value.timestamp, &value.value))
372    }
373}
374
375pub struct HistogramIterRefMut<'a> {
376    inner: std::slice::IterMut<'a, TimestampedValue<Histogram>>,
377}
378
379impl<'a> Iterator for HistogramIterRefMut<'a> {
380    type Item = (Option<NonZeroU64>, &'a mut Histogram);
381
382    fn next(&mut self) -> Option<Self::Item> {
383        self.inner.next().map(|value| (value.timestamp, &mut value.value))
384    }
385}