ddsketch_agent/
lib.rs

1//! A DDSketch implementation based on the Datadog Agent's DDSketch implementation.
2#![deny(warnings)]
3#![deny(missing_docs)]
4use std::{cmp::Ordering, mem};
5
6use datadog_protos::metrics::Dogsketch;
7use float_cmp::ApproxEqRatio as _;
8use ordered_float::OrderedFloat;
9use smallvec::SmallVec;
10
11#[allow(dead_code)]
12mod config {
13    include!(concat!(env!("OUT_DIR"), "/config.rs"));
14}
15
16static SKETCH_CONFIG: Config = Config::new(
17    config::DDSKETCH_CONF_BIN_LIMIT,
18    config::DDSKETCH_CONF_GAMMA_V,
19    config::DDSKETCH_CONF_GAMMA_LN,
20    config::DDSKETCH_CONF_NORM_MIN,
21    config::DDSKETCH_CONF_NORM_BIAS,
22);
23const UV_INF: i16 = i16::MAX;
24const MAX_KEY: i16 = UV_INF;
25const MAX_BIN_WIDTH: u16 = u16::MAX;
26
27#[derive(Copy, Clone, Debug, PartialEq)]
28struct Config {
29    // Maximum number of bins per sketch.
30    bin_limit: u16,
31
32    // gamma_ln is the natural log of gamma_v, used to speed up calculating log base gamma.
33    gamma_v: f64,
34    gamma_ln: f64,
35
36    // Minimum and maximum values representable by a sketch with these params.
37    //
38    // key(x) =
39    //    0 : -min > x < min
40    //    1 : x == min
41    //   -1 : x == -min
42    // +Inf : x > max
43    // -Inf : x < -max.
44    norm_min: f64,
45
46    // Bias of the exponent, used to ensure key(x) >= 1.
47    norm_bias: i32,
48}
49
50impl Config {
51    const fn new(bin_limit: u16, gamma_v: f64, gamma_ln: f64, norm_min: f64, norm_bias: i32) -> Self {
52        Self {
53            bin_limit,
54            gamma_v,
55            gamma_ln,
56            norm_min,
57            norm_bias,
58        }
59    }
60
61    /// Gets the value lower bound of the bin at the given key.
62    #[inline]
63    pub fn bin_lower_bound(&self, k: i16) -> f64 {
64        if k < 0 {
65            return -self.bin_lower_bound(-k);
66        }
67
68        if k == MAX_KEY {
69            return f64::INFINITY;
70        }
71
72        if k == 0 {
73            return 0.0;
74        }
75
76        self.gamma_v.powf(f64::from(i32::from(k) - self.norm_bias))
77    }
78
79    /// Gets the key for the given value.
80    ///
81    /// The key corresponds to the bin where this value would be represented. The value returned here is such that: γ^k
82    /// <= v < γ^(k+1).
83    #[allow(clippy::cast_possible_truncation)]
84    #[inline]
85    pub fn key(&self, v: f64) -> i16 {
86        if v < 0.0 {
87            return -self.key(-v);
88        }
89
90        if v == 0.0 || (v > 0.0 && v < self.norm_min) || (v < 0.0 && v > -self.norm_min) {
91            return 0;
92        }
93
94        // SAFETY: `rounded` is intentionally meant to be a whole integer, and additionally, based on our target gamma
95        // ln, we expect `log_gamma` to return a value between -2^16 and 2^16, so it will always fit in an i32.
96        let rounded = self.log_gamma(v).round_ties_even() as i32;
97        let key = rounded.wrapping_add(self.norm_bias);
98
99        // SAFETY: Our upper bound of POS_INF_KEY is i16, and our lower bound is simply one, so there is no risk of
100        // truncation via conversion.
101        key.clamp(1, i32::from(MAX_KEY)) as i16
102    }
103
104    #[inline]
105    pub fn log_gamma(&self, v: f64) -> f64 {
106        v.ln() / self.gamma_ln
107    }
108}
109
110/// A sketch bin.
111#[derive(Clone, Copy, Debug, Eq, PartialEq)]
112#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
113pub struct Bin {
114    /// The bin index.
115    k: i16,
116
117    /// The number of observations within the bin.
118    n: u16,
119}
120
121impl Bin {
122    /// Returns the key of the bin.
123    pub fn key(&self) -> i32 {
124        self.k as i32
125    }
126
127    /// Returns the number of observations within the bin.
128    pub fn count(&self) -> u32 {
129        self.n as u32
130    }
131
132    #[allow(clippy::cast_possible_truncation)]
133    fn increment(&mut self, n: u32) -> u32 {
134        let next = n + u32::from(self.n);
135        if next > u32::from(MAX_BIN_WIDTH) {
136            self.n = MAX_BIN_WIDTH;
137            return next - u32::from(MAX_BIN_WIDTH);
138        }
139
140        // SAFETY: We already know `next` is less than or equal to `MAX_BIN_WIDTH` if we got here, and `MAX_BIN_WIDTH`
141        // is u16, so next can't possibly be larger than a u16.
142        self.n = next as u16;
143        0
144    }
145}
146
147/// A histogram bucket.
148///
149/// This type can be used to represent a classic histogram bucket, such as those defined by Prometheus or OpenTelemetry,
150/// in a way that is then able to be inserted into the DDSketch via linear interpolation.
151pub struct Bucket {
152    /// The upper limit (inclusive) of values in the bucket.
153    pub upper_limit: f64,
154
155    /// The number of values in the bucket.
156    pub count: u64,
157}
158
159/// [DDSketch][ddsketch] implementation based on the [Datadog Agent][ddagent].
160///
161/// This implementation is subtly different from the open-source implementations of `DDSketch`, as Datadog made some
162/// slight tweaks to configuration values and in-memory layout to optimize it for insertion performance within the
163/// agent.
164///
165/// We've mimicked the agent version of `DDSketch` here in order to support a future where we can take sketches shipped
166/// by the agent, handle them internally, merge them, and so on, without any loss of accuracy, eventually forwarding
167/// them to Datadog ourselves.
168///
169/// As such, this implementation is constrained in the same ways: the configuration parameters cannot be changed, the
170/// collapsing strategy is fixed, and we support a limited number of methods for inserting into the sketch.
171///
172/// Importantly, we have a special function, again taken from the agent version, to allow us to interpolate histograms,
173/// specifically our own aggregated histograms, into a sketch so that we can emit useful default quantiles, rather than
174/// having to ship the buckets -- upper bound and count -- to a downstream system that might have no native way to do
175/// the same thing, basically providing no value as they have no way to render useful data from them.
176///
177/// # Features
178///
179/// This crate exposes a single feature, `serde`, which enables serialization and deserialization of `DDSketch` with
180/// `serde`. This feature is not enabled by default, as it can be slightly risky to use. This is primarily due to the
181/// fact that the format of `DDSketch` is not promised to be stable over time. If you enable this feature, you should
182/// take care to avoid storing serialized `DDSketch` data for long periods of time, as deserializing it in the future
183/// may work but could lead to incorrect/unexpected behavior or issues with correctness.
184///
185/// [ddsketch]: https://www.vldb.org/pvldb/vol12/p2195-masson.pdf
186/// [ddagent]: https://github.com/DataDog/datadog-agent
187#[derive(Clone, Debug)]
188#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
189pub struct DDSketch {
190    /// The bins within the sketch.
191    bins: SmallVec<[Bin; 4]>,
192
193    /// The number of observations within the sketch.
194    count: u32,
195
196    /// The minimum value of all observations within the sketch.
197    min: f64,
198
199    /// The maximum value of all observations within the sketch.
200    max: f64,
201
202    /// The sum of all observations within the sketch.
203    sum: f64,
204
205    /// The average value of all observations within the sketch.
206    avg: f64,
207}
208
209impl DDSketch {
210    /// Returns the number of bins in the sketch.
211    pub fn bin_count(&self) -> usize {
212        self.bins.len()
213    }
214
215    /// Whether or not this sketch is empty.
216    pub fn is_empty(&self) -> bool {
217        self.count == 0
218    }
219
220    /// Number of samples currently represented by this sketch.
221    pub fn count(&self) -> u32 {
222        self.count
223    }
224
225    /// Minimum value seen by this sketch.
226    ///
227    /// Returns `None` if the sketch is empty.
228    pub fn min(&self) -> Option<f64> {
229        if self.is_empty() {
230            None
231        } else {
232            Some(self.min)
233        }
234    }
235
236    /// Maximum value seen by this sketch.
237    ///
238    /// Returns `None` if the sketch is empty.
239    pub fn max(&self) -> Option<f64> {
240        if self.is_empty() {
241            None
242        } else {
243            Some(self.max)
244        }
245    }
246
247    /// Sum of all values seen by this sketch.
248    ///
249    /// Returns `None` if the sketch is empty.
250    pub fn sum(&self) -> Option<f64> {
251        if self.is_empty() {
252            None
253        } else {
254            Some(self.sum)
255        }
256    }
257
258    /// Average value seen by this sketch.
259    ///
260    /// Returns `None` if the sketch is empty.
261    pub fn avg(&self) -> Option<f64> {
262        if self.is_empty() {
263            None
264        } else {
265            Some(self.avg)
266        }
267    }
268
269    /// Returns the current bins of this sketch.
270    pub fn bins(&self) -> &[Bin] {
271        &self.bins
272    }
273
274    /// Clears the sketch, removing all bins and resetting all statistics.
275    pub fn clear(&mut self) {
276        self.count = 0;
277        self.min = f64::MAX;
278        self.max = f64::MIN;
279        self.avg = 0.0;
280        self.sum = 0.0;
281        self.bins.clear();
282    }
283
284    fn adjust_basic_stats(&mut self, v: f64, n: u32) {
285        if v < self.min {
286            self.min = v;
287        }
288
289        if v > self.max {
290            self.max = v;
291        }
292
293        self.count += n;
294        self.sum += v * f64::from(n);
295
296        if n == 1 {
297            self.avg += (v - self.avg) / f64::from(self.count);
298        } else {
299            // TODO: From the Agent source code, this method apparently loses precision when the
300            // two averages -- v and self.avg -- are close.  Is there a better approach?
301            self.avg = self.avg + (v - self.avg) * f64::from(n) / f64::from(self.count);
302        }
303    }
304
305    fn insert_key_counts(&mut self, mut counts: Vec<(i16, u32)>) {
306        // Counts need to be sorted by key.
307        counts.sort_unstable_by(|(k1, _), (k2, _)| k1.cmp(k2));
308
309        let mut temp = SmallVec::<[Bin; 4]>::new();
310
311        let mut bins_idx = 0;
312        let mut key_idx = 0;
313        let bins_len = self.bins.len();
314        let counts_len = counts.len();
315
316        // PERF TODO: there's probably a fast path to be had where could check if all if the counts have existing bins
317        // that aren't yet full, and we just update them directly, although we'd still be doing a linear scan to find
318        // them since keys aren't 1:1 with their position in `self.bins` but using this method just to update one or two
319        // bins is clearly suboptimal and we wouldn't really want to scan them all just to have to back out and actually
320        // do the non-fast path.. maybe a first pass could be checking if the first/last key falls within our known
321        // min/max key, and if it doesn't, then we know we have to go through the non-fast path, and if it passes, we do
322        // the scan to see if we can just update bins directly?
323        while bins_idx < bins_len && key_idx < counts_len {
324            let bin = self.bins[bins_idx];
325            let vk = counts[key_idx].0;
326            let kn = counts[key_idx].1;
327
328            match bin.k.cmp(&vk) {
329                Ordering::Greater => {
330                    generate_bins(&mut temp, vk, kn);
331                    key_idx += 1;
332                }
333                Ordering::Less => {
334                    temp.push(bin);
335                    bins_idx += 1;
336                }
337                Ordering::Equal => {
338                    generate_bins(&mut temp, bin.k, u32::from(bin.n) + kn);
339                    bins_idx += 1;
340                    key_idx += 1;
341                }
342            }
343        }
344
345        temp.extend_from_slice(&self.bins[bins_idx..]);
346
347        while key_idx < counts_len {
348            let vk = counts[key_idx].0;
349            let kn = counts[key_idx].1;
350            generate_bins(&mut temp, vk, kn);
351            key_idx += 1;
352        }
353
354        trim_left(&mut temp, SKETCH_CONFIG.bin_limit);
355
356        // PERF TODO: This is where we might do a mem::swap instead so that we could shove the bin vector into an object
357        // pool but I'm not sure this actually matters at the moment.
358        self.bins = temp;
359    }
360
361    fn insert_keys(&mut self, mut keys: Vec<i16>) {
362        // Updating more than 4 billion keys would be very very weird and likely indicative of something horribly
363        // broken.
364        assert!(keys.len() <= u32::MAX.try_into().expect("we don't support 16-bit systems"));
365
366        keys.sort_unstable();
367
368        let mut temp = SmallVec::<[Bin; 4]>::new();
369
370        let mut bins_idx = 0;
371        let mut key_idx = 0;
372        let bins_len = self.bins.len();
373        let keys_len = keys.len();
374
375        // PERF TODO: there's probably a fast path to be had where could check if all if the counts have existing bins
376        // that aren't yet full, and we just update them directly, although we'd still be doing a linear scan to find
377        // them since keys aren't 1:1 with their position in `self.bins` but using this method just to update one or two
378        // bins is clearly suboptimal and we wouldn't really want to scan them all just to have to back out and actually
379        // do the non-fast path.. maybe a first pass could be checking if the first/last key falls within our known
380        // min/max key, and if it doesn't, then we know we have to go through the non-fast path, and if it passes, we do
381        // the scan to see if we can just update bins directly?
382        while bins_idx < bins_len && key_idx < keys_len {
383            let bin = self.bins[bins_idx];
384            let vk = keys[key_idx];
385
386            match bin.k.cmp(&vk) {
387                Ordering::Greater => {
388                    let kn = buf_count_leading_equal(&keys, key_idx);
389                    generate_bins(&mut temp, vk, kn);
390                    key_idx += kn as usize;
391                }
392                Ordering::Less => {
393                    temp.push(bin);
394                    bins_idx += 1;
395                }
396                Ordering::Equal => {
397                    let kn = buf_count_leading_equal(&keys, key_idx);
398                    generate_bins(&mut temp, bin.k, u32::from(bin.n) + kn);
399                    bins_idx += 1;
400                    key_idx += kn as usize;
401                }
402            }
403        }
404
405        temp.extend_from_slice(&self.bins[bins_idx..]);
406
407        while key_idx < keys_len {
408            let vk = keys[key_idx];
409            let kn = buf_count_leading_equal(&keys, key_idx);
410            generate_bins(&mut temp, vk, kn);
411            key_idx += kn as usize;
412        }
413
414        trim_left(&mut temp, SKETCH_CONFIG.bin_limit);
415
416        // PERF TODO: This is where we might do a mem::swap instead so that we could shove the bin vector into an object
417        // pool but I'm not sure this actually matters at the moment.
418        self.bins = temp;
419    }
420
421    /// Inserts a single value into the sketch.
422    pub fn insert(&mut self, v: f64) {
423        // TODO: This should return a result that makes sure we have enough room to actually add 1 more sample without
424        // hitting `self.config.max_count()`
425        self.adjust_basic_stats(v, 1);
426
427        let key = SKETCH_CONFIG.key(v);
428
429        let mut insert_at = None;
430
431        for (bin_idx, b) in self.bins.iter_mut().enumerate() {
432            if b.k == key {
433                if b.n < MAX_BIN_WIDTH {
434                    // Fast path for adding to an existing bin without overflow.
435                    b.n += 1;
436                    return;
437                } else {
438                    insert_at = Some(bin_idx);
439                    break;
440                }
441            }
442            if b.k > key {
443                insert_at = Some(bin_idx);
444                break;
445            }
446        }
447
448        if let Some(bin_idx) = insert_at {
449            self.bins.insert(bin_idx, Bin { k: key, n: 1 });
450        } else {
451            self.bins.push(Bin { k: key, n: 1 });
452        }
453        trim_left(&mut self.bins, SKETCH_CONFIG.bin_limit);
454    }
455
456    /// Inserts many values into the sketch.
457    pub fn insert_many(&mut self, vs: &[f64]) {
458        // TODO: This should return a result that makes sure we have enough room to actually add N more samples without
459        // hitting `self.config.bin_limit`.
460        let mut keys = Vec::with_capacity(vs.len());
461        for v in vs {
462            self.adjust_basic_stats(*v, 1);
463            keys.push(SKETCH_CONFIG.key(*v));
464        }
465        self.insert_keys(keys);
466    }
467
468    /// Inserts a single value into the sketch `n` times.
469    pub fn insert_n(&mut self, v: f64, n: u32) {
470        // TODO: this should return a result that makes sure we have enough room to actually add N more samples without
471        // hitting `self.config.max_count()`
472        self.adjust_basic_stats(v, n);
473
474        let key = SKETCH_CONFIG.key(v);
475        self.insert_key_counts(vec![(key, n)]);
476    }
477
478    fn insert_interpolate_bucket(&mut self, lower: f64, upper: f64, count: u32) {
479        // Find the keys for the bins where the lower bound and upper bound would end up, and collect all of the keys in
480        // between, inclusive.
481        let lower_key = SKETCH_CONFIG.key(lower);
482        let upper_key = SKETCH_CONFIG.key(upper);
483        let keys = (lower_key..=upper_key).collect::<Vec<_>>();
484
485        let mut key_counts = Vec::new();
486        let mut remaining_count = count;
487        let distance = upper - lower;
488        let mut start_idx = 0;
489        let mut end_idx = 1;
490        let mut lower_bound = SKETCH_CONFIG.bin_lower_bound(keys[start_idx]);
491        let mut remainder = 0.0;
492
493        while end_idx < keys.len() && remaining_count > 0 {
494            // For each key, map the total distance between the input lower/upper bound against the sketch lower/upper
495            // bound for the current sketch bin, which tells us how much of the input count to apply to the current
496            // sketch bin.
497            let upper_bound = SKETCH_CONFIG.bin_lower_bound(keys[end_idx]);
498            let fkn = ((upper_bound - lower_bound) / distance) * f64::from(count);
499            if fkn > 1.0 {
500                remainder += fkn - fkn.trunc();
501            }
502
503            // SAFETY: This integer cast is intentional: we want to get the non-fractional part, as we've captured the
504            // fractional part in the above conditional.
505            #[allow(clippy::cast_possible_truncation)]
506            let mut kn = fkn as u32;
507            if remainder > 1.0 {
508                kn += 1;
509                remainder -= 1.0;
510            }
511
512            if kn > 0 {
513                if kn > remaining_count {
514                    kn = remaining_count;
515                }
516
517                self.adjust_basic_stats(lower_bound, kn);
518                key_counts.push((keys[start_idx], kn));
519
520                remaining_count -= kn;
521                start_idx = end_idx;
522                lower_bound = upper_bound;
523            }
524
525            end_idx += 1;
526        }
527
528        if remaining_count > 0 {
529            let last_key = keys[start_idx];
530            lower_bound = SKETCH_CONFIG.bin_lower_bound(last_key);
531            self.adjust_basic_stats(lower_bound, remaining_count);
532            key_counts.push((last_key, remaining_count));
533        }
534
535        self.insert_key_counts(key_counts);
536    }
537
538    /// ## Errors
539    ///
540    /// Returns an error if a bucket size is greater that `u32::MAX`.
541    pub fn insert_interpolate_buckets(&mut self, mut buckets: Vec<Bucket>) -> Result<(), &'static str> {
542        // Buckets need to be sorted from lowest to highest so that we can properly calculate the rolling lower/upper
543        // bounds.
544        buckets.sort_by(|a, b| {
545            let oa = OrderedFloat(a.upper_limit);
546            let ob = OrderedFloat(b.upper_limit);
547
548            oa.cmp(&ob)
549        });
550
551        let mut lower = f64::NEG_INFINITY;
552
553        if buckets.iter().any(|bucket| bucket.count > u64::from(u32::MAX)) {
554            return Err("bucket size greater than u32::MAX");
555        }
556
557        for bucket in buckets {
558            let mut upper = bucket.upper_limit;
559            if upper.is_sign_positive() && upper.is_infinite() {
560                upper = lower;
561            } else if lower.is_sign_negative() && lower.is_infinite() {
562                lower = upper;
563            }
564
565            // Each bucket should only have the values that fit within that bucket, which is generally enforced at the
566            // source level by converting from cumulative buckets, or enforced by the internal structures that hold
567            // bucketed data i.e. Vector's internal `Histogram` data structure used for collecting histograms from
568            // `metrics`.
569            let count =
570                u32::try_from(bucket.count).unwrap_or_else(|_| unreachable!("count range has already been checked."));
571
572            self.insert_interpolate_bucket(lower, upper, count);
573            lower = bucket.upper_limit;
574        }
575
576        Ok(())
577    }
578
579    /// Adds a bin directly into the sketch.
580    ///
581    /// Used only for unit testing so that we can create a sketch with an exact layout, which allows testing around the
582    /// resulting bins when feeding in specific values, as well as generating explicitly bad layouts for testing.
583    #[allow(dead_code)]
584    pub(crate) fn insert_raw_bin(&mut self, k: i16, n: u16) {
585        let v = SKETCH_CONFIG.bin_lower_bound(k);
586        self.adjust_basic_stats(v, u32::from(n));
587        self.bins.push(Bin { k, n });
588    }
589
590    /// Gets the value at a given quantile.
591    pub fn quantile(&self, q: f64) -> Option<f64> {
592        if self.count == 0 {
593            return None;
594        }
595
596        if q <= 0.0 {
597            return Some(self.min);
598        }
599
600        if q >= 1.0 {
601            return Some(self.max);
602        }
603
604        let mut n = 0.0;
605        let mut estimated = None;
606        let wanted_rank = rank(self.count, q);
607
608        for (i, bin) in self.bins.iter().enumerate() {
609            n += f64::from(bin.n);
610            if n <= wanted_rank {
611                continue;
612            }
613
614            let weight = (n - wanted_rank) / f64::from(bin.n);
615            let mut v_low = SKETCH_CONFIG.bin_lower_bound(bin.k);
616            let mut v_high = v_low * SKETCH_CONFIG.gamma_v;
617
618            if i == self.bins.len() {
619                v_high = self.max;
620            } else if i == 0 {
621                v_low = self.min;
622            }
623
624            estimated = Some(v_low * weight + v_high * (1.0 - weight));
625            break;
626        }
627
628        estimated.map(|v| v.clamp(self.min, self.max)).or(Some(f64::NAN))
629    }
630
631    /// Merges another sketch into this sketch, without a loss of accuracy.
632    ///
633    /// All samples present in the other sketch will be correctly represented in this sketch, and summary statistics
634    /// such as the sum, average, count, min, and max, will represent the sum of samples from both sketches.
635    pub fn merge(&mut self, other: &DDSketch) {
636        // Merge the basic statistics together.
637        self.count += other.count;
638        if other.max > self.max {
639            self.max = other.max;
640        }
641        if other.min < self.min {
642            self.min = other.min;
643        }
644        self.sum += other.sum;
645        self.avg = self.avg + (other.avg - self.avg) * f64::from(other.count) / f64::from(self.count);
646
647        // Now merge the bins.
648        let mut temp = SmallVec::<[Bin; 4]>::new();
649
650        let mut bins_idx = 0;
651        for other_bin in &other.bins {
652            let start = bins_idx;
653            while bins_idx < self.bins.len() && self.bins[bins_idx].k < other_bin.k {
654                bins_idx += 1;
655            }
656
657            temp.extend_from_slice(&self.bins[start..bins_idx]);
658
659            if bins_idx >= self.bins.len() || self.bins[bins_idx].k > other_bin.k {
660                temp.push(*other_bin);
661            } else if self.bins[bins_idx].k == other_bin.k {
662                generate_bins(
663                    &mut temp,
664                    other_bin.k,
665                    u32::from(other_bin.n) + u32::from(self.bins[bins_idx].n),
666                );
667                bins_idx += 1;
668            }
669        }
670
671        temp.extend_from_slice(&self.bins[bins_idx..]);
672        trim_left(&mut temp, SKETCH_CONFIG.bin_limit);
673
674        self.bins = temp;
675    }
676
677    /// Merges this sketch into the `Dogsketch` Protocol Buffers representation.
678    pub fn merge_to_dogsketch(&self, dogsketch: &mut Dogsketch) {
679        dogsketch.set_cnt(i64::from(self.count));
680        dogsketch.set_min(self.min);
681        dogsketch.set_max(self.max);
682        dogsketch.set_avg(self.avg);
683        dogsketch.set_sum(self.sum);
684
685        let mut k = Vec::new();
686        let mut n = Vec::new();
687
688        for bin in &self.bins {
689            k.push(i32::from(bin.k));
690            n.push(u32::from(bin.n));
691        }
692
693        dogsketch.set_k(k);
694        dogsketch.set_n(n);
695    }
696}
697
698impl PartialEq for DDSketch {
699    fn eq(&self, other: &Self) -> bool {
700        // We skip checking the configuration because we don't allow creating configurations by hand, and it's always
701        // locked to the constants used by the Datadog Agent.  We only check the configuration equality manually in
702        // `DDSketch::merge`, to protect ourselves in the future if different configurations become allowed.
703        //
704        // Additionally, we also use floating-point-specific relative comparisons for sum/avg because they can be
705        // minimally different between sketches purely due to floating-point behavior, despite being fed the same exact
706        // data in terms of recorded samples.
707        self.count == other.count
708            && float_eq(self.min, other.min)
709            && float_eq(self.max, other.max)
710            && float_eq(self.sum, other.sum)
711            && float_eq(self.avg, other.avg)
712            && self.bins == other.bins
713    }
714}
715
716impl Default for DDSketch {
717    fn default() -> Self {
718        Self {
719            bins: SmallVec::new(),
720            count: 0,
721            min: f64::MAX,
722            max: f64::MIN,
723            sum: 0.0,
724            avg: 0.0,
725        }
726    }
727}
728
729impl Eq for DDSketch {}
730
731impl TryFrom<Dogsketch> for DDSketch {
732    type Error = &'static str;
733
734    fn try_from(value: Dogsketch) -> Result<Self, Self::Error> {
735        let mut sketch = DDSketch {
736            count: u32::try_from(value.cnt).map_err(|_| "sketch count overflows u32")?,
737            min: value.min,
738            max: value.max,
739            avg: value.avg,
740            sum: value.sum,
741            ..Default::default()
742        };
743
744        let k = value.k;
745        let n = value.n;
746
747        if k.len() != n.len() {
748            return Err("k and n bin vectors have differing lengths");
749        }
750
751        for (k, n) in k.into_iter().zip(n.into_iter()) {
752            let k = i16::try_from(k).map_err(|_| "bin key overflows i16")?;
753            let n = u16::try_from(n).map_err(|_| "bin count overflows u16")?;
754
755            sketch.bins.push(Bin { k, n });
756        }
757
758        Ok(sketch)
759    }
760}
761
762fn float_eq(l_value: f64, r_value: f64) -> bool {
763    // When comparing two values, the smaller value cannot deviate by more than 0.0000001% of the larger value.
764    const RATIO_ERROR: f64 = 0.00000001;
765
766    (l_value.is_nan() && r_value.is_nan()) || l_value.approx_eq_ratio(&r_value, RATIO_ERROR)
767}
768
769fn rank(count: u32, q: f64) -> f64 {
770    let rank = q * f64::from(count - 1);
771    rank.round_ties_even()
772}
773
774#[allow(clippy::cast_possible_truncation)]
775fn buf_count_leading_equal(keys: &[i16], start_idx: usize) -> u32 {
776    if start_idx == keys.len() - 1 {
777        return 1;
778    }
779
780    let mut idx = start_idx;
781    while idx < keys.len() && keys[idx] == keys[start_idx] {
782        idx += 1;
783    }
784
785    // SAFETY: We limit the size of the vector (used to provide the slice given to us here) to be no larger than 2^32,
786    // so we can't exceed u32 here.
787    (idx - start_idx) as u32
788}
789
790fn trim_left(bins: &mut SmallVec<[Bin; 4]>, bin_limit: u16) {
791    // We won't ever support Vector running on anything other than a 32-bit platform and above, I imagine, so this
792    // should always be safe.
793    let bin_limit = bin_limit as usize;
794    if bin_limit == 0 || bins.len() < bin_limit {
795        return;
796    }
797
798    let num_to_remove = bins.len() - bin_limit;
799    let mut missing = 0;
800    let mut overflow = SmallVec::<[Bin; 4]>::new();
801
802    for bin in bins.iter().take(num_to_remove) {
803        missing += u32::from(bin.n);
804
805        if missing > u32::from(MAX_BIN_WIDTH) {
806            overflow.push(Bin {
807                k: bin.k,
808                n: MAX_BIN_WIDTH,
809            });
810
811            missing -= u32::from(MAX_BIN_WIDTH);
812        }
813    }
814
815    let bin_remove = &mut bins[num_to_remove];
816    missing = bin_remove.increment(missing);
817    if missing > 0 {
818        generate_bins(&mut overflow, bin_remove.k, missing);
819    }
820
821    let overflow_len = overflow.len();
822    let (_, bins_end) = bins.split_at(num_to_remove);
823    overflow.extend_from_slice(bins_end);
824
825    // I still don't yet understand how this works, since you'd think bin limit should be the overall limit of the
826    // number of bins, but we're allowing more than that.. :thinkies:
827    overflow.truncate(bin_limit + overflow_len);
828
829    mem::swap(bins, &mut overflow);
830}
831
832#[allow(clippy::cast_possible_truncation)]
833fn generate_bins(bins: &mut SmallVec<[Bin; 4]>, k: i16, n: u32) {
834    if n < u32::from(MAX_BIN_WIDTH) {
835        // SAFETY: Cannot truncate `n`, as it's less than a u16 value.
836        bins.push(Bin { k, n: n as u16 });
837    } else {
838        let overflow = n % u32::from(MAX_BIN_WIDTH);
839        if overflow != 0 {
840            bins.push(Bin {
841                k,
842                // SAFETY: Cannot truncate `overflow`, as it's modulo'd by a u16 value.
843                n: overflow as u16,
844            });
845        }
846
847        for _ in 0..(n / u32::from(MAX_BIN_WIDTH)) {
848            bins.push(Bin { k, n: MAX_BIN_WIDTH });
849        }
850    }
851}
852
853#[cfg(test)]
854mod tests {
855    use super::{config::AGENT_DEFAULT_EPS, Bucket, Config, DDSketch, MAX_KEY, SKETCH_CONFIG};
856
857    const FLOATING_POINT_ACCEPTABLE_ERROR: f64 = 1.0e-10;
858
859    #[test]
860    fn test_ddsketch_config_key_lower_bound_identity() {
861        for k in (-MAX_KEY + 1)..MAX_KEY {
862            assert_eq!(k, SKETCH_CONFIG.key(SKETCH_CONFIG.bin_lower_bound(k)));
863        }
864    }
865
866    #[test]
867    fn test_ddsketch_basic() {
868        let mut sketch = DDSketch::default();
869        assert!(sketch.is_empty());
870        assert_eq!(sketch.count(), 0);
871        assert_eq!(sketch.min(), None);
872        assert_eq!(sketch.max(), None);
873        assert_eq!(sketch.sum(), None);
874        assert_eq!(sketch.avg(), None);
875
876        sketch.insert(3.15);
877        assert!(!sketch.is_empty());
878        assert_eq!(sketch.count(), 1);
879        assert_eq!(sketch.min(), Some(3.15));
880        assert_eq!(sketch.max(), Some(3.15));
881        assert_eq!(sketch.sum(), Some(3.15));
882        assert_eq!(sketch.avg(), Some(3.15));
883
884        sketch.insert(2.28);
885        assert!(!sketch.is_empty());
886        assert_eq!(sketch.count(), 2);
887        assert_eq!(sketch.min(), Some(2.28));
888        assert_eq!(sketch.max(), Some(3.15));
889        assert_eq!(sketch.sum(), Some(5.43));
890        assert_eq!(sketch.avg(), Some(2.715));
891    }
892
893    #[test]
894    fn test_ddsketch_clear() {
895        let sketch1 = DDSketch::default();
896        let mut sketch2 = DDSketch::default();
897
898        assert_eq!(sketch1, sketch2);
899        assert!(sketch1.is_empty());
900        assert!(sketch2.is_empty());
901
902        sketch2.insert(3.15);
903        assert_ne!(sketch1, sketch2);
904        assert!(!sketch2.is_empty());
905
906        sketch2.clear();
907        assert_eq!(sketch1, sketch2);
908        assert!(sketch2.is_empty());
909    }
910
911    #[test]
912    fn test_ddsketch_neg_to_pos() {
913        // This gives us 10k values because otherwise this test runs really slow in debug mode.
914        let start = -1.0;
915        let end = 1.0;
916        let delta = 0.0002;
917
918        let mut sketch = DDSketch::default();
919
920        let mut v = start;
921        while v <= end {
922            sketch.insert(v);
923
924            v += delta;
925        }
926
927        let min = sketch.quantile(0.0).expect("should have value");
928        let median = sketch.quantile(0.5).expect("should have value");
929        let max = sketch.quantile(1.0).expect("should have value");
930
931        assert_eq!(start, min);
932        assert!(median.abs() < FLOATING_POINT_ACCEPTABLE_ERROR);
933        assert!((end - max).abs() < FLOATING_POINT_ACCEPTABLE_ERROR);
934    }
935
936    #[test]
937    fn test_out_of_range_buckets_error() {
938        let mut sketch = DDSketch::default();
939
940        let buckets = vec![
941            Bucket {
942                upper_limit: 5.4,
943                count: 32,
944            },
945            Bucket {
946                upper_limit: 5.8,
947                count: u64::from(u32::MAX) + 1,
948            },
949            Bucket {
950                upper_limit: 9.2,
951                count: 320,
952            },
953        ];
954
955        assert_eq!(
956            Err("bucket size greater than u32::MAX"),
957            sketch.insert_interpolate_buckets(buckets)
958        );
959
960        // Assert the sketch remains unchanged.
961        assert_eq!(sketch, DDSketch::default());
962    }
963
964    #[test]
965    fn test_merge() {
966        let mut all_values = DDSketch::default();
967        let mut odd_values = DDSketch::default();
968        let mut even_values = DDSketch::default();
969        let mut all_values_many = DDSketch::default();
970
971        let mut values = Vec::new();
972        for i in -50..=50 {
973            let v = f64::from(i);
974
975            all_values.insert(v);
976
977            if i & 1 == 0 {
978                odd_values.insert(v);
979            } else {
980                even_values.insert(v);
981            }
982
983            values.push(v);
984        }
985
986        all_values_many.insert_many(&values);
987
988        odd_values.merge(&even_values);
989        let merged_values = odd_values;
990
991        // Number of bins should be equal to the number of values we inserted.
992        assert_eq!(all_values.bin_count(), values.len());
993
994        // Values at both ends of the quantile range should be equal.
995        let low_end = all_values.quantile(0.01).expect("should have estimated value");
996        let high_end = all_values.quantile(0.99).expect("should have estimated value");
997        assert_eq!(high_end, -low_end);
998
999        let target_bin_count = all_values.bin_count();
1000        for sketch in &[all_values, all_values_many, merged_values] {
1001            assert_eq!(sketch.quantile(0.5), Some(0.0));
1002            assert_eq!(sketch.quantile(0.0), Some(-50.0));
1003            assert_eq!(sketch.quantile(1.0), Some(50.0));
1004
1005            for p in 0..50 {
1006                let q = f64::from(p) / 100.0;
1007                let positive = sketch.quantile(q + 0.5).expect("should have estimated value");
1008                let negative = -sketch.quantile(0.5 - q).expect("should have estimated value");
1009
1010                assert!(
1011                    (positive - negative).abs() <= 1.0e-6,
1012                    "positive vs negative difference too great ({positive} vs {negative})",
1013                );
1014            }
1015
1016            assert_eq!(target_bin_count, sketch.bin_count());
1017        }
1018    }
1019
1020    #[test]
1021    fn test_relative_accuracy_fast() {
1022        // These values are based on the agent's unit tests for asserting relative accuracy of the DDSketch
1023        // implementation.  Notably, it does not seem to test the full extent of values that the open-source
1024        // implementations do, but then again... all we care about is parity with the agent version so we can pass them
1025        // through.
1026        //
1027        // Another noteworthy thing: it seems that they don't test from the actual targeted minimum value, which is
1028        // 1.0e-9, which would give nanosecond granularity vs just microsecond granularity.
1029        let min_value = 1.0;
1030        // We don't care about precision loss, just consistency.
1031        #[allow(clippy::cast_possible_truncation)]
1032        let max_value = SKETCH_CONFIG.gamma_v.powf(5.0) as f32;
1033
1034        test_relative_accuracy(&SKETCH_CONFIG, AGENT_DEFAULT_EPS, min_value, max_value);
1035    }
1036
1037    #[test]
1038    #[cfg(feature = "ddsketch_extended")]
1039    fn test_relative_accuracy_slow() {
1040        // These values are based on the agent's unit tests for asserting relative accuracy of the DDSketch
1041        // implementation.  Notably, it does not seem to test the full extent of values that the open-source
1042        // implementations do, but then again... all we care about is parity with the agent version so we can pass them
1043        // through.
1044        //
1045        // Another noteworthy thing: it seems that they don't test from the actual targeted minimum value, which is
1046        // 1.0e-9, which would give nanosecond granularity vs just microsecond granularity.
1047        //
1048        // This test uses a far larger range of values, and takes 60-70 seconds, hence why we've guarded it here behind
1049        // a cfg flag.
1050        let min_value = 1.0e-6;
1051        let max_value = i64::MAX as f32;
1052
1053        test_relative_accuracy(&SKETCH_CONFIG, AGENT_DEFAULT_EPS, min_value, max_value)
1054    }
1055
1056    fn parse_sketch_from_string_bins(layout: &str) -> DDSketch {
1057        layout
1058            .split(' ')
1059            .filter(|v| !v.is_empty())
1060            .map(|pair| pair.split(':').map(ToOwned::to_owned).collect::<Vec<_>>())
1061            .fold(DDSketch::default(), |mut sketch, mut kn| {
1062                let k = kn.remove(0).parse::<i16>().unwrap();
1063                let n = kn.remove(0).parse::<u16>().unwrap();
1064
1065                sketch.insert_raw_bin(k, n);
1066                sketch
1067            })
1068    }
1069
1070    fn compare_sketches(actual: &DDSketch, expected: &DDSketch, allowed_err: f64) {
1071        let actual_sum = actual.sum().unwrap();
1072        let expected_sum = expected.sum().unwrap();
1073        let actual_avg = actual.avg().unwrap();
1074        let expected_avg = expected.avg().unwrap();
1075        let sum_delta = (actual_sum - expected_sum).abs();
1076        let avg_delta = (actual_avg - expected_avg).abs();
1077        assert!(sum_delta <= allowed_err);
1078        assert!(avg_delta <= allowed_err);
1079        assert_eq!(actual.min(), expected.min());
1080        assert_eq!(actual.max(), expected.max());
1081        assert_eq!(actual.count(), expected.count());
1082        assert_eq!(actual.bins(), expected.bins());
1083    }
1084
1085    #[test]
1086    fn test_sketch_insert_and_overflow() {
1087        /// values to insert into a sketch
1088        #[allow(dead_code)]
1089        enum Value {
1090            Float(f64),
1091            Vec(Vec<f64>),
1092            NFloats(u32, f64),
1093        }
1094        /// ways to insert values into a sketch
1095        #[derive(Debug)]
1096        enum InsertFn {
1097            Insert,
1098            InsertMany,
1099            InsertN,
1100        }
1101        struct Case {
1102            description: &'static str,
1103            start: &'static str,
1104            insert: Value,
1105            expected: &'static str,
1106        }
1107
1108        let cases = &[
1109            Case {
1110                description: "baseline: inserting into an empty sketch",
1111                start: "",
1112                insert: Value::Float(0.0),
1113                expected: "0:1",
1114            },
1115            Case {
1116                description: "inserting a value into an existing bin",
1117                start: "0:1",
1118                insert: Value::Float(0.0),
1119                expected: "0:2",
1120            },
1121            Case {
1122                description: "inserting a value into a new bin at the start",
1123                start: "1338:1",
1124                insert: Value::Float(0.0),
1125                expected: "0:1 1338:1",
1126            },
1127            Case {
1128                description: "inserting a value into a new bin in the middle",
1129                start: "0:1 1338:1",
1130                insert: Value::Float(0.5),
1131                expected: "0:1 1293:1 1338:1",
1132            },
1133            Case {
1134                description: "inserting a value into a new bin at the end",
1135                start: "0:1",
1136                insert: Value::Float(1.0),
1137                expected: "0:1 1338:1",
1138            },
1139            Case {
1140                description: "inserting a value into an existing bin and filling it",
1141                start: "0:65534",
1142                insert: Value::Float(0.0),
1143                expected: "0:65535",
1144            },
1145            Case {
1146                description: "inserting a value into an existing bin and causing an overflow",
1147                start: "0:65535",
1148                insert: Value::Float(0.0),
1149                expected: "0:1 0:65535",
1150            },
1151            Case {
1152                description: "inserting many values into a sketch and causing an overflow",
1153                start: "0:100",
1154                insert: Value::NFloats(65535, 0.0),
1155                expected: "0:100 0:65535",
1156            },
1157            Case {
1158                description: "inserting a value into a new bin in the middle and causing an overflow",
1159                start: "0:1 1338:1",
1160                insert: Value::NFloats(65536, 0.5),
1161                expected: "0:1 1293:1 1293:65535 1338:1",
1162            },
1163        ];
1164
1165        for case in cases {
1166            for insert_fn in &[InsertFn::Insert, InsertFn::InsertMany, InsertFn::InsertN] {
1167                // Insert each value every way possible.
1168
1169                let mut sketch = parse_sketch_from_string_bins(case.start);
1170
1171                match insert_fn {
1172                    InsertFn::Insert => match &case.insert {
1173                        Value::Float(v) => sketch.insert(*v),
1174                        Value::Vec(vs) => {
1175                            for v in vs {
1176                                sketch.insert(*v);
1177                            }
1178                        }
1179                        Value::NFloats(n, v) => {
1180                            for _ in 0..*n {
1181                                sketch.insert(*v);
1182                            }
1183                        }
1184                    },
1185                    InsertFn::InsertMany => match &case.insert {
1186                        Value::Float(v) => sketch.insert_many(&[*v]),
1187                        Value::Vec(vs) => sketch.insert_many(vs),
1188                        Value::NFloats(n, v) => {
1189                            for _ in 0..*n {
1190                                sketch.insert_many(&[*v]);
1191                            }
1192                        }
1193                    },
1194                    InsertFn::InsertN => match &case.insert {
1195                        Value::Float(v) => sketch.insert_n(*v, 1),
1196                        Value::Vec(vs) => {
1197                            for v in vs {
1198                                sketch.insert_n(*v, 1);
1199                            }
1200                        }
1201                        Value::NFloats(n, v) => sketch.insert_n(*v, *n),
1202                    },
1203                }
1204
1205                let expected = parse_sketch_from_string_bins(case.expected);
1206                assert_eq!(expected.bins(), sketch.bins(), "{}", case.description);
1207            }
1208        }
1209    }
1210
1211    #[test]
1212    fn test_histogram_interpolation_agent_similarity() {
1213        #[derive(Clone)]
1214        struct Case {
1215            lower: f64,
1216            upper: f64,
1217            count: u32,
1218            allowed_err: f64,
1219            expected: &'static str,
1220        }
1221
1222        let check_result = |actual: &DDSketch, case: &Case| {
1223            let expected = parse_sketch_from_string_bins(case.expected);
1224
1225            assert_eq!(expected.count(), case.count);
1226            assert_eq!(actual.count(), case.count);
1227            assert_eq!(actual.bins(), expected.bins());
1228            compare_sketches(actual, &expected, case.allowed_err);
1229
1230            let actual_count: u32 = actual.bins.iter().map(|b| u32::from(b.n)).sum();
1231            assert_eq!(actual_count, case.count);
1232        };
1233
1234        let cases = &[
1235            Case { lower: 0.0, upper: 10.0, count: 2, allowed_err: 0.0, expected: "0:1 1442:1" },
1236            Case { lower: 10.0, upper: 20.0, count: 4,  allowed_err: 0.0, expected: "1487:1 1502:1 1514:1 1524:1" },
1237		    Case { lower: -10.0, upper: 10.0, count: 4, allowed_err: 0.0, expected: "-1487:1 -1442:1 -1067:1 1442:1"},
1238            Case { lower: 0.0, upper: 10.0, count: 100, allowed_err: 0.0, expected: "0:1 1190:1 1235:1 1261:1 1280:1 1295:1 1307:1 1317:1 1326:1 1334:1 1341:1 1347:1 1353:1 1358:1 1363:1 1368:1 1372:2 1376:1 1380:1 1384:1 1388:1 1391:1 1394:1 1397:2 1400:1 1403:1 1406:2 1409:1 1412:1 1415:2 1417:1 1419:1 1421:1 1423:1 1425:1 1427:1 1429:2 1431:1 1433:1 1435:2 1437:1 1439:2 1441:1 1443:2 1445:2 1447:1 1449:2 1451:2 1453:2 1455:2 1457:2 1459:1 1460:1 1461:1 1462:1 1463:1 1464:1 1465:1 1466:1 1467:2 1468:1 1469:1 1470:1 1471:1 1472:2 1473:1 1474:1 1475:1 1476:2 1477:1 1478:2 1479:1 1480:1 1481:2 1482:1 1483:2 1484:1 1485:2 1486:1" },
1239		    Case { lower: 1_000.0, upper: 100_000.0, count: 1_000_000 - 1, allowed_err: 0.0, expected: "1784:158 1785:162 1786:164 1787:166 1788:170 1789:171 1790:175 1791:177 1792:180 1793:183 1794:185 1795:189 1796:191 1797:195 1798:197 1799:201 1800:203 1801:207 1802:210 1803:214 1804:217 1805:220 1806:223 1807:227 1808:231 1809:234 1810:238 1811:242 1812:245 1813:249 1814:253 1815:257 1816:261 1817:265 1818:270 1819:273 1820:278 1821:282 1822:287 1823:291 1824:295 1825:300 1826:305 1827:310 1828:314 1829:320 1830:324 1831:329 1832:335 1833:340 1834:345 1835:350 1836:356 1837:362 1838:367 1839:373 1840:379 1841:384 1842:391 1843:397 1844:403 1845:409 1846:416 1847:422 1848:429 1849:435 1850:442 1851:449 1852:457 1853:463 1854:470 1855:478 1856:486 1857:493 1858:500 1859:509 1860:516 1861:525 1862:532 1863:541 1864:550 1865:558 1866:567 1867:575 1868:585 1869:594 1870:603 1871:612 1872:622 1873:632 1874:642 1875:651 1876:662 1877:672 1878:683 1879:693 1880:704 1881:716 1882:726 1883:738 1884:749 1885:761 1886:773 1887:785 1888:797 1889:809 1890:823 1891:835 1892:848 1893:861 1894:875 1895:889 1896:902 1897:917 1898:931 1899:945 1900:960 1901:975 1902:991 1903:1006 1904:1021 1905:1038 1906:1053 1907:1071 1908:1087 1909:1104 1910:1121 1911:1138 1912:1157 1913:1175 1914:1192 1915:1212 1916:1231 1917:1249 1918:1269 1919:1290 1920:1309 1921:1329 1922:1351 1923:1371 1924:1393 1925:1415 1926:1437 1927:1459 1928:1482 1929:1506 1930:1529 1931:1552 1932:1577 1933:1602 1934:1626 1935:1652 1936:1678 1937:1704 1938:1731 1939:1758 1940:1785 1941:1813 1942:1841 1943:1870 1944:1900 1945:1929 1946:1959 1947:1990 1948:2021 1949:2052 1950:2085 1951:2117 1952:2150 1953:2184 1954:2218 1955:2253 1956:2287 1957:2324 1958:2360 1959:2396 1960:2435 1961:2472 1962:2511 1963:2550 1964:2589 1965:2631 1966:2671 1967:2714 1968:2755 1969:2799 1970:2842 1971:2887 1972:2932 1973:2978 1974:3024 1975:3071 1976:3120 1977:3168 1978:3218 1979:3268 1980:3319 1981:3371 1982:3423 1983:3477 1984:3532 1985:3586 1986:3643 1987:3700 1988:3757 1989:3816 1990:3876 1991:3936 1992:3998 1993:4060 1994:4124 1995:4188 1996:4253 1997:4320 1998:4388 1999:4456 2000:4526 2001:4596 2002:4668 2003:4741 2004:4816 2005:4890 2006:4967 2007:5044 2008:5124 2009:5203 2010:5285 2011:5367 2012:5451 2013:5536 2014:5623 2015:5711 2016:5800 2017:5890 2018:5983 2019:6076 2020:6171 2021:6267 2022:6365 2023:6465 2024:6566 2025:6668 2026:6773 2027:6878 2028:6986 2029:7095 2030:7206 2031:7318 2032:7433 2033:7549 2034:7667 2035:7786 2036:7909 2037:8032 2038:8157 2039:8285 2040:8414 2041:8546 2042:8679 2043:8815 2044:8953 2045:9092 2046:9235 2047:9379 2048:9525 2049:9675 2050:9825 2051:9979 2052:10135 2053:10293 2054:10454 2055:10618 2056:10783 2057:10952 2058:11123 2059:11297 2060:11473 2061:11653 2062:11834 2063:12020 2064:12207 2065:12398 2066:12592 2067:12788 2068:12989 2069:13191 2070:13397 2071:13607 2072:13819 2073:14036 2074:14254 2075:14478 2076:14703 2077:14933 2078:15167 2079:15403 2080:8942" },
1240		    Case { lower: 1_000.0, upper: 10_000.0, count: 10_000_000 - 1, allowed_err: 0.00001, expected: "1784:17485 1785:17758 1786:18035 1787:18318 1788:18604 1789:18894 1790:19190 1791:19489 1792:19794 1793:20103 1794:20418 1795:20736 1796:21061 1797:21389 1798:21724 1799:22063 1800:22408 1801:22758 1802:23113 1803:23475 1804:23841 1805:24215 1806:24592 1807:24977 1808:25366 1809:25764 1810:26165 1811:26575 1812:26990 1813:27412 1814:27839 1815:28275 1816:28717 1817:29165 1818:29622 1819:30083 1820:30554 1821:31032 1822:31516 1823:32009 1824:32509 1825:33016 1826:33533 1827:34057 1828:34589 1829:35129 1830:35678 1831:36235 1832:36802 1833:37377 1834:37961 1835:38554 1836:39156 1837:39768 1838:40390 1839:41020 1840:41662 1841:42312 1842:42974 1843:43645 1844:44327 1845:45020 1846:45723 1847:46438 1848:47163 1849:47900 1850:48648 1851:49409 1852:50181 1853:50964 1854:51761 1855:52570 1856:53391 1857:54226 1858:55072 1859:55934 1860:56807 1861:57695 1862:58596 1863:59512 1864:60441 1865:61387 1866:62345 1867:63319 1868:64309 1869:65314 1870:799 1870:65535 1871:1835 1871:65535 1872:2889 1872:65535 1873:3957 1873:65535 1874:5043 1874:65535 1875:6146 1875:65535 1876:7266 1876:65535 1877:8404 1877:65535 1878:9559 1878:65535 1879:10732 1879:65535 1880:11923 1880:65535 1881:13135 1881:65535 1882:14363 1882:65535 1883:15612 1883:65535 1884:16879 1884:65535 1885:18168 1885:65535 1886:19475 1886:65535 1887:20803 1887:65535 1888:22153 1888:65535 1889:23523 1889:65535 1890:24914 1890:65535 1891:26327 1891:65535 1892:27763 1892:65535 1893:29221 1893:65535 1894:30701 1894:65535 1895:32205 1895:65535 1896:33732 1896:65535 1897:35283 1897:65535 1898:36858 1898:65535 1899:38458 1899:65535 1900:40084 1900:65535 1901:41733 1901:65535 1902:43409 1902:65535 1903:45112 1903:65535 1904:46841 1904:65535 1905:48596 1905:65535 1906:50380 1906:65535 1907:52191 1907:65535 1908:54030 1908:65535 1909:55899 1909:65535 1910:57796 1910:65535 1911:59723 1911:65535 1912:61680 1912:65535 1913:63668 1913:65535 1914:152 1914:65535 1914:65535 1915:2202 1915:65535 1915:65535 1916:4285 1916:65535 1916:65535 1917:6399 1917:65535 1917:65535 1918:8547 1918:65535 1918:65535 1919:10729 1919:65535 1919:65535 1920:12945 1920:65535 1920:65535 1921:15195 1921:65535 1921:65535 1922:17480 1922:65535 1922:65535 1923:19801 1923:65535 1923:65535 1924:22158 1924:65535 1924:65535 1925:24553 1925:65535 1925:65535 1926:26985 1926:65535 1926:65535 1927:29453 1927:65535 1927:65535 1928:31963 1928:65535 1928:65535 1929:34509 1929:65535 1929:65535 1930:37097 1930:65535 1930:65535 1931:39724 1931:65535 1931:65535 1932:17411"},
1241        ];
1242
1243        let double_insert_cases = &[
1244            Case { lower: 1_000.0, upper: 10_000.0, count: 10_000_000 - 1, allowed_err: 0.0002, expected: "1784:34970 1785:35516 1786:36070 1787:36636 1788:37208 1789:37788 1790:38380 1791:38978 1792:39588 1793:40206 1794:40836 1795:41472 1796:42122 1797:42778 1798:43448 1799:44126 1800:44816 1801:45516 1802:46226 1803:46950 1804:47682 1805:48430 1806:49184 1807:49954 1808:50732 1809:51528 1810:52330 1811:53150 1812:53980 1813:54824 1814:55678 1815:56550 1816:57434 1817:58330 1818:59244 1819:60166 1820:61108 1821:62064 1822:63032 1823:64018 1824:65018 1825:497 1825:65535 1826:1531 1826:65535 1827:2579 1827:65535 1828:3643 1828:65535 1829:4723 1829:65535 1830:5821 1830:65535 1831:6935 1831:65535 1832:8069 1832:65535 1833:9219 1833:65535 1834:10387 1834:65535 1835:11573 1835:65535 1836:12777 1836:65535 1837:14001 1837:65535 1838:15245 1838:65535 1839:16505 1839:65535 1840:17789 1840:65535 1841:19089 1841:65535 1842:20413 1842:65535 1843:21755 1843:65535 1844:23119 1844:65535 1845:24505 1845:65535 1846:25911 1846:65535 1847:27341 1847:65535 1848:28791 1848:65535 1849:30265 1849:65535 1850:31761 1850:65535 1851:33283 1851:65535 1852:34827 1852:65535 1853:36393 1853:65535 1854:37987 1854:65535 1855:39605 1855:65535 1856:41247 1856:65535 1857:42917 1857:65535 1858:44609 1858:65535 1859:46333 1859:65535 1860:48079 1860:65535 1861:49855 1861:65535 1862:51657 1862:65535 1863:53489 1863:65535 1864:55347 1864:65535 1865:57239 1865:65535 1866:59155 1866:65535 1867:61103 1867:65535 1868:63083 1868:65535 1869:65093 1869:65535 1870:1598 1870:65535 1870:65535 1871:3670 1871:65535 1871:65535 1872:5778 1872:65535 1872:65535 1873:7914 1873:65535 1873:65535 1874:10086 1874:65535 1874:65535 1875:12292 1875:65535 1875:65535 1876:14532 1876:65535 1876:65535 1877:16808 1877:65535 1877:65535 1878:19118 1878:65535 1878:65535 1879:21464 1879:65535 1879:65535 1880:23846 1880:65535 1880:65535 1881:26270 1881:65535 1881:65535 1882:28726 1882:65535 1882:65535 1883:31224 1883:65535 1883:65535 1884:33758 1884:65535 1884:65535 1885:36336 1885:65535 1885:65535 1886:38950 1886:65535 1886:65535 1887:41606 1887:65535 1887:65535 1888:44306 1888:65535 1888:65535 1889:47046 1889:65535 1889:65535 1890:49828 1890:65535 1890:65535 1891:52654 1891:65535 1891:65535 1892:55526 1892:65535 1892:65535 1893:58442 1893:65535 1893:65535 1894:61402 1894:65535 1894:65535 1895:64410 1895:65535 1895:65535 1896:1929 1896:65535 1896:65535 1896:65535 1897:5031 1897:65535 1897:65535 1897:65535 1898:8181 1898:65535 1898:65535 1898:65535 1899:11381 1899:65535 1899:65535 1899:65535 1900:14633 1900:65535 1900:65535 1900:65535 1901:17931 1901:65535 1901:65535 1901:65535 1902:21283 1902:65535 1902:65535 1902:65535 1903:24689 1903:65535 1903:65535 1903:65535 1904:28147 1904:65535 1904:65535 1904:65535 1905:31657 1905:65535 1905:65535 1905:65535 1906:35225 1906:65535 1906:65535 1906:65535 1907:38847 1907:65535 1907:65535 1907:65535 1908:42525 1908:65535 1908:65535 1908:65535 1909:46263 1909:65535 1909:65535 1909:65535 1910:50057 1910:65535 1910:65535 1910:65535 1911:53911 1911:65535 1911:65535 1911:65535 1912:57825 1912:65535 1912:65535 1912:65535 1913:61801 1913:65535 1913:65535 1913:65535 1914:304 1914:65535 1914:65535 1914:65535 1914:65535 1915:4404 1915:65535 1915:65535 1915:65535 1915:65535 1916:8570 1916:65535 1916:65535 1916:65535 1916:65535 1917:12798 1917:65535 1917:65535 1917:65535 1917:65535 1918:17094 1918:65535 1918:65535 1918:65535 1918:65535 1919:21458 1919:65535 1919:65535 1919:65535 1919:65535 1920:25890 1920:65535 1920:65535 1920:65535 1920:65535 1921:30390 1921:65535 1921:65535 1921:65535 1921:65535 1922:34960 1922:65535 1922:65535 1922:65535 1922:65535 1923:39602 1923:65535 1923:65535 1923:65535 1923:65535 1924:44316 1924:65535 1924:65535 1924:65535 1924:65535 1925:49106 1925:65535 1925:65535 1925:65535 1925:65535 1926:53970 1926:65535 1926:65535 1926:65535 1926:65535 1927:58906 1927:65535 1927:65535 1927:65535 1927:65535 1928:63926 1928:65535 1928:65535 1928:65535 1928:65535 1929:3483 1929:65535 1929:65535 1929:65535 1929:65535 1929:65535 1930:8659 1930:65535 1930:65535 1930:65535 1930:65535 1930:65535 1931:13913 1931:65535 1931:65535 1931:65535 1931:65535 1931:65535 1932:34822" },
1245        ];
1246
1247        for case in cases {
1248            let mut sketch = DDSketch::default();
1249            assert!(sketch.is_empty());
1250
1251            sketch.insert_interpolate_bucket(case.lower, case.upper, case.count);
1252            check_result(&sketch, case);
1253        }
1254
1255        for case in double_insert_cases {
1256            let mut sketch = DDSketch::default();
1257            assert!(sketch.is_empty());
1258
1259            sketch.insert_interpolate_bucket(case.lower, case.upper, case.count);
1260            sketch.insert_interpolate_bucket(case.lower, case.upper, case.count);
1261
1262            let mut case = case.clone();
1263            case.count *= 2;
1264            check_result(&sketch, &case);
1265        }
1266    }
1267
1268    fn test_relative_accuracy(config: &Config, rel_acc: f64, min_value: f32, max_value: f32) {
1269        let max_observed_rel_acc = check_max_relative_accuracy(config, min_value, max_value);
1270        assert!(
1271            max_observed_rel_acc <= rel_acc + FLOATING_POINT_ACCEPTABLE_ERROR,
1272            "observed out of bound max relative acc: {max_observed_rel_acc}, target rel acc={rel_acc}",
1273        );
1274    }
1275
1276    fn compute_relative_accuracy(target: f64, actual: f64) -> f64 {
1277        assert!(
1278            !(target < 0.0 || actual < 0.0),
1279            "expected/actual values must be greater than 0.0; target={target}, actual={actual}",
1280        );
1281
1282        if target == actual {
1283            0.0
1284        } else if target == 0.0 {
1285            if actual == 0.0 {
1286                0.0
1287            } else {
1288                f64::INFINITY
1289            }
1290        } else if actual < target {
1291            (target - actual) / target
1292        } else {
1293            (actual - target) / target
1294        }
1295    }
1296
1297    fn check_max_relative_accuracy(config: &Config, min_value: f32, max_value: f32) -> f64 {
1298        assert!(min_value < max_value, "min_value must be less than max_value");
1299
1300        let mut v = min_value;
1301        let mut max_relative_acc = 0.0;
1302        while v < max_value {
1303            let target = f64::from(v);
1304            let actual = config.bin_lower_bound(config.key(target));
1305
1306            let relative_acc = compute_relative_accuracy(target, actual);
1307            if relative_acc > max_relative_acc {
1308                max_relative_acc = relative_acc;
1309            }
1310
1311            v = f32::from_bits(v.to_bits() + 1);
1312        }
1313
1314        // Final iteration to make sure we hit the highest value.
1315        let actual = config.bin_lower_bound(config.key(f64::from(max_value)));
1316        let relative_acc = compute_relative_accuracy(f64::from(max_value), actual);
1317        if relative_acc > max_relative_acc {
1318            max_relative_acc = relative_acc;
1319        }
1320
1321        max_relative_acc
1322    }
1323}
ddsketch_agent/lib.rs

ddsketch_agent/
lib.rs