Skip to main content

saluki_context/
origin.rs

1#![allow(warnings)]
2//! Metric origin.
3
4use std::{fmt, num::NonZeroU32, sync::Arc};
5
6use indexmap::Equivalent;
7use saluki_common::hash::hash_single_fast;
8use serde::{Deserialize, Serialize};
9use stringtheory::MetaString;
10use tracing::warn;
11
12use crate::tags::{SharedTagSet, Tag};
13
14/// The cardinality of tags associated with the origin entity.
15#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
16#[serde(try_from = "String")]
17pub enum OriginTagCardinality {
18    /// No cardinality.
19    ///
20    /// This implies that no tags should be added to the metric based on its origin.
21    None,
22
23    /// Low cardinality.
24    ///
25    /// This generally covers tags which are static, or relatively slow to change, and generally results in a small
26    /// number of unique values for the given tag key.
27    Low,
28
29    /// Orchestrator cardinality.
30    ///
31    /// This generally covers orchestrator-specific tags, such as Kubernetes pod UID, and lands somewhere between low
32    /// and high cardinality.
33    Orchestrator,
34
35    /// High cardinality.
36    ///
37    /// This generally covers tags which frequently change and generally results in a large number of unique values for
38    /// the given tag key.
39    High,
40}
41
42impl OriginTagCardinality {
43    /// Returns the string representation of the cardinality.
44    pub const fn as_str(&self) -> &'static str {
45        match self {
46            Self::None => "none",
47            Self::Low => "low",
48            Self::Orchestrator => "orchestrator",
49            Self::High => "high",
50        }
51    }
52}
53
54impl TryFrom<&str> for OriginTagCardinality {
55    type Error = String;
56
57    fn try_from(value: &str) -> Result<Self, Self::Error> {
58        // Exact lowercase match first (zero-cost happy path), then case-insensitive fallback
59        // using eq_ignore_ascii_case (also allocation-free) to match the core Datadog Agent,
60        // which normalises with strings.ToLower before matching.
61        match value {
62            "none" => Ok(Self::None),
63            "low" => Ok(Self::Low),
64            "high" => Ok(Self::High),
65            "orch" | "orchestrator" => Ok(Self::Orchestrator),
66            other if other.eq_ignore_ascii_case("none") => Ok(Self::None),
67            other if other.eq_ignore_ascii_case("low") => Ok(Self::Low),
68            other if other.eq_ignore_ascii_case("high") => Ok(Self::High),
69            other if other.eq_ignore_ascii_case("orch") || other.eq_ignore_ascii_case("orchestrator") => {
70                Ok(Self::Orchestrator)
71            }
72            other => Err(format!("unknown tag cardinality type '{}'", other)),
73        }
74    }
75}
76
77impl TryFrom<String> for OriginTagCardinality {
78    type Error = String;
79
80    fn try_from(value: String) -> Result<Self, Self::Error> {
81        Self::try_from(value.as_str())
82    }
83}
84
85impl fmt::Display for OriginTagCardinality {
86    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87        match self {
88            Self::None => write!(f, "none"),
89            Self::Low => write!(f, "low"),
90            Self::Orchestrator => write!(f, "orchestrator"),
91            Self::High => write!(f, "high"),
92        }
93    }
94}
95
96/// A raw representation of an origin.
97///
98/// Metrics contain metadata about their origin, in terms of the metric's _reason_ for existing: the metric was ingested
99/// via DogStatsD, or was generated by an integration, and so on. However, there is also the concept of a metric
100/// originating from a particular _entity_, such as a specific Kubernetes container. This relates directly to the
101/// specific sender of the metric, which is used to enrich the metric with additional tags describing the origin entity.
102///
103/// The origin entity will generally be the process ID of the metric sender, or the container ID, both of which are then
104/// generally mapped to the relevant information for the metric, such as the orchestrator-level tags for the
105/// container/pod/deployment.
106#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
107pub struct RawOrigin<'a> {
108    /// Process ID of the sender.
109    process_id: Option<NonZeroU32>,
110
111    /// Local Data of the sender.
112    ///
113    /// This will typically be either the container ID, or the inode of the container's cgroups controller, or both. It
114    /// may or may not have a special prefix that indicates which of the two it's.
115    local_data: Option<&'a str>,
116
117    /// Pod UID of the sender.
118    ///
119    /// This is generally only used in Kubernetes environments to uniquely identify the pod. UIDs are equivalent to UUIDs.
120    pod_uid: Option<&'a str>,
121
122    /// Desired cardinality of any tags associated with the entity.
123    ///
124    /// This controls the cardinality of the tags added to this metric when enriching based on the available entity IDs.
125    cardinality: Option<OriginTagCardinality>,
126
127    /// External Data of the sender.
128    ///
129    /// See [`ExternalData`] for more information.
130    external_data: Option<RawExternalData<'a>>,
131}
132
133impl<'a> RawOrigin<'a> {
134    /// Returns `true` if the origin information is empty.
135    pub fn is_empty(&self) -> bool {
136        self.process_id.is_none()
137            && self.local_data.is_none()
138            && self.pod_uid.is_none()
139            && self.cardinality.is_none()
140            && self.external_data.is_none()
141    }
142
143    /// Unsets the process ID of the sender.
144    pub fn clear_process_id(&mut self) {
145        self.process_id = None;
146    }
147
148    /// Sets the process ID of the sender.
149    ///
150    /// Must be a non-zero value. If the value is zero, it's silently ignored.
151    pub fn set_process_id(&mut self, process_id: u32) {
152        self.process_id = NonZeroU32::new(process_id);
153    }
154
155    /// Returns the process ID of the sender.
156    pub fn process_id(&self) -> Option<u32> {
157        self.process_id.map(NonZeroU32::get)
158    }
159
160    /// Sets the Local Data of the sender.
161    pub fn set_local_data(&mut self, local_data: impl Into<Option<&'a str>>) {
162        self.local_data = local_data.into();
163    }
164
165    /// Returns the Local Data of the sender.
166    pub fn local_data(&self) -> Option<&str> {
167        self.local_data
168    }
169
170    /// Sets the pod UID of the sender.
171    pub fn set_pod_uid(&mut self, pod_uid: impl Into<Option<&'a str>>) {
172        self.pod_uid = pod_uid.into();
173    }
174
175    /// Returns the pod UID of the sender.
176    pub fn pod_uid(&self) -> Option<&str> {
177        self.pod_uid
178    }
179
180    /// Sets the desired cardinality of any tags associated with the entity.
181    pub fn set_cardinality(&mut self, cardinality: impl Into<Option<OriginTagCardinality>>) {
182        self.cardinality = cardinality.into();
183    }
184
185    /// Returns the desired cardinality of any tags associated with the entity.
186    pub fn cardinality(&self) -> Option<OriginTagCardinality> {
187        self.cardinality.as_ref().copied()
188    }
189
190    /// Sets the External Data of the sender.
191    pub fn set_external_data(&mut self, external_data: impl Into<Option<&'a str>>) {
192        self.external_data = external_data.into().and_then(RawExternalData::try_from_str);
193    }
194
195    /// Returns the External Data of the sender.
196    pub fn external_data(&self) -> Option<&RawExternalData<'a>> {
197        self.external_data.as_ref()
198    }
199}
200
201impl fmt::Display for RawOrigin<'_> {
202    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203        let mut has_written = false;
204
205        write!(f, "RawOrigin(")?;
206
207        if let Some(process_id) = self.process_id {
208            write!(f, "process_id={}", process_id)?;
209        }
210
211        if let Some(local_data) = self.local_data {
212            if has_written {
213                write!(f, " ")?;
214            } else {
215                has_written = true;
216            }
217            write!(f, "local_data={}", local_data)?;
218        }
219
220        if let Some(pod_uid) = self.pod_uid {
221            if has_written {
222                write!(f, " ")?;
223            } else {
224                has_written = true;
225            }
226            write!(f, "pod_uid={}", pod_uid)?;
227        }
228
229        if let Some(cardinality) = self.cardinality {
230            if has_written {
231                write!(f, " ")?;
232            } else {
233                has_written = true;
234            }
235            write!(f, "cardinality={}", cardinality)?;
236        }
237
238        if let Some(external_data) = self.external_data.as_ref() {
239            if has_written {
240                write!(f, " ")?;
241            }
242            write!(f, "external_data={}", external_data)?;
243        }
244
245        write!(f, ")")
246    }
247}
248
249/// A resolver for mapping origins to their associated tags.
250pub trait OriginTagsResolver: Send + Sync {
251    /// Resolves the origin tags for the given raw origin.
252    fn resolve_origin_tags(&self, origin: RawOrigin<'_>) -> SharedTagSet;
253}
254
255impl<T> OriginTagsResolver for Arc<T>
256where
257    T: OriginTagsResolver,
258{
259    fn resolve_origin_tags(&self, origin: RawOrigin<'_>) -> SharedTagSet {
260        (**self).resolve_origin_tags(origin)
261    }
262}
263
264impl<T> OriginTagsResolver for Option<T>
265where
266    T: OriginTagsResolver,
267{
268    fn resolve_origin_tags(&self, origin: RawOrigin<'_>) -> SharedTagSet {
269        self.as_ref()
270            .map(|resolver| resolver.resolve_origin_tags(origin))
271            .unwrap_or_else(|| SharedTagSet::default())
272    }
273}
274
275/// External Data associated with an origin.
276///
277/// "External Data" is a concept that's used to aid origin detection of workloads running in Kubernetes environments
278/// where introspection isn't possible or may return incorrect information. Origin detection generally centers around
279/// determining the container where a metric originates from, and then enriching the metric with tags that describe that
280/// container, as well as the pod the container is running within, and so on. In some cases, the origin of a metric
281/// can't be detected from the outside (such as by using peer credentials over Unix Domain sockets) and can't be
282/// detected by the workload itself (such as when running in nested virtualization environments). In these cases, we
283/// need a mechanism that allows passing the necessary information to the client, who then passes it on to us, so that
284/// we can correctly resolve the origin.
285///
286/// "External Data" supports this by allowing for an external Kubernetes admission controller to attach specific
287/// metadata -- pod UID and container name -- to application pods, which is then read and sent along with metrics. This
288/// information is then used during origin detection in order to correlate the container ID of the origin, which is
289/// sufficient to allow enriching the metric with container-specific tags.
290///
291/// # Format
292///
293/// An External Data string is a comma-separated list of key/value pairs, where each key represents a particular aspect
294/// of the workload entity. The following keys are supported:
295///
296/// - `it-<true/false>`: A boolean value indicating whether the entity is an init container.
297/// - `pu-<pod_uid>`: The pod UID associated with the entity.
298/// - `cn-<container_name>`: The container name associated with the entity.
299///
300/// For parsing external data strings without allocating, see [`RawExternalData`].
301#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
302pub struct ExternalData {
303    pod_uid: MetaString,
304    container_name: MetaString,
305    init_container: bool,
306}
307
308impl ExternalData {
309    /// Creates a new `ExternalData` instance.
310    pub fn new(pod_uid: MetaString, container_name: MetaString, init_container: bool) -> Self {
311        Self {
312            pod_uid,
313            container_name,
314            init_container,
315        }
316    }
317
318    /// Returns the pod UID.
319    pub fn pod_uid(&self) -> &MetaString {
320        &self.pod_uid
321    }
322
323    /// Returns the container name.
324    pub fn container_name(&self) -> &MetaString {
325        &self.container_name
326    }
327
328    /// Returns `true` if the entity is an init container.
329    pub fn is_init_container(&self) -> bool {
330        self.init_container
331    }
332}
333
334impl std::hash::Hash for ExternalData {
335    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
336        (*self.pod_uid).hash(state);
337        (*self.container_name).hash(state);
338        self.init_container.hash(state);
339    }
340}
341
342/// A borrowed representation of [`ExternalData`].
343///
344/// This can be used to parse external data strings without needing to allocate backing storage for any of the fields,
345/// and can be used to look up map entries (such as when using `HashMap`) when the key is [`ExternalData`].
346#[derive(Clone, Debug, Eq, PartialEq)]
347pub struct RawExternalData<'a> {
348    pod_uid: &'a str,
349    container_name: &'a str,
350    init_container: bool,
351}
352
353impl<'a> RawExternalData<'a> {
354    /// Creates a new `RawExternalData` from a raw string.
355    ///
356    /// If the external data isn't valid, `None` is returned.
357    pub fn try_from_str(raw: &'a str) -> Option<Self> {
358        if raw.is_empty() {
359            return None;
360        }
361
362        let mut data = Self {
363            pod_uid: "",
364            container_name: "",
365            init_container: false,
366        };
367
368        let parts = raw.split(',');
369        for part in parts {
370            if part.len() < 4 {
371                // All key/value pairs have a prefix of `xx-` where `xx` is some short code, so we basically can't have
372                // any real key/value pair that's less than four characters overall.
373                warn!("Parsed external data with invalid key/value pair: {}", part);
374                continue;
375            }
376
377            let key = &part[0..3];
378            let value = &part[3..];
379
380            match key {
381                "it-" => data.init_container = value.parse().unwrap_or(false),
382                "pu-" => data.pod_uid = value,
383                "cn-" => data.container_name = value,
384                _ => {
385                    // Unknown key, ignore.
386                    warn!("Parsed external data with unknown key: {}", key);
387                }
388            }
389        }
390
391        Some(data)
392    }
393}
394
395impl std::hash::Hash for RawExternalData<'_> {
396    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
397        self.pod_uid.hash(state);
398        self.container_name.hash(state);
399        self.init_container.hash(state);
400    }
401}
402
403impl fmt::Display for RawExternalData<'_> {
404    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
405        write!(
406            f,
407            "pu-{},cn-{},it-{}",
408            self.pod_uid, self.container_name, self.init_container
409        )
410    }
411}
412
413impl Equivalent<ExternalData> for RawExternalData<'_> {
414    fn equivalent(&self, other: &ExternalData) -> bool {
415        self.pod_uid == &*other.pod_uid
416            && self.container_name == &*other.container_name
417            && self.init_container == other.init_container
418    }
419}
420
421#[cfg(test)]
422mod tests {
423    use std::{
424        collections::hash_map::DefaultHasher,
425        hash::{Hash as _, Hasher as _},
426    };
427
428    use proptest::prelude::*;
429
430    use super::*;
431
432    proptest! {
433        #[test]
434        fn property_test_identical_hash_impls(pod_uid in "[a-z0-9]{1,64}", container_name in "[a-z0-9]{1,64}", init_container in any::<bool>()) {
435            let external_data = ExternalData::new(pod_uid.clone().into(), container_name.clone().into(), init_container);
436            let external_data_ref = RawExternalData {
437                pod_uid: &pod_uid,
438                container_name: &container_name,
439                init_container,
440            };
441
442            let mut hasher = DefaultHasher::new();
443            external_data.hash(&mut hasher);
444            let external_data_hash = hasher.finish();
445
446            let mut hasher = DefaultHasher::new();
447            external_data_ref.hash(&mut hasher);
448            let external_data_ref_hash = hasher.finish();
449
450            assert_eq!(external_data_hash, external_data_ref_hash);
451        }
452    }
453}