Skip to main content

saluki_env/workload/
entity.rs

1//! Entity identifiers.
2
3use std::{cmp::Ordering, fmt};
4
5use stringtheory::MetaString;
6use tracing::warn;
7
8const ENTITY_PREFIX_POD_UID: &str = "kubernetes_pod_uid://";
9const ENTITY_PREFIX_CONTAINER_ID: &str = "container_id://";
10const ENTITY_PREFIX_CONTAINER_INODE: &str = "container_inode://";
11const ENTITY_PREFIX_CONTAINER_PID: &str = "container_pid://";
12
13const LOCAL_DATA_PREFIX_INODE: &str = "in-";
14const LOCAL_DATA_PREFIX_CID: &str = "ci-";
15const LOCAL_DATA_PREFIX_LEGACY_CID: &str = "cid-";
16
17/// An entity identifier.
18#[derive(Clone, Debug, Eq, Hash, PartialEq)]
19pub enum EntityId {
20    /// The global entity.
21    ///
22    /// Represents the root of the entity hierarchy, which is equivalent to a "global" scope. This is generally used
23    /// to represent a collection of metadata entries that aren't associated with any specific entity, but with
24    /// anything within the workload, such as host or cluster tags.
25    Global,
26
27    /// A Kubernetes pod UID.
28    ///
29    /// Represents the UUID of a specific Kubernetes pod.
30    PodUid(MetaString),
31
32    /// A container ID.
33    ///
34    /// This is generally a long hexadecimal string, as generally used by container runtimes like `containerd`.
35    Container(MetaString),
36
37    /// A container inode.
38    ///
39    /// Represents the inode of the cgroups controller for a specific container.
40    ContainerInode(u64),
41
42    /// A container PID.
43    ///
44    /// Represents the PID of the process within a specific container.
45    ContainerPid(u32),
46}
47
48impl EntityId {
49    /// Creates an `EntityId` from Local Data.
50    ///
51    /// This method follows the same logic/behavior as the Datadog Agent's origin detection logic:
52    /// - If the input starts with `ci-`, we treat it as a container ID.
53    /// - If the input starts with `in-`, we treat it as a container cgroup controller inode.
54    /// - If the input contains a comma, we split the input and search for either a prefixed container ID or prefixed
55    ///   inode. If both are present, we use the container ID.
56    /// - If the input starts with `cid-`, we treat it as a container ID.
57    /// - If none of the above conditions are met, we assume the entire input is a container ID.
58    ///
59    /// If the input fails to be parsed in a valid fashion (for example, `in-` prefix but the remainder isn't a valid
60    /// integer), or is empty, `None` is returned.
61    pub fn from_local_data<S>(raw_local_data: S) -> Option<Self>
62    where
63        S: AsRef<str> + Into<MetaString>,
64    {
65        let local_data_value = raw_local_data.as_ref();
66        if local_data_value.is_empty() {
67            return None;
68        }
69
70        if local_data_value.contains(',') {
71            let mut maybe_container_inode = None;
72            for local_data_subvalue in local_data_value.split(',') {
73                match parse_local_data_value(local_data_subvalue) {
74                    // We always prefer the container ID if we get it.
75                    Ok(Some(Self::Container(cid))) => return Some(Self::Container(cid)),
76                    Ok(Some(Self::ContainerInode(inode))) => maybe_container_inode = Some(inode),
77                    Err(()) => {
78                        warn!(
79                            local_data = local_data_value,
80                            local_data_subvalue,
81                            "Failed parsing Local Data subvalue. Metric may be missing origin detection-based tags."
82                        );
83                    }
84                    _ => {}
85                }
86            }
87
88            // Return the container inode if we found one.
89            if let Some(inode) = maybe_container_inode {
90                return Some(Self::ContainerInode(inode));
91            }
92        }
93
94        // Try to parse the local data value as a single entity ID value, falling back to treating the entire value as a
95        // container ID otherwise.
96        match parse_local_data_value(local_data_value) {
97            // We always prefer the container ID if we get it.
98            Ok(Some(eid)) => Some(eid),
99            Ok(None) => Some(Self::Container(raw_local_data.into())),
100            Err(()) => {
101                warn!(
102                    local_data = local_data_value,
103                    "Failed parsing Local Data value. Metric may be missing origin detection-based tags."
104                );
105                None
106            }
107        }
108    }
109
110    /// Creates an `EntityId` from a Kubernetes pod UID.
111    ///
112    /// If the pod UID value is `"none"`, this will return `None`.
113    pub fn from_pod_uid<S>(pod_uid: S) -> Option<Self>
114    where
115        S: AsRef<str> + Into<MetaString>,
116    {
117        if pod_uid.as_ref() == "none" {
118            return None;
119        }
120        Some(Self::PodUid(pod_uid.into()))
121    }
122
123    /// Returns the inner container ID value, if this entity ID is a `Container`.
124    ///
125    /// Otherwise, `None` is returned and the original entity ID is consumed.
126    pub fn try_into_container(self) -> Option<MetaString> {
127        match self {
128            Self::Container(container_id) => Some(container_id),
129            _ => None,
130        }
131    }
132
133    fn precedence_value(&self) -> usize {
134        match self {
135            Self::Global => 0,
136            Self::PodUid(_) => 1,
137            Self::Container(_) => 2,
138            Self::ContainerInode(_) => 3,
139            Self::ContainerPid(_) => 4,
140        }
141    }
142}
143
144impl fmt::Display for EntityId {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        match self {
147            Self::Global => write!(f, "system://global"),
148            Self::PodUid(pod_uid) => write!(f, "{}{}", ENTITY_PREFIX_POD_UID, pod_uid),
149            Self::Container(container_id) => write!(f, "{}{}", ENTITY_PREFIX_CONTAINER_ID, container_id),
150            Self::ContainerInode(inode) => write!(f, "{}{}", ENTITY_PREFIX_CONTAINER_INODE, inode),
151            Self::ContainerPid(pid) => write!(f, "{}{}", ENTITY_PREFIX_CONTAINER_PID, pid),
152        }
153    }
154}
155
156impl serde::Serialize for EntityId {
157    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
158    where
159        S: serde::Serializer,
160    {
161        // We have this manual implementation of `Serialize` just to avoid needing to bring in `serde_with` to get the
162        // helper that utilizes the `Display` implementation.
163        serializer.collect_str(self)
164    }
165}
166
167/// A wrapper for entity IDs that sorts them in a manner consistent with the expected precedence of entity IDs.
168///
169/// This type establishes a total ordering over entity IDs based on their logical precedence, which is as follows:
170///
171/// - global (highest precedence)
172/// - pod
173/// - container
174/// - container inode
175/// - container PID (lowest precedence)
176///
177/// Wrapped entity IDs are be sorted highest to lowest precedence. For entity IDs with the same precedence, they're
178/// further ordered by their internal value. For entity IDs with a string identifier, lexicographical ordering is used.
179/// For entity IDs with a numeric identifier, numerical ordering is used.
180#[derive(Eq, PartialEq)]
181pub struct HighestPrecedenceEntityIdRef<'a>(&'a EntityId);
182
183impl<'a> From<&'a EntityId> for HighestPrecedenceEntityIdRef<'a> {
184    fn from(entity_id: &'a EntityId) -> Self {
185        Self(entity_id)
186    }
187}
188
189impl PartialOrd for HighestPrecedenceEntityIdRef<'_> {
190    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
191        Some(self.cmp(other))
192    }
193}
194
195impl Ord for HighestPrecedenceEntityIdRef<'_> {
196    fn cmp(&self, other: &Self) -> Ordering {
197        // Do the initial comparison based on the implicit precedence of each entity ID.
198        let self_precedence = self.0.precedence_value();
199        let other_precedence = other.0.precedence_value();
200        if self_precedence != other_precedence {
201            return self_precedence.cmp(&other_precedence);
202        }
203
204        // We have two entities at the same level of precedence, so we need to compare their actual values.
205        match (self.0, other.0) {
206            // Global entities are always equal.
207            (EntityId::Global, EntityId::Global) => Ordering::Equal,
208            (EntityId::PodUid(self_pod_uid), EntityId::PodUid(other_pod_uid)) => self_pod_uid.cmp(other_pod_uid),
209            (EntityId::Container(self_container_id), EntityId::Container(other_container_id)) => {
210                self_container_id.cmp(other_container_id)
211            }
212            (EntityId::ContainerInode(self_inode), EntityId::ContainerInode(other_inode)) => {
213                self_inode.cmp(other_inode)
214            }
215            (EntityId::ContainerPid(self_pid), EntityId::ContainerPid(other_pid)) => self_pid.cmp(other_pid),
216            _ => unreachable!("entities with different precedence should not be compared"),
217        }
218    }
219}
220
221fn parse_local_data_value(raw_local_data_value: &str) -> Result<Option<EntityId>, ()> {
222    if raw_local_data_value.starts_with(LOCAL_DATA_PREFIX_CID) {
223        let cid = raw_local_data_value.trim_start_matches(LOCAL_DATA_PREFIX_CID);
224        Ok(Some(EntityId::Container(cid.into())))
225    } else if raw_local_data_value.starts_with(LOCAL_DATA_PREFIX_INODE) {
226        let inode = raw_local_data_value
227            .trim_start_matches(LOCAL_DATA_PREFIX_INODE)
228            .parse()
229            .map_err(|_| ())?;
230        Ok(Some(EntityId::ContainerInode(inode)))
231    } else if raw_local_data_value.starts_with(LOCAL_DATA_PREFIX_LEGACY_CID) {
232        let cid = raw_local_data_value.trim_start_matches(LOCAL_DATA_PREFIX_LEGACY_CID);
233        Ok(Some(EntityId::Container(cid.into())))
234    } else {
235        Ok(None)
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    #[test]
244    fn local_data() {
245        const PREFIX_CID: &str = "ci-singlecontainerid";
246        const PREFIX_LEGACY_CID: &str = "cid-singlecontainerid";
247        const CID: EntityId = EntityId::Container(MetaString::from_static("singlecontainerid"));
248        const PREFIX_INODE: &str = "in-12345";
249        const INODE: EntityId = EntityId::ContainerInode(12345);
250
251        let cases = [
252            // Empty inputs aren't valid.
253            ("".into(), None),
254            // Invalid container inode values.
255            ("in-notanumber".into(), None),
256            // Fallback to treat any unparsed value as container ID.
257            ("random".into(), Some(EntityId::Container("random".into()))),
258            // Single prefixed values.
259            (PREFIX_CID.into(), Some(CID.clone())),
260            (PREFIX_INODE.into(), Some(INODE.clone())),
261            (PREFIX_LEGACY_CID.into(), Some(CID.clone())),
262            // Multiple prefixed values, comma separated.
263            //
264            // We should always prefer container ID over inode. We also test invalid values here since we should
265            // ignore them as we iterate over the split values.
266            (format!("{},{}", PREFIX_CID, PREFIX_INODE), Some(CID.clone())),
267            (format!("{},{}", PREFIX_INODE, PREFIX_CID), Some(CID.clone())),
268            (format!("{},{}", PREFIX_LEGACY_CID, PREFIX_INODE), Some(CID.clone())),
269            (format!("{},{}", PREFIX_INODE, PREFIX_LEGACY_CID), Some(CID.clone())),
270            (format!("{},invalid", PREFIX_CID), Some(CID.clone())),
271            (format!("{},invalid", PREFIX_LEGACY_CID), Some(CID.clone())),
272            (format!("{},invalid", PREFIX_INODE), Some(INODE.clone())),
273        ];
274
275        for (input, expected) in cases {
276            let actual = EntityId::from_local_data(input);
277            assert_eq!(actual, expected);
278        }
279    }
280
281    #[test]
282    fn pod_uid_valid() {
283        let pod_uid = "abcdef1234567890";
284        let entity_id = EntityId::from_pod_uid(pod_uid).unwrap();
285        assert_eq!(entity_id, EntityId::PodUid(MetaString::from(pod_uid)));
286    }
287
288    #[test]
289    fn pod_uid_none() {
290        let pod_uid = "none";
291        let entity_id = EntityId::from_pod_uid(pod_uid);
292        assert!(entity_id.is_none());
293    }
294}