saluki_env/workload/
entity.rs

1use std::{cmp::Ordering, fmt};
2
3use stringtheory::MetaString;
4use tracing::warn;
5
6const ENTITY_PREFIX_POD_UID: &str = "kubernetes_pod_uid://";
7const ENTITY_PREFIX_CONTAINER_ID: &str = "container_id://";
8const ENTITY_PREFIX_CONTAINER_INODE: &str = "container_inode://";
9const ENTITY_PREFIX_CONTAINER_PID: &str = "container_pid://";
10
11const LOCAL_DATA_PREFIX_INODE: &str = "in-";
12const LOCAL_DATA_PREFIX_CID: &str = "ci-";
13const LOCAL_DATA_PREFIX_LEGACY_CID: &str = "cid-";
14
15/// An entity identifier.
16#[derive(Clone, Debug, Eq, Hash, PartialEq)]
17pub enum EntityId {
18    /// The global entity.
19    ///
20    /// Represents the root of the entity hierarchy, which is equivalent to a "global" scope. This is generally used
21    /// to represent a collection of metadata entries that are not associated with any specific entity, but with
22    /// anything within the workload, such as host or cluster tags.
23    Global,
24
25    /// A Kubernetes pod UID.
26    ///
27    /// Represents the UUID of a specific Kubernetes pod.
28    PodUid(MetaString),
29
30    /// A container ID.
31    ///
32    /// This is generally a long hexadecimal string, as generally used by container runtimes like `containerd`.
33    Container(MetaString),
34
35    /// A container inode.
36    ///
37    /// Represents the inode of the cgroups controller for a specific container.
38    ContainerInode(u64),
39
40    /// A container PID.
41    ///
42    /// Represents the PID of the process within a specific container.
43    ContainerPid(u32),
44}
45
46impl EntityId {
47    /// Creates an `EntityId` from Local Data.
48    ///
49    /// This method follows the same logic/behavior as the Datadog Agent's origin detection logic:
50    /// - If the input starts with `ci-`, we treat it as a container ID.
51    /// - If the input starts with `in-`, we treat it as a container cgroup controller inode.
52    /// - If the input contains a comma, we split the input and search for either a prefixed container ID or prefixed
53    ///   inode. If both are present, we use the container ID.
54    /// - If the input starts with `cid-`, we treat it as a container ID.
55    /// - If none of the above conditions are met, we assume the entire input is a container ID.
56    ///
57    /// If the input fails to be parsed in a valid fashion (e.g., `in-` prefix but the remainder is not a valid
58    /// integer), or is empty, `None` is returned.
59    pub fn from_local_data<S>(raw_local_data: S) -> Option<Self>
60    where
61        S: AsRef<str> + Into<MetaString>,
62    {
63        let local_data_value = raw_local_data.as_ref();
64        if local_data_value.is_empty() {
65            return None;
66        }
67
68        if local_data_value.contains(',') {
69            let mut maybe_container_inode = None;
70            for local_data_subvalue in local_data_value.split(',') {
71                match parse_local_data_value(local_data_subvalue) {
72                    // We always prefer the container ID if we get it.
73                    Ok(Some(Self::Container(cid))) => return Some(Self::Container(cid)),
74                    Ok(Some(Self::ContainerInode(inode))) => maybe_container_inode = Some(inode),
75                    Err(()) => {
76                        warn!(
77                            local_data = local_data_value,
78                            local_data_subvalue,
79                            "Failed parsing Local Data subvalue. Metric may be missing origin detection-based tags."
80                        );
81                    }
82                    _ => {}
83                }
84            }
85
86            // Return the container inode if we found one.
87            if let Some(inode) = maybe_container_inode {
88                return Some(Self::ContainerInode(inode));
89            }
90        }
91
92        // Try to parse the local data value as a single entity ID value, falling back to treating the entire value as a
93        // container ID otherwise.
94        match parse_local_data_value(local_data_value) {
95            // We always prefer the container ID if we get it.
96            Ok(Some(eid)) => Some(eid),
97            Ok(None) => Some(Self::Container(raw_local_data.into())),
98            Err(()) => {
99                warn!(
100                    local_data = local_data_value,
101                    "Failed parsing Local Data value. Metric may be missing origin detection-based tags."
102                );
103                None
104            }
105        }
106    }
107
108    /// Creates an `EntityId` from a Kubernetes pod UID.
109    ///
110    /// If the pod UID value is "none", this will return `None`.
111    pub fn from_pod_uid<S>(pod_uid: S) -> Option<Self>
112    where
113        S: AsRef<str> + Into<MetaString>,
114    {
115        if pod_uid.as_ref() == "none" {
116            return None;
117        }
118        Some(Self::PodUid(pod_uid.into()))
119    }
120
121    /// Returns the inner container ID value, if this entity ID is a `Container`.
122    ///
123    /// Otherwise, `None` is returned and the original entity ID is consumed.
124    pub fn try_into_container(self) -> Option<MetaString> {
125        match self {
126            Self::Container(container_id) => Some(container_id),
127            _ => None,
128        }
129    }
130
131    fn precedence_value(&self) -> usize {
132        match self {
133            Self::Global => 0,
134            Self::PodUid(_) => 1,
135            Self::Container(_) => 2,
136            Self::ContainerInode(_) => 3,
137            Self::ContainerPid(_) => 4,
138        }
139    }
140}
141
142impl fmt::Display for EntityId {
143    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144        match self {
145            Self::Global => write!(f, "system://global"),
146            Self::PodUid(pod_uid) => write!(f, "{}{}", ENTITY_PREFIX_POD_UID, pod_uid),
147            Self::Container(container_id) => write!(f, "{}{}", ENTITY_PREFIX_CONTAINER_ID, container_id),
148            Self::ContainerInode(inode) => write!(f, "{}{}", ENTITY_PREFIX_CONTAINER_INODE, inode),
149            Self::ContainerPid(pid) => write!(f, "{}{}", ENTITY_PREFIX_CONTAINER_PID, pid),
150        }
151    }
152}
153
154impl serde::Serialize for EntityId {
155    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
156    where
157        S: serde::Serializer,
158    {
159        // We have this manual implementation of `Serialize` just to avoid needing to bring in `serde_with` to get the
160        // helper that utilizes the `Display` implementation.
161        serializer.collect_str(self)
162    }
163}
164
165/// A wrapper for entity IDs that sorts them in a manner consistent with the expected precedence of entity IDs.
166///
167/// This type establishes a total ordering over entity IDs based on their logical precedence, which is as follows:
168///
169/// - global (highest precedence)
170/// - pod
171/// - container
172/// - container inode
173/// - container PID (lowest precedence)
174///
175/// Wrapped entity IDs are be sorted highest to lowest precedence. For entity IDs with the same precedence, they are
176/// further ordered by their internal value. For entity IDs with a string identifier, lexicographical ordering is used.
177/// For entity IDs with a numeric identifier, numerical ordering is used.
178#[derive(Eq, PartialEq)]
179pub struct HighestPrecedenceEntityIdRef<'a>(&'a EntityId);
180
181impl<'a> From<&'a EntityId> for HighestPrecedenceEntityIdRef<'a> {
182    fn from(entity_id: &'a EntityId) -> Self {
183        Self(entity_id)
184    }
185}
186
187impl PartialOrd for HighestPrecedenceEntityIdRef<'_> {
188    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
189        Some(self.cmp(other))
190    }
191}
192
193impl Ord for HighestPrecedenceEntityIdRef<'_> {
194    fn cmp(&self, other: &Self) -> Ordering {
195        // Do the initial comparison based on the implicit precedence of each entity ID.
196        let self_precedence = self.0.precedence_value();
197        let other_precedence = other.0.precedence_value();
198        if self_precedence != other_precedence {
199            return self_precedence.cmp(&other_precedence);
200        }
201
202        // We have two entities at the same level of precedence, so we need to compare their actual values.
203        match (self.0, other.0) {
204            // Global entities are always equal.
205            (EntityId::Global, EntityId::Global) => Ordering::Equal,
206            (EntityId::PodUid(self_pod_uid), EntityId::PodUid(other_pod_uid)) => self_pod_uid.cmp(other_pod_uid),
207            (EntityId::Container(self_container_id), EntityId::Container(other_container_id)) => {
208                self_container_id.cmp(other_container_id)
209            }
210            (EntityId::ContainerInode(self_inode), EntityId::ContainerInode(other_inode)) => {
211                self_inode.cmp(other_inode)
212            }
213            (EntityId::ContainerPid(self_pid), EntityId::ContainerPid(other_pid)) => self_pid.cmp(other_pid),
214            _ => unreachable!("entities with different precedence should not be compared"),
215        }
216    }
217}
218
219fn parse_local_data_value(raw_local_data_value: &str) -> Result<Option<EntityId>, ()> {
220    if raw_local_data_value.starts_with(LOCAL_DATA_PREFIX_CID) {
221        let cid = raw_local_data_value.trim_start_matches(LOCAL_DATA_PREFIX_CID);
222        Ok(Some(EntityId::Container(cid.into())))
223    } else if raw_local_data_value.starts_with(LOCAL_DATA_PREFIX_INODE) {
224        let inode = raw_local_data_value
225            .trim_start_matches(LOCAL_DATA_PREFIX_INODE)
226            .parse()
227            .map_err(|_| ())?;
228        Ok(Some(EntityId::ContainerInode(inode)))
229    } else if raw_local_data_value.starts_with(LOCAL_DATA_PREFIX_LEGACY_CID) {
230        let cid = raw_local_data_value.trim_start_matches(LOCAL_DATA_PREFIX_LEGACY_CID);
231        Ok(Some(EntityId::Container(cid.into())))
232    } else {
233        Ok(None)
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn local_data() {
243        const PREFIX_CID: &str = "ci-singlecontainerid";
244        const PREFIX_LEGACY_CID: &str = "cid-singlecontainerid";
245        const CID: EntityId = EntityId::Container(MetaString::from_static("singlecontainerid"));
246        const PREFIX_INODE: &str = "in-12345";
247        const INODE: EntityId = EntityId::ContainerInode(12345);
248
249        let cases = [
250            // Empty inputs aren't valid.
251            ("".into(), None),
252            // Invalid container inode values.
253            ("in-notanumber".into(), None),
254            // Fallback to treat any unparsed value as container ID.
255            ("random".into(), Some(EntityId::Container("random".into()))),
256            // Single prefixed values.
257            (PREFIX_CID.into(), Some(CID.clone())),
258            (PREFIX_INODE.into(), Some(INODE.clone())),
259            (PREFIX_LEGACY_CID.into(), Some(CID.clone())),
260            // Multiple prefixed values, comma separated.
261            //
262            // We should always prefer container ID over inode. We also test invalid values here since we should
263            // ignore them as we iterate over the split values.
264            (format!("{},{}", PREFIX_CID, PREFIX_INODE), Some(CID.clone())),
265            (format!("{},{}", PREFIX_INODE, PREFIX_CID), Some(CID.clone())),
266            (format!("{},{}", PREFIX_LEGACY_CID, PREFIX_INODE), Some(CID.clone())),
267            (format!("{},{}", PREFIX_INODE, PREFIX_LEGACY_CID), Some(CID.clone())),
268            (format!("{},invalid", PREFIX_CID), Some(CID.clone())),
269            (format!("{},invalid", PREFIX_LEGACY_CID), Some(CID.clone())),
270            (format!("{},invalid", PREFIX_INODE), Some(INODE.clone())),
271        ];
272
273        for (input, expected) in cases {
274            let actual = EntityId::from_local_data(input);
275            assert_eq!(actual, expected);
276        }
277    }
278
279    #[test]
280    fn pod_uid_valid() {
281        let pod_uid = "abcdef1234567890";
282        let entity_id = EntityId::from_pod_uid(pod_uid).unwrap();
283        assert_eq!(entity_id, EntityId::PodUid(MetaString::from(pod_uid)));
284    }
285
286    #[test]
287    fn pod_uid_none() {
288        let pod_uid = "none";
289        let entity_id = EntityId::from_pod_uid(pod_uid);
290        assert!(entity_id.is_none());
291    }
292}