saluki_core/data_model/event/metric/
metadata.rs

1use std::{fmt, sync::Arc};
2
3const ORIGIN_PRODUCT_AGENT: u32 = 10;
4const ORIGIN_SUBPRODUCT_DOGSTATSD: u32 = 10;
5const ORIGIN_SUBPRODUCT_INTEGRATION: u32 = 11;
6const ORIGIN_PRODUCT_DETAIL_NONE: u32 = 0;
7
8/// Metric metadata.
9///
10/// Metadata includes all information that is not specifically related to the context or value of the metric itself,
11/// such as sample rate and timestamp.
12#[must_use]
13#[derive(Clone, Debug, Default, Eq, PartialEq)]
14pub struct MetricMetadata {
15    /// The hostname where the metric originated from.
16    // TODO: We made this `Arc<str>` because it's 16 bytes vs 24 bytes for `MetaString`, but one problem is that it means that
17    // we have to allocate a new `Arc<str>` for every hostname override (or empty hostname to disable the host tag) which is
18    // suboptimal.
19    //
20    // A main part of the problem is that we want to determine if a hostname was set at all -- whether empty or not -- so that
21    // when we get to host enrichment, we can determine if we should be overriding the hostname or not. This means we can't
22    // simply drop the `Option` and check if the string is empty or not.
23    pub hostname: Option<Arc<str>>,
24
25    /// The metric origin.
26    // TODO: only optional so we can default? seems like we always have one
27    pub origin: Option<MetricOrigin>,
28}
29
30impl MetricMetadata {
31    /// Returns the hostname.
32    pub fn hostname(&self) -> Option<&str> {
33        self.hostname.as_deref()
34    }
35
36    /// Returns the metric origin.
37    pub fn origin(&self) -> Option<&MetricOrigin> {
38        self.origin.as_ref()
39    }
40
41    /// Set the hostname where the metric originated from.
42    ///
43    /// This could be specified as part of a metric payload that was received from a client, or set internally to the
44    /// hostname where this process is running.
45    ///
46    /// This variant is specifically for use in builder-style APIs.
47    pub fn with_hostname(mut self, hostname: impl Into<Option<Arc<str>>>) -> Self {
48        self.hostname = hostname.into();
49        self
50    }
51
52    /// Set the hostname where the metric originated from.
53    ///
54    /// This could be specified as part of a metric payload that was received from a client, or set internally to the
55    /// hostname where this process is running.
56    pub fn set_hostname(&mut self, hostname: impl Into<Option<Arc<str>>>) {
57        self.hostname = hostname.into();
58    }
59
60    /// Set the metric origin to the given source type.
61    ///
62    /// Indicates the source of the metric, such as the product or service that emitted it, or the source component
63    /// itself that emitted it.
64    ///
65    /// This variant is specifically for use in builder-style APIs.
66    pub fn with_source_type(mut self, source_type: impl Into<Option<Arc<str>>>) -> Self {
67        self.origin = source_type.into().map(MetricOrigin::SourceType);
68        self
69    }
70
71    /// Set the metric origin to the given source type.
72    ///
73    /// Indicates the source of the metric, such as the product or service that emitted it, or the source component
74    /// itself that emitted it.
75    pub fn set_source_type(&mut self, source_type: impl Into<Option<Arc<str>>>) {
76        self.origin = source_type.into().map(MetricOrigin::SourceType);
77    }
78
79    /// Set the metric origin to the given origin.
80    ///
81    /// Indicates the source of the metric, such as the product or service that emitted it, or the source component
82    /// itself that emitted it.
83    ///
84    /// This variant is specifically for use in builder-style APIs.
85    pub fn with_origin(mut self, origin: impl Into<Option<MetricOrigin>>) -> Self {
86        self.origin = origin.into();
87        self
88    }
89
90    /// Set the metric origin to the given origin.
91    ///
92    /// Indicates the source of the metric, such as the product or service that emitted it, or the source component
93    /// itself that emitted it.
94    pub fn set_origin(&mut self, origin: impl Into<Option<MetricOrigin>>) {
95        self.origin = origin.into();
96    }
97}
98
99impl fmt::Display for MetricMetadata {
100    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
101        if let Some(origin) = &self.origin {
102            write!(f, " origin={}", origin)?;
103        }
104
105        Ok(())
106    }
107}
108
109// TODO: This is not technically right.
110//
111// In practice, the Datadog Agent _does_ ship metrics with both source type name and origin metadata, although perhaps
112// luckily, that is only the case for check metrics, which we don't deal with in ADP (yet).
113//
114// Eventually, we likely will have to consider exposing both of these fields.
115
116/// Categorical origin of a metric.
117///
118/// This is used to describe, in high-level terms, where a metric originated from, such as the specific software package
119/// or library that emitted. This is distinct from the `OriginEntity`, which describes the specific sender of the metric.
120#[derive(Clone, Debug, Eq, PartialEq)]
121pub enum MetricOrigin {
122    /// Originated from a generic source.
123    ///
124    /// This is used to set the origin of a metric as the source component type itself, such as `dogstatsd` or `otel`,
125    /// when richer origin metadata is not available.
126    SourceType(Arc<str>),
127
128    /// Originated from a specific product, category, and/or service.
129    OriginMetadata {
130        /// Product that emitted the metric.
131        product: u32,
132
133        /// Subproduct that emitted the metric.
134        ///
135        /// Previously known as "category".
136        subproduct: u32,
137
138        /// Product detail.
139        ///
140        /// Previously known as "service".
141        product_detail: u32,
142    },
143}
144
145impl MetricOrigin {
146    /// Creates a `MetricsOrigin` for any metric ingested via DogStatsD.
147    pub fn dogstatsd() -> Self {
148        Self::OriginMetadata {
149            product: ORIGIN_PRODUCT_AGENT,
150            subproduct: ORIGIN_SUBPRODUCT_DOGSTATSD,
151            product_detail: ORIGIN_PRODUCT_DETAIL_NONE,
152        }
153    }
154
155    /// Creates a `MetricsOrigin` for any metric that originated via an JXM check integration.
156    pub fn jmx_check(check_name: &str) -> Self {
157        let product_detail = jmx_check_name_to_product_detail(check_name);
158
159        Self::OriginMetadata {
160            product: ORIGIN_PRODUCT_AGENT,
161            subproduct: ORIGIN_SUBPRODUCT_INTEGRATION,
162            product_detail,
163        }
164    }
165
166    /// Returns `true` if the origin of the metric is DogStatsD.
167    pub fn is_dogstatsd(&self) -> bool {
168        matches!(self, Self::OriginMetadata { subproduct, .. } if *subproduct == ORIGIN_SUBPRODUCT_DOGSTATSD)
169    }
170}
171
172impl fmt::Display for MetricOrigin {
173    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174        match self {
175            Self::SourceType(source_type) => write!(f, "source_type={}", source_type),
176            Self::OriginMetadata {
177                product,
178                subproduct,
179                product_detail,
180            } => write!(
181                f,
182                "product={} subproduct={} product_detail={}",
183                product_id_to_str(*product),
184                subproduct_id_to_str(*subproduct),
185                product_detail_id_to_str(*product_detail),
186            ),
187        }
188    }
189}
190
191fn jmx_check_name_to_product_detail(check_name: &str) -> u32 {
192    // Taken from Datadog Agent mappings:
193    // https://github.com/DataDog/datadog-agent/blob/fd3a119bda125462d578e0004f1370ee019ce2d5/pkg/serializer/internal/metrics/origin_mapping.go#L41
194    match check_name {
195        "jmx-custom-check" => 9,
196        "activemq" => 12,
197        "cassandra" => 28,
198        "confluent_platform" => 40,
199        "hazelcast" => 70,
200        "hive" => 73,
201        "hivemq" => 74,
202        "hudi" => 76,
203        "ignite" => 83,
204        "jboss_wildfly" => 87,
205        "kafka" => 90,
206        "presto" => 130,
207        "solr" => 147,
208        "sonarqube" => 148,
209        "tomcat" => 163,
210        "weblogic" => 172,
211        _ => 0,
212    }
213}
214
215fn product_id_to_str(product_id: u32) -> &'static str {
216    match product_id {
217        ORIGIN_PRODUCT_AGENT => "agent",
218        _ => "unknown_product",
219    }
220}
221
222fn subproduct_id_to_str(subproduct_id: u32) -> &'static str {
223    match subproduct_id {
224        ORIGIN_SUBPRODUCT_DOGSTATSD => "dogstatsd",
225        ORIGIN_SUBPRODUCT_INTEGRATION => "integration",
226        _ => "unknown_subproduct",
227    }
228}
229
230fn product_detail_id_to_str(product_detail_id: u32) -> &'static str {
231    match product_detail_id {
232        // TODO: Map the JMX check integration product detail IDs to their respective names.
233        ORIGIN_PRODUCT_DETAIL_NONE => "none",
234        _ => "unknown_product_detail",
235    }
236}