Skip to main content

saluki_core/topology/
ids.rs

1//! Component and component output identifiers.
2use core::fmt;
3use std::{borrow::Cow, ops::Deref};
4
5use crate::{
6    components::{ComponentContext, ComponentType},
7    topology::graph::DataType,
8};
9
10const INVALID_COMPONENT_ID: &str =
11    "component IDs may only contain alphanumerics (a-z, A-Z, or 0-9), underscores, and hyphens";
12const INVALID_COMPONENT_OUTPUT_ID: &str =
13    "component output IDs may only contain alphanumerics (a-z, A-Z, or 0-9), underscores, hyphens, and up to one period";
14
15/// A component identifier.
16#[derive(Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
17pub struct ComponentId(Cow<'static, str>);
18
19impl TryFrom<&str> for ComponentId {
20    type Error = &'static str;
21
22    fn try_from(value: &str) -> Result<Self, Self::Error> {
23        if !validate_component_id(value, false) {
24            Err(INVALID_COMPONENT_ID)
25        } else {
26            Ok(Self(value.to_string().into()))
27        }
28    }
29}
30
31impl Deref for ComponentId {
32    type Target = str;
33
34    fn deref(&self) -> &Self::Target {
35        self.0.as_ref()
36    }
37}
38
39impl fmt::Display for ComponentId {
40    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41        self.0.fmt(f)
42    }
43}
44
45/// A component output identifier.
46#[derive(Clone, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
47pub struct ComponentOutputId(Cow<'static, str>);
48
49impl ComponentOutputId {
50    /// Creates a new `ComponentOutputId` from an identifier and output definition.
51    ///
52    /// # Errors
53    ///
54    /// If generated component output ID isn't valid (identifier or output definition containing invalid characters,
55    /// etc), an error is returned.
56    pub fn from_definition<T: Copy>(
57        component_id: ComponentId, output_def: &OutputDefinition<T>,
58    ) -> Result<Self, (String, &'static str)> {
59        match output_def.output_name() {
60            None => Ok(Self(component_id.0)),
61            Some(output_name) => {
62                let output_id = format!("{}.{}", component_id.0, output_name);
63
64                if validate_component_id(&output_id, true) {
65                    Ok(Self(output_id.into()))
66                } else {
67                    Err((output_id, INVALID_COMPONENT_OUTPUT_ID))
68                }
69            }
70        }
71    }
72
73    /// Returns the component ID.
74    pub fn component_id(&self) -> ComponentId {
75        if let Some((component_id, _)) = self.0.split_once('.') {
76            ComponentId(component_id.to_string().into())
77        } else {
78            ComponentId(self.0.clone())
79        }
80    }
81
82    /// Returns the output name.
83    pub fn output(&self) -> OutputName {
84        if let Some((_, output_name)) = self.0.split_once('.') {
85            OutputName::Given(output_name.to_string().into())
86        } else {
87            OutputName::Default
88        }
89    }
90
91    /// Returns `true` if this is a default output.
92    pub fn is_default(&self) -> bool {
93        self.0.split_once('.').is_none()
94    }
95}
96
97impl TryFrom<&str> for ComponentOutputId {
98    type Error = &'static str;
99
100    fn try_from(value: &str) -> Result<Self, Self::Error> {
101        if !validate_component_id(value, true) {
102            Err(INVALID_COMPONENT_OUTPUT_ID)
103        } else {
104            Ok(Self(value.to_string().into()))
105        }
106    }
107}
108
109impl fmt::Display for ComponentOutputId {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        self.0.fmt(f)
112    }
113}
114
115const fn validate_component_id(id: &str, as_output_id: bool) -> bool {
116    let id_bytes = id.as_bytes();
117
118    // Identifiers cannot be empty strings.
119    if id_bytes.is_empty() {
120        return false;
121    }
122
123    // Keep track of whether or not we've seen a period yet. If we have, we track its index, which serves two purposes:
124    // figure out if we see _another_ period (can only have one), and ensure that either side of the string (when split
125    // by the separator) isn't empty.
126    let mut idx = 0;
127    let end = id_bytes.len();
128    let mut separator_idx = end;
129    while idx < end {
130        let b = id_bytes[idx];
131        if !b.is_ascii_alphanumeric() && b != b'_' && b != b'-' {
132            if as_output_id && b == b'.' && separator_idx == end {
133                // Found our period separator.
134                separator_idx = idx;
135            } else {
136                // We're not validating as an output ID, or we already saw a period separator, which means this is
137                // invalid.
138                return false;
139            }
140        }
141
142        idx += 1;
143    }
144
145    if as_output_id && (separator_idx == 0 || separator_idx == end - 1) {
146        // Can't have the separator as the first or last character.
147        return false;
148    }
149
150    true
151}
152
153/// An output name.
154///
155/// Components must always have at least one output, but an output can either be the default output or a named output.
156/// This allows for components to have multiple outputs, potentially with one (the default) acting as a catch-all.
157///
158/// `OutputName` is used to differentiate between a default output and named outputs.
159#[derive(Clone, Debug, Eq, Hash, PartialEq)]
160pub enum OutputName {
161    /// Default output.
162    Default,
163
164    /// Named output.
165    Given(Cow<'static, str>),
166}
167
168impl fmt::Display for OutputName {
169    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
170        match self {
171            OutputName::Default => write!(f, "_default"),
172            OutputName::Given(name) => write!(f, "{}", name),
173        }
174    }
175}
176
177/// An output definition.
178///
179/// Outputs are a combination of the output name and data type, which defines the data type (or types) of events that
180/// can be emitted from a particular component output.
181#[derive(Clone, Debug)]
182pub struct OutputDefinition<T> {
183    name: OutputName,
184    data_ty: T,
185}
186
187impl<T> OutputDefinition<T>
188where
189    T: Copy,
190{
191    /// Creates a default output with the given data type.
192    pub const fn default_output(data_ty: T) -> Self {
193        Self {
194            name: OutputName::Default,
195            data_ty,
196        }
197    }
198
199    /// Creates a named output with the given name and data type.
200    pub fn named_output<S>(name: S, data_ty: T) -> Self
201    where
202        S: Into<Cow<'static, str>>,
203    {
204        Self {
205            name: OutputName::Given(name.into()),
206            data_ty,
207        }
208    }
209
210    /// Returns the output name.
211    ///
212    /// If this is a default output, `None` is returned.
213    pub fn output_name(&self) -> Option<&str> {
214        match &self.name {
215            OutputName::Default => None,
216            OutputName::Given(name) => Some(name.as_ref()),
217        }
218    }
219
220    /// Returns the data type.
221    pub fn data_ty(&self) -> T {
222        self.data_ty
223    }
224}
225
226/// A component identifier that specifies the component type.
227#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
228pub struct TypedComponentId {
229    id: ComponentId,
230    ty: ComponentType,
231}
232
233impl TypedComponentId {
234    /// Creates a new `TypedComponentId` from the given component ID and component type.
235    pub fn new(id: ComponentId, ty: ComponentType) -> Self {
236        Self { id, ty }
237    }
238
239    /// Returns a reference to the component ID.
240    pub fn component_id(&self) -> &ComponentId {
241        &self.id
242    }
243
244    /// Returns the component type.
245    pub fn component_type(&self) -> ComponentType {
246        self.ty
247    }
248
249    /// Returns the component context.
250    pub fn component_context(&self) -> ComponentContext {
251        match self.ty {
252            ComponentType::Source => ComponentContext::source(self.id.clone()),
253            ComponentType::Relay => ComponentContext::relay(self.id.clone()),
254            ComponentType::Decoder => ComponentContext::decoder(self.id.clone()),
255            ComponentType::Transform => ComponentContext::transform(self.id.clone()),
256            ComponentType::Encoder => ComponentContext::encoder(self.id.clone()),
257            ComponentType::Forwarder => ComponentContext::forwarder(self.id.clone()),
258            ComponentType::Destination => ComponentContext::destination(self.id.clone()),
259        }
260    }
261
262    /// Consumes the `TypedComponentId` and returns its component ID, component type, and component context.
263    pub fn into_parts(self) -> (ComponentId, ComponentType, ComponentContext) {
264        let component_context = self.component_context();
265        (self.id, self.ty, component_context)
266    }
267}
268
269/// Unique identifier for a specified output of a component, including the data type of the output.
270#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
271pub struct TypedComponentOutputId {
272    component_output: ComponentOutputId,
273    output_ty: DataType,
274}
275
276impl TypedComponentOutputId {
277    /// Creates a new `TypedComponentOutputId` from the given component output ID and output data type.
278    pub fn new(component_output: ComponentOutputId, output_ty: DataType) -> Self {
279        Self {
280            component_output,
281            output_ty,
282        }
283    }
284
285    /// Gets a reference to the component output ID.
286    pub fn component_output(&self) -> &ComponentOutputId {
287        &self.component_output
288    }
289
290    /// Returns the output data type.
291    pub fn output_ty(&self) -> DataType {
292        self.output_ty
293    }
294}
295
296/// Disambiguation marker for [`AsComponentIds`] when a single component ID is given.
297pub struct Single;
298
299/// Disambiguation marker for [`AsComponentIds`] when multiple component IDs are given.
300pub struct Multiple;
301
302/// Conversion into an iterator of component IDs.
303///
304/// Intended for use in methods that accept component IDs as string references, where a single or multiple IDs may be
305/// passed within a single parameter. This allows being generic over those possibilities such that callers can use more
306/// natural values rather than contrived values (such as always having to wrap a single string in a slice, etc).
307pub trait AsComponentIds<Marker> {
308    /// Converts `self` into an iterator of component output IDs.
309    ///
310    /// This borrows `self` -- rather than consuming it as the `into_` prefix would normally imply -- so that the
311    /// iterator can be built multiple times from the same value, which is necessary for connecting every upstream ID
312    /// to every downstream ID when making many-to-many connections.
313    fn as_component_ids(&self) -> impl Iterator<Item: AsRef<str>>;
314}
315
316impl<T> AsComponentIds<Single> for T
317where
318    T: AsRef<str>,
319{
320    fn as_component_ids(&self) -> impl Iterator<Item: AsRef<str>> {
321        std::iter::once(self)
322    }
323}
324
325impl<I> AsComponentIds<Multiple> for I
326where
327    for<'a> &'a I: IntoIterator<Item: AsRef<str>>,
328{
329    fn as_component_ids(&self) -> impl Iterator<Item: AsRef<str>> {
330        self.into_iter()
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn component_id() {
340        let id = ComponentId::try_from("component").unwrap();
341        assert_eq!(id, ComponentId::try_from("component").unwrap());
342        assert_eq!(&*id, "component");
343
344        let id = ComponentId::try_from("component_1").unwrap();
345        assert_eq!(id, ComponentId::try_from("component_1").unwrap());
346        assert_eq!(&*id, "component_1");
347    }
348
349    #[test]
350    fn component_id_invalid() {
351        assert!(ComponentId::try_from("").is_err());
352        assert!(ComponentId::try_from("non_alphanumeric_$#!").is_err());
353        assert!(ComponentId::try_from("cant_have_periods_for_non_component_output_id.foo").is_err());
354    }
355
356    #[test]
357    fn component_output_id_default() {
358        let id = ComponentOutputId::try_from("component").unwrap();
359        assert_eq!(id.component_id(), ComponentId::try_from("component").unwrap());
360        assert_eq!(id.output(), OutputName::Default);
361        assert!(id.is_default());
362    }
363
364    #[test]
365    fn component_output_id_named() {
366        let id = ComponentOutputId::try_from("component.metrics").unwrap();
367        assert_eq!(id.component_id(), ComponentId::try_from("component").unwrap());
368        assert_eq!(id.output(), OutputName::Given("metrics".into()));
369        assert!(!id.is_default());
370    }
371
372    #[test]
373    fn component_output_id_invalid() {
374        assert!(ComponentOutputId::try_from("").is_err());
375        assert!(ComponentOutputId::try_from("non_alphanumeric_$#!").is_err());
376        assert!(ComponentOutputId::try_from("too.many.periods").is_err());
377        assert!(ComponentOutputId::try_from(".one_side_of_named_output_is_empty").is_err());
378        assert!(ComponentOutputId::try_from("one_side_of_named_output_is_empty.").is_err());
379    }
380}