dd_sds/
path.rs

1use std::borrow::Cow;
2use std::fmt::{Debug, Display, Formatter};
3
4use crate::proximity_keywords::{
5    should_bypass_standardize_path, standardize_path_chars, BypassStandardizePathResult,
6    UNIFIED_LINK_CHAR,
7};
8use serde::{Deserialize, Serialize};
9
10#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
11pub struct Path<'a> {
12    pub segments: Vec<PathSegment<'a>>,
13}
14
15#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
16#[serde(tag = "type", content = "value")]
17pub enum PathSegment<'a> {
18    Field(Cow<'a, str>),
19    Index(usize),
20}
21
22impl Path<'_> {
23    /// An empty path - pointing to the root.
24    pub fn root() -> Self {
25        Self { segments: vec![] }
26    }
27
28    /// Converts and path segment references into Owned strings so the lifetime can be static.
29    pub fn into_static(&self) -> Path<'static> {
30        Path {
31            segments: self.segments.iter().map(PathSegment::into_static).collect(),
32        }
33    }
34
35    pub fn len(&self) -> usize {
36        self.segments.len()
37    }
38
39    pub fn is_empty(&self) -> bool {
40        self.segments.is_empty()
41    }
42
43    pub fn starts_with(&self, prefix: &Path) -> bool {
44        if prefix.len() > self.len() {
45            // the prefix is longer than the path
46            return false;
47        }
48
49        // ensure all segments of `prefix` match self
50        for (a, b) in prefix.segments.iter().zip(self.segments.iter()) {
51            if a != b {
52                return false;
53            }
54        }
55        true
56    }
57
58    fn size_segments_only(&self) -> usize {
59        self.segments
60            .iter()
61            .map(|segment| {
62                if let PathSegment::Field(field) = segment {
63                    return field.len();
64                }
65                0
66            })
67            .sum()
68    }
69
70    pub fn sanitize(&self) -> String {
71        let size_segments = self.size_segments_only();
72        let mut sanitized_path = String::with_capacity(size_segments + size_segments / 2);
73        self.segments.iter().enumerate().for_each(|(i, segment)| {
74            if let PathSegment::Field(field) = segment {
75                if i != 0 {
76                    sanitized_path.push(UNIFIED_LINK_CHAR);
77                }
78
79                if should_bypass_standardize_path(field) != BypassStandardizePathResult::NoBypass {
80                    sanitized_path.push_str(field.to_ascii_lowercase().as_str())
81                } else {
82                    standardize_path_chars(field, |c| {
83                        sanitized_path.push(c.to_ascii_lowercase());
84                    });
85                }
86            }
87        });
88
89        sanitized_path
90    }
91}
92
93impl<'a> PathSegment<'a> {
94    pub fn into_static(&self) -> PathSegment<'static> {
95        match self {
96            PathSegment::Field(cow) => PathSegment::Field(Cow::Owned(cow.as_ref().to_owned())),
97            PathSegment::Index(i) => PathSegment::Index(*i),
98        }
99    }
100
101    pub fn is_index(&self) -> bool {
102        matches!(self, PathSegment::Index(_))
103    }
104
105    pub fn length(&self) -> usize {
106        if let PathSegment::Field(field) = self {
107            field.len()
108        } else {
109            0
110        }
111    }
112
113    pub fn sanitize(&self) -> Option<Cow<'a, str>> {
114        if let PathSegment::Field(field) = self {
115            match should_bypass_standardize_path(field) {
116                BypassStandardizePathResult::BypassAndAllLowercase => Some(field.clone()),
117                BypassStandardizePathResult::BypassAndAllUppercase => {
118                    Some(Cow::Owned(field.to_ascii_lowercase()))
119                }
120                BypassStandardizePathResult::NoBypass => {
121                    let mut sanitized_segment = String::with_capacity(self.length() + 1);
122                    standardize_path_chars(field, |c| {
123                        sanitized_segment.push(c.to_ascii_lowercase());
124                    });
125                    Some(Cow::Owned(sanitized_segment))
126                }
127            }
128        } else {
129            None
130        }
131    }
132}
133
134impl Debug for Path<'_> {
135    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
136        Display::fmt(self, f)
137    }
138}
139
140// Note: This format isn't great, some indices / fields can collide, and fields aren't escaped.
141// It's kept like this to match the existing "logs-backend" behavior.
142impl Display for Path<'_> {
143    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
144        for (i, segment) in self.segments.iter().enumerate() {
145            match segment {
146                PathSegment::Field(field) => {
147                    if i != 0 {
148                        write!(f, ".")?;
149                    }
150                    write!(f, "{}", field)?;
151                }
152                PathSegment::Index(i) => {
153                    write!(f, "[{}]", i)?;
154                }
155            }
156        }
157        Ok(())
158    }
159}
160
161impl<'a> From<Vec<PathSegment<'a>>> for Path<'a> {
162    fn from(segments: Vec<PathSegment<'a>>) -> Self {
163        Self { segments }
164    }
165}
166
167impl<'a> From<&'a str> for PathSegment<'a> {
168    fn from(value: &'a str) -> Self {
169        Self::Field(Cow::Borrowed(value))
170    }
171}
172
173impl From<usize> for PathSegment<'static> {
174    fn from(value: usize) -> Self {
175        PathSegment::Index(value)
176    }
177}
178
179#[cfg(test)]
180mod test {
181    use super::*;
182    use crate::proximity_keywords::UNIFIED_LINK_STR;
183
184    #[test]
185    fn test_starts_with() {
186        let foo = Path::from(vec!["foo".into()]);
187        let foo_bar = Path::from(vec!["foo".into(), "bar".into()]);
188        let array_foo = Path::from(vec![0.into(), "foo".into()]);
189
190        assert!(foo_bar.starts_with(&foo));
191        assert!(!foo.starts_with(&foo_bar));
192        assert!(foo.starts_with(&foo));
193        assert!(!foo.starts_with(&array_foo));
194        assert!(!array_foo.starts_with(&foo));
195    }
196
197    #[test]
198    fn test_sanitize_segments() {
199        assert_eq!(
200            Path::from(vec!["hello".into(), 0.into(), "world".into()])
201                .segments
202                .iter()
203                .filter_map(|segment| { segment.sanitize() })
204                .collect::<Vec<_>>()
205                .join(UNIFIED_LINK_STR),
206            "hello.world"
207        );
208        assert_eq!(
209            Path::from(vec!["hello".into(), 1.into(), "CHICKEN".into(), 2.into()])
210                .segments
211                .iter()
212                .filter_map(|segment| { segment.sanitize() })
213                .collect::<Vec<_>>()
214                .join(UNIFIED_LINK_STR),
215            "hello.chicken"
216        );
217        assert_eq!(
218            Path::from(vec![
219                "hello_world-of".into(),
220                1.into(),
221                "CHICKEN".into(),
222                2.into(),
223            ])
224            .segments
225            .iter()
226            .filter_map(|segment| { segment.sanitize() })
227            .collect::<Vec<_>>()
228            .join(UNIFIED_LINK_STR),
229            "hello.world.of.chicken"
230        );
231
232        assert_eq!(
233            Path::from(vec!["hello_world-of-".into(), "/chickens_/".into()])
234                .segments
235                .iter()
236                .filter_map(|segment| { segment.sanitize() })
237                .collect::<Vec<_>>()
238                .join(UNIFIED_LINK_STR),
239            "hello.world.of-./chickens./"
240        );
241    }
242
243    #[test]
244    fn test_sanitize_path() {
245        assert_eq!(
246            Path::from(vec!["hello".into(), 0.into(), "world".into()]).sanitize(),
247            "hello.world"
248        );
249        assert_eq!(
250            Path::from(vec!["hello".into(), 1.into(), "CHICKEN".into(), 2.into()]).sanitize(),
251            "hello.chicken"
252        );
253        assert_eq!(
254            Path::from(vec![
255                "hello_world-of".into(),
256                1.into(),
257                "CHICKEN".into(),
258                2.into(),
259            ])
260            .sanitize(),
261            "hello.world.of.chicken"
262        );
263
264        assert_eq!(
265            Path::from(vec!["hello_world-of-".into(), "/chickens_/".into()]).sanitize(),
266            "hello.world.of-./chickens./"
267        );
268        assert_eq!(
269            Path::from(vec!["helloChicken".into()]).sanitize(),
270            "hello.chicken"
271        );
272    }
273
274    #[test]
275    fn test_size() {
276        assert_eq!(
277            Path::from(vec!["hello".into(), 0.into(), "world".into()]).size_segments_only(),
278            10
279        );
280        assert_eq!(
281            Path::from(vec!["".into(), 0.into(), "path✅".into()]).size_segments_only(),
282            7
283        );
284    }
285}