saluki_common/
strings.rs

1use stringtheory::{interning::Interner, MetaString};
2
3/// A string builder.
4///
5///
6/// This builder is designed to allow building strings incrementally. This can simplify certain patterns of string
7/// construction by removing the need to manually manage a temporary string buffer, clearing it after building the
8/// resulting string, and so on.
9///
10/// # Limits
11///
12/// The builder can be configured to limit the overall length of the strings it builds.
13///
14/// # Interning
15///
16/// The builder supports providing an interner that is used to intern the finalized string. This allows for
17/// efficiently building strings, reusing the intermediate buffer in between before eventually interning the string.
18pub struct StringBuilder<I = ()> {
19    buf: String,
20    limit: usize,
21    interner: I,
22}
23
24impl StringBuilder<()> {
25    /// Creates a new `StringBuilder`.
26    ///
27    /// No limit is set for the strings built by this builder.
28    pub fn new() -> Self {
29        Self {
30            buf: String::new(),
31            limit: usize::MAX,
32            interner: (),
33        }
34    }
35
36    /// Creates a new `StringBuilder` with the given limit.
37    ///
38    /// Strings that exceed the limit will be discarded.
39    pub fn with_limit(limit: usize) -> Self {
40        Self {
41            buf: String::new(),
42            limit,
43            interner: (),
44        }
45    }
46}
47
48impl<I> StringBuilder<I> {
49    /// Configures this builder with the given interner.
50    pub fn with_interner<I2>(self, interner: I2) -> StringBuilder<I2>
51    where
52        I2: Interner,
53    {
54        StringBuilder {
55            buf: self.buf,
56            limit: self.limit,
57            interner,
58        }
59    }
60
61    /// Returns `true` if the buffer of the builder is empty.
62    pub fn is_empty(&self) -> bool {
63        self.buf.is_empty()
64    }
65
66    /// Returns the length of the buffer of the builder.
67    pub fn len(&self) -> usize {
68        self.buf.len()
69    }
70
71    /// Returns the available space in the buffer of the builder.
72    pub fn available(&self) -> usize {
73        self.limit - self.buf.len()
74    }
75
76    /// Clears the buffer of the builder.
77    pub fn clear(&mut self) {
78        self.buf.clear();
79    }
80
81    /// Pushes a character into the builder.
82    ///
83    /// Returns `None` if the buffer limit would be exceeded by writing the character.
84    pub fn push(&mut self, c: char) -> Option<()> {
85        let char_len = c.len_utf8();
86        if self.buf.len() + char_len > self.limit {
87            return None;
88        }
89        self.buf.push(c);
90        Some(())
91    }
92
93    /// Pushes a string fragment into the builder.
94    ///
95    /// Returns `None` if the buffer limit would be exceeded by writing the string.
96    pub fn push_str(&mut self, s: &str) -> Option<()> {
97        if self.buf.len() + s.len() > self.limit {
98            return None;
99        }
100        self.buf.push_str(s);
101        Some(())
102    }
103
104    /// Returns a references to the current string.
105    pub fn as_str(&self) -> &str {
106        &self.buf
107    }
108}
109
110impl<I> StringBuilder<I>
111where
112    I: Interner,
113{
114    /// Attempts to build and intern the string.
115    ///
116    /// Returns `None` if the string exceeds the configured limit or if it cannot be interned.
117    pub fn try_intern(&mut self) -> Option<MetaString> {
118        self.interner.try_intern(self.as_str()).map(MetaString::from)
119    }
120}
121
122impl std::fmt::Write for StringBuilder {
123    fn write_str(&mut self, s: &str) -> std::fmt::Result {
124        match self.push_str(s) {
125            Some(()) => Ok(()),
126            None => Err(std::fmt::Error),
127        }
128    }
129}
130
131/// Sanitizes the input string by ensuring all characters are lowercase ASCII alphanumeric or underscores.
132///
133/// All characters that are not ASCII alphanumeric or underscores are replaced with underscores, and alphanumerics will
134/// be lowercased.
135pub fn lower_alphanumeric(s: &str) -> String {
136    s.chars()
137        .map(|c| {
138            if c.is_ascii_alphanumeric() || c == '_' {
139                c.to_ascii_lowercase()
140            } else {
141                '_'
142            }
143        })
144        .collect()
145}
146
147#[cfg(test)]
148mod tests {
149    use std::{fmt::Write, num::NonZeroUsize};
150
151    use stringtheory::interning::FixedSizeInterner;
152
153    use super::*;
154
155    fn build_string_builder() -> StringBuilder {
156        StringBuilder::new()
157    }
158
159    fn build_string_builder_with_limit(limit: usize) -> StringBuilder {
160        StringBuilder::with_limit(limit)
161    }
162
163    fn build_interned_string_builder(interner_capacity: usize) -> StringBuilder<FixedSizeInterner<1>> {
164        StringBuilder::new().with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
165    }
166
167    fn build_interned_string_builder_with_limit(
168        interner_capacity: usize, limit: usize,
169    ) -> StringBuilder<FixedSizeInterner<1>> {
170        StringBuilder::with_limit(limit)
171            .with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
172    }
173
174    #[test]
175    fn lower_alphanumeric_basic() {
176        assert_eq!(lower_alphanumeric("Hello World!"), "hello_world_");
177        assert_eq!(lower_alphanumeric("1234"), "1234");
178        assert_eq!(lower_alphanumeric("abc_def"), "abc_def");
179        assert_eq!(lower_alphanumeric("abc-def"), "abc_def");
180        assert_eq!(lower_alphanumeric("abc def"), "abc_def");
181    }
182
183    #[test]
184    fn string_builder_basic() {
185        let mut builder = build_string_builder();
186
187        assert_eq!(builder.push_str("Hello World!"), Some(()));
188        assert_eq!(builder.as_str(), "Hello World!");
189
190        builder.clear();
191
192        assert_eq!(builder.push_str("hello"), Some(()));
193        assert_eq!(builder.push_str(" "), Some(()));
194        assert_eq!(builder.push_str("world"), Some(()));
195        assert_eq!(builder.as_str(), "hello world");
196    }
197
198    #[test]
199    fn string_builder_basic_with_interner() {
200        let mut builder = build_interned_string_builder(128);
201
202        assert_eq!(builder.push_str("Hello World!"), Some(()));
203        assert_eq!(builder.try_intern(), Some(MetaString::from("Hello World!")));
204
205        builder.clear();
206
207        assert_eq!(builder.push_str("hello"), Some(()));
208        assert_eq!(builder.push_str(" "), Some(()));
209        assert_eq!(builder.push_str("world"), Some(()));
210        assert_eq!(builder.try_intern(), Some(MetaString::from("hello world")));
211    }
212
213    #[test]
214    fn string_builder_clear() {
215        let mut builder = build_string_builder();
216
217        assert_eq!(builder.push_str("hello"), Some(()));
218        builder.clear();
219        assert_eq!(builder.as_str(), "");
220    }
221
222    #[test]
223    fn string_builder_is_empty_len_available() {
224        const LIMIT: usize = 32;
225
226        let mut builder = build_string_builder_with_limit(LIMIT);
227
228        // Starts out empty:
229        assert!(builder.is_empty());
230        assert_eq!(builder.len(), 0);
231        assert_eq!(builder.available(), LIMIT);
232
233        // After pushing "hello":
234        assert_eq!(builder.push_str("hello"), Some(()));
235        assert!(!builder.is_empty());
236        assert_eq!(builder.len(), 5);
237        assert_eq!(builder.available(), LIMIT - 5);
238        assert_eq!(builder.as_str(), "hello");
239
240        // After pushing " world":
241        builder.push_str(" world");
242        assert!(!builder.is_empty());
243        assert_eq!(builder.len(), 11);
244        assert_eq!(builder.available(), LIMIT - 11);
245        assert_eq!(builder.as_str(), "hello world");
246
247        // Manually clearing the buffer:
248        builder.clear();
249        assert!(builder.is_empty());
250        assert_eq!(builder.len(), 0);
251        assert_eq!(builder.available(), LIMIT);
252    }
253
254    #[test]
255    fn string_builder_with_limit() {
256        const LIMIT: usize = 16;
257
258        let mut builder = build_string_builder_with_limit(LIMIT);
259
260        // Under the limit:
261        let string_one = "hello, world!";
262        assert!(string_one.len() < LIMIT);
263        assert_eq!(builder.push_str(string_one), Some(()));
264        assert_eq!(builder.as_str(), string_one);
265
266        // Over the limit:
267        let string_two = "definitely way too long";
268        assert!(string_two.len() > LIMIT);
269        assert_eq!(builder.push_str(string_two), None);
270
271        builder.clear();
272
273        // Under the limit, but we build it piecemeal:
274        let string_three_parts = vec!["hello", " ", "world"];
275        let string_three = string_three_parts.join("");
276        assert!(string_three.len() < LIMIT);
277        for string_three_part in string_three_parts {
278            assert_eq!(builder.push_str(string_three_part), Some(()));
279        }
280        assert_eq!(builder.as_str(), string_three);
281    }
282
283    #[test]
284    fn string_builder_under_limit_interner_full() {
285        const INTERNER_CAPACITY: usize = 24;
286        const LIMIT: usize = 64;
287
288        let mut builder = build_interned_string_builder_with_limit(INTERNER_CAPACITY, LIMIT);
289
290        // Under the limit but over the interner capacity.
291        //
292        // The pushes should succeed, but we should not be able to build the string due to
293        // the interner not having enough space:
294        let string_one = "are you there, god? it's me, margaret";
295        assert!(string_one.len() < LIMIT);
296        assert!(string_one.len() > INTERNER_CAPACITY);
297        assert_eq!(builder.push_str(string_one), Some(()));
298        assert_eq!(builder.try_intern(), None);
299    }
300
301    #[test]
302    fn string_builder_fmt_write() {
303        let mut builder = build_string_builder();
304
305        let name = "steve from blues clues";
306        let num_apples = 5;
307
308        write!(builder, "hello, world!").unwrap();
309        write!(builder, " it's me, {}.", name).unwrap();
310        write!(builder, " i've got {} apples.", num_apples).unwrap();
311
312        assert_eq!(
313            builder.as_str(),
314            "hello, world! it's me, steve from blues clues. i've got 5 apples."
315        );
316    }
317}