saluki_common/
strings.rs

1use stringtheory::{interning::Interner, MetaString};
2
3/// A string builder.
4///
5///
6/// This builder is designed to allow building strings incrementally. This can simplify certain patterns of string
7/// construction by removing the need to manually manage a temporary string buffer, clearing it after building the
8/// resulting string, and so on.
9///
10/// # Limits
11///
12/// The builder can be configured to limit the overall length of the strings it builds.
13///
14/// # Interning
15///
16/// The builder supports providing an interner that is used to intern the finalized string. This allows for
17/// efficiently building strings, reusing the intermediate buffer in between before eventually interning the string.
18pub struct StringBuilder<I = ()> {
19    buf: String,
20    limit: usize,
21    interner: I,
22}
23
24impl StringBuilder<()> {
25    /// Creates a new `StringBuilder`.
26    ///
27    /// No limit is set for the strings built by this builder.
28    pub fn new() -> Self {
29        Self {
30            buf: String::new(),
31            limit: usize::MAX,
32            interner: (),
33        }
34    }
35
36    /// Creates a new `StringBuilder` with the given limit.
37    ///
38    /// Strings that exceed the limit will be discarded.
39    pub fn with_limit(limit: usize) -> Self {
40        Self {
41            buf: String::new(),
42            limit,
43            interner: (),
44        }
45    }
46}
47
48impl<I> StringBuilder<I> {
49    /// Configures this builder with the given interner.
50    pub fn with_interner<I2>(self, interner: I2) -> StringBuilder<I2>
51    where
52        I2: Interner,
53    {
54        StringBuilder {
55            buf: self.buf,
56            limit: self.limit,
57            interner,
58        }
59    }
60
61    /// Returns `true` if the buffer of the builder is empty.
62    pub fn is_empty(&self) -> bool {
63        self.buf.is_empty()
64    }
65
66    /// Returns the length of the buffer of the builder.
67    pub fn len(&self) -> usize {
68        self.buf.len()
69    }
70
71    /// Returns the available space in the buffer of the builder.
72    pub fn available(&self) -> usize {
73        self.limit - self.buf.len()
74    }
75
76    /// Clears the buffer of the builder.
77    pub fn clear(&mut self) {
78        self.buf.clear();
79    }
80
81    /// Pushes a character into the builder.
82    ///
83    /// Returns `None` if the buffer limit would be exceeded by writing the character.
84    pub fn push(&mut self, c: char) -> Option<()> {
85        let char_len = c.len_utf8();
86        if self.buf.len() + char_len > self.limit {
87            return None;
88        }
89        self.buf.push(c);
90        Some(())
91    }
92
93    /// Pushes a string fragment into the builder.
94    ///
95    /// Returns `None` if the buffer limit would be exceeded by writing the string.
96    pub fn push_str(&mut self, s: &str) -> Option<()> {
97        if self.buf.len() + s.len() > self.limit {
98            return None;
99        }
100        self.buf.push_str(s);
101        Some(())
102    }
103
104    /// Returns a references to the current string.
105    pub fn as_str(&self) -> &str {
106        &self.buf
107    }
108}
109
110impl<I> StringBuilder<I>
111where
112    I: Interner,
113{
114    /// Attempts to build and intern the string.
115    ///
116    /// Returns `None` if the string exceeds the configured limit or if it cannot be interned.
117    pub fn try_intern(&mut self) -> Option<MetaString> {
118        self.interner.try_intern(self.as_str()).map(MetaString::from)
119    }
120}
121
122impl std::fmt::Write for StringBuilder {
123    fn write_str(&mut self, s: &str) -> std::fmt::Result {
124        match self.push_str(s) {
125            Some(()) => Ok(()),
126            None => Err(std::fmt::Error),
127        }
128    }
129}
130
131/// Sanitizes the input string by ensuring all characters are lowercase ASCII alphanumeric or underscores.
132///
133/// All characters that are not ASCII alphanumeric or underscores are replaced with underscores, and alphanumerics will
134/// be lowercased.
135pub fn lower_alphanumeric(s: &str) -> String {
136    s.chars()
137        .map(|c| {
138            if c.is_ascii_alphanumeric() || c == '_' {
139                c.to_ascii_lowercase()
140            } else {
141                '_'
142            }
143        })
144        .collect()
145}
146
147/// Converts an unsigned integer to a string representation.
148///
149/// No allocations are required.
150pub fn unsigned_integer_to_string(value: u64) -> MetaString {
151    let mut writer = itoa::Buffer::new();
152    let s = writer.format(value);
153    MetaString::from(s)
154}
155
156#[cfg(test)]
157mod tests {
158    use std::{fmt::Write, num::NonZeroUsize};
159
160    use stringtheory::interning::FixedSizeInterner;
161
162    use super::*;
163
164    fn build_string_builder() -> StringBuilder {
165        StringBuilder::new()
166    }
167
168    fn build_string_builder_with_limit(limit: usize) -> StringBuilder {
169        StringBuilder::with_limit(limit)
170    }
171
172    fn build_interned_string_builder(interner_capacity: usize) -> StringBuilder<FixedSizeInterner<1>> {
173        StringBuilder::new().with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
174    }
175
176    fn build_interned_string_builder_with_limit(
177        interner_capacity: usize, limit: usize,
178    ) -> StringBuilder<FixedSizeInterner<1>> {
179        StringBuilder::with_limit(limit)
180            .with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
181    }
182
183    #[test]
184    fn lower_alphanumeric_basic() {
185        assert_eq!(lower_alphanumeric("Hello World!"), "hello_world_");
186        assert_eq!(lower_alphanumeric("1234"), "1234");
187        assert_eq!(lower_alphanumeric("abc_def"), "abc_def");
188        assert_eq!(lower_alphanumeric("abc-def"), "abc_def");
189        assert_eq!(lower_alphanumeric("abc def"), "abc_def");
190    }
191
192    #[test]
193    fn string_builder_basic() {
194        let mut builder = build_string_builder();
195
196        assert_eq!(builder.push_str("Hello World!"), Some(()));
197        assert_eq!(builder.as_str(), "Hello World!");
198
199        builder.clear();
200
201        assert_eq!(builder.push_str("hello"), Some(()));
202        assert_eq!(builder.push_str(" "), Some(()));
203        assert_eq!(builder.push_str("world"), Some(()));
204        assert_eq!(builder.as_str(), "hello world");
205    }
206
207    #[test]
208    fn string_builder_basic_with_interner() {
209        let mut builder = build_interned_string_builder(128);
210
211        assert_eq!(builder.push_str("Hello World!"), Some(()));
212        assert_eq!(builder.try_intern(), Some(MetaString::from("Hello World!")));
213
214        builder.clear();
215
216        assert_eq!(builder.push_str("hello"), Some(()));
217        assert_eq!(builder.push_str(" "), Some(()));
218        assert_eq!(builder.push_str("world"), Some(()));
219        assert_eq!(builder.try_intern(), Some(MetaString::from("hello world")));
220    }
221
222    #[test]
223    fn string_builder_clear() {
224        let mut builder = build_string_builder();
225
226        assert_eq!(builder.push_str("hello"), Some(()));
227        builder.clear();
228        assert_eq!(builder.as_str(), "");
229    }
230
231    #[test]
232    fn string_builder_is_empty_len_available() {
233        const LIMIT: usize = 32;
234
235        let mut builder = build_string_builder_with_limit(LIMIT);
236
237        // Starts out empty:
238        assert!(builder.is_empty());
239        assert_eq!(builder.len(), 0);
240        assert_eq!(builder.available(), LIMIT);
241
242        // After pushing "hello":
243        assert_eq!(builder.push_str("hello"), Some(()));
244        assert!(!builder.is_empty());
245        assert_eq!(builder.len(), 5);
246        assert_eq!(builder.available(), LIMIT - 5);
247        assert_eq!(builder.as_str(), "hello");
248
249        // After pushing " world":
250        builder.push_str(" world");
251        assert!(!builder.is_empty());
252        assert_eq!(builder.len(), 11);
253        assert_eq!(builder.available(), LIMIT - 11);
254        assert_eq!(builder.as_str(), "hello world");
255
256        // Manually clearing the buffer:
257        builder.clear();
258        assert!(builder.is_empty());
259        assert_eq!(builder.len(), 0);
260        assert_eq!(builder.available(), LIMIT);
261    }
262
263    #[test]
264    fn string_builder_with_limit() {
265        const LIMIT: usize = 16;
266
267        let mut builder = build_string_builder_with_limit(LIMIT);
268
269        // Under the limit:
270        let string_one = "hello, world!";
271        assert!(string_one.len() < LIMIT);
272        assert_eq!(builder.push_str(string_one), Some(()));
273        assert_eq!(builder.as_str(), string_one);
274
275        // Over the limit:
276        let string_two = "definitely way too long";
277        assert!(string_two.len() > LIMIT);
278        assert_eq!(builder.push_str(string_two), None);
279
280        builder.clear();
281
282        // Under the limit, but we build it piecemeal:
283        let string_three_parts = vec!["hello", " ", "world"];
284        let string_three = string_three_parts.join("");
285        assert!(string_three.len() < LIMIT);
286        for string_three_part in string_three_parts {
287            assert_eq!(builder.push_str(string_three_part), Some(()));
288        }
289        assert_eq!(builder.as_str(), string_three);
290    }
291
292    #[test]
293    fn string_builder_under_limit_interner_full() {
294        const INTERNER_CAPACITY: usize = 24;
295        const LIMIT: usize = 64;
296
297        let mut builder = build_interned_string_builder_with_limit(INTERNER_CAPACITY, LIMIT);
298
299        // Under the limit but over the interner capacity.
300        //
301        // The pushes should succeed, but we should not be able to build the string due to
302        // the interner not having enough space:
303        let string_one = "are you there, god? it's me, margaret";
304        assert!(string_one.len() < LIMIT);
305        assert!(string_one.len() > INTERNER_CAPACITY);
306        assert_eq!(builder.push_str(string_one), Some(()));
307        assert_eq!(builder.try_intern(), None);
308    }
309
310    #[test]
311    fn string_builder_fmt_write() {
312        let mut builder = build_string_builder();
313
314        let name = "steve from blues clues";
315        let num_apples = 5;
316
317        write!(builder, "hello, world!").unwrap();
318        write!(builder, " it's me, {}.", name).unwrap();
319        write!(builder, " i've got {} apples.", num_apples).unwrap();
320
321        assert_eq!(
322            builder.as_str(),
323            "hello, world! it's me, steve from blues clues. i've got 5 apples."
324        );
325    }
326}