Skip to main content

saluki_common/
strings.rs

1use stringtheory::{interning::Interner, MetaString};
2
3/// A string builder.
4///
5///
6/// This builder is designed to allow building strings incrementally. This can simplify certain patterns of string
7/// construction by removing the need to manually manage a temporary string buffer, clearing it after building the
8/// resulting string, and so on.
9///
10/// # Limits
11///
12/// The builder can be configured to limit the overall length of the strings it builds.
13///
14/// # Interning
15///
16/// The builder supports providing an interner that is used to intern the finalized string. This allows for
17/// efficiently building strings, reusing the intermediate buffer in between before eventually interning the string.
18///
19/// # Formatting
20///
21/// The builder supports formatting strings using the `write!` macro.
22#[derive(Debug)]
23pub struct StringBuilder<I = ()> {
24    buf: String,
25    limit: usize,
26    interner: I,
27}
28
29impl StringBuilder<()> {
30    /// Creates a new `StringBuilder`.
31    ///
32    /// No limit is set for the strings built by this builder.
33    pub fn new() -> Self {
34        Self {
35            buf: String::new(),
36            limit: usize::MAX,
37            interner: (),
38        }
39    }
40
41    /// Creates a new `StringBuilder` with the given limit.
42    ///
43    /// Strings that exceed the limit will be discarded.
44    pub fn with_limit(limit: usize) -> Self {
45        Self {
46            buf: String::new(),
47            limit,
48            interner: (),
49        }
50    }
51}
52
53impl<I> StringBuilder<I> {
54    /// Configures this builder with the given interner.
55    pub fn with_interner<I2>(self, interner: I2) -> StringBuilder<I2>
56    where
57        I2: Interner,
58    {
59        StringBuilder {
60            buf: self.buf,
61            limit: self.limit,
62            interner,
63        }
64    }
65
66    /// Returns `true` if the buffer of the builder is empty.
67    pub fn is_empty(&self) -> bool {
68        self.buf.is_empty()
69    }
70
71    /// Returns the length of the buffer of the builder.
72    pub fn len(&self) -> usize {
73        self.buf.len()
74    }
75
76    /// Returns the available space in the buffer of the builder.
77    pub fn available(&self) -> usize {
78        self.limit - self.buf.len()
79    }
80
81    /// Clears the buffer of the builder.
82    pub fn clear(&mut self) {
83        self.buf.clear();
84    }
85
86    /// Pushes a character into the builder.
87    ///
88    /// Returns `None` if the buffer limit would be exceeded by writing the character.
89    pub fn push(&mut self, c: char) -> Option<()> {
90        let char_len = c.len_utf8();
91        if self.buf.len() + char_len > self.limit {
92            return None;
93        }
94        self.buf.push(c);
95        Some(())
96    }
97
98    /// Pushes a string fragment into the builder.
99    ///
100    /// Returns `None` if the buffer limit would be exceeded by writing the string.
101    pub fn push_str(&mut self, s: &str) -> Option<()> {
102        if self.buf.len() + s.len() > self.limit {
103            return None;
104        }
105        self.buf.push_str(s);
106        Some(())
107    }
108
109    /// Returns a references to the current string.
110    pub fn as_str(&self) -> &str {
111        &self.buf
112    }
113}
114
115impl<I> StringBuilder<I>
116where
117    I: Interner,
118{
119    /// Attempts to build and intern the string.
120    ///
121    /// Returns `None` if the string exceeds the configured limit or if it cannot be interned.
122    pub fn try_intern(&mut self) -> Option<MetaString> {
123        self.interner.try_intern(self.as_str()).map(MetaString::from)
124    }
125
126    /// Returns a `MetaString` using the configured interner.
127    pub fn to_meta_string(&self) -> MetaString {
128        MetaString::from_interner(self.as_str(), &self.interner)
129    }
130}
131
132impl std::fmt::Write for StringBuilder {
133    fn write_str(&mut self, s: &str) -> std::fmt::Result {
134        match self.push_str(s) {
135            Some(()) => Ok(()),
136            None => Err(std::fmt::Error),
137        }
138    }
139}
140
141/// Sanitizes the input string by ensuring all characters are lowercase ASCII alphanumeric or underscores.
142///
143/// All characters that are not ASCII alphanumeric or underscores are replaced with underscores, and alphanumerics will
144/// be lowercased.
145pub fn lower_alphanumeric(s: &str) -> String {
146    s.chars()
147        .map(|c| {
148            if c.is_ascii_alphanumeric() || c == '_' {
149                c.to_ascii_lowercase()
150            } else {
151                '_'
152            }
153        })
154        .collect()
155}
156
157/// Converts an unsigned integer to a string representation.
158///
159/// No allocations are required.
160pub fn unsigned_integer_to_string(value: u64) -> MetaString {
161    let mut writer = itoa::Buffer::new();
162    let s = writer.format(value);
163    MetaString::from(s)
164}
165
166#[cfg(test)]
167mod tests {
168    use std::{fmt::Write, num::NonZeroUsize};
169
170    use stringtheory::interning::FixedSizeInterner;
171
172    use super::*;
173
174    fn build_string_builder() -> StringBuilder {
175        StringBuilder::new()
176    }
177
178    fn build_string_builder_with_limit(limit: usize) -> StringBuilder {
179        StringBuilder::with_limit(limit)
180    }
181
182    fn build_interned_string_builder(interner_capacity: usize) -> StringBuilder<FixedSizeInterner<1>> {
183        StringBuilder::new().with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
184    }
185
186    fn build_interned_string_builder_with_limit(
187        interner_capacity: usize, limit: usize,
188    ) -> StringBuilder<FixedSizeInterner<1>> {
189        StringBuilder::with_limit(limit)
190            .with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
191    }
192
193    #[test]
194    fn lower_alphanumeric_basic() {
195        assert_eq!(lower_alphanumeric("Hello World!"), "hello_world_");
196        assert_eq!(lower_alphanumeric("1234"), "1234");
197        assert_eq!(lower_alphanumeric("abc_def"), "abc_def");
198        assert_eq!(lower_alphanumeric("abc-def"), "abc_def");
199        assert_eq!(lower_alphanumeric("abc def"), "abc_def");
200    }
201
202    #[test]
203    fn string_builder_basic() {
204        let mut builder = build_string_builder();
205
206        assert_eq!(builder.push_str("Hello World!"), Some(()));
207        assert_eq!(builder.as_str(), "Hello World!");
208
209        builder.clear();
210
211        assert_eq!(builder.push_str("hello"), Some(()));
212        assert_eq!(builder.push_str(" "), Some(()));
213        assert_eq!(builder.push_str("world"), Some(()));
214        assert_eq!(builder.as_str(), "hello world");
215    }
216
217    #[test]
218    fn string_builder_basic_with_interner() {
219        let mut builder = build_interned_string_builder(128);
220
221        assert_eq!(builder.push_str("Hello World!"), Some(()));
222        assert_eq!(builder.try_intern(), Some(MetaString::from("Hello World!")));
223
224        builder.clear();
225
226        assert_eq!(builder.push_str("hello"), Some(()));
227        assert_eq!(builder.push_str(" "), Some(()));
228        assert_eq!(builder.push_str("world"), Some(()));
229        assert_eq!(builder.try_intern(), Some(MetaString::from("hello world")));
230    }
231
232    #[test]
233    fn string_builder_clear() {
234        let mut builder = build_string_builder();
235
236        assert_eq!(builder.push_str("hello"), Some(()));
237        builder.clear();
238        assert_eq!(builder.as_str(), "");
239    }
240
241    #[test]
242    fn string_builder_is_empty_len_available() {
243        const LIMIT: usize = 32;
244
245        let mut builder = build_string_builder_with_limit(LIMIT);
246
247        // Starts out empty:
248        assert!(builder.is_empty());
249        assert_eq!(builder.len(), 0);
250        assert_eq!(builder.available(), LIMIT);
251
252        // After pushing "hello":
253        assert_eq!(builder.push_str("hello"), Some(()));
254        assert!(!builder.is_empty());
255        assert_eq!(builder.len(), 5);
256        assert_eq!(builder.available(), LIMIT - 5);
257        assert_eq!(builder.as_str(), "hello");
258
259        // After pushing " world":
260        builder.push_str(" world");
261        assert!(!builder.is_empty());
262        assert_eq!(builder.len(), 11);
263        assert_eq!(builder.available(), LIMIT - 11);
264        assert_eq!(builder.as_str(), "hello world");
265
266        // Manually clearing the buffer:
267        builder.clear();
268        assert!(builder.is_empty());
269        assert_eq!(builder.len(), 0);
270        assert_eq!(builder.available(), LIMIT);
271    }
272
273    #[test]
274    fn string_builder_with_limit() {
275        const LIMIT: usize = 16;
276
277        let mut builder = build_string_builder_with_limit(LIMIT);
278
279        // Under the limit:
280        let string_one = "hello, world!";
281        assert!(string_one.len() < LIMIT);
282        assert_eq!(builder.push_str(string_one), Some(()));
283        assert_eq!(builder.as_str(), string_one);
284
285        // Over the limit:
286        let string_two = "definitely way too long";
287        assert!(string_two.len() > LIMIT);
288        assert_eq!(builder.push_str(string_two), None);
289
290        builder.clear();
291
292        // Under the limit, but we build it piecemeal:
293        let string_three_parts = vec!["hello", " ", "world"];
294        let string_three = string_three_parts.join("");
295        assert!(string_three.len() < LIMIT);
296        for string_three_part in string_three_parts {
297            assert_eq!(builder.push_str(string_three_part), Some(()));
298        }
299        assert_eq!(builder.as_str(), string_three);
300    }
301
302    #[test]
303    fn string_builder_under_limit_interner_full() {
304        const INTERNER_CAPACITY: usize = 24;
305        const LIMIT: usize = 64;
306
307        let mut builder = build_interned_string_builder_with_limit(INTERNER_CAPACITY, LIMIT);
308
309        // Under the limit but over the interner capacity.
310        //
311        // The pushes should succeed, but we should not be able to build the string due to
312        // the interner not having enough space:
313        let string_one = "are you there, god? it's me, margaret";
314        assert!(string_one.len() < LIMIT);
315        assert!(string_one.len() > INTERNER_CAPACITY);
316        assert_eq!(builder.push_str(string_one), Some(()));
317        assert_eq!(builder.try_intern(), None);
318    }
319
320    #[test]
321    fn string_builder_fmt_write() {
322        let mut builder = build_string_builder();
323
324        let name = "steve from blues clues";
325        let num_apples = 5;
326
327        write!(builder, "hello, world!").unwrap();
328        write!(builder, " it's me, {}.", name).unwrap();
329        write!(builder, " i've got {} apples.", num_apples).unwrap();
330
331        assert_eq!(
332            builder.as_str(),
333            "hello, world! it's me, steve from blues clues. i've got 5 apples."
334        );
335    }
336}