Skip to main content

saluki_common/
strings.rs

1use stringtheory::{interning::Interner, MetaString};
2
3/// A string builder.
4///
5///
6/// This builder is designed to allow building strings incrementally. This can simplify certain patterns of string
7/// construction by removing the need to manually manage a temporary string buffer, clearing it after building the
8/// resulting string, and so on.
9///
10/// # Limits
11///
12/// The builder can be configured to limit the overall length of the strings it builds.
13///
14/// # Interning
15///
16/// The builder supports providing an interner that is used to intern the finalized string. This allows for
17/// efficiently building strings, reusing the intermediate buffer in between before eventually interning the string.
18pub struct StringBuilder<I = ()> {
19    buf: String,
20    limit: usize,
21    interner: I,
22}
23
24impl StringBuilder<()> {
25    /// Creates a new `StringBuilder`.
26    ///
27    /// No limit is set for the strings built by this builder.
28    pub fn new() -> Self {
29        Self {
30            buf: String::new(),
31            limit: usize::MAX,
32            interner: (),
33        }
34    }
35
36    /// Creates a new `StringBuilder` with the given limit.
37    ///
38    /// Strings that exceed the limit will be discarded.
39    pub fn with_limit(limit: usize) -> Self {
40        Self {
41            buf: String::new(),
42            limit,
43            interner: (),
44        }
45    }
46}
47
48impl<I> StringBuilder<I> {
49    /// Configures this builder with the given interner.
50    pub fn with_interner<I2>(self, interner: I2) -> StringBuilder<I2>
51    where
52        I2: Interner,
53    {
54        StringBuilder {
55            buf: self.buf,
56            limit: self.limit,
57            interner,
58        }
59    }
60
61    /// Returns `true` if the buffer of the builder is empty.
62    pub fn is_empty(&self) -> bool {
63        self.buf.is_empty()
64    }
65
66    /// Returns the length of the buffer of the builder.
67    pub fn len(&self) -> usize {
68        self.buf.len()
69    }
70
71    /// Returns the available space in the buffer of the builder.
72    pub fn available(&self) -> usize {
73        self.limit - self.buf.len()
74    }
75
76    /// Clears the buffer of the builder.
77    pub fn clear(&mut self) {
78        self.buf.clear();
79    }
80
81    /// Pushes a character into the builder.
82    ///
83    /// Returns `None` if the buffer limit would be exceeded by writing the character.
84    pub fn push(&mut self, c: char) -> Option<()> {
85        let char_len = c.len_utf8();
86        if self.buf.len() + char_len > self.limit {
87            return None;
88        }
89        self.buf.push(c);
90        Some(())
91    }
92
93    /// Pushes a string fragment into the builder.
94    ///
95    /// Returns `None` if the buffer limit would be exceeded by writing the string.
96    pub fn push_str(&mut self, s: &str) -> Option<()> {
97        if self.buf.len() + s.len() > self.limit {
98            return None;
99        }
100        self.buf.push_str(s);
101        Some(())
102    }
103
104    /// Returns a references to the current string.
105    pub fn as_str(&self) -> &str {
106        &self.buf
107    }
108}
109
110impl<I> StringBuilder<I>
111where
112    I: Interner,
113{
114    /// Attempts to build and intern the string.
115    ///
116    /// Returns `None` if the string exceeds the configured limit or if it cannot be interned.
117    pub fn try_intern(&mut self) -> Option<MetaString> {
118        self.interner.try_intern(self.as_str()).map(MetaString::from)
119    }
120
121    /// Returns a `MetaString` using the configured interner.
122    pub fn to_meta_string(&self) -> MetaString {
123        MetaString::from_interner(self.as_str(), &self.interner)
124    }
125}
126
127impl std::fmt::Write for StringBuilder {
128    fn write_str(&mut self, s: &str) -> std::fmt::Result {
129        match self.push_str(s) {
130            Some(()) => Ok(()),
131            None => Err(std::fmt::Error),
132        }
133    }
134}
135
136/// Sanitizes the input string by ensuring all characters are lowercase ASCII alphanumeric or underscores.
137///
138/// All characters that are not ASCII alphanumeric or underscores are replaced with underscores, and alphanumerics will
139/// be lowercased.
140pub fn lower_alphanumeric(s: &str) -> String {
141    s.chars()
142        .map(|c| {
143            if c.is_ascii_alphanumeric() || c == '_' {
144                c.to_ascii_lowercase()
145            } else {
146                '_'
147            }
148        })
149        .collect()
150}
151
152/// Converts an unsigned integer to a string representation.
153///
154/// No allocations are required.
155pub fn unsigned_integer_to_string(value: u64) -> MetaString {
156    let mut writer = itoa::Buffer::new();
157    let s = writer.format(value);
158    MetaString::from(s)
159}
160
161#[cfg(test)]
162mod tests {
163    use std::{fmt::Write, num::NonZeroUsize};
164
165    use stringtheory::interning::FixedSizeInterner;
166
167    use super::*;
168
169    fn build_string_builder() -> StringBuilder {
170        StringBuilder::new()
171    }
172
173    fn build_string_builder_with_limit(limit: usize) -> StringBuilder {
174        StringBuilder::with_limit(limit)
175    }
176
177    fn build_interned_string_builder(interner_capacity: usize) -> StringBuilder<FixedSizeInterner<1>> {
178        StringBuilder::new().with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
179    }
180
181    fn build_interned_string_builder_with_limit(
182        interner_capacity: usize, limit: usize,
183    ) -> StringBuilder<FixedSizeInterner<1>> {
184        StringBuilder::with_limit(limit)
185            .with_interner(FixedSizeInterner::new(NonZeroUsize::new(interner_capacity).unwrap()))
186    }
187
188    #[test]
189    fn lower_alphanumeric_basic() {
190        assert_eq!(lower_alphanumeric("Hello World!"), "hello_world_");
191        assert_eq!(lower_alphanumeric("1234"), "1234");
192        assert_eq!(lower_alphanumeric("abc_def"), "abc_def");
193        assert_eq!(lower_alphanumeric("abc-def"), "abc_def");
194        assert_eq!(lower_alphanumeric("abc def"), "abc_def");
195    }
196
197    #[test]
198    fn string_builder_basic() {
199        let mut builder = build_string_builder();
200
201        assert_eq!(builder.push_str("Hello World!"), Some(()));
202        assert_eq!(builder.as_str(), "Hello World!");
203
204        builder.clear();
205
206        assert_eq!(builder.push_str("hello"), Some(()));
207        assert_eq!(builder.push_str(" "), Some(()));
208        assert_eq!(builder.push_str("world"), Some(()));
209        assert_eq!(builder.as_str(), "hello world");
210    }
211
212    #[test]
213    fn string_builder_basic_with_interner() {
214        let mut builder = build_interned_string_builder(128);
215
216        assert_eq!(builder.push_str("Hello World!"), Some(()));
217        assert_eq!(builder.try_intern(), Some(MetaString::from("Hello World!")));
218
219        builder.clear();
220
221        assert_eq!(builder.push_str("hello"), Some(()));
222        assert_eq!(builder.push_str(" "), Some(()));
223        assert_eq!(builder.push_str("world"), Some(()));
224        assert_eq!(builder.try_intern(), Some(MetaString::from("hello world")));
225    }
226
227    #[test]
228    fn string_builder_clear() {
229        let mut builder = build_string_builder();
230
231        assert_eq!(builder.push_str("hello"), Some(()));
232        builder.clear();
233        assert_eq!(builder.as_str(), "");
234    }
235
236    #[test]
237    fn string_builder_is_empty_len_available() {
238        const LIMIT: usize = 32;
239
240        let mut builder = build_string_builder_with_limit(LIMIT);
241
242        // Starts out empty:
243        assert!(builder.is_empty());
244        assert_eq!(builder.len(), 0);
245        assert_eq!(builder.available(), LIMIT);
246
247        // After pushing "hello":
248        assert_eq!(builder.push_str("hello"), Some(()));
249        assert!(!builder.is_empty());
250        assert_eq!(builder.len(), 5);
251        assert_eq!(builder.available(), LIMIT - 5);
252        assert_eq!(builder.as_str(), "hello");
253
254        // After pushing " world":
255        builder.push_str(" world");
256        assert!(!builder.is_empty());
257        assert_eq!(builder.len(), 11);
258        assert_eq!(builder.available(), LIMIT - 11);
259        assert_eq!(builder.as_str(), "hello world");
260
261        // Manually clearing the buffer:
262        builder.clear();
263        assert!(builder.is_empty());
264        assert_eq!(builder.len(), 0);
265        assert_eq!(builder.available(), LIMIT);
266    }
267
268    #[test]
269    fn string_builder_with_limit() {
270        const LIMIT: usize = 16;
271
272        let mut builder = build_string_builder_with_limit(LIMIT);
273
274        // Under the limit:
275        let string_one = "hello, world!";
276        assert!(string_one.len() < LIMIT);
277        assert_eq!(builder.push_str(string_one), Some(()));
278        assert_eq!(builder.as_str(), string_one);
279
280        // Over the limit:
281        let string_two = "definitely way too long";
282        assert!(string_two.len() > LIMIT);
283        assert_eq!(builder.push_str(string_two), None);
284
285        builder.clear();
286
287        // Under the limit, but we build it piecemeal:
288        let string_three_parts = vec!["hello", " ", "world"];
289        let string_three = string_three_parts.join("");
290        assert!(string_three.len() < LIMIT);
291        for string_three_part in string_three_parts {
292            assert_eq!(builder.push_str(string_three_part), Some(()));
293        }
294        assert_eq!(builder.as_str(), string_three);
295    }
296
297    #[test]
298    fn string_builder_under_limit_interner_full() {
299        const INTERNER_CAPACITY: usize = 24;
300        const LIMIT: usize = 64;
301
302        let mut builder = build_interned_string_builder_with_limit(INTERNER_CAPACITY, LIMIT);
303
304        // Under the limit but over the interner capacity.
305        //
306        // The pushes should succeed, but we should not be able to build the string due to
307        // the interner not having enough space:
308        let string_one = "are you there, god? it's me, margaret";
309        assert!(string_one.len() < LIMIT);
310        assert!(string_one.len() > INTERNER_CAPACITY);
311        assert_eq!(builder.push_str(string_one), Some(()));
312        assert_eq!(builder.try_intern(), None);
313    }
314
315    #[test]
316    fn string_builder_fmt_write() {
317        let mut builder = build_string_builder();
318
319        let name = "steve from blues clues";
320        let num_apples = 5;
321
322        write!(builder, "hello, world!").unwrap();
323        write!(builder, " it's me, {}.", name).unwrap();
324        write!(builder, " i've got {} apples.", num_apples).unwrap();
325
326        assert_eq!(
327            builder.as_str(),
328            "hello, world! it's me, steve from blues clues. i've got 5 apples."
329        );
330    }
331}