stringtheory/interning/fixed_size.rs
1#[cfg(not(feature = "loom"))]
2use std::sync::{atomic::AtomicUsize, Arc, Mutex};
3use std::{
4 num::NonZeroUsize,
5 ptr::NonNull,
6 sync::atomic::Ordering::{AcqRel, Acquire},
7};
8
9#[cfg(feature = "loom")]
10use loom::sync::{atomic::AtomicUsize, Arc, Mutex};
11
12use super::{
13 helpers::{aligned, aligned_string, hash_string, layout_for_data, PackedLengthCapacity},
14 InternedString, Interner,
15};
16use crate::interning::helpers::{ReclaimedEntries, ReclaimedEntry};
17
18const HEADER_LEN: usize = std::mem::size_of::<EntryHeader>();
19const HEADER_ALIGN: usize = std::mem::align_of::<EntryHeader>();
20
21/// The minimum possible length of an entry.
22///
23/// For any entry in the interner, there is already an `EntryHeader` followed by the string data itself. In order to
24/// ensure that entries can be written contiguously, we additionally ensure that the number of bytes we utilize for the
25/// string data is aligned at least as much as `EntryHeader` itself.
26///
27/// This means that the minimum possible length of an entry, or the minimum number of bytes a valid entry could consume,
28/// is the length of the header plus the alignment of the header.
29const MINIMUM_ENTRY_LEN: usize = HEADER_LEN + HEADER_ALIGN;
30
31#[derive(Debug)]
32pub(crate) struct StringState {
33 interner: Arc<Mutex<InternerShardState>>,
34 header: NonNull<EntryHeader>,
35}
36
37impl StringState {
38 #[inline]
39 pub const fn as_str(&self) -> &str {
40 // SAFETY: We ensure `self.header` is well-aligned and points to an initialized `EntryHeader` value when creating `StringState`.
41 unsafe { get_entry_string(self.header) }
42 }
43}
44
45impl PartialEq for StringState {
46 fn eq(&self, other: &Self) -> bool {
47 self.header == other.header
48 }
49}
50
51impl Clone for StringState {
52 fn clone(&self) -> Self {
53 // SAFETY: The caller that creates `StringState` is responsible for ensuring that `self.header` is well-aligned
54 // and points to an initialized `EntryHeader` value.
55 let header = unsafe { self.header.as_ref() };
56 header.increment_active_refs();
57
58 Self {
59 interner: self.interner.clone(),
60 header: self.header,
61 }
62 }
63}
64
65impl Drop for StringState {
66 fn drop(&mut self) {
67 // SAFETY: The caller that creates `StringState` is responsible for ensuring that `self.header` is well-aligned
68 // and points to an initialized `EntryHeader` value.
69 let header = unsafe { self.header.as_ref() };
70 if header.decrement_active_refs() {
71 // We decremented the reference count to zero, so try to mark this entry for reclamation.
72 let mut interner = self.interner.lock().unwrap();
73 interner.mark_for_reclamation(self.header);
74 }
75 }
76}
77
78// SAFETY: We don't take references to the entry header pointer that outlast `StringState`, and the only modification we
79// do to the entry header is through atomic operations, so it's safe to both send and share `StringState` between
80// threads.
81unsafe impl Send for StringState {}
82unsafe impl Sync for StringState {}
83
84/// Metadata about an interner entry.
85///
86/// `EntryHeader` represents the smallest amount of information about an interned entry that is needed to support both
87/// lookup of existing interned strings, as well as the ability to reclaim space in the interner when an entry is no
88/// longer in use.
89struct EntryHeader {
90 /// The hash of the string that this entry represents.
91 hash: u64,
92
93 /// The number of active references to this entry.
94 ///
95 /// Only incremented by the interner itself, and decremented by `InternedString` when it is dropped.
96 refs: AtomicUsize,
97
98 /// Combined length/capacity of the entry, in terms of the string itself.
99 ///
100 /// Notably, this does _not_ include the length of the header itself. For example, an entry holding the string
101 /// "hello, world!" has a string length of 13 bytes, but since we have to pad out to meet our alignment requirements
102 /// for `EntryHeader`, we would end up with a capacity of 16 bytes. As such, `EntryHeader::len` would report `13`,
103 /// while `EntryHeader::capacity` would report `16`. Likewise, `EntryHeader::entry_len` would report `40`,
104 /// accounting for the string capacity (16) as well as the header length itself (24).
105 ///
106 /// As explained in the description of `PackedLengthCapacity`, this does mean strings can't be larger than ~4GB on
107 /// 64-bit platforms, which is not a problem we have.
108 len_cap: PackedLengthCapacity,
109}
110
111impl EntryHeader {
112 /// Creates a tombstone entry with the given capacity.
113 ///
114 /// This is to allow for updating a region in the data buffer, which has been reclaimed, such that it is
115 /// identifiable as being unused.
116 fn tombstone(entry: ReclaimedEntry) -> Self {
117 // The usable capacity for a reclaimed entry is the full capacity minus the size of `EntryHeader` itself, as
118 // reclaimed entries represent the _entire_ region in the data buffer, but `EntryHeader` only cares about the
119 // string portion itself.
120 let cap = Self::usable_from_reclaimed(entry);
121
122 Self {
123 hash: 0,
124 refs: AtomicUsize::new(0),
125 len_cap: PackedLengthCapacity::new(cap, 0),
126 }
127 }
128
129 /// Creates a new entry for the given string.
130 fn from_string(hash: u64, s: &str) -> Self {
131 // We're dictating the necessary capacity here, which is the length of the string rounded to the nearest
132 // multiple of the alignment of `EntryHeader`, which ensures that any subsequent entry will be properly aligned.
133 let cap = aligned_string::<Self>(s);
134
135 Self {
136 hash,
137 refs: AtomicUsize::new(1),
138 len_cap: PackedLengthCapacity::new(cap, s.len()),
139 }
140 }
141
142 /// Creates a new entry for the given string, based on the given reclaimed entry.
143 ///
144 /// This maps the entry header to the underlying capacity of the given reclaimed entry, which is done in cases where
145 /// a reclaimed entry is being used when interning a new string, and the reclaimed entry is larger than the string
146 /// being interned, but not large enough that we could split the excess capacity into a new reclaimed entry.
147 fn from_reclaimed_entry(mut entry: ReclaimedEntry, hash: u64, s: &str) -> (Self, Option<ReclaimedEntry>) {
148 // The usable capacity for a reclaimed entry is the full capacity minus the size of `EntryHeader` itself, as
149 // reclaimed entries represent the _entire_ region in the data buffer, but `EntryHeader` only cares about the
150 // string portion itself.
151 let entry_cap = EntryHeader::usable_from_reclaimed(entry);
152 let required_cap = aligned_string::<Self>(s);
153
154 // If the reclaimed entry has enough additional space beyond what we need for the string, we'll split it off and
155 // return it for the caller to keep around in the reclaimed entries list.
156 let remainder = entry_cap - required_cap;
157 let (adjusted_cap, maybe_split_entry) = if remainder >= MINIMUM_ENTRY_LEN {
158 let entry_len = EntryHeader::len_for(s);
159 let split_entry = entry.split_off(entry_len);
160
161 (entry_len - HEADER_LEN, Some(split_entry))
162 } else {
163 (entry_cap, None)
164 };
165
166 let header = Self {
167 hash,
168 refs: AtomicUsize::new(1),
169 len_cap: PackedLengthCapacity::new(adjusted_cap, s.len()),
170 };
171
172 (header, maybe_split_entry)
173 }
174
175 /// Returns the computed length of a complete entry, in bytes, for the given string.
176 ///
177 /// This includes the size of the entry header itself and the string data, when padded for alignment, and represents
178 /// the number of bytes that would be consumed in the data buffer.
179 const fn len_for(s: &str) -> usize {
180 HEADER_LEN + aligned_string::<Self>(s)
181 }
182
183 /// Returns the usable capacity of a reclaimed entry, in bytes.
184 ///
185 /// Usable refers to the number of bytes in a reclaimed entry that could be used for string data, after accounting
186 /// for the size of `EntryHeader` itself.
187 const fn usable_from_reclaimed(entry: ReclaimedEntry) -> usize {
188 entry.capacity() - HEADER_LEN
189 }
190
191 /// Returns the total number of bytes that this entry takes up in the data buffer.
192 const fn entry_len(&self) -> usize {
193 HEADER_LEN + self.capacity()
194 }
195
196 /// Returns the size of the string, in bytes, that this entry can hold.
197 const fn capacity(&self) -> usize {
198 self.len_cap.capacity()
199 }
200
201 /// Returns the size of the string, in bytes, that this entry _actually_ holds.
202 const fn len(&self) -> usize {
203 self.len_cap.len()
204 }
205
206 /// Returns `true` if this entry is currently referenced.
207 fn is_active(&self) -> bool {
208 self.refs.load(Acquire) != 0
209 }
210
211 /// Increments the active reference count by one.
212 fn increment_active_refs(&self) {
213 self.refs.fetch_add(1, AcqRel);
214 }
215
216 /// Decrements the active reference count by one.
217 ///
218 /// Returns `true` if the active reference count is zero _after_ calling this method.
219 fn decrement_active_refs(&self) -> bool {
220 self.refs.fetch_sub(1, AcqRel) == 1
221 }
222}
223
224#[derive(Debug)]
225struct InternerShardState {
226 // Direct pieces of our buffer allocation.
227 ptr: NonNull<u8>,
228 offset: usize,
229 capacity: NonZeroUsize,
230
231 // Number of active entries (strings) in the interner.
232 entries: usize,
233
234 // Length of all active entries, in bytes.
235 //
236 // This is equivalent to `self.offset` minus the total size of all reclaimed entries.
237 len: usize,
238
239 // Markers for entries that can be reused.
240 reclaimed: ReclaimedEntries,
241}
242
243impl InternerShardState {
244 /// Creates a new `InternerShardState` with a pre-allocated buffer that has the given capacity.
245 pub fn with_capacity(capacity: NonZeroUsize) -> Self {
246 assert!(
247 aligned::<EntryHeader>(capacity.get()) <= isize::MAX as usize,
248 "capacity would overflow isize::MAX, which violates layout constraints"
249 );
250
251 // Allocate our data buffer. This is the main backing allocation for all interned strings, and is well-aligned
252 // for `EntryHeader`.
253 //
254 // SAFETY: `layout_for_data` ensures the layout is non-zero.
255 let data_layout = layout_for_data::<EntryHeader>(capacity);
256 let data_ptr = unsafe { std::alloc::alloc(data_layout) };
257 let ptr = match NonNull::new(data_ptr) {
258 Some(ptr) => ptr,
259 None => std::alloc::handle_alloc_error(data_layout),
260 };
261
262 Self {
263 ptr,
264 offset: 0,
265 capacity,
266 entries: 0,
267 len: 0,
268 reclaimed: ReclaimedEntries::new(),
269 }
270 }
271
272 /// Returns the total number of unused bytes that are available for interning.
273 fn available(&self) -> usize {
274 self.capacity.get() - self.offset
275 }
276
277 fn get_entry_ptr(&self, offset: usize) -> NonNull<EntryHeader> {
278 debug_assert!(
279 offset + MINIMUM_ENTRY_LEN <= self.capacity.get(),
280 "offset would point to entry that cannot possibly avoid extending past end of data buffer"
281 );
282
283 // SAFETY: The caller is responsible for ensuring that `offset` is within the bounds of the data buffer, and
284 // that `offset` is well-aligned for `EntryHeader`.
285 let entry_ptr = unsafe { self.ptr.as_ptr().add(offset).cast::<EntryHeader>() };
286 debug_assert!(entry_ptr.is_aligned(), "entry header pointer must be well-aligned");
287
288 // SAFETY: `entry_ptr` is derived from `self.ptr`, which itself is `NonNull<u8>`, and the caller is responsible
289 // for ensuring that `offset` is within the bounds of the data buffer, so we know `entry_ptr` is non-null.
290 unsafe { NonNull::new_unchecked(entry_ptr) }
291 }
292
293 fn find_entry(&self, hash: u64, s: &str) -> Option<NonNull<EntryHeader>> {
294 let mut offset = 0;
295
296 while offset < self.offset {
297 // Construct a pointer to the entry at `offset`, and get a reference to the header value.
298 let header_ptr = self.get_entry_ptr(offset);
299 let header = unsafe { header_ptr.as_ref() };
300
301 // See if this entry is active or not. If it's active, then we'll quickly check the hash/length of the
302 // string to see if this is likely to be a match for `s`.
303 if header.is_active() && header.hash == hash && header.len() == s.len() {
304 // As a final check, we make sure that the entry string and `s` are equal. If they are, then we
305 // have an exact match and will return the entry.
306 //
307 // SAFETY: We know that our header is valid and initialized.
308 let s_entry = unsafe { get_entry_string(header_ptr) };
309 if s_entry == s {
310 // Increment the reference count for this entry so it's not prematurely reclaimed.
311 header.increment_active_refs();
312
313 return Some(header_ptr);
314 }
315 }
316
317 // Either this was a reclaimed entry or we didn't have a match, so we move on to the next entry.
318 offset += header.entry_len();
319 }
320
321 None
322 }
323
324 fn write_entry(&mut self, offset: usize, entry_header: EntryHeader, s: &str) -> NonNull<EntryHeader> {
325 debug_assert_eq!(
326 entry_header.len(),
327 s.len(),
328 "entry header length must match string length"
329 );
330
331 let entry_ptr = self.get_entry_ptr(offset);
332 let entry_len = entry_header.entry_len();
333
334 // Write the entry header.
335 unsafe { entry_ptr.as_ptr().write(entry_header) };
336
337 let s_buf = s.as_bytes();
338
339 // Write the string.
340 let entry_s_buf = unsafe {
341 // Take the entry pointer and add 1, which sets our pointer to right _after_ the header.
342 let entry_s_ptr = entry_ptr.as_ptr().add(1).cast::<u8>();
343 std::slice::from_raw_parts_mut(entry_s_ptr, s_buf.len())
344 };
345 entry_s_buf.copy_from_slice(s_buf);
346
347 // Update our internal statistics.
348 self.entries += 1;
349 self.len += entry_len;
350
351 entry_ptr
352 }
353
354 fn write_to_unoccupied(&mut self, s_hash: u64, s: &str) -> NonNull<EntryHeader> {
355 let entry_header = EntryHeader::from_string(s_hash, s);
356
357 // Write the entry to the end of the data buffer.
358 let entry_offset = self.offset;
359 self.offset += entry_header.entry_len();
360
361 self.write_entry(entry_offset, entry_header, s)
362 }
363
364 fn write_to_reclaimed_entry(&mut self, entry: ReclaimedEntry, s_hash: u64, s: &str) -> NonNull<EntryHeader> {
365 let entry_offset = entry.offset();
366 let (entry_header, maybe_split_entry) = EntryHeader::from_reclaimed_entry(entry, s_hash, s);
367
368 // If we had enough capacity in the reclaimed entry to hold this string _and_ potentially hold another entry, we
369 // split it off and store that remainder entry.
370 if let Some(split_entry) = maybe_split_entry {
371 self.add_reclaimed(split_entry);
372 }
373
374 // Write the entry in place of the reclaimed entry.
375 self.write_entry(entry_offset, entry_header, s)
376 }
377
378 fn add_reclaimed_from_header(&mut self, header_ptr: NonNull<EntryHeader>) {
379 // Get the offset of the header within the data buffer.
380 //
381 // SAFETY: The caller is responsible for ensuring the entry header reference belongs to this interner. If that
382 // is upheld, then we know that entry header belongs to our data buffer, and that the pointer to the entry
383 // header is not less than the base pointer of the data buffer, ensuring the offset is non-negative.
384 let entry_offset = unsafe {
385 header_ptr
386 .cast::<u8>()
387 .as_ptr()
388 .offset_from(self.ptr.as_ptr().cast_const())
389 };
390 debug_assert!(entry_offset >= 0, "entry offset must be non-negative");
391
392 let header = unsafe { header_ptr.as_ref() };
393
394 let entry = ReclaimedEntry::new(entry_offset as usize, header.entry_len());
395 self.len -= entry.capacity();
396 self.add_reclaimed(entry);
397 }
398
399 fn add_reclaimed(&mut self, entry: ReclaimedEntry) {
400 // Reclamation is a two-step process: first, we have to actually keep track of the reclaimed entry, which
401 // potentially involves merging adjacent reclaimed entries, and then once all of that has happened, we tombstone
402 // the entry (whether merged or not).
403 //
404 // However, if the merged reclaimed entry immediately precedes any available capacity, we can skip tombstoning
405 // it, since we can just wind back `offset` to reclaim the space.
406 let merged_entry = self.reclaimed.insert(entry);
407 if merged_entry.offset() + merged_entry.capacity() == self.offset {
408 self.offset -= merged_entry.capacity();
409 self.reclaimed.remove(&merged_entry);
410 return;
411 }
412
413 self.clear_reclaimed_entry(merged_entry);
414 }
415
416 fn mark_for_reclamation(&mut self, header_ptr: NonNull<EntryHeader>) {
417 // See if the reference count is zero.
418 //
419 // Only interned string values (the frontend handle that wraps the pointer to a specific entry) can decrement
420 // the reference count for their specific entry when dropped, and only `InternerShardState` -- with its access
421 // mediated through a mutex -- can increment the reference count for entries. This means that if the reference
422 // count is zero, then we know that nobody else is holding a reference to this entry, and no concurrent call to
423 // `try_intern` could be updating the reference count, either... so it's safe to be marked as reclaimed.
424 //
425 // SAFETY: The caller is responsible for ensuring that `header_ptr` is well-aligned and points to an initialized
426 // `EntryHeader` value.
427 let header = unsafe { header_ptr.as_ref() };
428 if !header.is_active() {
429 self.entries -= 1;
430 self.add_reclaimed_from_header(header_ptr);
431 }
432 }
433
434 fn clear_reclaimed_entry(&mut self, entry: ReclaimedEntry) {
435 let entry_ptr = self.get_entry_ptr(entry.offset);
436
437 // Write the entry tombstone itself, which clears out the hash and sets the reference count to zero.
438 //
439 // SAFETY: We know that `entry_ptr` is valid for writes (reclaimed entries are, by definition, inactive regions
440 // in the data buffer) and is well-aligned for `EntryHeader`.
441 let tombstone = EntryHeader::tombstone(entry);
442 let str_cap = tombstone.capacity();
443
444 unsafe {
445 entry_ptr.as_ptr().write(EntryHeader::tombstone(entry));
446 }
447
448 // Write a magic value to the entire string capacity for the entry. This ensures that there's a known repeating
449 // value which, in the case of debugging issues, can be a signal that offsets/reclaimed entries are incorrect
450 // and overlapping with active entries.
451 //
452 // SAFETY: Like above, the caller is responsible for ensuring that `offset` is within the bounds of the data
453 // buffer, and that `offset + capacity` does not extend past the bounds of the data buffer.
454 unsafe {
455 // Take the entry pointer and add 1, which sets our pointer to right _after_ the header.
456 let str_ptr = entry_ptr.as_ptr().add(1).cast::<u8>();
457 let str_buf = std::slice::from_raw_parts_mut(str_ptr, str_cap);
458 str_buf.fill(0xAA);
459 }
460 }
461
462 fn try_intern(&mut self, s_hash: u64, s: &str) -> Option<NonNull<EntryHeader>> {
463 // We can only intern strings with a size that fits within a packed length/capacity value, so if `s` is larger
464 // than that, we can't intern it, and there's existing entry we could have for it either.
465 if s.len() > PackedLengthCapacity::maximum_value() {
466 return None;
467 }
468
469 // Try and find an existing entry for this string.
470 if self.entries != 0 {
471 if let Some(existing_entry) = self.find_entry(s_hash, s) {
472 return Some(existing_entry);
473 }
474 }
475
476 let required_cap = EntryHeader::len_for(s);
477
478 // We didn't find an existing entry, so we're going to intern it.
479 //
480 // First, try and see if we have a reclaimed entry that can fit this string. If nothing suitable is found, or we
481 // have no reclaimed entries, then we'll just try to fit it in the remaining capacity of our data buffer.
482 if !self.reclaimed.is_empty() {
483 let maybe_reclaimed_entry = self.reclaimed.take_if(|entry| entry.capacity() >= required_cap);
484 if let Some(reclaimed_entry) = maybe_reclaimed_entry {
485 return Some(self.write_to_reclaimed_entry(reclaimed_entry, s_hash, s));
486 }
487 }
488
489 if required_cap <= self.available() {
490 Some(self.write_to_unoccupied(s_hash, s))
491 } else {
492 None
493 }
494 }
495}
496
497impl Drop for InternerShardState {
498 fn drop(&mut self) {
499 // SAFETY: We allocated this buffer with the global allocator, and we're generating the same layout that was
500 // used to allocate it in the first place.
501 unsafe {
502 std::alloc::dealloc(self.ptr.as_ptr(), layout_for_data::<EntryHeader>(self.capacity));
503 }
504 }
505}
506
507// SAFETY: We don't take references to the data buffer pointer that outlast `InternerShardState`, and all access to
508// `InternerShardState` itself is mediated through a mutex, so we're safe to send it around and share it between
509// threads.
510unsafe impl Send for InternerShardState {}
511unsafe impl Sync for InternerShardState {}
512
513#[derive(Debug)]
514struct InternerState<const SHARD_FACTOR: usize> {
515 shards: [Arc<Mutex<InternerShardState>>; SHARD_FACTOR],
516 capacity: usize,
517}
518
519impl<const SHARD_FACTOR: usize> InternerState<SHARD_FACTOR> {
520 // Ensure our shard factor is a power of two at compile, so that we can just use a mask to get the shard index.
521 const _POWER_OF_TWO_SHARD_FACTOR: () = {
522 if !SHARD_FACTOR.is_power_of_two() {
523 panic!("shard factor must be a power of two")
524 }
525 };
526
527 pub fn with_capacity(capacity: NonZeroUsize) -> Self {
528 let shard_capacity = match NonZeroUsize::new(capacity.get() / SHARD_FACTOR) {
529 Some(shard_capacity) => shard_capacity,
530 // If we can't divide the capacity evenly, we just specify a capacity of one which will force every shard to
531 // upsize the capacity so that a single entry can fit, and satisfies the need for `NonZeroUsize`.
532 //
533 // SAFETY: One is obviously not zero.
534 None => unsafe { NonZeroUsize::new_unchecked(1) },
535 };
536
537 let shards = std::iter::repeat_with(|| Arc::new(Mutex::new(InternerShardState::with_capacity(shard_capacity))))
538 .take(SHARD_FACTOR)
539 .collect::<Vec<_>>();
540 let capacity = shards
541 .iter()
542 .map(|shard| {
543 let shard = shard.lock().unwrap();
544 shard.capacity.get()
545 })
546 .sum();
547 let shards: [Arc<Mutex<InternerShardState>>; SHARD_FACTOR] =
548 shards.try_into().expect("should not fail to convert to array");
549
550 Self { shards, capacity }
551 }
552
553 fn is_empty(&self) -> bool {
554 self.shards.iter().any(|shard| shard.lock().unwrap().entries != 0)
555 }
556
557 fn len(&self) -> usize {
558 self.shards.iter().map(|shard| shard.lock().unwrap().entries).sum()
559 }
560
561 fn len_bytes(&self) -> usize {
562 self.shards.iter().map(|shard| shard.lock().unwrap().offset).sum()
563 }
564
565 fn capacity_bytes(&self) -> usize {
566 self.capacity
567 }
568
569 fn try_intern(&self, s: &str) -> Option<InternedString> {
570 let hash = hash_string(s);
571 let shard_idx = (hash as usize) & (SHARD_FACTOR - 1);
572
573 let shard = &self.shards[shard_idx];
574 intern_with_shard_and_hash(shard, hash, s)
575 }
576}
577
578/// A string interner based on a single, fixed-size backing buffer with support for reclamation.
579///
580/// ## Overview
581///
582/// This interner uses a single, fixed-size backing buffer where interned strings are stored contiguously. This provides
583/// bounded memory usage, and the interner will not allocate additional memory for new strings once the buffer is full.
584/// Since interned strings are not likely to need to live for the life of the program, the interner supports
585/// reclamation. Once all references to an interned string have been dropped, the storage for that string is reclaimed
586/// and can be used to hold new strings.
587///
588/// ## Storage layout
589///
590/// The backing buffer stores strings contiguously, with an entry "header" prepended to each string. The header contains
591/// relevant data -- hash of the string, reference count, and length of the string -- needed to work with the entry
592/// either when searching for existing entries or when using the entry itself.
593///
594/// The layout of an entry is as follows:
595///
596/// ```text
597/// ┌───────────────────────── entry #1 ──────────────────────────┐ ┌─ entry #2 ─┐ ┌─ entry .. ─┐
598/// ▼ ▼ ▼ ▼ ▼ ▼
599/// ┏━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━┓
600/// ┃ str hash │ ref cnt │ str len │ str data │ padding ┃ ┃ header ┃ ┃ header ┃
601/// ┃ (8 bytes) │ (8 bytes) │ (8 bytes) │ (N bytes) │ (1-7 bytes) ┃ ┃ & string ┃ ┃ & string ┃
602/// ┗━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━┛ ┗━━━━━━━━━━━━┛
603/// ▲ ▲ ▲
604/// └────────── `EntryHeader` ──────────┘ └── aligned for `EntryHeader`
605/// (8 byte alignment) via trailing padding
606/// ```
607///
608/// The backing buffer is always aligned properly for `EntryHeader`, so that the first entry can be referenced
609/// correctly. However, when appending additional entries to the buffer, we need to ensure that those entries also have
610/// an aligned start for accessing the header. This is complicated due to the variable number of bytes for the string
611/// data.
612///
613/// Alignment padding is added to the end of the entry to ensure that when appending the next entry, the start of the
614/// entry is properly aligned for `EntryHeader`. In the worst case, up to 7 bytes could be added (and thus wasted) on
615/// this alignment padding.
616///
617/// ## `InternedString`
618///
619/// The `InternedString` type is a handle to the entry header, and thus the string data, for an interned string. It is
620/// designed to be small -- 8 bytes! -- and cheap to clone, as it contains an atomic reference to the entry header and a
621/// reference to the interner that owns the string. It dereferences to the underlying string with relatively low
622/// overhead: two pointer indirections.
623///
624/// When an `InternedString` is dropped, it decrements the reference count for the entry it points to. If the reference
625/// count drops to zero, it will attempt to mark the entry for reclamation.
626///
627/// ## Reclamation
628///
629/// As we want to bound the memory used by the interner, but also not allow it to be filled up with strings that
630/// eventually end up going entirely unused, we need a way to remove those unused strings so their underlying storage
631/// can be used for new strings. This is where reclamation comes in.
632///
633/// When a string is interned, the entry header tracks how many active references there are to it. When that reference
634/// count drops to zero, the last reference to the string attempts to mark the entry for reclamation. Assuming no other
635/// reference has been taken out on the entry in the meantime, the entry gets added to a list of "reclaimed" entries.
636///
637/// Reclaimed entries are simple markers -- start and end position in the data buffer -- which are stored in a freelist.
638/// When attempting to intern a new string, this freelist is searched to see if there's an entry large enough to fit the
639/// new string, and if so, it is used.
640///
641/// Additionally, when entries are reclaimed, adjacent entries are merged together where possible. This helps to avoid
642/// unnecessary fragmentation over time, although not as effectively as reconstructing the data buffer to re-pack
643/// entries.
644#[derive(Clone, Debug)]
645pub struct FixedSizeInterner<const SHARD_FACTOR: usize> {
646 state: Arc<InternerState<SHARD_FACTOR>>,
647}
648
649impl<const SHARD_FACTOR: usize> FixedSizeInterner<SHARD_FACTOR> {
650 /// Creates a new `FixedSizeInterner` with the given capacity.
651 ///
652 /// The given capacity will potentially be rounded up by a small number of bytes (up to 7) in order to ensure the
653 /// backing buffer is properly aligned.
654 pub fn new(capacity: NonZeroUsize) -> Self {
655 Self {
656 state: Arc::new(InternerState::with_capacity(capacity)),
657 }
658 }
659}
660
661impl<const SHARD_FACTOR: usize> Interner for FixedSizeInterner<SHARD_FACTOR> {
662 fn is_empty(&self) -> bool {
663 self.state.is_empty()
664 }
665
666 fn len(&self) -> usize {
667 self.state.len()
668 }
669
670 fn len_bytes(&self) -> usize {
671 self.state.len_bytes()
672 }
673
674 fn capacity_bytes(&self) -> usize {
675 self.state.capacity_bytes()
676 }
677
678 fn try_intern(&self, s: &str) -> Option<InternedString> {
679 self.state.try_intern(s)
680 }
681}
682
683#[inline]
684const unsafe fn get_entry_string_parts(header_ptr: NonNull<EntryHeader>) -> (NonNull<u8>, usize) {
685 // SAFETY: The caller is responsible for ensuring that `header_ptr` is well-aligned and points to an initialized
686 // `EntryHeader` value.
687 let header = header_ptr.as_ref();
688
689 // Advance past the header and get the pointer to the string.
690 //
691 // SAFETY: We know that we're simply skipping over the header by advancing the pointer by one when it's still typed
692 // as `*mut EntryHeader`.
693 let s_ptr = header_ptr.add(1).cast::<u8>();
694 (s_ptr, header.len())
695}
696
697#[inline]
698const unsafe fn get_entry_string<'a>(header_ptr: NonNull<EntryHeader>) -> &'a str {
699 let (s_ptr, s_len) = get_entry_string_parts(header_ptr);
700
701 // SAFETY: We depend on `get_entry_string_parts` to give us a valid pointer and length for the string.
702 std::str::from_utf8_unchecked(std::slice::from_raw_parts(s_ptr.as_ptr() as *const _, s_len))
703}
704
705fn intern_with_shard_and_hash(shard: &Arc<Mutex<InternerShardState>>, hash: u64, s: &str) -> Option<InternedString> {
706 let header = {
707 let mut shard = shard.lock().unwrap();
708 shard.try_intern(hash, s)?
709 };
710
711 Some(InternedString::from(StringState {
712 interner: Arc::clone(shard),
713 header,
714 }))
715}
716
717#[cfg(test)]
718mod tests {
719 use std::{
720 collections::HashSet,
721 ops::{Deref as _, RangeInclusive},
722 };
723
724 use prop::sample::Index;
725 use proptest::{
726 collection::{hash_set, vec as arb_vec},
727 prelude::*,
728 };
729
730 use super::*;
731 use crate::interning::InternedStringState;
732
733 fn create_shard(capacity: NonZeroUsize) -> Arc<Mutex<InternerShardState>> {
734 Arc::new(Mutex::new(InternerShardState::with_capacity(capacity)))
735 }
736
737 fn intern_for_shard(shard: &Arc<Mutex<InternerShardState>>, s: &str) -> Option<InternedString> {
738 let hash = hash_string(s);
739 intern_with_shard_and_hash(shard, hash, s)
740 }
741
742 fn shard_capacity(shard: &Arc<Mutex<InternerShardState>>) -> usize {
743 shard.lock().unwrap().capacity.get()
744 }
745
746 fn shard_available(shard: &Arc<Mutex<InternerShardState>>) -> usize {
747 shard.lock().unwrap().available()
748 }
749
750 fn shard_entries(shard: &Arc<Mutex<InternerShardState>>) -> usize {
751 shard.lock().unwrap().entries
752 }
753
754 fn shard_reclaimed_len(shard: &Arc<Mutex<InternerShardState>>) -> usize {
755 shard.lock().unwrap().reclaimed.len()
756 }
757
758 fn shard_first_reclaimed_entry(shard: &Arc<Mutex<InternerShardState>>) -> ReclaimedEntry {
759 shard.lock().unwrap().reclaimed.first().unwrap()
760 }
761
762 fn get_reclaimed_entry_for_string(s: &InternedString) -> ReclaimedEntry {
763 let state = match &s.state {
764 InternedStringState::FixedSize(state) => state,
765 _ => panic!("unexpected string state"),
766 };
767
768 let ptr = state.interner.lock().unwrap().ptr.as_ptr();
769 let header = unsafe { state.header.as_ref() };
770 let offset = unsafe { state.header.as_ptr().cast::<u8>().offset_from(ptr) as usize };
771 ReclaimedEntry::new(offset, header.entry_len())
772 }
773
774 fn entry_len(s: &str) -> usize {
775 EntryHeader::len_for(s)
776 }
777
778 fn arb_alphanum_strings(
779 str_len: RangeInclusive<usize>, unique_strs: RangeInclusive<usize>,
780 ) -> impl Strategy<Value = Vec<String>> {
781 // Create characters between 0x20 (32) and 0x7E (126), which are all printable ASCII characters.
782 let char_gen = any::<u8>().prop_map(|c| std::cmp::max(c % 127, 32));
783
784 let str_gen = any::<usize>()
785 .prop_map(move |n| std::cmp::max(n % *str_len.end(), *str_len.start()))
786 .prop_flat_map(move |len| arb_vec(char_gen.clone(), len))
787 // SAFETY: We know our characters are all valid UTF-8 because they're in the ASCII range.
788 .prop_map(|xs| unsafe { String::from_utf8_unchecked(xs) });
789
790 // Create a hash set, which handles the deduplication aspect for us, ensuring we have N unique strings where N
791 // is within the `unique_strs` range... and then convert it to `Vec<String>` for easier consumption.
792 hash_set(str_gen, unique_strs).prop_map(|unique_strs| unique_strs.into_iter().collect::<Vec<_>>())
793 }
794
795 #[test]
796 fn basic() {
797 let interner = FixedSizeInterner::<1>::new(NonZeroUsize::new(1024).unwrap());
798
799 let s1 = interner.try_intern("hello").unwrap();
800 let s2 = interner.try_intern("world").unwrap();
801 let s3 = interner.try_intern("hello").unwrap();
802
803 assert_eq!(s1.deref(), "hello");
804 assert_eq!(s2.deref(), "world");
805 assert_eq!(s3.deref(), "hello");
806
807 // The pointers from the interned strings should be the same, but not between the interned string and a pointer
808 // to an equivalent (but not interned) string:
809 assert!(std::ptr::eq(s1.deref() as *const _, s3.deref() as *const _));
810
811 let local_hello = "hello";
812 assert!(!std::ptr::eq(s1.deref() as *const _, local_hello as *const _));
813 }
814
815 #[test]
816 fn try_intern_without_capacity() {
817 // Big enough to fit a single "hello world!" string, but not big enough to fit two.
818 let interner = FixedSizeInterner::<1>::new(NonZeroUsize::new(64).unwrap());
819
820 let s1 = interner.try_intern("hello world!");
821 assert!(s1.is_some());
822
823 let s2 = interner.try_intern("hello, world");
824 assert!(s2.is_none());
825 }
826
827 #[test]
828 fn reclaim_after_dropped() {
829 let shard = create_shard(NonZeroUsize::new(1024).unwrap());
830
831 let s1 = intern_for_shard(&shard, "hello").expect("should not fail to intern");
832 let s1_entry_len = entry_len(&s1);
833
834 assert_eq!(shard_entries(&shard), 1);
835 assert_eq!(shard_available(&shard), 1024 - s1_entry_len);
836 assert_eq!(shard_reclaimed_len(&shard), 0);
837
838 // Drop the interned string, which should decrement the reference count to zero and then reclaim the entry.
839 drop(s1);
840
841 assert_eq!(shard_entries(&shard), 0);
842 assert_eq!(shard_available(&shard), 1024);
843 assert_eq!(shard_reclaimed_len(&shard), 0);
844 }
845
846 #[test]
847 fn interns_to_reclaimed_entry_with_leftover() {
848 // We want to intern a string initially, which takes up almost all of the capacity, and then drop it so it gets
849 // reclaimed. After that, we'll intern a much smaller string which should lead to utilizing that reclaimed
850 // entry, but only a part of it. Finally, we'll intern another new string.
851 //
852 // The point is to demonstrate that our reclamation logic is sound in terms of allowing reclaimed entries to be
853 // split while the search/insertion logic is operating.
854 let shard_capacity = NonZeroUsize::new(256).unwrap();
855 let shard = create_shard(shard_capacity);
856
857 // We craft four strings such that the first two (`s_large` and `s_medium1`) will take up enough capacity that
858 // `s_small` can't possibly be interned in the available capacity. We'll also craft `s_medium2` so it can fit
859 // within the reclaimed entry for `s_large` but takes enough capacity that `s_small` cannot fit in the leftover
860 // reclaimed entry that we split off.
861 let s_large = "99 bottles of beer on the wall, 99 bottles of beer! take one down, pass it around, 98 bottles of beer on the wall!";
862 let s_medium1 = "no act of kindness, no matter how small, is ever wasted";
863 let s_medium2 = "if you want to go fast, go alone; if you want to go far, go together";
864 let s_small = "are you there god? it's me, margaret";
865
866 let phase1_available_capacity = shard_capacity.get() - entry_len(s_large) - entry_len(s_medium1);
867 assert!(phase1_available_capacity < entry_len(s_small));
868 assert!((entry_len(s_large) - entry_len(s_medium2)) < entry_len(s_small));
869 assert!(entry_len(s_medium2) < entry_len(s_large));
870
871 // Phase 1: intern our two larger strings.
872 let s1 = intern_for_shard(&shard, s_large).expect("should not fail to intern");
873 let s2 = intern_for_shard(&shard, s_medium1).expect("should not fail to intern");
874
875 assert_eq!(shard_entries(&shard), 2);
876 assert_eq!(shard_available(&shard), phase1_available_capacity);
877 assert_eq!(shard_reclaimed_len(&shard), 0);
878
879 // Phase 2: drop `s_large` so it gets reclaimed.
880 drop(s1);
881 assert_eq!(shard_entries(&shard), 1);
882 assert_eq!(shard_reclaimed_len(&shard), 1);
883
884 // Phase 3: intern `s_medium2`, which should fit in the reclaimed entry for `s_large`. This should leave a
885 // small, split off reclaimed entry.
886 let s3 = intern_for_shard(&shard, s_medium2).expect("should not fail to intern");
887
888 assert_eq!(shard_entries(&shard), 2);
889 assert_eq!(shard_reclaimed_len(&shard), 1);
890
891 // Phase 4: intern `s_small`, which should not fit in the leftover reclaimed entry from `s_large` _or_ the
892 // available capacity.
893 let s4 = intern_for_shard(&shard, s_small);
894 assert_eq!(s4, None);
895
896 assert_eq!(shard_entries(&shard), 2);
897 assert_eq!(shard_reclaimed_len(&shard), 1);
898
899 // And make sure we can still dereference the interned strings we _do_ have left:
900 assert_eq!(s2.deref(), s_medium1);
901 assert_eq!(s3.deref(), s_medium2);
902 }
903
904 #[test]
905 fn has_reclaimed_entries_string_fits_exactly() {
906 // The shard is large enough to fit two of the identically-sized strings, but not all three. We show that when we drop
907 // an entry and it is reclaimed, a string of identical size should always be able to reuse that reclaimed entry.
908 const S1_VALUE: &str = "hello world!";
909 const S2_VALUE: &str = "hello, world";
910 const S3_VALUE: &str = "hello--world";
911
912 let shard = create_shard(NonZeroUsize::new(80).unwrap());
913
914 // Intern the first two strings, which should fit without issue.
915 let s1 = intern_for_shard(&shard, S1_VALUE).expect("should not fail to intern");
916 let s1_reclaimed_expected = get_reclaimed_entry_for_string(&s1);
917 let _s2 = intern_for_shard(&shard, S2_VALUE).expect("should not fail to intern");
918
919 assert_eq!(shard_entries(&shard), 2);
920 assert_eq!(shard_reclaimed_len(&shard), 0);
921
922 // Try to intern the third string, which should fail as we don't have the space.
923 let s3 = intern_for_shard(&shard, S3_VALUE);
924 assert_eq!(s3, None);
925
926 // Drop the first string, which should decrement the reference count to zero and then reclaim the entry.
927 drop(s1);
928
929 assert_eq!(shard_entries(&shard), 1);
930 assert_eq!(shard_reclaimed_len(&shard), 1);
931
932 let s1_reclaimed = shard_first_reclaimed_entry(&shard);
933 assert_eq!(s1_reclaimed_expected, s1_reclaimed);
934
935 // Try again to intern the third string, which should now succeed and take over the reclaimed entry entirely
936 // as the strings are identical in length.
937 let _s3 = intern_for_shard(&shard, S3_VALUE).expect("should not fail to intern");
938
939 assert_eq!(shard_entries(&shard), 2);
940 assert_eq!(shard_reclaimed_len(&shard), 0);
941 }
942
943 #[test]
944 fn reclaimed_entry_reuse_split_too_small() {
945 // This situation is slightly contrived, but: we want to test that when there's a reclaimed entry of a certain
946 // size, reusing that reclaimed entry won't lead to it being split if the resulting split entry would be too
947 // "small": unable to hold another minimum-sized entry.
948 //
949 // We have to intern three strings to do this because we only track reclaimed entries when they're followed by
950 // in-use entries, and the string we drop to create a reclaimed entry has to be big enough, but not _too_ big,
951 // to hold the string we want to intern after it.
952 let shard = create_shard(NonZeroUsize::new(128).unwrap());
953
954 // Declare our strings to intern and just check some preconditions by hand.
955 let s_one = "a horse, a horse, my kingdom for a horse!";
956 let s_one_entry_len = entry_len(s_one);
957 let s_two = "why hello there, beautiful";
958 let s_two_entry_len = entry_len(s_two);
959 let s_three = "real gs move in silence like lasagna";
960 let s_three_entry_len = entry_len(s_three);
961
962 assert!(s_one_entry_len <= shard_capacity(&shard));
963 assert!(s_two_entry_len <= shard_capacity(&shard));
964 assert!(s_one_entry_len + s_two_entry_len + s_three_entry_len > shard_capacity(&shard));
965 assert!(s_one_entry_len > s_two_entry_len);
966 assert!(s_three_entry_len > s_two_entry_len);
967 assert!((s_one_entry_len - s_three_entry_len) < MINIMUM_ENTRY_LEN);
968
969 // Intern the first two strings, which should fit without issue.
970 let s1 = intern_for_shard(&shard, s_one).expect("should not fail to intern");
971 let s1_reclaimed_expected = get_reclaimed_entry_for_string(&s1);
972 let _s2 = intern_for_shard(&shard, s_two).expect("should not fail to intern");
973
974 assert_eq!(shard_entries(&shard), 2);
975 assert_eq!(shard_reclaimed_len(&shard), 0);
976
977 // Try to intern the third string, which should fail as we don't have the space.
978 let s3 = intern_for_shard(&shard, s_three);
979 assert_eq!(s3, None);
980
981 // Drop the first string, which should decrement the reference count to zero and then reclaim the entry.
982 drop(s1);
983
984 assert_eq!(shard_entries(&shard), 1);
985 assert_eq!(shard_reclaimed_len(&shard), 1);
986
987 let s1_reclaimed = shard_first_reclaimed_entry(&shard);
988 assert_eq!(s1_reclaimed_expected, s1_reclaimed);
989
990 // Try again to intern the third string, which should now succeed and take over the reclaimed entry, but since
991 // the remainder of the reclaimed entry after taking the necessary capacity for `s_three` is not large enough
992 // (`MINIMUM_ENTRY_LEN`), we shouldn't end up splitting the reclaimed entry, and instead, `s3` should consume
993 // the entire reclaimed entry.
994 let s3 = intern_for_shard(&shard, s_three).expect("should not fail to intern");
995 let s3_reclaimed_expected = get_reclaimed_entry_for_string(&s3);
996
997 assert_eq!(shard_entries(&shard), 2);
998 assert_eq!(shard_reclaimed_len(&shard), 0);
999 assert_eq!(s1_reclaimed_expected, s3_reclaimed_expected);
1000 }
1001
1002 #[test]
1003 fn reclaimed_entry_adjacent_to_spare_capacity() {
1004 let shard = create_shard(NonZeroUsize::new(128).unwrap());
1005
1006 // Intern two smallish strings that fit without issue.
1007 let s1 = intern_for_shard(&shard, "hello, world!").expect("should not fail to intern");
1008 let s2 = intern_for_shard(&shard, "cheeeeeehooooo!").expect("should not fail to intern");
1009 let s1_entry_len = entry_len(&s1);
1010 let s2_entry_len = entry_len(&s2);
1011
1012 assert_eq!(shard_reclaimed_len(&shard), 0);
1013 assert_eq!(shard_available(&shard), 128 - s1_entry_len - s2_entry_len);
1014
1015 drop(s2);
1016 assert_eq!(shard_reclaimed_len(&shard), 0);
1017 assert_eq!(shard_available(&shard), 128 - s1_entry_len);
1018
1019 drop(s1);
1020 assert_eq!(shard_reclaimed_len(&shard), 0);
1021 assert_eq!(shard_available(&shard), 128);
1022 }
1023
1024 proptest! {
1025 #[test]
1026 #[cfg_attr(miri, ignore)]
1027 fn property_test_entry_count_accurate(
1028 strs in arb_alphanum_strings(1..=128, 16..=512),
1029 indices in arb_vec(any::<Index>(), 1..=1000),
1030 ) {
1031 // We ask `proptest` to generate a bunch of unique strings of varying lengths (1-128 bytes, 16-512 unique
1032 // strings) which we then randomly select out of those strings which strings we want to intern. The goal
1033 // here is to potentially select the same string multiple times, to exercise the actual interning logic...
1034 // but practically, to ensure that when we intern a string that has already been interned, we're not
1035 // incrementing the entries count again.
1036
1037 // Create an interner with enough capacity to hold all of the strings we've generated. This is the maximum
1038 // string size multiplied by the number of strings we've generated... plus a little constant factor per
1039 // string to account for the entry header.
1040 const ENTRY_SIZE: usize = 128 + HEADER_LEN;
1041 let interner = FixedSizeInterner::<1>::new(NonZeroUsize::new(ENTRY_SIZE * indices.len()).unwrap());
1042
1043 // For each index, pull out the string and both track it in `unique_strs` and intern it. We hold on to the
1044 // interned string handle to make sure the interned string is actually kept alive, keeping the entry count
1045 // stable.
1046 let mut interned = Vec::new();
1047 let mut unique_strs = HashSet::new();
1048 for index in &indices {
1049 let s = index.get(&strs);
1050 unique_strs.insert(s);
1051
1052 let s_interned = interner.try_intern(s).expect("should never fail to intern");
1053 interned.push(s_interned);
1054 }
1055
1056 assert_eq!(unique_strs.len(), interner.len());
1057 }
1058 }
1059
1060 #[cfg(feature = "loom")]
1061 #[test]
1062 fn concurrent_drop_and_intern() {
1063 fn shard_reclaimed_entries(shard: &Arc<Mutex<InternerShardState>>) -> Vec<ReclaimedEntry> {
1064 shard.lock().unwrap().reclaimed.iter().copied().collect()
1065 }
1066
1067 fn do_reclaimed_entries_overlap(a: ReclaimedEntry, b: ReclaimedEntry) -> bool {
1068 let a_start = a.offset;
1069 let a_end = a.offset + a.capacity - 1;
1070
1071 let b_start = b.offset;
1072 let b_end = b.offset + b.capacity - 1;
1073
1074 (a_start <= b_start && b_start <= a_end) || (a_start <= b_end && b_end <= a_end)
1075 }
1076
1077 const STRING_TO_INTERN: &str = "hello, world!";
1078
1079 // This test is meant to explore the thread orderings when one thread is trying to drop (and thus reclaim) the
1080 // last active reference to an interned string, and another thread is trying to intern that very same string.
1081 //
1082 // We accept, as a caveat, that a possible outcome is that we intern the "new" string again, even though an
1083 // existing entry to that string may have existed in an alternative ordering.
1084 loom::model(|| {
1085 let shard = create_shard(NonZeroUsize::new(1024).unwrap());
1086 let t2_shard = Arc::clone(&shard);
1087
1088 // Intern the string from thread T1.
1089 let t1_interned_s = intern_for_shard(&shard, STRING_TO_INTERN).expect("should not fail to intern");
1090 assert_eq!(t1_interned_s.deref(), STRING_TO_INTERN);
1091 let t1_reclaimed_entry = get_reclaimed_entry_for_string(&t1_interned_s);
1092
1093 // Spawn thread T2, which tries to intern the same string and returns the handle to it.
1094 let t2_result = loom::thread::spawn(move || {
1095 let interned_s = intern_for_shard(&t2_shard, STRING_TO_INTERN).expect("should not fail to intern");
1096 let reclaimed_entry = get_reclaimed_entry_for_string(&interned_s);
1097
1098 (interned_s, reclaimed_entry)
1099 });
1100
1101 drop(t1_interned_s);
1102
1103 let (t2_interned_s, t2_reclaimed_entry) = t2_result.join().expect("should not fail to join T2");
1104 assert_eq!(t2_interned_s.deref(), STRING_TO_INTERN);
1105
1106 // What we're checking for here is that either:
1107 // - there's no reclaimed entries (T2 found the existing entry for the string before T1 dropped it)
1108 // - there's a reclaimed entry (T2 didn't find the existing entry for the string before T1 marked it as
1109 // inactive) but the reclaimed entry does _not_ overlap with the interned string from T2, meaning we
1110 // didn't get confused and allow T2 to use an existing entry that T1 then later marked as reclaimed
1111 let reclaimed_entries = shard_reclaimed_entries(&shard);
1112 assert!(reclaimed_entries.len() <= 1, "should have at most one reclaimed entry");
1113
1114 if !reclaimed_entries.is_empty() {
1115 // If we do have a reclaimed entry, it needs to match exactly with only one of the interned strings.
1116 let is_t1_entry = reclaimed_entries.first().unwrap() == &t1_reclaimed_entry;
1117 let is_t2_entry = reclaimed_entries.first().unwrap() == &t2_reclaimed_entry;
1118
1119 assert!(
1120 (is_t1_entry || is_t2_entry) && !(is_t1_entry && is_t2_entry),
1121 "should only match one interned string"
1122 );
1123
1124 // Additionally, we ensure that the reclaimed entry does not overlap with the other interned string.
1125 assert!(
1126 !do_reclaimed_entries_overlap(t1_reclaimed_entry, t2_reclaimed_entry),
1127 "reclaimed entry should not overlap with remaining interned string"
1128 );
1129 }
1130 });
1131 }
1132}