Skip to main content

cl_structures/
intern.rs

1//! Interners for [strings](string_interner) and non-[Drop] [types](dropless_interner).
2//!
3//! An object is [Interned][1] if it is allocated within one of the interners
4//! in this module. [Interned][1] values have referential equality semantics, and
5//! [Deref](std::ops::Deref) to the value within their respective intern pool.
6//!
7//! This means, of course, that the same value interned in two different pools will be
8//! considered *not equal* by [Eq] and [Hash](std::hash::Hash).
9//!
10//! [1]: interned::Interned
11
12pub mod interned {
13    //! An [Interned] reference asserts its wrapped value has referential equality.
14    use super::string_interner::StringInterner;
15    use std::{
16        fmt::{Debug, Display},
17        hash::Hash,
18        ops::Deref,
19    };
20
21    /// An [Interned] value is one that is *referentially comparable*.
22    /// That is, the interned value is unique in memory, simplifying
23    /// its equality and hashing implementation.
24    ///
25    /// Comparing [Interned] values via [PartialOrd] or [Ord] will still
26    /// dereference to the wrapped pointers, and as such, may produce
27    /// results inconsistent with [PartialEq] or [Eq].
28    #[repr(transparent)]
29    #[expect(private_interfaces)]
30    pub struct Interned<'a, T: ?Sized>(pub &'a T, pub Private);
31
32    pub type Symbol = Interned<'static, str>;
33
34    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
35    struct Private();
36
37    impl<'a, T: ?Sized> Interned<'a, T> {
38        /// Gets the internal value as a pointer
39        pub fn as_ptr(interned: &Self) -> *const T {
40            interned.0
41        }
42
43        /// Gets the internal value as a reference with the interner's lifetime
44        pub fn to_ref(&self) -> &'a T {
45            self.0
46        }
47    }
48
49    impl<'a, T: ?Sized> Interned<'a, T> {
50        pub(super) fn new(value: &'a T) -> Self {
51            Self(value, Private())
52        }
53    }
54
55    impl<T: ?Sized> Copy for Interned<'_, T> {}
56
57    impl<T: ?Sized> Clone for Interned<'_, T> {
58        fn clone(&self) -> Self {
59            *self
60        }
61    }
62
63    impl<T: ?Sized + Debug> Debug for Interned<'_, T> {
64        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65            f.write_str("~")?;
66            self.0.fmt(f)
67        }
68    }
69
70    impl<T: ?Sized> Deref for Interned<'_, T> {
71        type Target = T;
72        fn deref(&self) -> &Self::Target {
73            self.0
74        }
75    }
76
77    impl<T: ?Sized> PartialEq for Interned<'_, T> {
78        fn eq(&self, other: &Self) -> bool {
79            std::ptr::eq(self.0, other.0)
80        }
81    }
82
83    impl<T: ?Sized> Eq for Interned<'_, T> {}
84
85    impl<T: ?Sized + PartialOrd> PartialOrd for Interned<'_, T> {
86        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
87            self.0.partial_cmp(other.0)
88        }
89    }
90
91    impl<T: ?Sized + Ord> Ord for Interned<'_, T> {
92        fn cmp(&self, other: &Self) -> std::cmp::Ordering {
93            self.0.cmp(other.0)
94        }
95    }
96
97    impl<T: ?Sized> Hash for Interned<'_, T> {
98        fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
99            Self::as_ptr(self).hash(state)
100        }
101    }
102
103    impl<T: ?Sized + Display> Display for Interned<'_, T> {
104        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105            self.0.fmt(f)
106        }
107    }
108
109    impl<'a> AsRef<str> for Interned<'a, str> {
110        fn as_ref(&self) -> &str {
111            self.0
112        }
113    }
114
115    impl From<&str> for Symbol {
116        /// Types which implement [`AsRef<str>`] will be stored in the global [StringInterner]
117        fn from(value: &str) -> Self {
118            from_str(value)
119        }
120    }
121
122    impl Default for Symbol {
123        fn default() -> Self {
124            from_str("")
125        }
126    }
127
128    fn from_str(value: &str) -> Symbol {
129        let global_interner = StringInterner::global();
130        global_interner.get_or_insert(value)
131    }
132}
133
134pub mod string_interner {
135    //! A [StringInterner] hands out [Interned] copies of each unique string given to it.
136
137    use super::interned::Interned;
138    use cl_arena::dropless_arena::DroplessArena;
139    use std::{
140        collections::HashSet,
141        sync::{OnceLock, RwLock},
142    };
143
144    /// A string interner hands out [Interned] copies of each unique string given to it.
145    #[derive(Default)]
146    pub struct StringInterner<'a> {
147        arena: DroplessArena<'a>,
148        keys: RwLock<HashSet<&'a str>>,
149    }
150
151    impl StringInterner<'static> {
152        /// Gets a reference to a global string interner whose [Interned] strings are `'static`
153        pub fn global() -> &'static Self {
154            static GLOBAL_INTERNER: OnceLock<StringInterner<'static>> = OnceLock::new();
155
156            // SAFETY: The RwLock within the interner's `keys` protects the arena
157            // from being modified concurrently.
158            GLOBAL_INTERNER.get_or_init(|| StringInterner {
159                arena: DroplessArena::new(),
160                keys: Default::default(),
161            })
162        }
163    }
164
165    impl<'a> StringInterner<'a> {
166        /// Creates a new [StringInterner] backed by the provided [DroplessArena]
167        pub fn new(arena: DroplessArena<'a>) -> Self {
168            Self { arena, keys: RwLock::new(HashSet::new()) }
169        }
170
171        /// Returns an [Interned] copy of the given string,
172        /// allocating a new one if it doesn't already exist.
173        ///
174        /// # Blocks
175        /// This function blocks when the interner is held by another thread.
176        pub fn get_or_insert(&'a self, value: &str) -> Interned<'a, str> {
177            let Self { arena, keys } = self;
178
179            // Safety: Holding this write guard for the entire duration of this
180            // function enforces a safety invariant. See StringInterner::global.
181            let mut keys = keys.write().expect("should not be poisoned");
182
183            Interned::new(match keys.get(value) {
184                Some(value) => value,
185                None => {
186                    let value = match value {
187                        "" => "", // Arena will panic if passed an empty string
188                        _ => arena.alloc_str(value),
189                    };
190                    keys.insert(value);
191                    value
192                }
193            })
194        }
195        /// Gets a reference to the interned copy of the given value, if it exists
196        /// # Blocks
197        /// This function blocks when the interner is held by another thread.
198        pub fn get(&'a self, value: &str) -> Option<Interned<'a, str>> {
199            let keys = self.keys.read().expect("should not be poisoned");
200            keys.get(value).copied().map(Interned::new)
201        }
202    }
203
204    impl std::fmt::Debug for StringInterner<'_> {
205        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
206            f.debug_struct("Interner")
207                .field("keys", &self.keys)
208                .finish()
209        }
210    }
211
212    impl std::fmt::Display for StringInterner<'_> {
213        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
214            let Ok(keys) = self.keys.read() else {
215                return write!(f, "Could not lock StringInterner key map.");
216            };
217            let mut keys: Vec<_> = keys.iter().collect();
218            keys.sort();
219
220            writeln!(f, "Keys:")?;
221            for (idx, key) in keys.iter().enumerate() {
222                writeln!(f, "{idx}:\t\"{key}\"")?
223            }
224            writeln!(f, "Count: {}", keys.len())?;
225
226            Ok(())
227        }
228    }
229
230    // # Safety:
231    // This is fine because StringInterner::get_or_insert(v) holds a RwLock
232    // for its entire duration, and doesn't touch the non-(Send+Sync) arena
233    // unless the lock is held by a write guard.
234    unsafe impl Send for StringInterner<'_> {}
235    unsafe impl Sync for StringInterner<'_> {}
236
237    #[cfg(test)]
238    mod tests {
239        use super::StringInterner;
240
241        macro_rules! ptr_eq {
242        ($a: expr, $b: expr $(, $($t:tt)*)?) => {
243            assert_eq!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
244        };
245    }
246        macro_rules! ptr_ne {
247        ($a: expr, $b: expr $(, $($t:tt)*)?) => {
248            assert_ne!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
249        };
250    }
251
252        #[test]
253        fn empties_is_unique() {
254            let interner = StringInterner::global();
255            let empty = interner.get_or_insert("");
256            let empty2 = interner.get_or_insert("");
257            ptr_eq!(*empty, *empty2);
258        }
259        #[test]
260        fn non_empty_is_unique() {
261            let interner = StringInterner::global();
262            let nonempty1 = interner.get_or_insert("not empty!");
263            let nonempty2 = interner.get_or_insert("not empty!");
264            let different = interner.get_or_insert("different!");
265            ptr_eq!(*nonempty1, *nonempty2);
266            ptr_ne!(*nonempty1, *different);
267        }
268    }
269}
270
271pub mod dropless_interner {
272    //! A [DroplessInterner] hands out [Interned] references for arbitrary types.
273    //!
274    //! Note: It is a *logic error* to modify the returned reference via interior mutability
275    //! in a way that changes the values produced by [Eq] and [Hash].
276    //!
277    //! See the standard library [HashSet] for more details.
278    //!
279    //! ```rust
280    //! use cl_structures::intern::dropless_interner::DroplessInterner;
281    //! use cl_arena::dropless_arena::DroplessArena;
282    //!
283    //! let da = DroplessArena::new();
284    //! let di: DroplessInterner<'_, ()> = DroplessInterner::new(da);
285    //! let unit1 = di.get_or_insert(());
286    //! let unit2 = di.get_or_insert(());
287    //! let unit3 = di.get(&()).unwrap();
288    //! assert_eq!(unit1, unit3);
289    //! assert_eq!(unit2, unit1);
290    //! assert_eq!(unit3, unit2);
291    //! ```
292    //!
293    //! ```rust
294    //! use cl_structures::intern::dropless_interner::DroplessInterner;
295    //! use cl_arena::dropless_arena::DroplessArena;
296    //!
297    //! let da = DroplessArena::new();
298    //! let di: DroplessInterner<'_, [i32; 10]> = DroplessInterner::new(da);
299    //! let arr1 = di.get_or_insert([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
300    //! let arr2 = di.get_or_insert([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
301    //! let arr3 = di.get(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).unwrap();
302    //! assert_eq!(arr1, arr3);
303    //! assert_eq!(arr2, arr1);
304    //! assert_eq!(arr3, arr2);
305    //!
306    //! let arr4 = di.get_or_insert([10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
307    //! assert_ne!(arr1, arr4);
308    //! ```
309    use super::interned::Interned;
310    use cl_arena::dropless_arena::DroplessArena;
311    use std::{collections::HashSet, hash::Hash, sync::RwLock};
312
313    /// A [DroplessInterner] hands out [Interned] references for arbitrary types.
314    ///
315    /// See the [module-level documentation](self) for more information.
316    pub struct DroplessInterner<'a, T: Eq + Hash> {
317        arena: DroplessArena<'a>,
318        keys: RwLock<HashSet<&'a T>>,
319    }
320
321    impl<'a, T: Eq + Hash> Default for DroplessInterner<'a, T> {
322        fn default() -> Self {
323            Self { arena: Default::default(), keys: Default::default() }
324        }
325    }
326
327    impl<'a, T: Eq + Hash> DroplessInterner<'a, T> {
328        /// Creates a new [DroplessInterner] backed by the provided [`DroplessArena`]
329        ///
330        /// # Panics
331        /// Panics if T [needs drop](std::mem::needs_drop)
332        pub fn new(arena: DroplessArena<'a>) -> Self {
333            Self { arena, keys: RwLock::new(HashSet::new()) }
334        }
335
336        /// Converts the given value into an [Interned] value.
337        ///
338        /// # Blocks
339        /// This function blocks when the interner is held by another thread.
340        pub fn get_or_insert(&'a self, value: T) -> Interned<'a, T> {
341            let Self { arena, keys } = self;
342
343            // Safety: Locking the keyset for the entire duration of this function
344            // enforces a safety invariant when the interner is stored in a global.
345            let mut keys = keys.write().expect("should not be poisoned");
346
347            Interned::new(match keys.get(&value) {
348                Some(value) => value,
349                None => {
350                    let value = if std::mem::size_of::<T>() == 0 {
351                        Box::leak(Box::new(value))
352                    } else {
353                        arena.alloc(value)
354                    };
355                    keys.insert(value);
356                    value
357                }
358            })
359        }
360
361        /// Returns the [Interned] copy of the given value, if one already exists
362        ///
363        /// # Blocks
364        /// This function blocks when the interner is being written to by another thread.
365        pub fn get(&self, value: &T) -> Option<Interned<'a, T>> {
366            let keys = self.keys.read().expect("should not be poisoned");
367            keys.get(value).copied().map(Interned::new)
368        }
369    }
370
371    /// # Safety
372    /// This should be safe because references yielded by
373    /// [get_or_insert](DroplessInterner::get_or_insert) are unique, and the function uses
374    /// the [RwLock] around the [HashSet] to ensure mutual exclusion
375    unsafe impl<'a, T: Eq + Hash + Send> Send for DroplessInterner<'a, T> where &'a T: Send {}
376    unsafe impl<T: Eq + Hash + Send + Sync> Sync for DroplessInterner<'_, T> {}
377}
378
379pub mod leaky_interner {
380    //! An "interner" which leaks anything you give it
381    use std::{collections::HashSet, hash::Hash, sync::RwLock};
382
383    use crate::intern::interned::Interned;
384
385    /// An interner which leaks anything you give it using [Box::leak]
386    pub struct LeakyInterner<'a, T: Eq + Hash> {
387        keys: RwLock<HashSet<&'a T>>,
388    }
389
390    impl<'a, T: Eq + Hash> Default for LeakyInterner<'a, T> {
391        fn default() -> Self {
392            Self { keys: Default::default() }
393        }
394    }
395
396    impl<'a, T: Eq + Hash> LeakyInterner<'a, T> {
397        pub fn new() -> Self {
398            Self { keys: RwLock::new(HashSet::new()) }
399        }
400
401        /// Converts the given value into an [Interned] value.
402        ///
403        /// # Blocks
404        /// This function blocks when the interner is held by another thread.
405        pub fn get_or_insert(&'a self, value: T) -> Interned<'a, T> {
406            let Self { keys } = self;
407            let mut keys = keys.write().expect("should not be poisoned");
408
409            Interned::new(match keys.get(&value) {
410                Some(value) => value,
411                None => {
412                    let value = Box::leak(Box::new(value));
413                    keys.insert(value);
414                    value
415                }
416            })
417        }
418
419        /// Returns the [Interned] copy of the given value, if one already exists
420        ///
421        /// # Blocks
422        /// This function blocks when the interner is being written to by another thread.
423        pub fn get(&self, value: &T) -> Option<Interned<'a, T>> {
424            let keys = self.keys.read().expect("should not be poisoned");
425            keys.get(value).copied().map(Interned::new)
426        }
427
428        /// Calls the closure on each previously-interned value
429        pub fn foreach(&self, mut f: impl FnMut(&T)) {
430            for key in self.keys.read().expect("should not be poisoned").iter() {
431                f(key);
432            }
433        }
434    }
435}