cl_structures/
intern.rs

1//! Interners for [strings](string_interner) and arbitrary [types](typed_interner).
2//!
3//! An object is [Interned][1] if it is allocated within one of the interners
4//! in this module. [Interned][1] values have referential equality semantics, and
5//! [Deref](std::ops::Deref) to the value within their respective intern pool.
6//!
7//! This means, of course, that the same value interned in two different pools will be
8//! considered *not equal* by [Eq] and [Hash](std::hash::Hash).
9//!
10//! [1]: interned::Interned
11
12pub mod interned {
13    //! An [Interned] reference asserts its wrapped value has referential equality.
14    use super::string_interner::StringInterner;
15    use std::{
16        fmt::{Debug, Display},
17        hash::Hash,
18        ops::Deref,
19    };
20
21    /// An [Interned] value is one that is *referentially comparable*.
22    /// That is, the interned value is unique in memory, simplifying
23    /// its equality and hashing implementation.
24    ///
25    /// Comparing [Interned] values via [PartialOrd] or [Ord] will still
26    /// dereference to the wrapped pointers, and as such, may produce
27    /// results inconsistent with [PartialEq] or [Eq].
28    #[repr(transparent)]
29    pub struct Interned<'a, T: ?Sized> {
30        value: &'a T,
31    }
32
33    impl<'a, T: ?Sized> Interned<'a, T> {
34        /// Gets the internal value as a pointer
35        pub fn as_ptr(interned: &Self) -> *const T {
36            interned.value
37        }
38
39        /// Gets the internal value as a reference with the interner's lifetime
40        pub fn to_ref(&self) -> &'a T {
41            self.value
42        }
43    }
44
45    impl<T: ?Sized + Debug> Debug for Interned<'_, T> {
46        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47            f.debug_struct("Interned")
48                .field("value", &self.value)
49                .finish()
50        }
51    }
52    impl<'a, T: ?Sized> Interned<'a, T> {
53        pub(super) fn new(value: &'a T) -> Self {
54            Self { value }
55        }
56    }
57    impl<T: ?Sized> Deref for Interned<'_, T> {
58        type Target = T;
59        fn deref(&self) -> &Self::Target {
60            self.value
61        }
62    }
63    impl<T: ?Sized> Copy for Interned<'_, T> {}
64    impl<T: ?Sized> Clone for Interned<'_, T> {
65        fn clone(&self) -> Self {
66            *self
67        }
68    }
69    // TODO: These implementations are subtly incorrect, as they do not line up with `eq`
70    // impl<'a, T: ?Sized + PartialOrd> PartialOrd for Interned<'a, T> {
71    //     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
72    //         match self == other {
73    //             true => Some(std::cmp::Ordering::Equal),
74    //             false => self.value.partial_cmp(other.value),
75    //         }
76    //     }
77    // }
78    // impl<'a, T: ?Sized + Ord> Ord for Interned<'a, T> {
79    //     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
80    //         match self == other {
81    //             true => std::cmp::Ordering::Equal,
82    //             false => self.value.cmp(other.value),
83    //         }
84    //     }
85    // }
86
87    impl<T: ?Sized> Eq for Interned<'_, T> {}
88    impl<T: ?Sized> PartialEq for Interned<'_, T> {
89        fn eq(&self, other: &Self) -> bool {
90            std::ptr::eq(self.value, other.value)
91        }
92    }
93    impl<T: ?Sized> Hash for Interned<'_, T> {
94        fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
95            Self::as_ptr(self).hash(state)
96        }
97    }
98    impl<T: ?Sized + Display> Display for Interned<'_, T> {
99        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100            self.value.fmt(f)
101        }
102    }
103
104    impl<T: AsRef<str>> From<T> for Interned<'static, str> {
105        /// Types which implement [`AsRef<str>`] will be stored in the global [StringInterner]
106        fn from(value: T) -> Self {
107            from_str(value.as_ref())
108        }
109    }
110    fn from_str(value: &str) -> Interned<'static, str> {
111        let global_interner = StringInterner::global();
112        global_interner.get_or_insert(value)
113    }
114}
115
116pub mod string_interner {
117    //! A [StringInterner] hands out [Interned] copies of each unique string given to it.
118
119    use super::interned::Interned;
120    use cl_arena::dropless_arena::DroplessArena;
121    use std::{
122        collections::HashSet,
123        sync::{OnceLock, RwLock},
124    };
125
126    /// A string interner hands out [Interned] copies of each unique string given to it.
127    #[derive(Default)]
128    pub struct StringInterner<'a> {
129        arena: DroplessArena<'a>,
130        keys: RwLock<HashSet<&'a str>>,
131    }
132
133    impl StringInterner<'static> {
134        /// Gets a reference to a global string interner whose [Interned] strings are `'static`
135        pub fn global() -> &'static Self {
136            static GLOBAL_INTERNER: OnceLock<StringInterner<'static>> = OnceLock::new();
137
138            // SAFETY: The RwLock within the interner's `keys` protects the arena
139            // from being modified concurrently.
140            GLOBAL_INTERNER.get_or_init(|| StringInterner {
141                arena: DroplessArena::new(),
142                keys: Default::default(),
143            })
144        }
145    }
146
147    impl<'a> StringInterner<'a> {
148        /// Creates a new [StringInterner] backed by the provided [DroplessArena]
149        pub fn new(arena: DroplessArena<'a>) -> Self {
150            Self { arena, keys: RwLock::new(HashSet::new()) }
151        }
152
153        /// Returns an [Interned] copy of the given string,
154        /// allocating a new one if it doesn't already exist.
155        ///
156        /// # Blocks
157        /// This function blocks when the interner is held by another thread.
158        pub fn get_or_insert(&'a self, value: &str) -> Interned<'a, str> {
159            let Self { arena, keys } = self;
160
161            // Safety: Holding this write guard for the entire duration of this
162            // function enforces a safety invariant. See StringInterner::global.
163            let mut keys = keys.write().expect("should not be poisoned");
164
165            Interned::new(match keys.get(value) {
166                Some(value) => value,
167                None => {
168                    let value = match value {
169                        "" => "", // Arena will panic if passed an empty string
170                        _ => arena.alloc_str(value),
171                    };
172                    keys.insert(value);
173                    value
174                }
175            })
176        }
177        /// Gets a reference to the interned copy of the given value, if it exists
178        /// # Blocks
179        /// This function blocks when the interner is held by another thread.
180        pub fn get(&'a self, value: &str) -> Option<Interned<'a, str>> {
181            let keys = self.keys.read().expect("should not be poisoned");
182            keys.get(value).copied().map(Interned::new)
183        }
184    }
185
186    impl std::fmt::Debug for StringInterner<'_> {
187        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
188            f.debug_struct("Interner")
189                .field("keys", &self.keys)
190                .finish()
191        }
192    }
193
194    impl std::fmt::Display for StringInterner<'_> {
195        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
196            let Ok(keys) = self.keys.read() else {
197                return write!(f, "Could not lock StringInterner key map.");
198            };
199            let mut keys: Vec<_> = keys.iter().collect();
200            keys.sort();
201
202            writeln!(f, "Keys:")?;
203            for (idx, key) in keys.iter().enumerate() {
204                writeln!(f, "{idx}:\t\"{key}\"")?
205            }
206            writeln!(f, "Count: {}", keys.len())?;
207
208            Ok(())
209        }
210    }
211
212    // # Safety:
213    // This is fine because StringInterner::get_or_insert(v) holds a RwLock
214    // for its entire duration, and doesn't touch the non-(Send+Sync) arena
215    // unless the lock is held by a write guard.
216    unsafe impl Send for StringInterner<'_> {}
217    unsafe impl Sync for StringInterner<'_> {}
218
219    #[cfg(test)]
220    mod tests {
221        use super::StringInterner;
222
223        macro_rules! ptr_eq {
224        ($a: expr, $b: expr $(, $($t:tt)*)?) => {
225            assert_eq!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
226        };
227    }
228        macro_rules! ptr_ne {
229        ($a: expr, $b: expr $(, $($t:tt)*)?) => {
230            assert_ne!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
231        };
232    }
233
234        #[test]
235        fn empties_is_unique() {
236            let interner = StringInterner::global();
237            let empty = interner.get_or_insert("");
238            let empty2 = interner.get_or_insert("");
239            ptr_eq!(*empty, *empty2);
240        }
241        #[test]
242        fn non_empty_is_unique() {
243            let interner = StringInterner::global();
244            let nonempty1 = interner.get_or_insert("not empty!");
245            let nonempty2 = interner.get_or_insert("not empty!");
246            let different = interner.get_or_insert("different!");
247            ptr_eq!(*nonempty1, *nonempty2);
248            ptr_ne!(*nonempty1, *different);
249        }
250    }
251}
252
253pub mod typed_interner {
254    //! A [TypedInterner] hands out [Interned] references for arbitrary types.
255    //!
256    //! Note: It is a *logic error* to modify the returned reference via interior mutability
257    //! in a way that changes the values produced by [Eq] and [Hash].
258    //!
259    //! See the standard library [HashSet] for more details.
260    use super::interned::Interned;
261    use cl_arena::typed_arena::TypedArena;
262    use std::{collections::HashSet, hash::Hash, sync::RwLock};
263
264    /// A [TypedInterner] hands out [Interned] references for arbitrary types.
265    ///
266    /// See the [module-level documentation](self) for more information.
267    pub struct TypedInterner<'a, T: Eq + Hash> {
268        arena: TypedArena<'a, T>,
269        keys: RwLock<HashSet<&'a T>>,
270    }
271
272    impl<'a, T: Eq + Hash> Default for TypedInterner<'a, T> {
273        fn default() -> Self {
274            Self { arena: Default::default(), keys: Default::default() }
275        }
276    }
277
278    impl<'a, T: Eq + Hash> TypedInterner<'a, T> {
279        /// Creates a new [TypedInterner] backed by the provided [TypedArena]
280        pub fn new(arena: TypedArena<'a, T>) -> Self {
281            Self { arena, keys: RwLock::new(HashSet::new()) }
282        }
283
284        /// Converts the given value into an [Interned] value.
285        ///
286        /// # Blocks
287        /// This function blocks when the interner is held by another thread.
288        pub fn get_or_insert(&'a self, value: T) -> Interned<'a, T> {
289            let Self { arena, keys } = self;
290
291            // Safety: Locking the keyset for the entire duration of this function
292            // enforces a safety invariant when the interner is stored in a global.
293            let mut keys = keys.write().expect("should not be poisoned");
294
295            Interned::new(match keys.get(&value) {
296                Some(value) => value,
297                None => {
298                    let value = arena.alloc(value);
299                    keys.insert(value);
300                    value
301                }
302            })
303        }
304        /// Returns the [Interned] copy of the given value, if one already exists
305        ///
306        /// # Blocks
307        /// This function blocks when the interner is being written to by another thread.
308        pub fn get(&self, value: &T) -> Option<Interned<'a, T>> {
309            let keys = self.keys.read().expect("should not be poisoned");
310            keys.get(value).copied().map(Interned::new)
311        }
312    }
313
314    /// # Safety
315    /// This should be safe because references yielded by
316    /// [get_or_insert](TypedInterner::get_or_insert) are unique, and the function uses
317    /// the [RwLock] around the [HashSet] to ensure mutual exclusion
318    unsafe impl<'a, T: Eq + Hash + Send> Send for TypedInterner<'a, T> where &'a T: Send {}
319    unsafe impl<T: Eq + Hash + Send + Sync> Sync for TypedInterner<'_, T> {}
320}