cl_structures/intern.rs
1//! Interners for [strings](string_interner) and arbitrary [types](typed_interner).
2//!
3//! An object is [Interned][1] if it is allocated within one of the interners
4//! in this module. [Interned][1] values have referential equality semantics, and
5//! [Deref](std::ops::Deref) to the value within their respective intern pool.
6//!
7//! This means, of course, that the same value interned in two different pools will be
8//! considered *not equal* by [Eq] and [Hash](std::hash::Hash).
9//!
10//! [1]: interned::Interned
11
12pub mod interned {
13 //! An [Interned] reference asserts its wrapped value has referential equality.
14 use super::string_interner::StringInterner;
15 use std::{
16 fmt::{Debug, Display},
17 hash::Hash,
18 ops::Deref,
19 };
20
21 /// An [Interned] value is one that is *referentially comparable*.
22 /// That is, the interned value is unique in memory, simplifying
23 /// its equality and hashing implementation.
24 ///
25 /// Comparing [Interned] values via [PartialOrd] or [Ord] will still
26 /// dereference to the wrapped pointers, and as such, may produce
27 /// results inconsistent with [PartialEq] or [Eq].
28 #[repr(transparent)]
29 pub struct Interned<'a, T: ?Sized> {
30 value: &'a T,
31 }
32
33 impl<'a, T: ?Sized> Interned<'a, T> {
34 /// Gets the internal value as a pointer
35 pub fn as_ptr(interned: &Self) -> *const T {
36 interned.value
37 }
38
39 /// Gets the internal value as a reference with the interner's lifetime
40 pub fn to_ref(&self) -> &'a T {
41 self.value
42 }
43 }
44
45 impl<T: ?Sized + Debug> Debug for Interned<'_, T> {
46 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47 f.debug_struct("Interned")
48 .field("value", &self.value)
49 .finish()
50 }
51 }
52 impl<'a, T: ?Sized> Interned<'a, T> {
53 pub(super) fn new(value: &'a T) -> Self {
54 Self { value }
55 }
56 }
57 impl<T: ?Sized> Deref for Interned<'_, T> {
58 type Target = T;
59 fn deref(&self) -> &Self::Target {
60 self.value
61 }
62 }
63 impl<T: ?Sized> Copy for Interned<'_, T> {}
64 impl<T: ?Sized> Clone for Interned<'_, T> {
65 fn clone(&self) -> Self {
66 *self
67 }
68 }
69 // TODO: These implementations are subtly incorrect, as they do not line up with `eq`
70 // impl<'a, T: ?Sized + PartialOrd> PartialOrd for Interned<'a, T> {
71 // fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
72 // match self == other {
73 // true => Some(std::cmp::Ordering::Equal),
74 // false => self.value.partial_cmp(other.value),
75 // }
76 // }
77 // }
78 // impl<'a, T: ?Sized + Ord> Ord for Interned<'a, T> {
79 // fn cmp(&self, other: &Self) -> std::cmp::Ordering {
80 // match self == other {
81 // true => std::cmp::Ordering::Equal,
82 // false => self.value.cmp(other.value),
83 // }
84 // }
85 // }
86
87 impl<T: ?Sized> Eq for Interned<'_, T> {}
88 impl<T: ?Sized> PartialEq for Interned<'_, T> {
89 fn eq(&self, other: &Self) -> bool {
90 std::ptr::eq(self.value, other.value)
91 }
92 }
93 impl<T: ?Sized> Hash for Interned<'_, T> {
94 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
95 Self::as_ptr(self).hash(state)
96 }
97 }
98 impl<T: ?Sized + Display> Display for Interned<'_, T> {
99 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 self.value.fmt(f)
101 }
102 }
103
104 impl<T: AsRef<str>> From<T> for Interned<'static, str> {
105 /// Types which implement [`AsRef<str>`] will be stored in the global [StringInterner]
106 fn from(value: T) -> Self {
107 from_str(value.as_ref())
108 }
109 }
110 fn from_str(value: &str) -> Interned<'static, str> {
111 let global_interner = StringInterner::global();
112 global_interner.get_or_insert(value)
113 }
114}
115
116pub mod string_interner {
117 //! A [StringInterner] hands out [Interned] copies of each unique string given to it.
118
119 use super::interned::Interned;
120 use cl_arena::dropless_arena::DroplessArena;
121 use std::{
122 collections::HashSet,
123 sync::{OnceLock, RwLock},
124 };
125
126 /// A string interner hands out [Interned] copies of each unique string given to it.
127 #[derive(Default)]
128 pub struct StringInterner<'a> {
129 arena: DroplessArena<'a>,
130 keys: RwLock<HashSet<&'a str>>,
131 }
132
133 impl StringInterner<'static> {
134 /// Gets a reference to a global string interner whose [Interned] strings are `'static`
135 pub fn global() -> &'static Self {
136 static GLOBAL_INTERNER: OnceLock<StringInterner<'static>> = OnceLock::new();
137
138 // SAFETY: The RwLock within the interner's `keys` protects the arena
139 // from being modified concurrently.
140 GLOBAL_INTERNER.get_or_init(|| StringInterner {
141 arena: DroplessArena::new(),
142 keys: Default::default(),
143 })
144 }
145 }
146
147 impl<'a> StringInterner<'a> {
148 /// Creates a new [StringInterner] backed by the provided [DroplessArena]
149 pub fn new(arena: DroplessArena<'a>) -> Self {
150 Self { arena, keys: RwLock::new(HashSet::new()) }
151 }
152
153 /// Returns an [Interned] copy of the given string,
154 /// allocating a new one if it doesn't already exist.
155 ///
156 /// # Blocks
157 /// This function blocks when the interner is held by another thread.
158 pub fn get_or_insert(&'a self, value: &str) -> Interned<'a, str> {
159 let Self { arena, keys } = self;
160
161 // Safety: Holding this write guard for the entire duration of this
162 // function enforces a safety invariant. See StringInterner::global.
163 let mut keys = keys.write().expect("should not be poisoned");
164
165 Interned::new(match keys.get(value) {
166 Some(value) => value,
167 None => {
168 let value = match value {
169 "" => "", // Arena will panic if passed an empty string
170 _ => arena.alloc_str(value),
171 };
172 keys.insert(value);
173 value
174 }
175 })
176 }
177 /// Gets a reference to the interned copy of the given value, if it exists
178 /// # Blocks
179 /// This function blocks when the interner is held by another thread.
180 pub fn get(&'a self, value: &str) -> Option<Interned<'a, str>> {
181 let keys = self.keys.read().expect("should not be poisoned");
182 keys.get(value).copied().map(Interned::new)
183 }
184 }
185
186 impl std::fmt::Debug for StringInterner<'_> {
187 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
188 f.debug_struct("Interner")
189 .field("keys", &self.keys)
190 .finish()
191 }
192 }
193
194 impl std::fmt::Display for StringInterner<'_> {
195 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
196 let Ok(keys) = self.keys.read() else {
197 return write!(f, "Could not lock StringInterner key map.");
198 };
199 let mut keys: Vec<_> = keys.iter().collect();
200 keys.sort();
201
202 writeln!(f, "Keys:")?;
203 for (idx, key) in keys.iter().enumerate() {
204 writeln!(f, "{idx}:\t\"{key}\"")?
205 }
206 writeln!(f, "Count: {}", keys.len())?;
207
208 Ok(())
209 }
210 }
211
212 // # Safety:
213 // This is fine because StringInterner::get_or_insert(v) holds a RwLock
214 // for its entire duration, and doesn't touch the non-(Send+Sync) arena
215 // unless the lock is held by a write guard.
216 unsafe impl Send for StringInterner<'_> {}
217 unsafe impl Sync for StringInterner<'_> {}
218
219 #[cfg(test)]
220 mod tests {
221 use super::StringInterner;
222
223 macro_rules! ptr_eq {
224 ($a: expr, $b: expr $(, $($t:tt)*)?) => {
225 assert_eq!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
226 };
227 }
228 macro_rules! ptr_ne {
229 ($a: expr, $b: expr $(, $($t:tt)*)?) => {
230 assert_ne!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
231 };
232 }
233
234 #[test]
235 fn empties_is_unique() {
236 let interner = StringInterner::global();
237 let empty = interner.get_or_insert("");
238 let empty2 = interner.get_or_insert("");
239 ptr_eq!(*empty, *empty2);
240 }
241 #[test]
242 fn non_empty_is_unique() {
243 let interner = StringInterner::global();
244 let nonempty1 = interner.get_or_insert("not empty!");
245 let nonempty2 = interner.get_or_insert("not empty!");
246 let different = interner.get_or_insert("different!");
247 ptr_eq!(*nonempty1, *nonempty2);
248 ptr_ne!(*nonempty1, *different);
249 }
250 }
251}
252
253pub mod typed_interner {
254 //! A [TypedInterner] hands out [Interned] references for arbitrary types.
255 //!
256 //! Note: It is a *logic error* to modify the returned reference via interior mutability
257 //! in a way that changes the values produced by [Eq] and [Hash].
258 //!
259 //! See the standard library [HashSet] for more details.
260 use super::interned::Interned;
261 use cl_arena::typed_arena::TypedArena;
262 use std::{collections::HashSet, hash::Hash, sync::RwLock};
263
264 /// A [TypedInterner] hands out [Interned] references for arbitrary types.
265 ///
266 /// See the [module-level documentation](self) for more information.
267 pub struct TypedInterner<'a, T: Eq + Hash> {
268 arena: TypedArena<'a, T>,
269 keys: RwLock<HashSet<&'a T>>,
270 }
271
272 impl<'a, T: Eq + Hash> Default for TypedInterner<'a, T> {
273 fn default() -> Self {
274 Self { arena: Default::default(), keys: Default::default() }
275 }
276 }
277
278 impl<'a, T: Eq + Hash> TypedInterner<'a, T> {
279 /// Creates a new [TypedInterner] backed by the provided [TypedArena]
280 pub fn new(arena: TypedArena<'a, T>) -> Self {
281 Self { arena, keys: RwLock::new(HashSet::new()) }
282 }
283
284 /// Converts the given value into an [Interned] value.
285 ///
286 /// # Blocks
287 /// This function blocks when the interner is held by another thread.
288 pub fn get_or_insert(&'a self, value: T) -> Interned<'a, T> {
289 let Self { arena, keys } = self;
290
291 // Safety: Locking the keyset for the entire duration of this function
292 // enforces a safety invariant when the interner is stored in a global.
293 let mut keys = keys.write().expect("should not be poisoned");
294
295 Interned::new(match keys.get(&value) {
296 Some(value) => value,
297 None => {
298 let value = arena.alloc(value);
299 keys.insert(value);
300 value
301 }
302 })
303 }
304 /// Returns the [Interned] copy of the given value, if one already exists
305 ///
306 /// # Blocks
307 /// This function blocks when the interner is being written to by another thread.
308 pub fn get(&self, value: &T) -> Option<Interned<'a, T>> {
309 let keys = self.keys.read().expect("should not be poisoned");
310 keys.get(value).copied().map(Interned::new)
311 }
312 }
313
314 /// # Safety
315 /// This should be safe because references yielded by
316 /// [get_or_insert](TypedInterner::get_or_insert) are unique, and the function uses
317 /// the [RwLock] around the [HashSet] to ensure mutual exclusion
318 unsafe impl<'a, T: Eq + Hash + Send> Send for TypedInterner<'a, T> where &'a T: Send {}
319 unsafe impl<T: Eq + Hash + Send + Sync> Sync for TypedInterner<'_, T> {}
320}