1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
//! Interners for [strings](string_interner) and arbitrary [types](typed_interner).
//!
//! An object is [Interned][1] if it is allocated within one of the interners
//! in this module. [Interned][1] values have referential equality semantics, and
//! [Deref](std::ops::Deref) to the value within their respective intern pool.
//!
//! This means, of course, that the same value interned in two different pools will be
//! considered *not equal* by [Eq] and [Hash](std::hash::Hash).
//!
//! [1]: interned::Interned

pub mod interned {
    //! An [Interned] reference asserts its wrapped value has referential equality.
    use super::string_interner::StringInterner;
    use std::{
        fmt::{Debug, Display},
        hash::Hash,
        ops::Deref,
    };

    /// An [Interned] value is one that is *referentially comparable*.
    /// That is, the interned value is unique in memory, simplifying
    /// its equality and hashing implementation.
    ///
    /// Comparing [Interned] values via [PartialOrd] or [Ord] will still
    /// dereference to the wrapped pointers, and as such, may produce
    /// results inconsistent with [PartialEq] or [Eq].
    #[repr(transparent)]
    pub struct Interned<'a, T: ?Sized> {
        value: &'a T,
    }

    impl<'a, T: ?Sized> Interned<'a, T> {
        /// Gets the internal value as a pointer
        pub fn as_ptr(interned: &Self) -> *const T {
            interned.value
        }
    }

    impl<'a, T: ?Sized + Debug> Debug for Interned<'a, T> {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            f.debug_struct("Interned")
                .field("value", &self.value)
                .finish()
        }
    }
    impl<'a, T: ?Sized> Interned<'a, T> {
        pub(super) fn new(value: &'a T) -> Self {
            Self { value }
        }
    }
    impl<'a, T: ?Sized> Deref for Interned<'a, T> {
        type Target = T;
        fn deref(&self) -> &Self::Target {
            self.value
        }
    }
    impl<'a, T: ?Sized> Copy for Interned<'a, T> {}
    impl<'a, T: ?Sized> Clone for Interned<'a, T> {
        fn clone(&self) -> Self {
            *self
        }
    }
    // TODO: These implementations are subtly incorrect, as they do not line up with `eq`
    // impl<'a, T: ?Sized + PartialOrd> PartialOrd for Interned<'a, T> {
    //     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
    //         match self == other {
    //             true => Some(std::cmp::Ordering::Equal),
    //             false => self.value.partial_cmp(other.value),
    //         }
    //     }
    // }
    // impl<'a, T: ?Sized + Ord> Ord for Interned<'a, T> {
    //     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
    //         match self == other {
    //             true => std::cmp::Ordering::Equal,
    //             false => self.value.cmp(other.value),
    //         }
    //     }
    // }

    impl<'a, T: ?Sized> Eq for Interned<'a, T> {}
    impl<'a, T: ?Sized> PartialEq for Interned<'a, T> {
        fn eq(&self, other: &Self) -> bool {
            std::ptr::eq(self.value, other.value)
        }
    }
    impl<'a, T: ?Sized> Hash for Interned<'a, T> {
        fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
            Self::as_ptr(self).hash(state)
        }
    }
    impl<T: ?Sized + Display> Display for Interned<'_, T> {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            self.value.fmt(f)
        }
    }

    impl<T: AsRef<str>> From<T> for Interned<'static, str> {
        /// Types which implement [`AsRef<str>`] will be stored in the global [StringInterner]
        fn from(value: T) -> Self {
            from_str(value.as_ref())
        }
    }
    fn from_str(value: &str) -> Interned<'static, str> {
        let global_interner = StringInterner::global();
        global_interner.get_or_insert(value)
    }
}

pub mod string_interner {
    //! A [StringInterner] hands out [Interned] copies of each unique string given to it.

    use super::interned::Interned;
    use cl_arena::dropless_arena::DroplessArena;
    use std::{
        collections::HashSet,
        sync::{OnceLock, RwLock},
    };

    /// A string interner hands out [Interned] copies of each unique string given to it.
    pub struct StringInterner<'a> {
        arena: DroplessArena<'a>,
        keys: RwLock<HashSet<&'a str>>,
    }

    impl StringInterner<'static> {
        /// Gets a reference to a global string interner whose [Interned] strings are `'static`
        pub fn global() -> &'static Self {
            static GLOBAL_INTERNER: OnceLock<StringInterner<'static>> = OnceLock::new();

            // SAFETY: The RwLock within the interner's `keys` protects the arena
            // from being modified concurrently.
            GLOBAL_INTERNER.get_or_init(|| StringInterner {
                arena: DroplessArena::new(),
                keys: Default::default(),
            })
        }
    }

    impl<'a> StringInterner<'a> {
        /// Creates a new [StringInterner] backed by the provided [DroplessArena]
        pub fn new(arena: DroplessArena<'a>) -> Self {
            Self { arena, keys: RwLock::new(HashSet::new()) }
        }

        /// Returns an [Interned] copy of the given string,
        /// allocating a new one if it doesn't already exist.
        ///
        /// # Blocks
        /// This function blocks when the interner is held by another thread.
        pub fn get_or_insert(&'a self, value: &str) -> Interned<'a, str> {
            let Self { arena, keys } = self;

            // Safety: Holding this write guard for the entire duration of this
            // function enforces a safety invariant. See StringInterner::global.
            let mut keys = keys.write().expect("should not be poisoned");

            Interned::new(match keys.get(value) {
                Some(value) => value,
                None => {
                    let value = match value {
                        "" => "", // Arena will panic if passed an empty string
                        _ => arena.alloc_str(value),
                    };
                    keys.insert(value);
                    value
                }
            })
        }
        /// Gets a reference to the interned copy of the given value, if it exists
        /// # Blocks
        /// This function blocks when the interner is held by another thread.
        pub fn get(&'a self, value: &str) -> Option<Interned<'a, str>> {
            let keys = self.keys.read().expect("should not be poisoned");
            keys.get(value).copied().map(Interned::new)
        }
    }

    impl std::fmt::Debug for StringInterner<'_> {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            f.debug_struct("Interner")
                .field("keys", &self.keys)
                .finish()
        }
    }

    // # Safety:
    // This is fine because StringInterner::get_or_insert(v) holds a RwLock
    // for its entire duration, and doesn't touch the non-(Send+Sync) arena
    // unless the lock is held by a write guard.
    unsafe impl<'a> Send for StringInterner<'a> {}
    unsafe impl<'a> Sync for StringInterner<'a> {}

    #[cfg(test)]
    mod tests {
        use super::StringInterner;

        macro_rules! ptr_eq {
        ($a: expr, $b: expr $(, $($t:tt)*)?) => {
            assert_eq!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
        };
    }
        macro_rules! ptr_ne {
        ($a: expr, $b: expr $(, $($t:tt)*)?) => {
            assert_ne!(std::ptr::addr_of!($a), std::ptr::addr_of!($b) $(, $($t)*)?)
        };
    }

        #[test]
        fn empties_is_unique() {
            let interner = StringInterner::global();
            let empty = interner.get_or_insert("");
            let empty2 = interner.get_or_insert("");
            ptr_eq!(*empty, *empty2);
        }
        #[test]
        fn non_empty_is_unique() {
            let interner = StringInterner::global();
            let nonempty1 = interner.get_or_insert("not empty!");
            let nonempty2 = interner.get_or_insert("not empty!");
            let different = interner.get_or_insert("different!");
            ptr_eq!(*nonempty1, *nonempty2);
            ptr_ne!(*nonempty1, *different);
        }
    }
}

pub mod typed_interner {
    //! A [TypedInterner] hands out [Interned] references for arbitrary types.
    //!
    //! Note: It is a *logic error* to modify the returned reference via interior mutability
    //! in a way that changes the values produced by [Eq] and [Hash].
    //!
    //! See the standard library [HashSet] for more details.
    use super::interned::Interned;
    use cl_arena::typed_arena::TypedArena;
    use std::{collections::HashSet, hash::Hash, sync::RwLock};

    /// A [TypedInterner] hands out [Interned] references for arbitrary types.
    ///
    /// See the [module-level documentation](self) for more information.
    pub struct TypedInterner<'a, T: Eq + Hash> {
        arena: TypedArena<'a, T>,
        keys: RwLock<HashSet<&'a T>>,
    }

    impl<'a, T: Eq + Hash> TypedInterner<'a, T> {
        /// Creates a new [TypedInterner] backed by the provided [TypedArena]
        pub fn new(arena: TypedArena<'a, T>) -> Self {
            Self { arena, keys: RwLock::new(HashSet::new()) }
        }

        /// Converts the given value into an [Interned] value.
        ///
        /// # Blocks
        /// This function blocks when the interner is held by another thread.
        pub fn get_or_insert(&'a self, value: T) -> Interned<'a, T> {
            let Self { arena, keys } = self;

            // Safety: Locking the keyset for the entire duration of this function
            // enforces a safety invariant when the interner is stored in a global.
            let mut keys = keys.write().expect("should not be poisoned");

            Interned::new(match keys.get(&value) {
                Some(value) => value,
                None => {
                    let value = arena.alloc(value);
                    keys.insert(value);
                    value
                }
            })
        }
        /// Returns the [Interned] copy of the given value, if one already exists
        ///
        /// # Blocks
        /// This function blocks when the interner is being written to by another thread.
        pub fn get(&self, value: &T) -> Option<Interned<'a, T>> {
            let keys = self.keys.read().expect("should not be poisoned");
            keys.get(value).copied().map(Interned::new)
        }
    }

    /// # Safety
    /// This should be safe because references yielded by
    /// [get_or_insert](TypedInterner::get_or_insert) are unique, and the function uses
    /// the [RwLock] around the [HashSet] to ensure mutual exclusion
    unsafe impl<'a, T: Eq + Hash + Send> Send for TypedInterner<'a, T> where &'a T: Send {}
    unsafe impl<'a, T: Eq + Hash + Send + Sync> Sync for TypedInterner<'a, T> {}
}