interns/lib.rs
1/* Copyright (C) 2025 Saúl Valdelvira
2 *
3 * This program is free software: you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation, version 3.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program. If not, see <https://www.gnu.org/licenses/>. */
14
15//! An object interner library
16//!
17//! The main element of this crate is the [`Interner`] struct.
18//!
19//! It allows to build a "storage" of any kind of object that
20//! avoids repetition and memory waste.
21//!
22//! # Example (String interner)
23//! ```
24//! use interns::*;
25//!
26//! let mut interner = Interner::<str>::default();
27//!
28//! let a = interner.get_or_intern("hello");
29//! let b = interner.get_or_intern("world");
30//! let c = interner.get_or_intern("hello");
31//!
32//! let a_resolv = interner.resolve(a);
33//! let b_resolv = interner.resolve(b);
34//! let c_resolv = interner.resolve(c);
35//!
36//! assert_eq!(a_resolv, Some("hello"));
37//! assert_eq!(b_resolv, Some("world"));
38//! assert_eq!(c_resolv, Some("hello"));
39//!
40//! assert_eq!(a, c);
41//! assert_ne!(a, b);
42//! assert_ne!(b, c);
43//! ```
44
45use hashbrown::hash_map::RawEntryMut;
46use hashbrown::HashMap;
47use std::hash::{BuildHasher, Hash, RandomState};
48
49pub mod backend;
50pub use backend::{Backend, DefaultBackendBuilder, StringBuf};
51
52pub type Symbol<T, B = <T as DefaultBackendBuilder>::Backend> = <B as Backend<T>>::Symbol;
53
54pub type StringInterner = Interner<str,StringBuf>;
55
56/// Interner
57///
58/// This struct is responsible for tracking objects and
59/// interning them.
60///
61/// # Example
62/// ```
63/// use interns::*;
64///
65/// let mut interner = Interner::<str>::default();
66///
67/// let a = interner.get_or_intern("hello");
68/// let b = interner.get_or_intern("world");
69/// let c = interner.get_or_intern("hello");
70///
71/// let a_resolv = interner.resolve(a);
72/// let b_resolv = interner.resolve(b);
73/// let c_resolv = interner.resolve(c);
74///
75/// assert_eq!(a_resolv, Some("hello"));
76/// assert_eq!(b_resolv, Some("world"));
77/// assert_eq!(c_resolv, Some("hello"));
78///
79/// assert_eq!(a, c);
80/// assert_ne!(a, b);
81/// assert_ne!(b, c);
82/// ```
83pub struct Interner<
84 T,
85 B = <T as DefaultBackendBuilder>::Backend,
86 H = RandomState
87>
88where
89 T: Hash + Eq + PartialEq + ?Sized,
90 H: BuildHasher,
91 B: Backend<T>,
92{
93 backend: B,
94 set: HashMap<B::Symbol, (), ()>,
95 hasher: H,
96}
97
98impl<T, B, H> Interner<T, B, H>
99where
100 T: Hash + Eq + PartialEq + ?Sized,
101 H: BuildHasher,
102 B: Backend<T>,
103{
104 /// Create a new Interner with a default [backend](Backend)
105 /// and [hasher](BuildHasher)
106 pub fn new() -> Self
107 where
108 B: Default,
109 H: Default,
110 {
111 Self {
112 backend: B::default(),
113 set: HashMap::default(),
114 hasher: H::default(),
115 }
116 }
117
118 /// Create a new Interner with a default [backend](Backend) and
119 /// the given [hasher](BuildHasher)
120 pub fn with_hasher(hasher: H) -> Self
121 where
122 B: Default,
123 {
124 Self {
125 backend: B::default(),
126 set: HashMap::default(),
127 hasher,
128 }
129 }
130
131 /// Create a new Interner with a default [hasher](BuildHasher) and
132 /// the given [backend](Backend)
133 pub fn with_backend(backend: B) -> Self
134 where
135 H: Default,
136 {
137 Self {
138 backend,
139 set: HashMap::default(),
140 hasher: H::default(),
141 }
142 }
143
144 /// Create a new Interner with the given [backend](Backend)
145 /// and [hasher](BuildHasher)
146 pub fn with_backend_and_hasher(backend: B, hasher: H) -> Self {
147 Self {
148 backend,
149 hasher,
150 set: HashMap::default(),
151 }
152 }
153
154 /// Gets the [Symbol](Backend::Symbol) for `src`, interning it if it doesn't exist.
155 ///
156 /// # Example
157 /// ```
158 /// use interns::Interner;
159 ///
160 /// let mut interner = Interner::<str>::new();
161 /// let name = interner.get_or_intern("Abcd");
162 /// let name_again = interner.get_or_intern("Abcd");
163 /// assert_eq!(name, name_again);
164 /// ```
165 pub fn get_or_intern(&mut self, src: &T) -> B::Symbol {
166 /* We are doing shenanigans here.
167 *
168 * We are storing B::Symbol as the key, but we don't hash the
169 * Symbol itself. `src` is a reference to T, so we have no way
170 * of getting a Symbol from `src`.
171 *
172 * When we look for a symbol, we pass the hash or `src`, and also provide
173 * a custom function to check if the keys match (in case of collision).
174 * This function must resolve the Symbol to a value of T, and test it against `src`.
175 *
176 * When we insert a new element, we also need to provide a custom hasher function.
177 * This is because an insertion could cause the table to resize, thus causing all
178 * keys to be rehashed. If we don't provide a custom hasher function, a resize
179 * would reallocate the keys according to the Symbol, not the `T` value they
180 * resolve to, making it imposible to retrive those symbols from a `T` reference
181 * in the future.
182 *
183 * For this trick to work, we need to make sure that we _always_ access the table
184 * with a custom function, that resolves the Symbols before hashing/comparing.
185 */
186
187 let Self {
188 backend,
189 set,
190 hasher,
191 } = self;
192
193 let hash = hasher.hash_one(src);
194
195 let entry = set
196 .raw_entry_mut()
197 .from_hash(hash, |&sym| {
198 /* SAFETY: If the symbol is on the table it must also be on the backend. */
199 src == unsafe { backend.get_unchecked(sym) }
200 });
201
202 let k = match entry {
203 RawEntryMut::Occupied(occupied) => occupied.into_key(),
204 RawEntryMut::Vacant(vacant) => {
205 let sym = backend.intern(src);
206 vacant
207 .insert_with_hasher(hash, sym, (), |sym| {
208 /* SAFETY: We've interned the symbol on the call to `Backed::intern` above */
209 let src = unsafe { backend.get_unchecked(*sym) };
210 hasher.hash_one(src)
211 })
212 .0
213 }
214 };
215
216 *k
217 }
218
219 /// Resolves the [symbol](Backend::Symbol) into a reference of T
220 ///
221 /// # Example
222 /// ```
223 /// use interns::Interner;
224 ///
225 /// let mut interner = Interner::<str>::new();
226 /// let name = interner.get_or_intern("Abcd");
227 /// let resolved = interner.resolve(name);
228 /// assert_eq!(resolved, Some("Abcd"));
229 /// ```
230 pub fn resolve(&self, sym: B::Symbol) -> Option<&T> {
231 self.backend.get(sym)
232 }
233}
234
235impl<T,B> Default for Interner<T,B>
236where
237 T: Hash + Eq + PartialEq + ?Sized,
238 B: Backend<T> + Default
239{
240 fn default() -> Self {
241 Self::new()
242 }
243}
244
245#[cfg(test)]
246mod test;