interns/
lib.rs

1/*  Copyright (C) 2025 Saúl Valdelvira
2 *
3 *  This program is free software: you can redistribute it and/or modify
4 *  it under the terms of the GNU General Public License as published by
5 *  the Free Software Foundation, version 3.
6 *
7 *  This program is distributed in the hope that it will be useful,
8 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *  GNU General Public License for more details.
11 *
12 *  You should have received a copy of the GNU General Public License
13 *  along with this program.  If not, see <https://www.gnu.org/licenses/>. */
14
15//! An object interner library
16//!
17//! The main element of this crate is the [`Interner`] struct.
18//!
19//! It allows to build a "storage" of any kind of object that
20//! avoids repetition and memory waste.
21//!
22//! # Example (String interner)
23//! ```
24//! use interns::*;
25//!
26//! let mut interner = Interner::<str>::default();
27//!
28//! let a = interner.get_or_intern("hello");
29//! let b = interner.get_or_intern("world");
30//! let c = interner.get_or_intern("hello");
31//!
32//! let a_resolv = interner.resolve(a);
33//! let b_resolv = interner.resolve(b);
34//! let c_resolv = interner.resolve(c);
35//!
36//! assert_eq!(a_resolv, Some("hello"));
37//! assert_eq!(b_resolv, Some("world"));
38//! assert_eq!(c_resolv, Some("hello"));
39//!
40//! assert_eq!(a, c);
41//! assert_ne!(a, b);
42//! assert_ne!(b, c);
43//! ```
44
45use hashbrown::hash_map::RawEntryMut;
46use hashbrown::HashMap;
47use std::hash::{BuildHasher, Hash, RandomState};
48
49pub mod backend;
50pub use backend::{Backend, DefaultBackendBuilder, StringBuf};
51
52pub type Symbol<T, B = <T as DefaultBackendBuilder>::Backend> = <B as Backend<T>>::Symbol;
53
54pub type StringInterner = Interner<str,StringBuf>;
55
56/// Interner
57///
58/// This struct is responsible for tracking objects and
59/// interning them.
60///
61/// # Example
62/// ```
63/// use interns::*;
64///
65/// let mut interner = Interner::<str>::default();
66///
67/// let a = interner.get_or_intern("hello");
68/// let b = interner.get_or_intern("world");
69/// let c = interner.get_or_intern("hello");
70///
71/// let a_resolv = interner.resolve(a);
72/// let b_resolv = interner.resolve(b);
73/// let c_resolv = interner.resolve(c);
74///
75/// assert_eq!(a_resolv, Some("hello"));
76/// assert_eq!(b_resolv, Some("world"));
77/// assert_eq!(c_resolv, Some("hello"));
78///
79/// assert_eq!(a, c);
80/// assert_ne!(a, b);
81/// assert_ne!(b, c);
82/// ```
83pub struct Interner<
84    T,
85    B = <T as DefaultBackendBuilder>::Backend,
86    H = RandomState
87>
88where
89    T: Hash + Eq + PartialEq + ?Sized,
90    H: BuildHasher,
91    B: Backend<T>,
92{
93    backend: B,
94    set: HashMap<B::Symbol, (), ()>,
95    hasher: H,
96}
97
98impl<T, B, H> Interner<T, B, H>
99where
100    T: Hash + Eq + PartialEq + ?Sized,
101    H: BuildHasher,
102    B: Backend<T>,
103{
104    /// Create a new Interner with a default [backend](Backend)
105    /// and [hasher](BuildHasher)
106    pub fn new() -> Self
107    where
108        B: Default,
109        H: Default,
110    {
111        Self {
112            backend: B::default(),
113            set: HashMap::default(),
114            hasher: H::default(),
115        }
116    }
117
118    /// Create a new Interner with a default [backend](Backend) and
119    /// the given [hasher](BuildHasher)
120    pub fn with_hasher(hasher: H) -> Self
121    where
122        B: Default,
123    {
124        Self {
125            backend: B::default(),
126            set: HashMap::default(),
127            hasher,
128        }
129    }
130
131    /// Create a new Interner with a default [hasher](BuildHasher) and
132    /// the given [backend](Backend)
133    pub fn with_backend(backend: B) -> Self
134    where
135        H: Default,
136    {
137        Self {
138            backend,
139            set: HashMap::default(),
140            hasher: H::default(),
141        }
142    }
143
144    /// Create a new Interner with the given [backend](Backend)
145    /// and [hasher](BuildHasher)
146    pub fn with_backend_and_hasher(backend: B, hasher: H) -> Self {
147        Self {
148            backend,
149            hasher,
150            set: HashMap::default(),
151        }
152    }
153
154    /// Gets the [Symbol](Backend::Symbol) for `src`, interning it if it doesn't exist.
155    ///
156    /// # Example
157    /// ```
158    /// use interns::Interner;
159    ///
160    /// let mut interner = Interner::<str>::new();
161    /// let name = interner.get_or_intern("Abcd");
162    /// let name_again = interner.get_or_intern("Abcd");
163    /// assert_eq!(name, name_again);
164    /// ```
165    pub fn get_or_intern(&mut self, src: &T) -> B::Symbol {
166        /* We are doing shenanigans here.
167         *
168         * We are storing B::Symbol as the key, but we don't hash the
169         * Symbol itself. `src` is a reference to T, so we have no way
170         * of getting a Symbol from `src`.
171         *
172         * When we look for a symbol, we pass the hash or `src`, and also provide
173         * a custom function to check if the keys match (in case of collision).
174         * This function must resolve the Symbol to a value of T, and test it against `src`.
175         *
176         * When we insert a new element, we also need to provide a custom hasher function.
177         * This is because an insertion could cause the table to resize, thus causing all
178         * keys to be rehashed. If we don't provide a custom hasher function, a resize
179         * would reallocate the keys according to the Symbol, not the `T` value they
180         * resolve to, making it imposible to retrive those symbols from a `T` reference
181         * in the future.
182         *
183         * For this trick to work, we need to make sure that we _always_ access the table
184         * with a custom function, that resolves the Symbols before hashing/comparing.
185         */
186
187        let Self {
188            backend,
189            set,
190            hasher,
191        } = self;
192
193        let hash = hasher.hash_one(src);
194
195        let entry = set
196            .raw_entry_mut()
197            .from_hash(hash, |&sym| {
198                /* SAFETY: If the symbol is on the table it must also be on the backend. */
199                src == unsafe { backend.get_unchecked(sym) }
200            });
201
202        let k = match entry {
203            RawEntryMut::Occupied(occupied) => occupied.into_key(),
204            RawEntryMut::Vacant(vacant) => {
205                let sym = backend.intern(src);
206                vacant
207                    .insert_with_hasher(hash, sym, (), |sym| {
208                        /* SAFETY: We've interned the symbol on the call to `Backed::intern` above */
209                        let src = unsafe { backend.get_unchecked(*sym) };
210                        hasher.hash_one(src)
211                    })
212                    .0
213            }
214        };
215
216        *k
217    }
218
219    /// Resolves the [symbol](Backend::Symbol) into a reference of T
220    ///
221    /// # Example
222    /// ```
223    /// use interns::Interner;
224    ///
225    /// let mut interner = Interner::<str>::new();
226    /// let name = interner.get_or_intern("Abcd");
227    /// let resolved = interner.resolve(name);
228    /// assert_eq!(resolved, Some("Abcd"));
229    /// ```
230    pub fn resolve(&self, sym: B::Symbol) -> Option<&T> {
231        self.backend.get(sym)
232    }
233}
234
235impl<T,B> Default for Interner<T,B>
236where
237    T: Hash + Eq + PartialEq + ?Sized,
238    B: Backend<T> + Default
239{
240    fn default() -> Self {
241        Self::new()
242    }
243}
244
245#[cfg(test)]
246mod test;