1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
//!
//! This is an implementation of a string interner.
//!
//! It allows to bring down the memory usage from strings and allows for fast comparisons by replacing the strings by essentially an ID.
//!
//! This particular implementation comes from [matklad's "Fast and Simple Rust Interner" blog post](https://matklad.github.io/2020/03/22/fast-simple-rust-interner.html).
//!
use std::collections::HashMap;
use std::mem;
/// An interned string.
///
/// This is fast to move, clone and compare.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct Interned(u32);
/// A string interner.
///
/// This particular implementation comes from [matklad's "Fast and Simple Rust Interner" blog post](https://matklad.github.io/2020/03/22/fast-simple-rust-interner.html).
#[derive(Debug)]
pub struct Interner {
map: HashMap<&'static str, u32>,
vec: Vec<&'static str>,
buf: String,
full: Vec<String>,
}
impl Interner {
/// Initialize the interner with an initial capacity.
pub fn with_capacity(cap: usize) -> Self {
let cap = cap.next_power_of_two();
Self {
map: HashMap::default(),
vec: Vec::new(),
buf: String::with_capacity(cap),
full: Vec::new(),
}
}
/// Intern a given string.
pub fn intern(&mut self, name: &str) -> Interned {
if let Some(&id) = self.map.get(name) {
return Interned(id);
}
let name = unsafe { self.alloc(name) };
let id = self.map.len() as u32;
self.map.insert(name, id);
self.vec.push(name);
let id = Interned(id);
debug_assert!(self.lookup(id) == name);
debug_assert!(self.intern(name) == id);
id
}
/// Get the string associated to a given interning ID.
pub fn lookup(&self, id: Interned) -> &str {
self.vec[id.0 as usize]
}
unsafe fn alloc(&mut self, name: &str) -> &'static str {
let cap = self.buf.capacity();
if cap < self.buf.len() + name.len() {
let new_cap = (cap.max(name.len()) + 1).next_power_of_two();
let new_buf = String::with_capacity(new_cap);
let old_buf = mem::replace(&mut self.buf, new_buf);
self.full.push(old_buf);
}
let interned = {
let start = self.buf.len();
self.buf.push_str(name);
&self.buf[start..]
};
&*(interned as *const str)
}
}