Skip to main content

saluki_common/
hash.rs

1use std::{
2    hash::{BuildHasher, Hasher},
3    sync::LazyLock,
4};
5
6use shake::digest::{ExtendableOutput as _, Update as _};
7
8/// A fast, non-cryptographic hash implementation that's optimized for quality.
9///
10/// The implementation is reasonably suitable for hash tables and other data structures that require fast hashing and
11/// some degree of collision resistance.
12///
13/// Currently, [`foldhash`][foldhash] is used as the underlying implementation.
14///
15/// [foldhash]: http://github.com/orlp/foldhash
16pub type FastHasher = foldhash::quality::FoldHasher<'static>;
17
18/// [`BuildHasher`] implementation for [`FastHasher`].
19pub type FastBuildHasher = foldhash::quality::RandomState;
20
21// Single global instance of the fast hasher state since we need a consistently seeded state for `hash_single_fast` to
22// consistently hash things across the application.
23static FAST_BUILD_HASHER: LazyLock<FastBuildHasher> = LazyLock::new(get_fast_build_hasher);
24
25/// A no-op hasher that writes `u64` values directly to the internal state.
26///
27/// In some cases, pre-hashed values (`u64`) may be used as the key to a hash table or similar data structure. In those
28/// cases, re-hashing the key each time is unnecessary and potentially even undesirable.
29///
30/// `NoopU64Hasher` is a hash implementation that simply forwards `u64` values to the internal state and uses that as
31/// the final hashed value. It can used to hash a `u64` value directly, or to hash a value that wraps a `u64` value,
32/// such as a newtype (for example, `struct HashKey(u64)`).
33///
34/// # Behavior
35///
36/// `NoopU64Hasher` stores a single `u64` value internally. The last value written via `write_u64` is the final hash
37/// value. Writing any other value type to the hasher will panic.
38#[derive(Default)]
39pub struct NoopU64Hasher(u64);
40
41impl Hasher for NoopU64Hasher {
42    fn finish(&self) -> u64 {
43        self.0
44    }
45
46    fn write_u64(&mut self, i: u64) {
47        self.0 = i;
48    }
49
50    fn write(&mut self, bytes: &[u8]) {
51        panic!("non-u64 value written to NoopU64Hasher: {:?}", bytes);
52    }
53}
54
55/// A [`BuildHasher`] implementation for [`NoopU64Hasher`].
56#[derive(Clone, Default)]
57pub struct NoopU64BuildHasher;
58
59impl BuildHasher for NoopU64BuildHasher {
60    type Hasher = NoopU64Hasher;
61
62    fn build_hasher(&self) -> Self::Hasher {
63        NoopU64Hasher::default()
64    }
65}
66
67/// Returns a fresh [`FastBuildHasher`] instance.
68///
69/// This instance shouldn't be used to generate hashes that will be compared against hashes generated either with a
70/// hasher acquired from [`get_fast_hasher`] or [`hash_single_fast`], and those methods use a shared global state to both
71/// speed up hashing and ensure that the hashes are consistent across runs of those functions in particular.
72#[inline]
73pub fn get_fast_build_hasher() -> FastBuildHasher {
74    foldhash::quality::RandomState::default()
75}
76
77/// Returns a [`FastHasher`] instance backed by a shared, global state.
78///
79/// Values hashed with a `FastHasher`instance created with this method will be consistent within the same process, but
80/// won't be consistent across different runs of the application. Additionally, values hashed with this instance will
81/// not be consistent with those hashed by [`get_fast_build_hasher`], as that function returns a randomly seeded state for
82/// each call.
83#[inline]
84pub fn get_fast_hasher() -> FastHasher {
85    FAST_BUILD_HASHER.build_hasher()
86}
87
88/// Hashes a single value using the "fast" hash implementation, and returns the 64-bit hash value.
89///
90/// Utilizes the "fast" hash implementation provided by [`FastHasher`]. See [`get_fast_hasher`] for more details on hash
91/// values and the consistency of hash output between different hashing approaches and application states.
92#[inline]
93pub fn hash_single_fast<H: std::hash::Hash>(value: H) -> u64 {
94    let mut hasher = get_fast_hasher();
95    value.hash(&mut hasher);
96    hasher.finish()
97}
98
99/// Hashes a single value using the "stable" hash implementation, and returns the 64-bit hash value.
100///
101/// Utilizes the "stable" hash implementation provided by [`StableHasher`].
102#[inline]
103pub fn hash_single_stable<H: std::hash::Hash>(value: H) -> u64 {
104    let mut hasher = StableHasher::default();
105    value.hash(&mut hasher);
106    hasher.finish()
107}
108
109/// A non-cryptographic hash implementation that's meant to be stable over time.
110///
111/// The implementation is intended to be stable over time and across different runs of the applications, such that it
112/// can be depended on across different builds/versions of Saluki.
113///
114/// At a minimum, the hasher implementation won't change within major versions of Saluki, including v0 and v1.
115///
116/// Currently, [`shake`][shake] (specifically SHAKE128) is used as the underlying implementation. While SHAKE128 is a
117/// cryptographic hash algorithm, the way it's used effectively makes it a non-cryptographic hash algorithm given how
118/// much the output is truncated.
119///
120/// [shake]: https://crates.io/crates/shake
121#[derive(Default)]
122pub struct StableHasher(shake::Shake128);
123
124impl std::hash::Hasher for StableHasher {
125    fn write(&mut self, bytes: &[u8]) {
126        self.0.update(bytes);
127    }
128
129    fn finish(&self) -> u64 {
130        let mut buf = [0; std::mem::size_of::<u64>()];
131        self.0.clone().finalize_xof_into(&mut buf);
132
133        u64::from_le_bytes(buf)
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use std::hash::Hash as _;
140
141    use super::*;
142
143    // --- StableHasher / hash_single_stable ---
144
145    #[test]
146    fn stable_hasher_output_unchanged() {
147        // These expected values were generated with sha3 0.11's Shake128 and must not change
148        // within a major version of Saluki. If this test fails after a dependency change, the
149        // stable hash implementation has changed and any stored/compared hashes will be invalid.
150        assert_eq!(hash_single_stable("saluki"), 0x1d40da1f331833ef);
151        assert_eq!(hash_single_stable("hello world"), 0x6db238adc26a3bd8);
152    }
153
154    #[test]
155    fn stable_hasher_consistent() {
156        assert_eq!(hash_single_stable("test"), hash_single_stable("test"));
157    }
158
159    #[test]
160    fn stable_hasher_discriminates_inputs() {
161        assert_ne!(hash_single_stable("a"), hash_single_stable("b"));
162    }
163
164    // --- hash_single_fast / get_fast_hasher ---
165
166    #[test]
167    fn fast_hasher_consistent_within_run() {
168        assert_eq!(hash_single_fast("test"), hash_single_fast("test"));
169    }
170
171    #[test]
172    fn fast_hasher_discriminates_inputs() {
173        assert_ne!(hash_single_fast("a"), hash_single_fast("b"));
174    }
175
176    #[test]
177    fn get_fast_hasher_consistent_with_hash_single_fast() {
178        // get_fast_hasher uses the same global seed as hash_single_fast.
179        let mut hasher = get_fast_hasher();
180        "test".hash(&mut hasher);
181        assert_eq!(hasher.finish(), hash_single_fast("test"));
182    }
183
184    // --- get_fast_build_hasher ---
185
186    #[test]
187    fn fast_build_hasher_consistent_within_instance() {
188        let state = get_fast_build_hasher();
189        assert_eq!(state.hash_one("test"), state.hash_one("test"));
190    }
191
192    #[test]
193    fn fast_build_hasher_independent_per_call() {
194        // Each call to get_fast_build_hasher produces an independently seeded instance,
195        // so the same input should hash to different values across instances.
196        assert_ne!(
197            get_fast_build_hasher().hash_one("test"),
198            get_fast_build_hasher().hash_one("test")
199        );
200    }
201
202    // --- NoopU64Hasher ---
203
204    #[test]
205    fn noop_u64_hasher_passes_through() {
206        let mut h = NoopU64Hasher::default();
207        h.write_u64(42);
208        assert_eq!(h.finish(), 42);
209    }
210
211    #[test]
212    fn noop_u64_hasher_last_write_wins() {
213        let mut h = NoopU64Hasher::default();
214        h.write_u64(1);
215        h.write_u64(99);
216        assert_eq!(h.finish(), 99);
217    }
218
219    #[test]
220    #[should_panic]
221    fn noop_u64_hasher_panics_on_non_u64_write() {
222        let mut h = NoopU64Hasher::default();
223        h.write(b"not a u64");
224    }
225
226    // --- NoopU64BuildHasher ---
227
228    #[test]
229    fn noop_u64_build_hasher_creates_noop_hasher() {
230        let mut h = NoopU64BuildHasher.build_hasher();
231        h.write_u64(7);
232        assert_eq!(h.finish(), 7);
233    }
234}