saluki_common/hash.rs
1use std::{
2 hash::{BuildHasher, Hasher},
3 sync::LazyLock,
4};
5
6use shake::digest::{ExtendableOutput as _, Update as _};
7
8/// A fast, non-cryptographic hash implementation that's optimized for quality.
9///
10/// The implementation is reasonably suitable for hash tables and other data structures that require fast hashing and
11/// some degree of collision resistance.
12///
13/// Currently, [`foldhash`][foldhash] is used as the underlying implementation.
14///
15/// [foldhash]: http://github.com/orlp/foldhash
16pub type FastHasher = foldhash::quality::FoldHasher<'static>;
17
18/// [`BuildHasher`] implementation for [`FastHasher`].
19pub type FastBuildHasher = foldhash::quality::RandomState;
20
21// Single global instance of the fast hasher state since we need a consistently seeded state for `hash_single_fast` to
22// consistently hash things across the application.
23static FAST_BUILD_HASHER: LazyLock<FastBuildHasher> = LazyLock::new(get_fast_build_hasher);
24
25/// A no-op hasher that writes `u64` values directly to the internal state.
26///
27/// In some cases, pre-hashed values (`u64`) may be used as the key to a hash table or similar data structure. In those
28/// cases, re-hashing the key each time is unnecessary and potentially even undesirable.
29///
30/// `NoopU64Hasher` is a hash implementation that simply forwards `u64` values to the internal state and uses that as
31/// the final hashed value. It can used to hash a `u64` value directly, or to hash a value that wraps a `u64` value,
32/// such as a newtype (for example, `struct HashKey(u64)`).
33///
34/// # Behavior
35///
36/// `NoopU64Hasher` stores a single `u64` value internally. The last value written via `write_u64` is the final hash
37/// value. Writing any other value type to the hasher will panic.
38#[derive(Default)]
39pub struct NoopU64Hasher(u64);
40
41impl Hasher for NoopU64Hasher {
42 fn finish(&self) -> u64 {
43 self.0
44 }
45
46 fn write_u64(&mut self, i: u64) {
47 self.0 = i;
48 }
49
50 fn write(&mut self, bytes: &[u8]) {
51 panic!("non-u64 value written to NoopU64Hasher: {:?}", bytes);
52 }
53}
54
55/// A [`BuildHasher`] implementation for [`NoopU64Hasher`].
56#[derive(Clone, Default)]
57pub struct NoopU64BuildHasher;
58
59impl BuildHasher for NoopU64BuildHasher {
60 type Hasher = NoopU64Hasher;
61
62 fn build_hasher(&self) -> Self::Hasher {
63 NoopU64Hasher::default()
64 }
65}
66
67/// Returns a fresh [`FastBuildHasher`] instance.
68///
69/// This instance shouldn't be used to generate hashes that will be compared against hashes generated either with a
70/// hasher acquired from [`get_fast_hasher`] or [`hash_single_fast`], and those methods use a shared global state to both
71/// speed up hashing and ensure that the hashes are consistent across runs of those functions in particular.
72#[inline]
73pub fn get_fast_build_hasher() -> FastBuildHasher {
74 foldhash::quality::RandomState::default()
75}
76
77/// Returns a [`FastHasher`] instance backed by a shared, global state.
78///
79/// Values hashed with a `FastHasher`instance created with this method will be consistent within the same process, but
80/// won't be consistent across different runs of the application. Additionally, values hashed with this instance will
81/// not be consistent with those hashed by [`get_fast_build_hasher`], as that function returns a randomly seeded state for
82/// each call.
83#[inline]
84pub fn get_fast_hasher() -> FastHasher {
85 FAST_BUILD_HASHER.build_hasher()
86}
87
88/// Hashes a single value using the "fast" hash implementation, and returns the 64-bit hash value.
89///
90/// Utilizes the "fast" hash implementation provided by [`FastHasher`]. See [`get_fast_hasher`] for more details on hash
91/// values and the consistency of hash output between different hashing approaches and application states.
92#[inline]
93pub fn hash_single_fast<H: std::hash::Hash>(value: H) -> u64 {
94 let mut hasher = get_fast_hasher();
95 value.hash(&mut hasher);
96 hasher.finish()
97}
98
99/// Hashes a single value using the "stable" hash implementation, and returns the 64-bit hash value.
100///
101/// Utilizes the "stable" hash implementation provided by [`StableHasher`].
102#[inline]
103pub fn hash_single_stable<H: std::hash::Hash>(value: H) -> u64 {
104 let mut hasher = StableHasher::default();
105 value.hash(&mut hasher);
106 hasher.finish()
107}
108
109/// A non-cryptographic hash implementation that's meant to be stable over time.
110///
111/// The implementation is intended to be stable over time and across different runs of the applications, such that it
112/// can be depended on across different builds/versions of Saluki.
113///
114/// At a minimum, the hasher implementation won't change within major versions of Saluki, including v0 and v1.
115///
116/// Currently, [`shake`][shake] (specifically SHAKE128) is used as the underlying implementation. While SHAKE128 is a
117/// cryptographic hash algorithm, the way it's used effectively makes it a non-cryptographic hash algorithm given how
118/// much the output is truncated.
119///
120/// [shake]: https://crates.io/crates/shake
121#[derive(Default)]
122pub struct StableHasher(shake::Shake128);
123
124impl std::hash::Hasher for StableHasher {
125 fn write(&mut self, bytes: &[u8]) {
126 self.0.update(bytes);
127 }
128
129 fn finish(&self) -> u64 {
130 let mut buf = [0; std::mem::size_of::<u64>()];
131 self.0.clone().finalize_xof_into(&mut buf);
132
133 u64::from_le_bytes(buf)
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use std::hash::Hash as _;
140
141 use super::*;
142
143 // --- StableHasher / hash_single_stable ---
144
145 #[test]
146 fn stable_hasher_output_unchanged() {
147 // These expected values were generated with sha3 0.11's Shake128 and must not change
148 // within a major version of Saluki. If this test fails after a dependency change, the
149 // stable hash implementation has changed and any stored/compared hashes will be invalid.
150 assert_eq!(hash_single_stable("saluki"), 0x1d40da1f331833ef);
151 assert_eq!(hash_single_stable("hello world"), 0x6db238adc26a3bd8);
152 }
153
154 #[test]
155 fn stable_hasher_consistent() {
156 assert_eq!(hash_single_stable("test"), hash_single_stable("test"));
157 }
158
159 #[test]
160 fn stable_hasher_discriminates_inputs() {
161 assert_ne!(hash_single_stable("a"), hash_single_stable("b"));
162 }
163
164 // --- hash_single_fast / get_fast_hasher ---
165
166 #[test]
167 fn fast_hasher_consistent_within_run() {
168 assert_eq!(hash_single_fast("test"), hash_single_fast("test"));
169 }
170
171 #[test]
172 fn fast_hasher_discriminates_inputs() {
173 assert_ne!(hash_single_fast("a"), hash_single_fast("b"));
174 }
175
176 #[test]
177 fn get_fast_hasher_consistent_with_hash_single_fast() {
178 // get_fast_hasher uses the same global seed as hash_single_fast.
179 let mut hasher = get_fast_hasher();
180 "test".hash(&mut hasher);
181 assert_eq!(hasher.finish(), hash_single_fast("test"));
182 }
183
184 // --- get_fast_build_hasher ---
185
186 #[test]
187 fn fast_build_hasher_consistent_within_instance() {
188 let state = get_fast_build_hasher();
189 assert_eq!(state.hash_one("test"), state.hash_one("test"));
190 }
191
192 #[test]
193 fn fast_build_hasher_independent_per_call() {
194 // Each call to get_fast_build_hasher produces an independently seeded instance,
195 // so the same input should hash to different values across instances.
196 assert_ne!(
197 get_fast_build_hasher().hash_one("test"),
198 get_fast_build_hasher().hash_one("test")
199 );
200 }
201
202 // --- NoopU64Hasher ---
203
204 #[test]
205 fn noop_u64_hasher_passes_through() {
206 let mut h = NoopU64Hasher::default();
207 h.write_u64(42);
208 assert_eq!(h.finish(), 42);
209 }
210
211 #[test]
212 fn noop_u64_hasher_last_write_wins() {
213 let mut h = NoopU64Hasher::default();
214 h.write_u64(1);
215 h.write_u64(99);
216 assert_eq!(h.finish(), 99);
217 }
218
219 #[test]
220 #[should_panic]
221 fn noop_u64_hasher_panics_on_non_u64_write() {
222 let mut h = NoopU64Hasher::default();
223 h.write(b"not a u64");
224 }
225
226 // --- NoopU64BuildHasher ---
227
228 #[test]
229 fn noop_u64_build_hasher_creates_noop_hasher() {
230 let mut h = NoopU64BuildHasher.build_hasher();
231 h.write_u64(7);
232 assert_eq!(h.finish(), 7);
233 }
234}