saluki_common/hash.rs
1use std::{
2 hash::{BuildHasher, Hasher},
3 sync::LazyLock,
4};
5
6use sha3::digest::{ExtendableOutput as _, Update as _};
7
8/// A fast, non-cryptographic hash implementation that is optimized for quality.
9///
10/// The implementation is reasonably suitable for hash tables and other data structures that require fast hashing and
11/// some degree of collision resistance.
12///
13/// Currently, [`foldhash`][foldhash] is used as the underlying implementation.
14///
15/// [foldhash]: http://github.com/orlp/foldhash
16pub type FastHasher = foldhash::quality::FoldHasher;
17
18/// [`BuildHasher`] implementation for [`FastHasher`].
19pub type FastBuildHasher = foldhash::quality::RandomState;
20
21// Single global instance of the fast hasher state since we need a consistently-seeded state for `hash_single_fast` to
22// consistently hash things across the application.
23static FAST_BUILD_HASHER: LazyLock<FastBuildHasher> = LazyLock::new(get_fast_build_hasher);
24
25/// A no-op hasher that writes `u64` values directly to the internal state.
26///
27/// In some cases, pre-hashed values (`u64`) may be used as the key to a hash table or similar data structure. In those
28/// cases, re-hashing the key each time is unnecessary and potentially even undesirable.
29///
30/// `NoopU64Hasher` is a hash implementation that simply forwards `u64` values to the internal state and uses that as
31/// the final hashed value. It can used to hash a `u64` value directly, or to hash a value that wraps a `u64` value,
32/// such as a newtype (e.g., `struct HashKey(u64)`).
33///
34/// # Behavior
35///
36/// `NoopU64Hasher` stores a single `u64` value internally. The last value written via `write_u64` is the final hash
37/// value. Writing any other value type to the hasher will panic.
38#[derive(Default)]
39pub struct NoopU64Hasher(u64);
40
41impl Hasher for NoopU64Hasher {
42 fn finish(&self) -> u64 {
43 self.0
44 }
45
46 fn write_u64(&mut self, i: u64) {
47 self.0 = i;
48 }
49
50 fn write(&mut self, bytes: &[u8]) {
51 panic!("non-u64 value written to NoopU64Hasher: {:?}", bytes);
52 }
53}
54
55/// A [`BuildHasher`] implementation for [`NoopU64Hasher`].
56#[derive(Clone, Default)]
57pub struct NoopU64BuildHasher;
58
59impl BuildHasher for NoopU64BuildHasher {
60 type Hasher = NoopU64Hasher;
61
62 fn build_hasher(&self) -> Self::Hasher {
63 NoopU64Hasher::default()
64 }
65}
66
67/// Returns a fresh [`FastBuildHasher`] instance.
68///
69/// This instance should not be used to generate hashes that will be compared against hashes generated either with a
70/// hasher acquired from [`get_fast_hasher`] or [`hash_single_fast`], and those methods use a shared global state to both
71/// speed up hashing and ensure that the hashes are consistent across runs of those functions in particular.
72#[inline]
73pub fn get_fast_build_hasher() -> FastBuildHasher {
74 foldhash::quality::RandomState::default()
75}
76
77/// Returns a [`FastHasher`] instance backed by a shared, global state.
78///
79/// Values hashed with a `FastHasher`instance created with this method will be consistent within the same process, but
80/// will not be consistent across different runs of the application. Additionally, values hashed with this instance will
81/// not be consistent with those hashed by [`get_fast_build_hasher`], as that function returns a randomly-seeded state for
82/// each call.
83#[inline]
84pub fn get_fast_hasher() -> FastHasher {
85 FAST_BUILD_HASHER.build_hasher()
86}
87
88/// Hashes a single value using the "fast" hash implementation, and returns the 64-bit hash value.
89///
90/// Utilizes the "fast" hash implementation provided by [`FastHasher`]. See [`get_fast_hasher`] for more details on hash
91/// values and the consistency of hash output between different hashing approaches and application states.
92#[inline]
93pub fn hash_single_fast<H: std::hash::Hash>(value: H) -> u64 {
94 let mut hasher = get_fast_hasher();
95 value.hash(&mut hasher);
96 hasher.finish()
97}
98
99/// Hashes a single value using the "stable" hash implementation, and returns the 64-bit hash value.
100///
101/// Utilizes the "stable" hash implementation provided by [`StableHasher`].
102#[inline]
103pub fn hash_single_stable<H: std::hash::Hash>(value: H) -> u64 {
104 let mut hasher = StableHasher::default();
105 value.hash(&mut hasher);
106 hasher.finish()
107}
108
109/// A non-cryptographic hash implementation that is meant to be stable over time.
110///
111/// The implementation is intended to be stable over time and across different runs of the applications, such that it
112/// can be depended on across different builds/versions of Saluki.
113///
114/// At a minimum, the hasher implementation will not change within major versions of Saluki, including v0 and v1.
115///
116/// Currently, [`sha3`][sha3] (specifically SHAKE128) is used as the underlying implementation. While SHAKE128 is a
117/// cryptographic hash algorithm, the way it is used effectively makes it a non-cryptographic hash algorithm given how
118/// much the output is truncated.
119///
120/// [sha3]: https://crates.io/crates/sha3
121#[derive(Default)]
122pub struct StableHasher(sha3::Shake128);
123
124impl std::hash::Hasher for StableHasher {
125 fn write(&mut self, bytes: &[u8]) {
126 self.0.update(bytes);
127 }
128
129 fn finish(&self) -> u64 {
130 let mut buf = [0; std::mem::size_of::<u64>()];
131 self.0.clone().finalize_xof_into(&mut buf);
132
133 u64::from_le_bytes(buf)
134 }
135}