1use std::marker::PhantomData;
2
3use crate::{collections::PrehashedHashSet, hash::hash_single_fast};
4
5#[derive(Debug)]
21pub struct ReusableDeduplicator<T> {
22 seen: PrehashedHashSet<u64>,
23 _item: PhantomData<T>,
24}
25
26impl<T: Eq + std::hash::Hash> ReusableDeduplicator<T> {
27 pub fn new() -> Self {
29 Self {
30 seen: PrehashedHashSet::default(),
31 _item: PhantomData,
32 }
33 }
34
35 pub fn deduplicated<'item, I>(&mut self, iter: I) -> Deduplicated<'_, 'item, I, T>
37 where
38 I: Iterator<Item = &'item T>,
39 {
40 self.seen.clear();
41
42 Deduplicated {
43 iter,
44 seen: &mut self.seen,
45 }
46 }
47}
48
49pub struct Deduplicated<'seen, 'item, I, T>
51where
52 I: Iterator<Item = &'item T>,
53 T: Eq + std::hash::Hash + 'item,
54{
55 iter: I,
56 seen: &'seen mut PrehashedHashSet<u64>,
57}
58
59impl<'seen, 'item, I, T> Iterator for Deduplicated<'seen, 'item, I, T>
60where
61 I: Iterator<Item = &'item T>,
62 T: Eq + std::hash::Hash + 'item,
63{
64 type Item = &'item T;
65
66 fn next(&mut self) -> Option<Self::Item> {
67 for item in self.iter.by_ref() {
68 let hash = hash_single_fast(item);
69 if !self.seen.contains(&hash) {
70 self.seen.insert(hash);
71 return Some(item);
72 }
73 }
74 None
75 }
76}
77
78#[cfg(test)]
79mod tests {
80 use std::collections::HashSet;
81
82 use proptest::{prelude::*, proptest};
83
84 use super::*;
85
86 #[test]
87 fn basic_no_duplicates() {
88 let mut deduplicator = ReusableDeduplicator::new();
89
90 let input = vec![1, 2, 3];
91 assert!(!input.is_empty());
92
93 let expected = input.clone();
94 let actual = deduplicator.deduplicated(input.iter()).copied().collect::<Vec<_>>();
95 assert_eq!(actual, expected);
96 }
97
98 #[test]
99 fn basic_duplicates() {
100 let mut deduplicator = ReusableDeduplicator::new();
101
102 let input = vec![1, 3, 2, 1, 3, 2, 3, 1];
103 assert!(!input.is_empty());
104
105 let mut expected = input.clone();
106 expected.sort();
107 expected.dedup();
108
109 let mut actual = deduplicator.deduplicated(input.iter()).copied().collect::<Vec<_>>();
112 actual.sort();
113
114 assert_eq!(actual, expected);
115 }
116
117 #[test]
118 fn empty() {
119 let mut deduplicator = ReusableDeduplicator::<i32>::new();
120
121 let input = [];
122 assert!(input.is_empty());
123
124 let actual = deduplicator.deduplicated(input.iter()).copied().collect::<Vec<_>>();
125 assert!(actual.is_empty());
126 }
127
128 #[test]
129 fn overlapping_seen_with_reuse() {
130 let mut deduplicator = ReusableDeduplicator::new();
133
134 let input1 = vec![1, 2, 3, 4, 5];
136 assert!(!input1.is_empty());
137
138 let input2 = vec![4, 5, 6, 7, 8];
139 assert!(!input2.is_empty());
140
141 let input1_set = input1.iter().collect::<HashSet<_>>();
142 let input2_set = input2.iter().collect::<HashSet<_>>();
143 assert!(!input1_set.is_disjoint(&input2_set));
144
145 let expected1 = input1.clone();
147 let actual1 = deduplicator.deduplicated(input1.iter()).copied().collect::<Vec<_>>();
148 assert_eq!(actual1, expected1);
149
150 let expected2 = input2.clone();
151 let actual2 = deduplicator.deduplicated(input2.iter()).copied().collect::<Vec<_>>();
152 assert_eq!(actual2, expected2);
153 }
154
155 proptest! {
156 #[test]
157 fn property_test_basic(input in any::<Vec<i32>>()) {
158 let mut deduplicator = ReusableDeduplicator::new();
159
160 let mut expected = input.clone();
161 expected.sort();
162 expected.dedup();
163
164 let mut actual = deduplicator.deduplicated(input.iter()).copied().collect::<Vec<_>>();
167 actual.sort();
168
169 prop_assert_eq!(actual, expected);
170 }
171 }
172}