dd_sds/scanner/regex_rule/
regex_store.rs1use crate::stats::GLOBAL_STATS;
2use ahash::AHashMap;
3use lazy_static::lazy_static;
4use regex_automata::meta::{Cache, Regex as MetaRegex};
5use slotmap::{SlotMap, new_key_type};
6use std::ops::Deref;
7use std::sync::Weak;
8use std::sync::{Arc, Mutex};
9
10struct WeakSharedRegex {
11 regex: Weak<MetaRegex>,
12 cache_size: usize,
14}
15
16#[derive(Debug, Clone)]
17pub struct SharedRegex {
18 pub regex: Arc<MetaRegex>,
19 pub cache_key: RegexCacheKey,
20}
21
22impl Deref for SharedRegex {
23 type Target = MetaRegex;
24
25 fn deref(&self) -> &Self::Target {
26 self.regex.deref()
27 }
28}
29
30pub fn get_memoized_regex<T>(
31 pattern: &str,
32 regex_factory: impl FnOnce(&str) -> Result<regex_automata::meta::Regex, T>,
33) -> Result<SharedRegex, T> {
34 get_memoized_regex_with_custom_store(pattern, regex_factory, ®EX_STORE)
35}
36
37fn get_memoized_regex_with_custom_store<T>(
38 pattern: &str,
39 regex_factory: impl FnOnce(&str) -> Result<regex_automata::meta::Regex, T>,
40 store: &Mutex<RegexStore>,
41) -> Result<SharedRegex, T> {
42 {
43 let regex_store = store.lock().unwrap();
44 if let Some(exiting_regex) = regex_store.get(pattern) {
45 return Ok(exiting_regex);
46 }
47 }
48
49 let regex = regex_factory(pattern)?;
51
52 let mut regex_store = store.lock().unwrap();
53 Ok(regex_store.insert(pattern, regex))
54}
55
56const GC_FREQUENCY: u64 = 1_000;
59
60lazy_static! {
61 static ref REGEX_STORE: Arc<Mutex<RegexStore>> = Arc::new(Mutex::new(RegexStore::new()));
62}
63new_key_type! { pub struct RegexCacheKey; }
64
65struct RegexStore {
66 pattern_index: AHashMap<String, RegexCacheKey>,
67 key_map: SlotMap<RegexCacheKey, WeakSharedRegex>,
68 gc_counter: u64,
70}
71
72impl RegexStore {
73 pub fn new() -> Self {
74 Self {
75 pattern_index: AHashMap::new(),
76 key_map: SlotMap::with_key(),
77 gc_counter: 0,
78 }
79 }
80
81 fn gc(&mut self) {
83 self.gc_counter = 0;
84 self.pattern_index.retain(|_, cache_key| {
85 if self.key_map.get(*cache_key).unwrap().regex.strong_count() == 0 {
86 if let Some(old_regex) = self.key_map.remove(*cache_key) {
87 GLOBAL_STATS.add_total_regex_cache(-(old_regex.cache_size as i64));
88 }
89 false
90 } else {
91 true
92 }
93 });
94 GLOBAL_STATS.set_total_regexes(self.key_map.len());
95 }
96
97 pub fn get(&self, pattern: &str) -> Option<SharedRegex> {
99 self.pattern_index.get(pattern).and_then(|cache_key| {
100 self.key_map
101 .get(*cache_key)
102 .and_then(|x| x.regex.upgrade())
103 .map(|regex| SharedRegex {
104 regex,
105 cache_key: *cache_key,
106 })
107 })
108 }
109
110 #[cfg(test)]
111 fn len(&self) -> usize {
112 debug_assert_eq!(self.pattern_index.len(), self.key_map.len());
113 self.key_map.len()
114 }
115
116 pub fn insert(&mut self, pattern: &str, regex: MetaRegex) -> SharedRegex {
120 self.gc_counter += 1;
121 if self.gc_counter >= GC_FREQUENCY {
122 self.gc();
123 }
124 match self.get(pattern) {
125 Some(existing_regex) => existing_regex,
126 _ => {
127 let shared_regex = Arc::new(regex);
128
129 let regex_cache = shared_regex.create_cache();
130 let cache_size = regex_cache.memory_usage() + std::mem::size_of::<Cache>();
131 let cache_key = self.key_map.insert(WeakSharedRegex {
132 regex: Arc::downgrade(&shared_regex),
133 cache_size,
134 });
135 GLOBAL_STATS.add_total_regex_cache(cache_size as i64);
136 if let Some(old_cache_key) =
137 self.pattern_index.insert(pattern.to_owned(), cache_key)
138 {
139 if let Some(weak_ref) = self.key_map.remove(old_cache_key) {
141 GLOBAL_STATS.add_total_regex_cache(-(weak_ref.cache_size as i64));
142 debug_assert!(weak_ref.regex.strong_count() == 0)
143 }
144 }
145
146 GLOBAL_STATS.set_total_regexes(self.key_map.len());
147
148 SharedRegex {
149 regex: shared_regex,
150 cache_key,
151 }
152 }
153 }
154 }
155}
156
157#[cfg(test)]
158mod test {
159 use crate::scanner::regex_rule::regex_store::{
160 GC_FREQUENCY, RegexStore, get_memoized_regex_with_custom_store,
161 };
162 use regex_automata::meta::Regex;
163 use std::sync::Mutex;
164
165 #[test]
166 fn dropped_regexes_should_be_removed_from_global_store() {
167 let store = Mutex::new(RegexStore::new());
168
169 let regex = get_memoized_regex_with_custom_store("test", Regex::new, &store).unwrap();
170
171 assert_eq!(store.lock().unwrap().len(), 1);
172
173 drop(regex);
174
175 store.lock().unwrap().gc();
177
178 assert_eq!(store.lock().unwrap().len(), 0);
179 }
180
181 #[test]
182 fn test_automatic_gc() {
183 let store = Mutex::new(RegexStore::new());
184
185 let regex = get_memoized_regex_with_custom_store("test", Regex::new, &store).unwrap();
186 drop(regex);
187
188 for i in 0..(GC_FREQUENCY - 1) {
190 let regex =
191 get_memoized_regex_with_custom_store(&format!("test-{i}"), Regex::new, &store)
192 .unwrap();
193 drop(regex)
194 }
195 assert_eq!(store.lock().unwrap().len(), 1);
197 }
198}