dd_sds/scanner/regex_rule/
regex_store.rs1use crate::stats::GLOBAL_STATS;
2use ahash::AHashMap;
3use lazy_static::lazy_static;
4use regex_automata::meta::{Cache, Regex as MetaRegex};
5use slotmap::{SlotMap, new_key_type};
6use std::ops::Deref;
7use std::sync::Weak;
8use std::sync::{Arc, Mutex};
9
10struct WeakSharedRegex {
11 regex: Weak<MetaRegex>,
12 cache_size: usize,
14}
15
16#[derive(Debug, Clone)]
17pub struct SharedRegex {
18 pub regex: Arc<MetaRegex>,
19 pub cache_key: RegexCacheKey,
20}
21
22impl Deref for SharedRegex {
23 type Target = MetaRegex;
24
25 fn deref(&self) -> &Self::Target {
26 self.regex.deref()
27 }
28}
29
30pub fn get_memoized_regex<T>(
31 pattern: &str,
32 regex_factory: impl FnOnce(&str) -> Result<regex_automata::meta::Regex, T>,
33) -> Result<SharedRegex, T> {
34 get_memoized_regex_with_custom_store(pattern, regex_factory, ®EX_STORE)
35}
36
37fn get_memoized_regex_with_custom_store<T>(
38 pattern: &str,
39 regex_factory: impl FnOnce(&str) -> Result<regex_automata::meta::Regex, T>,
40 store: &Mutex<RegexStore>,
41) -> Result<SharedRegex, T> {
42 {
43 let regex_store = store.lock().unwrap();
44 if let Some(exiting_regex) = regex_store.get(pattern) {
45 return Ok(exiting_regex);
46 }
47 }
48
49 let regex = regex_factory(pattern)?;
51
52 let mut regex_store = store.lock().unwrap();
53 Ok(regex_store.insert(pattern, regex))
54}
55
56const GC_FREQUENCY: u64 = 1_000;
59
60lazy_static! {
61 static ref REGEX_STORE: Arc<Mutex<RegexStore>> = Arc::new(Mutex::new(RegexStore::new()));
62}
63new_key_type! { pub struct RegexCacheKey; }
64
65struct RegexStore {
66 pattern_index: AHashMap<String, RegexCacheKey>,
67 key_map: SlotMap<RegexCacheKey, WeakSharedRegex>,
68 gc_counter: u64,
70}
71
72impl RegexStore {
73 pub fn new() -> Self {
74 Self {
75 pattern_index: AHashMap::new(),
76 key_map: SlotMap::with_key(),
77 gc_counter: 0,
78 }
79 }
80
81 fn gc(&mut self) {
83 self.gc_counter = 0;
84 self.pattern_index.retain(|_, cache_key| {
85 if self.key_map.get(*cache_key).unwrap().regex.strong_count() == 0 {
86 if let Some(old_regex) = self.key_map.remove(*cache_key) {
87 GLOBAL_STATS.add_total_regex_cache(-(old_regex.cache_size as i64));
88 }
89 false
90 } else {
91 true
92 }
93 });
94 GLOBAL_STATS.set_total_regexes(self.key_map.len());
95 }
96
97 pub fn get(&self, pattern: &str) -> Option<SharedRegex> {
99 self.pattern_index.get(pattern).and_then(|cache_key| {
100 self.key_map
101 .get(*cache_key)
102 .and_then(|x| x.regex.upgrade())
103 .map(|regex| SharedRegex {
104 regex,
105 cache_key: *cache_key,
106 })
107 })
108 }
109
110 #[cfg(test)]
111 fn len(&self) -> usize {
112 debug_assert_eq!(self.pattern_index.len(), self.key_map.len());
113 self.key_map.len()
114 }
115
116 pub fn insert(&mut self, pattern: &str, regex: MetaRegex) -> SharedRegex {
120 self.gc_counter += 1;
121 if self.gc_counter >= GC_FREQUENCY {
122 self.gc();
123 }
124 match self.get(pattern) {
125 Some(existing_regex) => existing_regex,
126 _ => {
127 let shared_regex = Arc::new(regex);
128
129 let regex_cache = shared_regex.create_cache();
130 let cache_key = self.key_map.insert(WeakSharedRegex {
131 regex: Arc::downgrade(&shared_regex),
132 cache_size: regex_cache.memory_usage() + std::mem::size_of::<Cache>(),
133 });
134 if let Some(old_cache_key) =
135 self.pattern_index.insert(pattern.to_owned(), cache_key)
136 {
137 if let Some(weak_ref) = self.key_map.remove(old_cache_key) {
139 GLOBAL_STATS.add_total_regex_cache(-(weak_ref.cache_size as i64));
140 debug_assert!(weak_ref.regex.strong_count() == 0)
141 }
142 }
143
144 GLOBAL_STATS.set_total_regexes(self.key_map.len());
145
146 SharedRegex {
147 regex: shared_regex,
148 cache_key,
149 }
150 }
151 }
152 }
153}
154
155#[cfg(test)]
156mod test {
157 use crate::scanner::regex_rule::regex_store::{
158 GC_FREQUENCY, RegexStore, get_memoized_regex_with_custom_store,
159 };
160 use regex_automata::meta::Regex;
161 use std::sync::Mutex;
162
163 #[test]
164 fn dropped_regexes_should_be_removed_from_global_store() {
165 let store = Mutex::new(RegexStore::new());
166
167 let regex = get_memoized_regex_with_custom_store("test", Regex::new, &store).unwrap();
168
169 assert_eq!(store.lock().unwrap().len(), 1);
170
171 drop(regex);
172
173 store.lock().unwrap().gc();
175
176 assert_eq!(store.lock().unwrap().len(), 0);
177 }
178
179 #[test]
180 fn test_automatic_gc() {
181 let store = Mutex::new(RegexStore::new());
182
183 let regex = get_memoized_regex_with_custom_store("test", Regex::new, &store).unwrap();
184 drop(regex);
185
186 for i in 0..(GC_FREQUENCY - 1) {
188 let regex =
189 get_memoized_regex_with_custom_store(&format!("test-{i}"), Regex::new, &store)
190 .unwrap();
191 drop(regex)
192 }
193 assert_eq!(store.lock().unwrap().len(), 1);
195 }
196}