1use std::io::{BufRead, BufReader};
4use std::sync::OnceLock;
5
6use regex::bytes::Regex;
7
8static COMMENT_REGEX: OnceLock<Regex> = OnceLock::new();
9static BLANK_REGEX: OnceLock<Regex> = OnceLock::new();
10
11fn comment_regex() -> &'static Regex {
12 COMMENT_REGEX.get_or_init(|| Regex::new(r"^\s*#.*$").unwrap())
13}
14
15fn blank_regex() -> &'static Regex {
16 BLANK_REGEX.get_or_init(|| Regex::new(r"^\s*$").unwrap())
17}
18
19type ReplFunc = Box<dyn Fn(&[u8]) -> Vec<u8> + Send + Sync>;
20
21pub struct Replacer {
23 pub regex: Option<Regex>,
25
26 pub hints: Option<Vec<String>>,
30
31 pub repl: Option<Vec<u8>>,
35
36 pub repl_func: Option<ReplFunc>,
39}
40
41static DEFAULT_SCRUBBER: OnceLock<Scrubber> = OnceLock::new();
42
43pub fn default_scrubber() -> &'static Scrubber {
48 DEFAULT_SCRUBBER.get_or_init(Scrubber::default)
49}
50
51impl Default for Scrubber {
52 fn default() -> Self {
53 let hinted_api_key_replacer = Replacer {
54 regex: Some(Regex::new(r"(api_?key=)[a-zA-Z0-9]+([a-zA-Z0-9]{5})\b").unwrap()),
55 repl: Some(b"$1***************************$2".to_vec()),
56 hints: Some(vec!["api_key".to_string(), "apikey".to_string()]),
57 repl_func: None,
58 };
59
60 let hinted_app_key_replacer = Replacer {
61 regex: Some(Regex::new(r"(ap(?:p|plication)_?key=)[a-zA-Z0-9]+([a-zA-Z0-9]{5})\b").unwrap()),
62 repl: Some(b"$1***********************************$2".to_vec()),
63 hints: Some(vec![
64 "appkey".to_string(),
65 "app_key".to_string(),
66 "application_key".to_string(),
67 ]),
68 repl_func: None,
69 };
70
71 let api_key_replacer = Replacer {
73 regex: Some(Regex::new(r"\b[a-fA-F0-9]{27}([a-fA-F0-9]{5})\b").unwrap()),
74 repl: Some(b"***************************$1".to_vec()),
75 hints: None,
76 repl_func: None,
77 };
78
79 let api_key_replacer_yaml = Replacer {
81 regex: Some(Regex::new(r#"(\-|\:|,|\[|\{)(\s+)?\b[a-fA-F0-9]{27}([a-fA-F0-9]{5})\b"#).unwrap()),
82 repl: Some(b"$1$2\"***************************$3\"".to_vec()),
83 hints: None,
84 repl_func: None,
85 };
86
87 let app_key_replacer_yaml = Replacer {
88 regex: Some(Regex::new(r#"(\-|\:|,|\[|\{)(\s+)?\b[a-fA-F0-9]{35}([a-fA-F0-9]{5})\b"#).unwrap()),
89 repl: Some(b"$1$2\"***********************************$3\"".to_vec()),
90 hints: None,
91 repl_func: None,
92 };
93
94 let app_key_replacer = Replacer {
95 regex: Some(Regex::new(r"\b[a-fA-F0-9]{35}([a-fA-F0-9]{5})\b").unwrap()),
96 repl: Some(b"***********************************$1".to_vec()),
97 hints: None,
98 repl_func: None,
99 };
100
101 let rc_app_key_replacer = Replacer {
103 regex: Some(Regex::new(r"\bDDRCM_[A-Z0-9]+([A-Z0-9]{5})\b").unwrap()),
104 repl: Some(b"***********************************$1".to_vec()),
105 hints: None,
106 repl_func: None,
107 };
108
109 let uri_password_replacer = Replacer {
111 regex: Some(Regex::new(r#"(?i)([a-z][a-z0-9+-.]+://|\b)([^:\s]+):([^\s|"]+)@"#).unwrap()),
112 repl: Some(b"$1$2:********@".to_vec()),
113 hints: None,
114 repl_func: None,
115 };
116
117 let password_replacer = Replacer {
118 regex: Some(Regex::new(r#"(?i)(\"?(?:pass(?:word)?|pswd|pwd)\"?)((?:=| = |: )\"?)([0-9A-Za-z#!$%&'()*+,\-./:;<=>?@\[\\\]^_{|}~]+)"#).unwrap()),
119 repl: Some(b"$1$2********".to_vec()),
120 hints: None,
121 repl_func: None,
122 };
123
124 Self {
125 replacers: vec![
126 hinted_api_key_replacer,
127 hinted_app_key_replacer,
128 api_key_replacer_yaml,
129 app_key_replacer_yaml,
130 api_key_replacer,
131 app_key_replacer,
132 rc_app_key_replacer,
133 uri_password_replacer,
134 password_replacer,
135 ],
136 }
137 }
138}
139
140pub struct Scrubber {
142 replacers: Vec<Replacer>,
143}
144
145impl Scrubber {
146 pub fn new() -> Self {
148 Self { replacers: vec![] }
149 }
150
151 pub fn add_replacer(&mut self, replacer: Replacer) {
153 self.replacers.push(replacer);
154 }
155
156 pub fn scrub_bytes(&self, data: &[u8]) -> Vec<u8> {
160 let mut reader = BufReader::new(data);
161 self.scrub_reader(&mut reader)
162 }
163
164 fn scrub_reader(&self, reader: &mut BufReader<&[u8]>) -> Vec<u8> {
165 let mut scrubbed_lines = Vec::new();
166 let mut line = Vec::new();
167 let mut first = true;
168 while let Ok(bytes_read) = reader.read_until(b'\n', &mut line) {
169 if bytes_read == 0 {
170 break; }
172
173 if blank_regex().is_match(&line) {
174 scrubbed_lines.push(b"\n".to_vec());
175 } else if !comment_regex().is_match(&line) {
176 let b = self.scrub(&line, &self.replacers);
177 if !first {
178 scrubbed_lines.push(b"\n".to_vec());
179 }
180 scrubbed_lines.push(b);
181 first = false;
182 }
183 line.clear();
184 }
185 scrubbed_lines.join(&b'\n')
186 }
187
188 fn scrub(&self, data: &[u8], replacers: &[Replacer]) -> Vec<u8> {
190 let mut scrubbed_data = data.to_vec();
191 for replacer in replacers {
192 if replacer.regex.is_none() {
193 continue;
194 }
195
196 let contains_hint = if let Some(hints) = &replacer.hints {
197 hints.iter().any(|hint| {
198 let needle = hint.as_bytes();
199 data.windows(needle.len()).any(|window| window == needle)
200 })
201 } else {
202 false
203 };
204
205 if replacer.hints.as_ref().is_none_or(|h| h.is_empty() || contains_hint) {
206 if let Some(re) = &replacer.regex {
207 if let Some(repl_func) = &replacer.repl_func {
208 scrubbed_data = re
209 .replace_all(&scrubbed_data, |caps: ®ex::bytes::Captures| repl_func(&caps[0]))
210 .into_owned();
211 } else if let Some(repl) = &replacer.repl {
212 scrubbed_data = re.replace_all(&scrubbed_data, repl.as_slice()).into_owned();
213 }
214 }
215 }
216 }
217 scrubbed_data
218 }
219}
220
221#[cfg(test)]
222mod tests {
223 use super::*;
224
225 fn assert_clean(contents: &str, clean_contents: &str) {
226 let scrubber = default_scrubber();
227 let cleaned = scrubber.scrub_bytes(contents.as_bytes());
228 let cleaned_string = String::from_utf8(cleaned).unwrap();
229 assert_eq!(cleaned_string.trim(), clean_contents.trim());
230 }
231
232 #[test]
233 fn test_config_strip_api_key() {
234 assert_clean(
235 "api_key: aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
236 "api_key: \"***************************abbbb\"",
237 );
238 assert_clean(
239 "api_key: AAAAAAAAAAAAAAAAAAAAAAAAAAAABBBB",
240 "api_key: \"***************************ABBBB\"",
241 );
242 assert_clean(
243 "api_key: aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
244 "api_key: \"***************************abbbb\"",
245 );
246 assert_clean(
247 "api_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb'",
248 "api_key: '***************************abbbb'",
249 );
250 assert_clean(
251 " api_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb' ",
252 " api_key: '***************************abbbb' ",
253 );
254 }
255
256 #[test]
257 fn test_config_app_key() {
258 assert_clean(
259 "app_key: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
260 "app_key: \"***********************************abbbb\"",
261 );
262 assert_clean(
263 "app_key: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBB",
264 "app_key: \"***********************************ABBBB\"",
265 );
266 assert_clean(
267 "app_key: \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb\"",
268 "app_key: \"***********************************abbbb\"",
269 );
270 assert_clean(
271 "app_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb'",
272 "app_key: '***********************************abbbb'",
273 );
274 assert_clean(
275 " app_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb' ",
276 " app_key: '***********************************abbbb' ",
277 );
278 }
279
280 #[test]
281 fn test_config_rc_app_key() {
282 assert_clean(
283 "key: \"DDRCM_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABCDE\"",
284 "key: \"***********************************ABCDE\"",
285 );
286 }
287
288 #[test]
289 fn test_text_strip_api_key() {
290 assert_clean(
291 "Error status code 500 : http://dog.tld/api?key=3290abeefc68e1bbe852a25252bad88c",
292 "Error status code 500 : http://dog.tld/api?key=***************************ad88c",
293 );
294 assert_clean(
295 "hintedAPIKeyReplacer : http://dog.tld/api_key=InvalidLength12345abbbb",
296 "hintedAPIKeyReplacer : http://dog.tld/api_key=***************************abbbb",
297 );
298 assert_clean(
299 "hintedAPIKeyReplacer : http://dog.tld/apikey=InvalidLength12345abbbb",
300 "hintedAPIKeyReplacer : http://dog.tld/apikey=***************************abbbb",
301 );
302 assert_clean(
303 "apiKeyReplacer: https://agent-http-intake.logs.datadoghq.com/v1/input/aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
304 "apiKeyReplacer: https://agent-http-intake.logs.datadoghq.com/v1/input/***************************abbbb",
305 );
306 }
307
308 #[test]
309 fn test_config_strip_url_password() {
310 assert_clean(
311 "proxy: random_url_key: http://user:password@host:port",
312 "proxy: random_url_key: http://user:********@host:port",
313 );
314 assert_clean(
315 "random_url_key http://user:password@host:port",
316 "random_url_key http://user:********@host:port",
317 );
318 assert_clean(
319 "random_url_key: http://user:password@host:port",
320 "random_url_key: http://user:********@host:port",
321 );
322 assert_clean(
323 "random_url_key: http://user:p@ssw0r)@host:port",
324 "random_url_key: http://user:********@host:port",
325 );
326 assert_clean(
327 "random_url_key: http://user:🔑🔒🔐🔓@host:port",
328 "random_url_key: http://user:********@host:port",
329 );
330 assert_clean(
331 "random_url_key: http://user:password@host",
332 "random_url_key: http://user:********@host",
333 );
334 assert_clean(
335 "random_url_key: protocol://user:p@ssw0r)@host:port",
336 "random_url_key: protocol://user:********@host:port",
337 );
338 assert_clean(
339 "random_url_key: \"http://user:password@host:port\"",
340 "random_url_key: \"http://user:********@host:port\"",
341 );
342 assert_clean(
343 "random_url_key: 'http://user:password@host:port'",
344 "random_url_key: 'http://user:********@host:port'",
345 );
346 assert_clean(
347 "random_domain_key: 'user:password@host:port'",
348 "random_domain_key: 'user:********@host:port'",
349 );
350 assert_clean(
351 " random_url_key: 'http://user:password@host:port' ",
352 " random_url_key: 'http://user:********@host:port' ",
353 );
354 assert_clean(
355 " random_url_key: 'mongodb+s.r-v://user:password@host:port' ",
356 " random_url_key: 'mongodb+s.r-v://user:********@host:port' ",
357 );
358 assert_clean(
359 " random_url_key: 'mongodb+srv://user:pass-with-hyphen@abc.example.com/database' ",
360 " random_url_key: 'mongodb+srv://user:********@abc.example.com/database' ",
361 );
362 }
363
364 #[test]
365 fn test_text_strip_app_key() {
366 assert_clean(
367 "hintedAPPKeyReplacer : http://dog.tld/app_key=InvalidLength12345abbbb",
368 "hintedAPPKeyReplacer : http://dog.tld/app_key=***********************************abbbb",
369 );
370 assert_clean(
371 "hintedAPPKeyReplacer : http://dog.tld/appkey=InvalidLength12345abbbb",
372 "hintedAPPKeyReplacer : http://dog.tld/appkey=***********************************abbbb",
373 );
374 assert_clean(
375 "hintedAPPKeyReplacer : http://dog.tld/application_key=InvalidLength12345abbbb",
376 "hintedAPPKeyReplacer : http://dog.tld/application_key=***********************************abbbb",
377 );
378 assert_clean(
379 "appKeyReplacer: http://dog.tld/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
380 "appKeyReplacer: http://dog.tld/***********************************abbbb",
381 );
382 }
383}