1use std::io::{BufRead, BufReader};
4use std::sync::OnceLock;
5
6use regex::bytes::Regex;
7
8static COMMENT_REGEX: OnceLock<Regex> = OnceLock::new();
9static BLANK_REGEX: OnceLock<Regex> = OnceLock::new();
10
11fn comment_regex() -> &'static Regex {
12 COMMENT_REGEX.get_or_init(|| Regex::new(r"^\s*#.*$").unwrap())
13}
14
15fn blank_regex() -> &'static Regex {
16 BLANK_REGEX.get_or_init(|| Regex::new(r"^\s*$").unwrap())
17}
18
19type ReplFunc = Box<dyn Fn(&[u8]) -> Vec<u8> + Send + Sync>;
20
21pub struct Replacer {
23 pub regex: Option<Regex>,
25
26 pub hints: Option<Vec<String>>,
30
31 pub repl: Option<Vec<u8>>,
35
36 pub repl_func: Option<ReplFunc>,
39}
40
41static DEFAULT_SCRUBBER: OnceLock<Scrubber> = OnceLock::new();
42
43pub fn default_scrubber() -> &'static Scrubber {
48 DEFAULT_SCRUBBER.get_or_init(Scrubber::default)
49}
50
51impl Default for Scrubber {
52 fn default() -> Self {
53 let hinted_api_key_replacer = Replacer {
54 regex: Some(Regex::new(r"(api_?key=)[a-zA-Z0-9]+([a-zA-Z0-9]{5})\b").unwrap()),
55 repl: Some(b"$1***************************$2".to_vec()),
56 hints: Some(vec!["api_key".to_string(), "apikey".to_string()]),
57 repl_func: None,
58 };
59
60 let hinted_app_key_replacer = Replacer {
61 regex: Some(Regex::new(r"(ap(?:p|plication)_?key=)[a-zA-Z0-9]+([a-zA-Z0-9]{5})\b").unwrap()),
62 repl: Some(b"$1***********************************$2".to_vec()),
63 hints: Some(vec![
64 "appkey".to_string(),
65 "app_key".to_string(),
66 "application_key".to_string(),
67 ]),
68 repl_func: None,
69 };
70
71 let api_key_replacer = Replacer {
73 regex: Some(Regex::new(r"\b[a-fA-F0-9]{27}([a-fA-F0-9]{5})\b").unwrap()),
74 repl: Some(b"***************************$1".to_vec()),
75 hints: None,
76 repl_func: None,
77 };
78
79 let api_key_replacer_yaml = Replacer {
81 regex: Some(Regex::new(r#"(\-|\:|,|\[|\{)(\s+)?\b[a-fA-F0-9]{27}([a-fA-F0-9]{5})\b"#).unwrap()),
82 repl: Some(b"$1$2\"***************************$3\"".to_vec()),
83 hints: None,
84 repl_func: None,
85 };
86
87 let app_key_replacer_yaml = Replacer {
88 regex: Some(Regex::new(r#"(\-|\:|,|\[|\{)(\s+)?\b[a-fA-F0-9]{35}([a-fA-F0-9]{5})\b"#).unwrap()),
89 repl: Some(b"$1$2\"***********************************$3\"".to_vec()),
90 hints: None,
91 repl_func: None,
92 };
93
94 let app_key_replacer = Replacer {
95 regex: Some(Regex::new(r"\b[a-fA-F0-9]{35}([a-fA-F0-9]{5})\b").unwrap()),
96 repl: Some(b"***********************************$1".to_vec()),
97 hints: None,
98 repl_func: None,
99 };
100
101 let rc_app_key_replacer = Replacer {
103 regex: Some(Regex::new(r"\bDDRCM_[A-Z0-9]+([A-Z0-9]{5})\b").unwrap()),
104 repl: Some(b"***********************************$1".to_vec()),
105 hints: None,
106 repl_func: None,
107 };
108
109 let uri_password_replacer = Replacer {
111 regex: Some(Regex::new(r#"(?i)([a-z][a-z0-9+-.]+://|\b)([^:\s]+):([^\s|"]+)@"#).unwrap()),
112 repl: Some(b"$1$2:********@".to_vec()),
113 hints: None,
114 repl_func: None,
115 };
116
117 let password_replacer = Replacer {
121 regex: Some(Regex::new(r#"(?i)(\"?(?:pass(?:word)?|pswd|pwd)\"?)((?:=| = |:[ ]?)\"?)([0-9A-Za-z#!$%&'()*+,\-./:;<=>?@\[\\\]^_{|}~]+)(\"?)"#).unwrap()),
122 repl: Some(b"$1$2********$4".to_vec()),
123 hints: None,
124 repl_func: None,
125 };
126
127 Self {
128 replacers: vec![
129 hinted_api_key_replacer,
130 hinted_app_key_replacer,
131 api_key_replacer_yaml,
132 app_key_replacer_yaml,
133 api_key_replacer,
134 app_key_replacer,
135 rc_app_key_replacer,
136 uri_password_replacer,
137 password_replacer,
138 ],
139 }
140 }
141}
142
143pub struct Scrubber {
145 replacers: Vec<Replacer>,
146}
147
148impl Scrubber {
149 pub fn new() -> Self {
151 Self { replacers: vec![] }
152 }
153
154 pub fn add_replacer(&mut self, replacer: Replacer) {
156 self.replacers.push(replacer);
157 }
158
159 pub fn scrub_bytes(&self, data: &[u8]) -> Vec<u8> {
163 let mut reader = BufReader::new(data);
164 self.scrub_reader(&mut reader)
165 }
166
167 fn scrub_reader(&self, reader: &mut BufReader<&[u8]>) -> Vec<u8> {
168 let mut scrubbed_lines = Vec::new();
169 let mut line = Vec::new();
170 let mut first = true;
171 while let Ok(bytes_read) = reader.read_until(b'\n', &mut line) {
172 if bytes_read == 0 {
173 break; }
175
176 if blank_regex().is_match(&line) {
177 scrubbed_lines.push(b"\n".to_vec());
178 } else if !comment_regex().is_match(&line) {
179 let b = self.scrub(&line, &self.replacers);
180 if !first {
181 scrubbed_lines.push(b"\n".to_vec());
182 }
183 scrubbed_lines.push(b);
184 first = false;
185 }
186 line.clear();
187 }
188 scrubbed_lines.join(&b'\n')
189 }
190
191 fn scrub(&self, data: &[u8], replacers: &[Replacer]) -> Vec<u8> {
193 let mut scrubbed_data = data.to_vec();
194 for replacer in replacers {
195 if replacer.regex.is_none() {
196 continue;
197 }
198
199 let contains_hint = if let Some(hints) = &replacer.hints {
200 hints.iter().any(|hint| {
201 let needle = hint.as_bytes();
202 data.windows(needle.len()).any(|window| window == needle)
203 })
204 } else {
205 false
206 };
207
208 if replacer.hints.as_ref().is_none_or(|h| h.is_empty() || contains_hint) {
209 if let Some(re) = &replacer.regex {
210 if let Some(repl_func) = &replacer.repl_func {
211 scrubbed_data = re
212 .replace_all(&scrubbed_data, |caps: ®ex::bytes::Captures| repl_func(&caps[0]))
213 .into_owned();
214 } else if let Some(repl) = &replacer.repl {
215 scrubbed_data = re.replace_all(&scrubbed_data, repl.as_slice()).into_owned();
216 }
217 }
218 }
219 }
220 scrubbed_data
221 }
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227
228 fn assert_clean(contents: &str, clean_contents: &str) {
229 let scrubber = default_scrubber();
230 let cleaned = scrubber.scrub_bytes(contents.as_bytes());
231 let cleaned_string = String::from_utf8(cleaned).unwrap();
232 assert_eq!(cleaned_string.trim(), clean_contents.trim());
233 }
234
235 #[test]
236 fn test_config_strip_api_key() {
237 assert_clean(
238 "api_key: aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
239 "api_key: \"***************************abbbb\"",
240 );
241 assert_clean(
242 "api_key: AAAAAAAAAAAAAAAAAAAAAAAAAAAABBBB",
243 "api_key: \"***************************ABBBB\"",
244 );
245 assert_clean(
246 "api_key: aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
247 "api_key: \"***************************abbbb\"",
248 );
249 assert_clean(
250 "api_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb'",
251 "api_key: '***************************abbbb'",
252 );
253 assert_clean(
254 " api_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb' ",
255 " api_key: '***************************abbbb' ",
256 );
257 }
258
259 #[test]
260 fn test_config_app_key() {
261 assert_clean(
262 "app_key: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
263 "app_key: \"***********************************abbbb\"",
264 );
265 assert_clean(
266 "app_key: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBB",
267 "app_key: \"***********************************ABBBB\"",
268 );
269 assert_clean(
270 "app_key: \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb\"",
271 "app_key: \"***********************************abbbb\"",
272 );
273 assert_clean(
274 "app_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb'",
275 "app_key: '***********************************abbbb'",
276 );
277 assert_clean(
278 " app_key: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb' ",
279 " app_key: '***********************************abbbb' ",
280 );
281 }
282
283 #[test]
284 fn test_config_rc_app_key() {
285 assert_clean(
286 "key: \"DDRCM_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABCDE\"",
287 "key: \"***********************************ABCDE\"",
288 );
289 }
290
291 #[test]
292 fn test_text_strip_api_key() {
293 assert_clean(
294 "Error status code 500 : http://dog.tld/api?key=3290abeefc68e1bbe852a25252bad88c",
295 "Error status code 500 : http://dog.tld/api?key=***************************ad88c",
296 );
297 assert_clean(
298 "hintedAPIKeyReplacer : http://dog.tld/api_key=InvalidLength12345abbbb",
299 "hintedAPIKeyReplacer : http://dog.tld/api_key=***************************abbbb",
300 );
301 assert_clean(
302 "hintedAPIKeyReplacer : http://dog.tld/apikey=InvalidLength12345abbbb",
303 "hintedAPIKeyReplacer : http://dog.tld/apikey=***************************abbbb",
304 );
305 assert_clean(
306 "apiKeyReplacer: https://agent-http-intake.logs.datadoghq.com/v1/input/aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
307 "apiKeyReplacer: https://agent-http-intake.logs.datadoghq.com/v1/input/***************************abbbb",
308 );
309 }
310
311 #[test]
312 fn test_config_strip_url_password() {
313 assert_clean(
314 "proxy: random_url_key: http://user:password@host:port",
315 "proxy: random_url_key: http://user:********@host:port",
316 );
317 assert_clean(
318 "random_url_key http://user:password@host:port",
319 "random_url_key http://user:********@host:port",
320 );
321 assert_clean(
322 "random_url_key: http://user:password@host:port",
323 "random_url_key: http://user:********@host:port",
324 );
325 assert_clean(
326 "random_url_key: http://user:p@ssw0r)@host:port",
327 "random_url_key: http://user:********@host:port",
328 );
329 assert_clean(
330 "random_url_key: http://user:🔑🔒🔐🔓@host:port",
331 "random_url_key: http://user:********@host:port",
332 );
333 assert_clean(
334 "random_url_key: http://user:password@host",
335 "random_url_key: http://user:********@host",
336 );
337 assert_clean(
338 "random_url_key: protocol://user:p@ssw0r)@host:port",
339 "random_url_key: protocol://user:********@host:port",
340 );
341 assert_clean(
342 "random_url_key: \"http://user:password@host:port\"",
343 "random_url_key: \"http://user:********@host:port\"",
344 );
345 assert_clean(
346 "random_url_key: 'http://user:password@host:port'",
347 "random_url_key: 'http://user:********@host:port'",
348 );
349 assert_clean(
350 "random_domain_key: 'user:password@host:port'",
351 "random_domain_key: 'user:********@host:port'",
352 );
353 assert_clean(
354 " random_url_key: 'http://user:password@host:port' ",
355 " random_url_key: 'http://user:********@host:port' ",
356 );
357 assert_clean(
358 " random_url_key: 'mongodb+s.r-v://user:password@host:port' ",
359 " random_url_key: 'mongodb+s.r-v://user:********@host:port' ",
360 );
361 assert_clean(
362 " random_url_key: 'mongodb+srv://user:pass-with-hyphen@abc.example.com/database' ",
363 " random_url_key: 'mongodb+srv://user:********@abc.example.com/database' ",
364 );
365 }
366
367 #[test]
368 fn test_password_yaml_double_quoted_value() {
369 assert_clean("password: \"supersecret\"", "password: \"********\"");
370 }
371
372 #[test]
373 fn test_password_unquoted_value_still_scrubbed() {
374 assert_clean("password=supersecret", "password=********");
375 assert_clean("password: supersecret", "password: ********");
376 }
377
378 #[test]
379 fn test_json_password_like_key_scrubs_to_valid_json() {
380 let scrubber = default_scrubber();
381 let input = r#"{"mysql_password": "supersecret"}"#;
383 let cleaned = String::from_utf8(scrubber.scrub_bytes(input.as_bytes())).unwrap();
384 serde_json::from_str::<serde_json::Value>(&cleaned).expect("scrubbed JSON must parse");
385 assert!(cleaned.contains("********"));
386
387 let input_compact = r#"{"password":"secret"}"#;
389 let cleaned_compact = String::from_utf8(scrubber.scrub_bytes(input_compact.as_bytes())).unwrap();
390 serde_json::from_str::<serde_json::Value>(&cleaned_compact).expect("compact scrubbed JSON must parse");
391 assert!(
392 cleaned_compact.contains("********"),
393 "compact JSON password must be scrubbed: {cleaned_compact}"
394 );
395 }
396
397 #[test]
398 fn test_json_single_line_api_key_scrub() {
399 let scrubber = default_scrubber();
400 let input = r#"{"api_key":"aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb"}"#;
401 let cleaned = scrubber.scrub_bytes(input.as_bytes());
402 let cleaned_string = String::from_utf8(cleaned).unwrap();
403 serde_json::from_str::<serde_json::Value>(&cleaned_string).expect("scrubbed output must parse as JSON");
405 assert!(
406 cleaned_string.contains("***************************"),
407 "expected masked api key suffix, got: {cleaned_string}"
408 );
409 }
410
411 #[test]
412 fn test_large_single_line_json_scrubbed_still_parses() {
413 let mut map = serde_json::Map::new();
414 map.insert("api_key".into(), serde_json::json!("aaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb"));
415 map.insert("pad".into(), serde_json::json!("x".repeat(25_000)));
416 let line = serde_json::to_string(&serde_json::Value::Object(map)).unwrap();
417 assert!(line.len() > 16_384, "sanity: payload should exceed 16 KiB");
418
419 let scrubber = default_scrubber();
420 let cleaned = scrubber.scrub_bytes(line.as_bytes());
421 let cleaned_string = String::from_utf8(cleaned).unwrap();
422 serde_json::from_str::<serde_json::Value>(&cleaned_string).expect("JSON parse after scrub");
423 }
424
425 #[test]
426 fn test_text_strip_app_key() {
427 assert_clean(
428 "hintedAPPKeyReplacer : http://dog.tld/app_key=InvalidLength12345abbbb",
429 "hintedAPPKeyReplacer : http://dog.tld/app_key=***********************************abbbb",
430 );
431 assert_clean(
432 "hintedAPPKeyReplacer : http://dog.tld/appkey=InvalidLength12345abbbb",
433 "hintedAPPKeyReplacer : http://dog.tld/appkey=***********************************abbbb",
434 );
435 assert_clean(
436 "hintedAPPKeyReplacer : http://dog.tld/application_key=InvalidLength12345abbbb",
437 "hintedAPPKeyReplacer : http://dog.tld/application_key=***********************************abbbb",
438 );
439 assert_clean(
440 "appKeyReplacer: http://dog.tld/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbb",
441 "appKeyReplacer: http://dog.tld/***********************************abbbb",
442 );
443 }
444}