dd_sds/scanner/
debug_scan.rs

1use crate::scanner::regex_rule::access_regex_caches;
2use crate::scanner::regex_rule::compiled::RegexCompiledRule;
3use crate::{
4    CreateScannerError, Event, MatchAction, RegexRuleConfig, RootRuleConfig, RuleConfig, RuleMatch,
5    Scanner, ScannerError, Scope,
6};
7use serde::{Deserialize, Serialize};
8use std::sync::Arc;
9use thiserror::Error;
10
11#[derive(Debug, Serialize, Deserialize)]
12pub struct DebugRuleMatch {
13    pub rule_match: RuleMatch,
14    #[serde(flatten)]
15    pub status: DebugRuleMatchStatus,
16}
17
18#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
19#[serde(tag = "status")]
20pub enum DebugRuleMatchStatus {
21    Matched(MatchedInfo),
22    MissingIncludedKeyword,
23    IncludedKeywordTooFar,
24    ExcludedKeyword(ExcludedInfo),
25    NotInIncludedScope,
26    InExcludedScope,
27    Suppressed,
28    ChecksumFailed,
29}
30
31#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
32pub struct MatchedInfo {
33    included_keyword: Option<String>,
34    included_keyword_start_index: Option<usize>,
35    included_keyword_end_exclusive: Option<usize>,
36}
37
38#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
39pub struct ExcludedInfo {
40    pub excluded_keyword: Option<String>,
41    pub excluded_keyword_start_index: Option<usize>,
42    pub excluded_keyword_end_exclusive: Option<usize>,
43}
44
45#[derive(Debug, PartialEq, Eq, Error)]
46pub enum DebugScanError {
47    #[error(transparent)]
48    InvalidConfig(CreateScannerError),
49    #[error(transparent)]
50    ScanError(ScannerError),
51}
52
53impl From<CreateScannerError> for DebugScanError {
54    fn from(value: CreateScannerError) -> Self {
55        Self::InvalidConfig(value)
56    }
57}
58
59impl From<ScannerError> for DebugScanError {
60    fn from(value: ScannerError) -> Self {
61        Self::ScanError(value)
62    }
63}
64
65/// Similar to `.scan(), except more information is given for matches (such as the keyword),
66/// and partial matches are also returned with a reason it wasn't a full match.
67/// The current implementation is only able to return partial matches if there was a single issue.
68///
69/// This function should be considered experimental, and is not intended for use where performance
70/// is critical.
71pub fn debug_scan<E: Event>(
72    event: &mut E,
73    mut rule: RootRuleConfig<Arc<dyn RuleConfig>>,
74) -> Result<Vec<DebugRuleMatch>, DebugScanError> {
75    // prevent the output from changing
76    rule.match_action = MatchAction::None;
77
78    let full_scanner = Scanner::builder(&[rule.clone().map_inner(|x| x as Arc<dyn RuleConfig>)])
79        .build()
80        .map_err(DebugScanError::InvalidConfig)?;
81
82    let full_matches = full_scanner.scan(event)?;
83
84    let mut output: Vec<DebugRuleMatch> =
85        full_matches
86            .into_iter()
87            .map(|rule_match| {
88                let mut matched_status_info = MatchedInfo {
89                    included_keyword: None,
90                    included_keyword_start_index: None,
91                    included_keyword_end_exclusive: None,
92                };
93                if let Some(compiled_regex_rule) = full_scanner.rules[0].as_regex_rule()
94                    && let Some(compiled_included_keywords) = &compiled_regex_rule.included_keywords
95                {
96                    event.visit_string_mut(&rule_match.path, |content| {
97                        access_regex_caches(|caches| {
98                            if let Some(info) = compiled_included_keywords
99                                .find_keyword_before_match(rule_match.start_index, caches, content)
100                            {
101                                matched_status_info.included_keyword = Some(info.keyword);
102                                matched_status_info.included_keyword_start_index =
103                                    Some(info.keyword_start_index);
104                                matched_status_info.included_keyword_end_exclusive =
105                                    Some(info.keyword_end_index_exclusive);
106                            }
107                        });
108                        false
109                    });
110                }
111
112                DebugRuleMatch {
113                    rule_match,
114                    status: DebugRuleMatchStatus::Matched(matched_status_info),
115                }
116            })
117            .collect();
118
119    if let Some(regex_rule) = rule.inner.as_regex_rule() {
120        let regex_compiled_rule = full_scanner.rules[0].as_regex_rule().unwrap();
121        debug_scan_regex(event, &rule, regex_rule, &mut output, regex_compiled_rule)?;
122    }
123    debug_scan_included_scope(event, &rule, &mut output)?;
124    debug_scan_excluded_scope(event, &rule, &mut output)?;
125    debug_scan_suppressions(event, &rule, &mut output)?;
126
127    Ok(output)
128}
129
130fn debug_scan_regex<E: Event>(
131    event: &mut E,
132    root_rule: &RootRuleConfig<Arc<dyn RuleConfig>>,
133    regex_rule: &RegexRuleConfig,
134    output: &mut Vec<DebugRuleMatch>,
135    regex_compiled_rule: &RegexCompiledRule,
136) -> Result<(), ScannerError> {
137    debug_scan_included_keywords(event, root_rule, regex_rule, output)?;
138    debug_scan_excluded_keywords(event, root_rule, regex_rule, output, regex_compiled_rule)?;
139    debug_scan_checksum(event, root_rule, regex_rule, output)?;
140    Ok(())
141}
142
143fn debug_scan_included_keywords<E: Event>(
144    event: &mut E,
145    root_rule: &RootRuleConfig<Arc<dyn RuleConfig>>,
146    regex_rule: &RegexRuleConfig,
147    output: &mut Vec<DebugRuleMatch>,
148) -> Result<(), ScannerError> {
149    let mut regex_rule = regex_rule.clone();
150
151    if let Some(proximity_keywords) = &mut regex_rule.proximity_keywords
152        && !proximity_keywords.included_keywords.is_empty()
153    {
154        proximity_keywords.included_keywords = vec![];
155
156        let scanner = Scanner::builder(&[root_rule.clone().map_inner(|_| regex_rule.build())])
157            .build()
158            .unwrap();
159
160        let matches = scanner.scan(event)?;
161        for m in matches {
162            if !output.iter().any(|x| x.rule_match == m) {
163                output.push(DebugRuleMatch {
164                    rule_match: m,
165                    status: DebugRuleMatchStatus::MissingIncludedKeyword,
166                });
167            }
168        }
169    }
170    Ok(())
171}
172
173fn debug_scan_excluded_keywords<E: Event>(
174    event: &mut E,
175    root_rule: &RootRuleConfig<Arc<dyn RuleConfig>>,
176    regex_rule: &RegexRuleConfig,
177    output: &mut Vec<DebugRuleMatch>,
178    regex_compiled_rule: &RegexCompiledRule,
179) -> Result<(), ScannerError> {
180    let mut regex_rule = regex_rule.clone();
181
182    if let Some(proximity_keywords) = &mut regex_rule.proximity_keywords
183        && !proximity_keywords.excluded_keywords.is_empty()
184    {
185        proximity_keywords.excluded_keywords = vec![];
186
187        let scanner = Scanner::builder(&[root_rule.clone().map_inner(|_| regex_rule.build())])
188            .build()
189            .unwrap();
190
191        let matches = scanner.scan(event)?;
192
193        for m in matches {
194            if !output.iter().any(|x| x.rule_match == m) {
195                let mut excluded_info = ExcludedInfo {
196                    excluded_keyword: None,
197                    excluded_keyword_start_index: None,
198                    excluded_keyword_end_exclusive: None,
199                };
200
201                if let Some(compiled_excluded_keywords) = &regex_compiled_rule.excluded_keywords {
202                    event.visit_string_mut(&m.path, |content| {
203                        if let Some(info) = compiled_excluded_keywords
204                            .get_false_positive_match(content, m.start_index)
205                        {
206                            excluded_info.excluded_keyword_start_index = Some(info.start());
207                            excluded_info.excluded_keyword_end_exclusive = Some(info.end());
208                            excluded_info.excluded_keyword =
209                                Some(content[info.start()..info.end()].to_string());
210                        }
211                        false
212                    })
213                }
214                output.push(DebugRuleMatch {
215                    rule_match: m,
216                    status: DebugRuleMatchStatus::ExcludedKeyword(excluded_info),
217                });
218            }
219        }
220    }
221    Ok(())
222}
223
224fn debug_scan_included_scope<E: Event>(
225    event: &mut E,
226    root_rule: &RootRuleConfig<Arc<dyn RuleConfig>>,
227    output: &mut Vec<DebugRuleMatch>,
228) -> Result<(), ScannerError> {
229    let new_scope = match &root_rule.scope {
230        Scope::Include {
231            include: _,
232            exclude,
233        } => Scope::Exclude(exclude.clone()),
234        _ => return Ok(()),
235    };
236
237    let mut root_rule = root_rule.clone();
238    root_rule.scope = new_scope;
239
240    let scanner = Scanner::builder(&[root_rule]).build().unwrap();
241
242    let matches = scanner.scan(event)?;
243    add_status_if_no_match(matches, output, DebugRuleMatchStatus::NotInIncludedScope);
244    Ok(())
245}
246
247fn debug_scan_excluded_scope<E: Event>(
248    event: &mut E,
249    root_rule: &RootRuleConfig<Arc<dyn RuleConfig>>,
250    output: &mut Vec<DebugRuleMatch>,
251) -> Result<(), ScannerError> {
252    let new_scope = match &root_rule.scope {
253        Scope::Include { include, exclude } => {
254            if exclude.is_empty() {
255                return Ok(());
256            }
257            Scope::Include {
258                include: include.clone(),
259                exclude: vec![],
260            }
261        }
262        Scope::Exclude(exclude) => {
263            if exclude.is_empty() {
264                return Ok(());
265            }
266            Scope::Exclude(vec![])
267        }
268    };
269
270    let mut root_rule = root_rule.clone();
271    root_rule.scope = new_scope;
272
273    let scanner = Scanner::builder(&[root_rule]).build().unwrap();
274
275    let matches = scanner.scan(event)?;
276    add_status_if_no_match(matches, output, DebugRuleMatchStatus::InExcludedScope);
277    Ok(())
278}
279
280fn debug_scan_checksum<E: Event>(
281    event: &mut E,
282    root_rule: &RootRuleConfig<Arc<dyn RuleConfig>>,
283    regex_rule: &RegexRuleConfig,
284    output: &mut Vec<DebugRuleMatch>,
285) -> Result<(), ScannerError> {
286    if regex_rule.validator.is_none() {
287        return Ok(());
288    }
289
290    let mut regex_rule = regex_rule.clone();
291    regex_rule.validator = None;
292
293    let scanner = Scanner::builder(&[root_rule.clone().map_inner(|_| regex_rule.build())])
294        .build()
295        .unwrap();
296
297    let matches = scanner.scan(event)?;
298    add_status_if_no_match(matches, output, DebugRuleMatchStatus::ChecksumFailed);
299    Ok(())
300}
301
302fn debug_scan_suppressions<E: Event>(
303    event: &mut E,
304    root_rule: &RootRuleConfig<Arc<dyn RuleConfig>>,
305    output: &mut Vec<DebugRuleMatch>,
306) -> Result<(), ScannerError> {
307    if root_rule.suppressions.is_none() {
308        return Ok(());
309    }
310
311    let mut root_rule = root_rule.clone();
312    root_rule.suppressions = None;
313
314    let scanner = Scanner::builder(&[root_rule]).build().unwrap();
315
316    let new_matches = scanner.scan(event)?;
317    add_status_if_no_match(new_matches, output, DebugRuleMatchStatus::Suppressed);
318
319    Ok(())
320}
321
322fn add_status_if_no_match(
323    new_matches: Vec<RuleMatch>,
324    output: &mut Vec<DebugRuleMatch>,
325    status: DebugRuleMatchStatus,
326) {
327    for m in new_matches {
328        if !output.iter().any(|x| x.rule_match == m) {
329            output.push(DebugRuleMatch {
330                rule_match: m,
331                status: status.clone(),
332            });
333        }
334    }
335}
336
337#[cfg(test)]
338mod test {
339    use super::*;
340    use crate::{
341        MatchAction, Path, PathSegment, RegexRuleConfig, RootRuleConfig, SecondaryValidator,
342        SimpleEvent, Suppressions,
343    };
344    use std::collections::BTreeMap;
345
346    #[test]
347    fn test_full_match() {
348        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("secret").build());
349
350        let mut msg = "This is a secret".to_string();
351        let matches = debug_scan(&mut msg, rule_config).unwrap();
352
353        // Full match
354        assert_eq!(matches.len(), 1);
355        assert_eq!(
356            matches[0].status,
357            DebugRuleMatchStatus::Matched(MatchedInfo {
358                included_keyword: None,
359                included_keyword_start_index: None,
360                included_keyword_end_exclusive: None,
361            })
362        );
363        assert_eq!(matches[0].rule_match.start_index, 10);
364    }
365
366    #[test]
367    fn test_full_match_with_included_keyword() {
368        let rule_config = RootRuleConfig::new(
369            RegexRuleConfig::new("secret")
370                .with_included_keywords(&["a"])
371                .build(),
372        );
373
374        let mut msg = "This is a secret".to_string();
375        let matches = debug_scan(&mut msg, rule_config).unwrap();
376
377        assert_eq!(matches.len(), 1);
378        assert_eq!(
379            matches[0].status,
380            DebugRuleMatchStatus::Matched(MatchedInfo {
381                included_keyword: Some("a".to_string()),
382                included_keyword_start_index: Some(8),
383                included_keyword_end_exclusive: Some(9),
384            })
385        );
386        assert_eq!(matches[0].rule_match.start_index, 10);
387    }
388
389    #[test]
390    fn test_missing_included_keyword() {
391        let rule = RootRuleConfig::new(
392            RegexRuleConfig::new("secret")
393                .with_included_keywords(&["value"])
394                .build(),
395        )
396        .match_action(MatchAction::redact("[REDACTED]"));
397
398        let mut msg = "This is a secret".to_string();
399        let matches = debug_scan(&mut msg, rule).unwrap();
400
401        assert_eq!(matches.len(), 1);
402        assert_eq!(
403            matches[0].status,
404            DebugRuleMatchStatus::MissingIncludedKeyword
405        );
406        assert_eq!(matches[0].rule_match.start_index, 10);
407    }
408
409    #[test]
410    fn test_with_excluded_keyword() {
411        let rule = RootRuleConfig::new(
412            RegexRuleConfig::new("secret")
413                .with_excluded_keywords(&["a"])
414                .build(),
415        )
416        .match_action(MatchAction::redact("[REDACTED]"));
417
418        let mut msg = "This is a secret".to_string();
419        let matches = debug_scan(&mut msg, rule).unwrap();
420
421        assert_eq!(matches.len(), 1);
422        assert_eq!(
423            matches[0].status,
424            DebugRuleMatchStatus::ExcludedKeyword(ExcludedInfo {
425                excluded_keyword: Some("a".to_string()),
426                excluded_keyword_start_index: Some(8),
427                excluded_keyword_end_exclusive: Some(9),
428            })
429        );
430    }
431
432    #[test]
433    fn test_suppressions() {
434        let rule = RootRuleConfig::new(RegexRuleConfig::new("secret").build())
435            .match_action(MatchAction::redact("[REDACTED]"))
436            .suppressions(Suppressions {
437                starts_with: vec![],
438                ends_with: vec![],
439                exact_match: vec!["secret".to_string()],
440            });
441
442        let mut msg = "This is a secret".to_string();
443        let matches = debug_scan(&mut msg, rule).unwrap();
444
445        assert_eq!(matches.len(), 1);
446        assert_eq!(matches[0].status, DebugRuleMatchStatus::Suppressed);
447        assert_eq!(matches[0].rule_match.start_index, 10);
448    }
449
450    #[test]
451    fn test_included_scope() {
452        let rule = RootRuleConfig::new(RegexRuleConfig::new("secret").build())
453            .match_action(MatchAction::redact("[REDACTED]"))
454            .scope(Scope::include(vec![Path::from(vec![PathSegment::from(
455                "tag",
456            )])]));
457
458        let mut map = BTreeMap::new();
459        map.insert(
460            "tag".to_string(),
461            SimpleEvent::String("Not a match".to_string()),
462        );
463        map.insert(
464            "tag2".to_string(),
465            SimpleEvent::String("This is a secret".to_string()),
466        );
467
468        let mut event = SimpleEvent::Map(map);
469        let matches = debug_scan(&mut event, rule).unwrap();
470
471        assert_eq!(matches.len(), 1);
472        assert_eq!(matches[0].status, DebugRuleMatchStatus::NotInIncludedScope);
473        assert_eq!(matches[0].rule_match.start_index, 10);
474    }
475
476    #[test]
477    fn test_excluded_scope() {
478        let rule = RootRuleConfig::new(RegexRuleConfig::new("secret").build())
479            .match_action(MatchAction::redact("[REDACTED]"))
480            .scope(Scope::exclude(vec![Path::from(vec![PathSegment::from(
481                "tag",
482            )])]));
483
484        let mut map = BTreeMap::new();
485        map.insert(
486            "tag".to_string(),
487            SimpleEvent::String("Contains a secret".to_string()),
488        );
489
490        let mut event = SimpleEvent::Map(map);
491        let matches = debug_scan(&mut event, rule).unwrap();
492
493        assert_eq!(matches.len(), 1);
494        assert_eq!(matches[0].status, DebugRuleMatchStatus::InExcludedScope);
495    }
496
497    #[test]
498    fn test_checksum() {
499        let rule = RootRuleConfig::new(
500            RegexRuleConfig::new("[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")
501                .with_validator(Some(SecondaryValidator::LuhnChecksum))
502                .build(),
503        );
504
505        let mut event = "1234-1234-1234-1235".to_string();
506        let matches = debug_scan(&mut event, rule).unwrap();
507
508        assert_eq!(matches.len(), 1);
509        assert_eq!(matches[0].status, DebugRuleMatchStatus::ChecksumFailed);
510    }
511}