dd_sds/scanner/
suppression.rs

1use ahash::AHashSet;
2use regex_automata::{Input, meta};
3use regex_syntax::ast::{Alternation, Assertion, AssertionKind, Ast, Concat, Flags, Group};
4use serde::{Deserialize, Serialize};
5use serde_with::serde_as;
6use thiserror::Error;
7
8use crate::{
9    RegexCaches,
10    ast_utils::{literal_ast, span},
11    scanner::regex_rule::{SharedRegex, get_memoized_regex},
12};
13
14const MAX_SUPPRESSIONS_COUNT: usize = 30;
15const MAX_SUPPRESSION_LENGTH: usize = 1000;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Default)]
19pub struct Suppressions {
20    #[serde(default)]
21    pub starts_with: Vec<String>,
22    #[serde(default)]
23    pub ends_with: Vec<String>,
24    #[serde(default)]
25    pub exact_match: Vec<String>,
26}
27
28#[derive(Debug, PartialEq, Eq, Error)]
29pub enum SuppressionValidationError {
30    #[error("No more than {} suppressions are allowed", MAX_SUPPRESSIONS_COUNT)]
31    TooManySuppressions,
32
33    #[error("Individual suppressions cannot be empty")]
34    EmptySuppression,
35
36    #[error(
37        "Suppressions cannot be longer than {} characters",
38        MAX_SUPPRESSION_LENGTH
39    )]
40    SuppressionTooLong,
41
42    #[error("Duplicate suppressions are not allowed")]
43    DuplicateSuppression,
44}
45
46pub struct CompiledSuppressions {
47    /// The regex pattern that is used to match against the content of the match.
48    /// We use a regex instead of a set of Vec<String> with all the suppressions because:
49    /// * We get case insensitivity for free
50    /// * Lower-casing the content of the match means re-allocating the strings each time a match is found - which is more expensive than a regex
51    /// * Nathan had benchmarked it and found that regex was faster than lower-casing in the context of included keywords
52    pub suppressions_pattern: Option<SharedRegex>,
53}
54
55impl CompiledSuppressions {
56    pub fn should_match_be_suppressed(
57        &self,
58        match_content: &str,
59        regex_caches: &mut RegexCaches,
60    ) -> bool {
61        if let Some(suppressions) = &self.suppressions_pattern {
62            suppressions
63                .search_half_with(
64                    &mut regex_caches.get(suppressions).cache,
65                    &Input::new(match_content).earliest(true),
66                )
67                .is_some()
68        } else {
69            false
70        }
71    }
72}
73
74fn validate_suppressions_list(suppressions: &[String]) -> Result<(), SuppressionValidationError> {
75    if suppressions.len() > MAX_SUPPRESSIONS_COUNT {
76        return Err(SuppressionValidationError::TooManySuppressions);
77    }
78    if AHashSet::from_iter(suppressions).len() != suppressions.len() {
79        return Err(SuppressionValidationError::DuplicateSuppression);
80    }
81    for suppression in suppressions {
82        if suppression.len() > MAX_SUPPRESSION_LENGTH {
83            return Err(SuppressionValidationError::SuppressionTooLong);
84        }
85        if suppression.is_empty() {
86            return Err(SuppressionValidationError::EmptySuppression);
87        }
88    }
89    Ok(())
90}
91
92impl Suppressions {
93    pub fn compile(&self) -> Result<Option<CompiledSuppressions>, SuppressionValidationError> {
94        validate_suppressions_list(&self.starts_with)?;
95        validate_suppressions_list(&self.ends_with)?;
96        validate_suppressions_list(&self.exact_match)?;
97        if let Some(suppressions_ast) = compile_suppressions_pattern(self) {
98            let pattern = suppressions_ast.to_string();
99            let mut builder = meta::Regex::builder();
100            let regex_builder = builder
101                .syntax(regex_automata::util::syntax::Config::default().case_insensitive(true));
102
103            let suppressions_regex =
104                get_memoized_regex(&pattern, |p| regex_builder.build(p)).unwrap();
105            Ok(Some(CompiledSuppressions {
106                suppressions_pattern: Some(suppressions_regex),
107            }))
108        } else {
109            Ok(None)
110        }
111    }
112}
113
114fn compile_suppressions_pattern(config: &Suppressions) -> Option<Ast> {
115    let mut asts = vec![];
116    asts.extend(suppressions_ast(&config.starts_with, true, false));
117    asts.extend(suppressions_ast(&config.ends_with, false, true));
118    asts.extend(suppressions_ast(&config.exact_match, true, true));
119    if asts.is_empty() {
120        None
121    } else {
122        Some(Ast::Alternation(Alternation { span: span(), asts }))
123    }
124}
125
126fn suppressions_ast(suppressions: &[String], start_anchor: bool, end_anchor: bool) -> Vec<Ast> {
127    let mut asts = vec![];
128    for suppression in suppressions {
129        asts.push(suppression_ast(suppression, start_anchor, end_anchor));
130    }
131    asts
132}
133
134fn suppression_ast(suppression: &str, start_anchor: bool, end_anchor: bool) -> Ast {
135    let mut asts = vec![];
136    if start_anchor {
137        asts.push(Ast::Assertion(Assertion {
138            span: span(),
139            kind: AssertionKind::StartLine,
140        }));
141    }
142    for c in suppression.chars() {
143        asts.push(Ast::Literal(literal_ast(c)));
144    }
145    if end_anchor {
146        asts.push(Ast::Assertion(Assertion {
147            span: span(),
148            kind: AssertionKind::EndLine,
149        }));
150    }
151
152    Ast::Group(Group {
153        span: span(),
154        kind: regex_syntax::ast::GroupKind::NonCapturing(Flags {
155            span: span(),
156            items: vec![],
157        }),
158        ast: Box::new(Ast::Concat(Concat { span: span(), asts })),
159    })
160}
161
162#[cfg(test)]
163mod test {
164
165    use super::*;
166
167    #[test]
168    fn test_suppression_correctly_suppresses_correctly() {
169        let config = Suppressions {
170            starts_with: vec!["mary".to_string()],
171            ends_with: vec!["@datadoghq.com".to_string()],
172            exact_match: vec!["nathan@yahoo.com".to_string()],
173        };
174        let compiled_config = config.compile().unwrap().unwrap();
175        let mut caches = RegexCaches::new();
176        assert!(compiled_config.should_match_be_suppressed("mary@datadoghq.com", &mut caches));
177        assert!(compiled_config.should_match_be_suppressed("nathan@yahoo.com", &mut caches));
178        assert!(compiled_config.should_match_be_suppressed("john@datadoghq.com", &mut caches));
179        assert!(!compiled_config.should_match_be_suppressed("john@yahoo.com", &mut caches));
180        assert!(!compiled_config.should_match_be_suppressed("john mary john", &mut caches));
181        assert!(compiled_config.should_match_be_suppressed("mary john john", &mut caches));
182    }
183
184    #[test]
185    fn test_suppressions_ast_is_built_properly() {
186        let config = Suppressions {
187            starts_with: vec!["mary".to_string(), "john".to_string()],
188            ends_with: vec!["@datadoghq.com".to_string()],
189            exact_match: vec!["nathan@yahoo.com".to_string()],
190        };
191        let ast = compile_suppressions_pattern(&config).unwrap();
192        assert_eq!(
193            ast.to_string(),
194            r"(?:^mary)|(?:^john)|(?:@datadoghq\.com$)|(?:^nathan@yahoo\.com$)"
195        );
196    }
197}