dd_sds/scanner/
suppression.rs

1use ahash::AHashSet;
2use regex_automata::{Input, meta};
3use regex_syntax::ast::{Alternation, Assertion, AssertionKind, Ast, Concat, Flags, Group};
4use serde::{Deserialize, Serialize};
5use serde_with::serde_as;
6use thiserror::Error;
7
8use crate::{
9    RegexCaches,
10    ast_utils::{literal_ast, span},
11    scanner::regex_rule::{SharedRegex, get_memoized_regex},
12};
13
14const MAX_SUPPRESSIONS_COUNT: usize = 100;
15const MAX_SUPPRESSION_LENGTH: usize = 1000;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Default)]
19pub struct Suppressions {
20    #[serde(default)]
21    pub starts_with: Vec<String>,
22    #[serde(default)]
23    pub ends_with: Vec<String>,
24    #[serde(default)]
25    pub exact_match: Vec<String>,
26}
27
28#[derive(Debug, PartialEq, Eq, Error)]
29pub enum SuppressionValidationError {
30    #[error("No more than {} suppressions are allowed", MAX_SUPPRESSIONS_COUNT)]
31    TooManySuppressions,
32
33    #[error("Individual suppressions cannot be empty")]
34    EmptySuppression,
35
36    #[error(
37        "Suppressions cannot be longer than {} characters",
38        MAX_SUPPRESSION_LENGTH
39    )]
40    SuppressionTooLong,
41
42    #[error("Duplicate suppressions are not allowed")]
43    DuplicateSuppression,
44}
45
46pub struct CompiledSuppressions {
47    /// The regex pattern that is used to match against the content of the match.
48    /// We use a regex instead of a set of Vec<String> with all the suppressions because:
49    /// * We get case insensitivity for free
50    /// * Lower-casing the content of the match means re-allocating the strings each time a match is found - which is more expensive than a regex
51    /// * Nathan had benchmarked it and found that regex was faster than lower-casing in the context of included keywords
52    pub suppressions_pattern: Option<SharedRegex>,
53}
54
55impl CompiledSuppressions {
56    pub fn should_match_be_suppressed(
57        &self,
58        match_content: &str,
59        regex_caches: &mut RegexCaches,
60    ) -> bool {
61        if let Some(suppressions) = &self.suppressions_pattern {
62            suppressions
63                .search_half_with(
64                    &mut regex_caches.get(suppressions).cache,
65                    &Input::new(match_content).earliest(true),
66                )
67                .is_some()
68        } else {
69            false
70        }
71    }
72}
73
74fn validate_suppressions_list(suppressions: &[String]) -> Result<(), SuppressionValidationError> {
75    if suppressions.len() > MAX_SUPPRESSIONS_COUNT {
76        return Err(SuppressionValidationError::TooManySuppressions);
77    }
78    if AHashSet::from_iter(suppressions).len() != suppressions.len() {
79        return Err(SuppressionValidationError::DuplicateSuppression);
80    }
81    for suppression in suppressions {
82        if suppression.len() > MAX_SUPPRESSION_LENGTH {
83            return Err(SuppressionValidationError::SuppressionTooLong);
84        }
85        if suppression.is_empty() {
86            return Err(SuppressionValidationError::EmptySuppression);
87        }
88    }
89    Ok(())
90}
91
92impl Suppressions {
93    pub fn compile(&self) -> Result<Option<CompiledSuppressions>, SuppressionValidationError> {
94        validate_suppressions_list(&self.starts_with)?;
95        validate_suppressions_list(&self.ends_with)?;
96        validate_suppressions_list(&self.exact_match)?;
97        if let Some(suppressions_ast) = compile_suppressions_pattern(self) {
98            let pattern = suppressions_ast.to_string();
99            let mut builder = meta::Regex::builder();
100            let regex_builder = builder
101                .syntax(regex_automata::util::syntax::Config::default().case_insensitive(true));
102
103            #[allow(clippy::result_large_err)]
104            let suppressions_regex =
105                get_memoized_regex(&pattern, |p| regex_builder.build(p)).unwrap();
106            Ok(Some(CompiledSuppressions {
107                suppressions_pattern: Some(suppressions_regex),
108            }))
109        } else {
110            Ok(None)
111        }
112    }
113}
114
115fn compile_suppressions_pattern(config: &Suppressions) -> Option<Ast> {
116    let mut asts = vec![];
117    asts.extend(suppressions_ast(&config.starts_with, true, false));
118    asts.extend(suppressions_ast(&config.ends_with, false, true));
119    asts.extend(suppressions_ast(&config.exact_match, true, true));
120    if asts.is_empty() {
121        None
122    } else {
123        Some(Ast::Alternation(Alternation { span: span(), asts }))
124    }
125}
126
127fn suppressions_ast(suppressions: &[String], start_anchor: bool, end_anchor: bool) -> Vec<Ast> {
128    let mut asts = vec![];
129    for suppression in suppressions {
130        asts.push(suppression_ast(suppression, start_anchor, end_anchor));
131    }
132    asts
133}
134
135fn suppression_ast(suppression: &str, start_anchor: bool, end_anchor: bool) -> Ast {
136    let mut asts = vec![];
137    if start_anchor {
138        asts.push(Ast::Assertion(Assertion {
139            span: span(),
140            kind: AssertionKind::StartLine,
141        }));
142    }
143    for c in suppression.chars() {
144        asts.push(Ast::Literal(literal_ast(c)));
145    }
146    if end_anchor {
147        asts.push(Ast::Assertion(Assertion {
148            span: span(),
149            kind: AssertionKind::EndLine,
150        }));
151    }
152
153    Ast::Group(Group {
154        span: span(),
155        kind: regex_syntax::ast::GroupKind::NonCapturing(Flags {
156            span: span(),
157            items: vec![],
158        }),
159        ast: Box::new(Ast::Concat(Concat { span: span(), asts })),
160    })
161}
162
163#[cfg(test)]
164mod test {
165
166    use super::*;
167
168    #[test]
169    fn test_suppression_correctly_suppresses_correctly() {
170        let config = Suppressions {
171            starts_with: vec!["mary".to_string()],
172            ends_with: vec!["@datadoghq.com".to_string()],
173            exact_match: vec!["nathan@yahoo.com".to_string()],
174        };
175        let compiled_config = config.compile().unwrap().unwrap();
176        let mut caches = RegexCaches::new();
177        assert!(compiled_config.should_match_be_suppressed("mary@datadoghq.com", &mut caches));
178        assert!(compiled_config.should_match_be_suppressed("nathan@yahoo.com", &mut caches));
179        assert!(compiled_config.should_match_be_suppressed("john@datadoghq.com", &mut caches));
180        assert!(!compiled_config.should_match_be_suppressed("john@yahoo.com", &mut caches));
181        assert!(!compiled_config.should_match_be_suppressed("john mary john", &mut caches));
182        assert!(compiled_config.should_match_be_suppressed("mary john john", &mut caches));
183    }
184
185    #[test]
186    fn test_suppressions_ast_is_built_properly() {
187        let config = Suppressions {
188            starts_with: vec!["mary".to_string(), "john".to_string()],
189            ends_with: vec!["@datadoghq.com".to_string()],
190            exact_match: vec!["nathan@yahoo.com".to_string()],
191        };
192        let ast = compile_suppressions_pattern(&config).unwrap();
193        assert_eq!(
194            ast.to_string(),
195            r"(?:^mary)|(?:^john)|(?:@datadoghq\.com$)|(?:^nathan@yahoo\.com$)"
196        );
197    }
198}