dd_sds/scanner/
suppression.rs1use ahash::AHashSet;
2use regex_automata::{Input, meta};
3use regex_syntax::ast::{Alternation, Assertion, AssertionKind, Ast, Concat, Flags, Group};
4use serde::{Deserialize, Serialize};
5use serde_with::serde_as;
6use thiserror::Error;
7
8use crate::{
9 RegexCaches,
10 ast_utils::{literal_ast, span},
11 scanner::regex_rule::{SharedRegex, get_memoized_regex},
12};
13
14const MAX_SUPPRESSIONS_COUNT: usize = 30;
15const MAX_SUPPRESSION_LENGTH: usize = 1000;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Default)]
19pub struct Suppressions {
20 #[serde(default)]
21 pub starts_with: Vec<String>,
22 #[serde(default)]
23 pub ends_with: Vec<String>,
24 #[serde(default)]
25 pub exact_match: Vec<String>,
26}
27
28#[derive(Debug, PartialEq, Eq, Error)]
29pub enum SuppressionValidationError {
30 #[error("No more than {} suppressions are allowed", MAX_SUPPRESSIONS_COUNT)]
31 TooManySuppressions,
32
33 #[error("Individual suppressions cannot be empty")]
34 EmptySuppression,
35
36 #[error(
37 "Suppressions cannot be longer than {} characters",
38 MAX_SUPPRESSION_LENGTH
39 )]
40 SuppressionTooLong,
41
42 #[error("Duplicate suppressions are not allowed")]
43 DuplicateSuppression,
44}
45
46pub struct CompiledSuppressions {
47 pub suppressions_pattern: Option<SharedRegex>,
53}
54
55impl CompiledSuppressions {
56 pub fn should_match_be_suppressed(
57 &self,
58 match_content: &str,
59 regex_caches: &mut RegexCaches,
60 ) -> bool {
61 if let Some(suppressions) = &self.suppressions_pattern {
62 suppressions
63 .search_half_with(
64 &mut regex_caches.get(suppressions).cache,
65 &Input::new(match_content).earliest(true),
66 )
67 .is_some()
68 } else {
69 false
70 }
71 }
72}
73
74fn validate_suppressions_list(suppressions: &[String]) -> Result<(), SuppressionValidationError> {
75 if suppressions.len() > MAX_SUPPRESSIONS_COUNT {
76 return Err(SuppressionValidationError::TooManySuppressions);
77 }
78 if AHashSet::from_iter(suppressions).len() != suppressions.len() {
79 return Err(SuppressionValidationError::DuplicateSuppression);
80 }
81 for suppression in suppressions {
82 if suppression.len() > MAX_SUPPRESSION_LENGTH {
83 return Err(SuppressionValidationError::SuppressionTooLong);
84 }
85 if suppression.is_empty() {
86 return Err(SuppressionValidationError::EmptySuppression);
87 }
88 }
89 Ok(())
90}
91
92impl Suppressions {
93 pub fn compile(&self) -> Result<Option<CompiledSuppressions>, SuppressionValidationError> {
94 validate_suppressions_list(&self.starts_with)?;
95 validate_suppressions_list(&self.ends_with)?;
96 validate_suppressions_list(&self.exact_match)?;
97 if let Some(suppressions_ast) = compile_suppressions_pattern(self) {
98 let pattern = suppressions_ast.to_string();
99 let mut builder = meta::Regex::builder();
100 let regex_builder = builder
101 .syntax(regex_automata::util::syntax::Config::default().case_insensitive(true));
102
103 let suppressions_regex =
104 get_memoized_regex(&pattern, |p| regex_builder.build(p)).unwrap();
105 Ok(Some(CompiledSuppressions {
106 suppressions_pattern: Some(suppressions_regex),
107 }))
108 } else {
109 Ok(None)
110 }
111 }
112}
113
114fn compile_suppressions_pattern(config: &Suppressions) -> Option<Ast> {
115 let mut asts = vec![];
116 asts.extend(suppressions_ast(&config.starts_with, true, false));
117 asts.extend(suppressions_ast(&config.ends_with, false, true));
118 asts.extend(suppressions_ast(&config.exact_match, true, true));
119 if asts.is_empty() {
120 None
121 } else {
122 Some(Ast::Alternation(Alternation { span: span(), asts }))
123 }
124}
125
126fn suppressions_ast(suppressions: &[String], start_anchor: bool, end_anchor: bool) -> Vec<Ast> {
127 let mut asts = vec![];
128 for suppression in suppressions {
129 asts.push(suppression_ast(suppression, start_anchor, end_anchor));
130 }
131 asts
132}
133
134fn suppression_ast(suppression: &str, start_anchor: bool, end_anchor: bool) -> Ast {
135 let mut asts = vec![];
136 if start_anchor {
137 asts.push(Ast::Assertion(Assertion {
138 span: span(),
139 kind: AssertionKind::StartLine,
140 }));
141 }
142 for c in suppression.chars() {
143 asts.push(Ast::Literal(literal_ast(c)));
144 }
145 if end_anchor {
146 asts.push(Ast::Assertion(Assertion {
147 span: span(),
148 kind: AssertionKind::EndLine,
149 }));
150 }
151
152 Ast::Group(Group {
153 span: span(),
154 kind: regex_syntax::ast::GroupKind::NonCapturing(Flags {
155 span: span(),
156 items: vec![],
157 }),
158 ast: Box::new(Ast::Concat(Concat { span: span(), asts })),
159 })
160}
161
162#[cfg(test)]
163mod test {
164
165 use super::*;
166
167 #[test]
168 fn test_suppression_correctly_suppresses_correctly() {
169 let config = Suppressions {
170 starts_with: vec!["mary".to_string()],
171 ends_with: vec!["@datadoghq.com".to_string()],
172 exact_match: vec!["nathan@yahoo.com".to_string()],
173 };
174 let compiled_config = config.compile().unwrap().unwrap();
175 let mut caches = RegexCaches::new();
176 assert!(compiled_config.should_match_be_suppressed("mary@datadoghq.com", &mut caches));
177 assert!(compiled_config.should_match_be_suppressed("nathan@yahoo.com", &mut caches));
178 assert!(compiled_config.should_match_be_suppressed("john@datadoghq.com", &mut caches));
179 assert!(!compiled_config.should_match_be_suppressed("john@yahoo.com", &mut caches));
180 assert!(!compiled_config.should_match_be_suppressed("john mary john", &mut caches));
181 assert!(compiled_config.should_match_be_suppressed("mary john john", &mut caches));
182 }
183
184 #[test]
185 fn test_suppressions_ast_is_built_properly() {
186 let config = Suppressions {
187 starts_with: vec!["mary".to_string(), "john".to_string()],
188 ends_with: vec!["@datadoghq.com".to_string()],
189 exact_match: vec!["nathan@yahoo.com".to_string()],
190 };
191 let ast = compile_suppressions_pattern(&config).unwrap();
192 assert_eq!(
193 ast.to_string(),
194 r"(?:^mary)|(?:^john)|(?:@datadoghq\.com$)|(?:^nathan@yahoo\.com$)"
195 );
196 }
197}