dd_sds/scanner/regex_rule/
config.rs1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::secondary_validation::Validator;
7use crate::validation::validate_and_create_regex;
8use crate::{CompiledRule, CreateScannerError, Labels};
9use serde::{Deserialize, Serialize};
10use serde_with::serde_as;
11use serde_with::DefaultOnNull;
12use std::sync::Arc;
13use strum::{AsRefStr, EnumIter};
14
15pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
19pub struct RegexRuleConfig {
20 pub pattern: String,
21 pub proximity_keywords: Option<ProximityKeywordsConfig>,
22 pub validator: Option<SecondaryValidator>,
23 #[serde_as(deserialize_as = "DefaultOnNull")]
24 #[serde(default)]
25 pub labels: Labels,
26}
27
28impl RegexRuleConfig {
29 pub fn new(pattern: &str) -> Self {
30 #[allow(deprecated)]
31 Self {
32 pattern: pattern.to_owned(),
33 proximity_keywords: None,
34 validator: None,
35 labels: Labels::default(),
36 }
37 }
38
39 pub fn with_pattern(&self, pattern: &str) -> Self {
40 self.mutate_clone(|x| x.pattern = pattern.to_string())
41 }
42
43 pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
44 self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
45 }
46
47 pub fn with_labels(&self, labels: Labels) -> Self {
48 self.mutate_clone(|x| x.labels = labels)
49 }
50
51 pub fn build(&self) -> Arc<dyn RuleConfig> {
52 Arc::new(self.clone())
53 }
54
55 fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
56 let mut clone = self.clone();
57 modify(&mut clone);
58 clone
59 }
60
61 pub fn with_included_keywords(
62 &self,
63 keywords: impl IntoIterator<Item = impl AsRef<str>>,
64 ) -> Self {
65 let mut this = self.clone();
66 let mut config = self.get_or_create_proximity_keywords_config();
67 config.included_keywords = keywords
68 .into_iter()
69 .map(|x| x.as_ref().to_string())
70 .collect::<Vec<_>>();
71 this.proximity_keywords = Some(config);
72 this
73 }
74
75 pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
76 let mut this = self.clone();
77 this.validator = validator;
78 this
79 }
80
81 fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
82 self.proximity_keywords
83 .clone()
84 .unwrap_or_else(|| ProximityKeywordsConfig {
85 look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
86 included_keywords: vec![],
87 excluded_keywords: vec![],
88 })
89 }
90}
91
92impl RuleConfig for RegexRuleConfig {
93 fn convert_to_compiled_rule(
94 &self,
95 rule_index: usize,
96 scanner_labels: Labels,
97 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
98 let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
99
100 let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
101
102 let (included_keywords, excluded_keywords) = self
103 .proximity_keywords
104 .as_ref()
105 .map(|config| compile_keywords_proximity_config(config, &rule_labels))
106 .unwrap_or(Ok((None, None)))?;
107
108 Ok(Box::new(RegexCompiledRule {
109 rule_index,
110 regex,
111 included_keywords,
112 excluded_keywords,
113 validator: self
114 .validator
115 .clone()
116 .map(|x| Arc::new(x) as Arc<dyn Validator>),
117 metrics: RuleMetrics::new(&rule_labels),
118 }))
119 }
120}
121
122#[serde_as]
123#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
124pub struct ProximityKeywordsConfig {
125 pub look_ahead_character_count: usize,
126
127 #[serde_as(deserialize_as = "DefaultOnNull")]
128 #[serde(default)]
129 pub included_keywords: Vec<String>,
130
131 #[serde_as(deserialize_as = "DefaultOnNull")]
132 #[serde(default)]
133 pub excluded_keywords: Vec<String>,
134}
135
136#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter, AsRefStr)]
137#[serde(tag = "type")]
138pub enum SecondaryValidator {
139 AbaRtnChecksum,
140 BrazilianCnpjChecksum,
141 BrazilianCpfChecksum,
142 BtcChecksum,
143 BulgarianEGNChecksum,
144 ChineseIdChecksum,
145 CoordinationNumberChecksum,
146 CzechPersonalIdentificationNumberChecksum,
147 CzechTaxIdentificationNumberChecksum,
148 DutchBsnChecksum,
149 DutchPassportChecksum,
150 EthereumChecksum,
151 FinnishHetuChecksum,
152 FranceNifChecksum,
153 FranceSsnChecksum,
154 GermanIdsChecksum,
155 GermanSvnrChecksum,
156 GithubTokenChecksum,
157 GreekTinChecksum,
158 HungarianTinChecksum,
159 IbanChecker,
160 IrishPpsChecksum,
161 ItalianNationalIdChecksum,
162 JwtExpirationChecker,
163 LatviaNationalIdChecksum,
164 LithuanianPersonalIdentificationNumberChecksum,
165 LuhnChecksum,
166 LuxembourgIndividualNINChecksum,
167 Mod11_10checksum,
168 Mod11_2checksum,
169 Mod1271_36Checksum,
170 Mod27_26checksum,
171 Mod37_2checksum,
172 Mod37_36checksum,
173 Mod661_26checksum,
174 Mod97_10checksum,
175 MoneroAddress,
176 NhsCheckDigit,
177 NirChecksum,
178 PolishNationalIdChecksum,
179 PolishNipChecksum,
180 PortugueseTaxIdChecksum,
181 RodneCisloNumberChecksum,
182 RomanianPersonalNumericCode,
183 SlovenianPINChecksum,
184 SpanishDniChecksum,
185 SpanishNussChecksum,
186 SwedenPINChecksum,
187}
188
189#[cfg(test)]
190mod test {
191 use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
192 use strum::IntoEnumIterator;
193
194 use super::*;
195
196 #[test]
197 fn should_override_pattern() {
198 let rule_config = RegexRuleConfig::new("123").with_pattern("456");
199 assert_eq!(rule_config.pattern, "456");
200 }
201
202 #[test]
203 #[allow(deprecated)]
204 fn should_have_default() {
205 let rule_config = RegexRuleConfig::new("123");
206 assert_eq!(
207 rule_config,
208 RegexRuleConfig {
209 pattern: "123".to_string(),
210 proximity_keywords: None,
211 validator: None,
212 labels: Labels::empty(),
213 }
214 );
215 }
216
217 #[test]
218 fn proximity_keywords_should_have_default() {
219 let json_config = r#"{"look_ahead_character_count": 0}"#;
220 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
221 assert_eq!(
222 test,
223 ProximityKeywordsConfig {
224 look_ahead_character_count: 0,
225 included_keywords: vec![],
226 excluded_keywords: vec![]
227 }
228 );
229
230 let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
231 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
232 assert_eq!(
233 test,
234 ProximityKeywordsConfig {
235 look_ahead_character_count: 0,
236 included_keywords: vec![],
237 excluded_keywords: vec![]
238 }
239 );
240 }
241
242 #[test]
243 #[allow(deprecated)]
244 fn test_third_party_active_checker() {
245 let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
247 let validation_type = MatchValidationType::CustomHttp(http_config.clone());
248 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
249 .third_party_active_checker(validation_type.clone());
250
251 assert_eq!(
252 rule_config.third_party_active_checker,
253 Some(validation_type.clone())
254 );
255 assert_eq!(rule_config.match_validation_type, None);
256 assert_eq!(
257 rule_config.get_third_party_active_checker(),
258 Some(&validation_type)
259 );
260
261 let aws_type = AwsType::AwsId;
263 let validation_type2 = MatchValidationType::Aws(aws_type);
264 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
265 .third_party_active_checker(validation_type2.clone());
266
267 assert_eq!(
268 rule_config.third_party_active_checker,
269 Some(validation_type2.clone())
270 );
271 assert_eq!(
272 rule_config.get_third_party_active_checker(),
273 Some(&validation_type2)
274 );
275
276 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
278 .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
279
280 assert_eq!(
281 rule_config.get_third_party_active_checker(),
282 Some(&MatchValidationType::CustomHttp(http_config.clone()))
283 );
284 }
285
286 #[test]
287 fn test_secondary_validator_enum_iter() {
288 let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
290 assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
292 assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
293 }
294
295 #[test]
296 fn test_secondary_validator_are_sorted() {
297 let validator_names: Vec<String> = SecondaryValidator::iter()
298 .map(|a| a.as_ref().to_string())
299 .collect();
300 let mut sorted_validator_names = validator_names.clone();
301 sorted_validator_names.sort();
302 assert_eq!(sorted_validator_names, validator_names, "Secondary validators should be sorted by alphabetical order, but it's not the case, expected order:");
303 }
304}