dd_sds/scanner/regex_rule/
config.rs1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::secondary_validation::jwt_claims_validator::JwtClaimsValidatorConfig;
7use crate::validation::validate_and_create_regex;
8use crate::{CompiledRule, CreateScannerError, Labels};
9use serde::{Deserialize, Serialize};
10use serde_with::DefaultOnNull;
11use serde_with::serde_as;
12use std::sync::Arc;
13use strum::{AsRefStr, EnumIter};
14
15pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
19pub struct RegexRuleConfig {
20 pub pattern: String,
21 pub proximity_keywords: Option<ProximityKeywordsConfig>,
22 pub validator: Option<SecondaryValidator>,
23 #[serde_as(deserialize_as = "DefaultOnNull")]
24 #[serde(default)]
25 pub labels: Labels,
26}
27
28impl RegexRuleConfig {
29 pub fn new(pattern: &str) -> Self {
30 #[allow(deprecated)]
31 Self {
32 pattern: pattern.to_owned(),
33 proximity_keywords: None,
34 validator: None,
35 labels: Labels::default(),
36 }
37 }
38
39 pub fn with_pattern(&self, pattern: &str) -> Self {
40 self.mutate_clone(|x| x.pattern = pattern.to_string())
41 }
42
43 pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
44 self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
45 }
46
47 pub fn with_labels(&self, labels: Labels) -> Self {
48 self.mutate_clone(|x| x.labels = labels)
49 }
50
51 pub fn build(&self) -> Arc<dyn RuleConfig> {
52 Arc::new(self.clone())
53 }
54
55 fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
56 let mut clone = self.clone();
57 modify(&mut clone);
58 clone
59 }
60
61 pub fn with_included_keywords(
62 &self,
63 keywords: impl IntoIterator<Item = impl AsRef<str>>,
64 ) -> Self {
65 let mut this = self.clone();
66 let mut config = self.get_or_create_proximity_keywords_config();
67 config.included_keywords = keywords
68 .into_iter()
69 .map(|x| x.as_ref().to_string())
70 .collect::<Vec<_>>();
71 this.proximity_keywords = Some(config);
72 this
73 }
74
75 pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
76 let mut this = self.clone();
77 this.validator = validator;
78 this
79 }
80
81 fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
82 self.proximity_keywords
83 .clone()
84 .unwrap_or_else(|| ProximityKeywordsConfig {
85 look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
86 included_keywords: vec![],
87 excluded_keywords: vec![],
88 })
89 }
90}
91
92impl RuleConfig for RegexRuleConfig {
93 fn convert_to_compiled_rule(
94 &self,
95 rule_index: usize,
96 scanner_labels: Labels,
97 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
98 let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
99
100 let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
101
102 let (included_keywords, excluded_keywords) = self
103 .proximity_keywords
104 .as_ref()
105 .map(|config| compile_keywords_proximity_config(config, &rule_labels))
106 .unwrap_or(Ok((None, None)))?;
107
108 Ok(Box::new(RegexCompiledRule {
109 rule_index,
110 regex,
111 included_keywords,
112 excluded_keywords,
113 validator: self.validator.clone().map(|x| x.compile()),
114 metrics: RuleMetrics::new(&rule_labels),
115 }))
116 }
117}
118
119#[serde_as]
120#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
121pub struct ProximityKeywordsConfig {
122 pub look_ahead_character_count: usize,
123
124 #[serde_as(deserialize_as = "DefaultOnNull")]
125 #[serde(default)]
126 pub included_keywords: Vec<String>,
127
128 #[serde_as(deserialize_as = "DefaultOnNull")]
129 #[serde(default)]
130 pub excluded_keywords: Vec<String>,
131}
132
133#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter, AsRefStr)]
134#[serde(tag = "type")]
135pub enum SecondaryValidator {
136 AbaRtnChecksum,
137 BrazilianCnpjChecksum,
138 BrazilianCpfChecksum,
139 BtcChecksum,
140 BulgarianEGNChecksum,
141 ChineseIdChecksum,
142 CoordinationNumberChecksum,
143 CzechPersonalIdentificationNumberChecksum,
144 CzechTaxIdentificationNumberChecksum,
145 DutchBsnChecksum,
146 DutchPassportChecksum,
147 EthereumChecksum,
148 FinnishHetuChecksum,
149 FranceNifChecksum,
150 FranceSsnChecksum,
151 GermanIdsChecksum,
152 GermanSvnrChecksum,
153 GithubTokenChecksum,
154 GreekTinChecksum,
155 HungarianTinChecksum,
156 IbanChecker,
157 IrishPpsChecksum,
158 ItalianNationalIdChecksum,
159 JwtClaimsValidator { config: JwtClaimsValidatorConfig },
160 JwtExpirationChecker,
161 LatviaNationalIdChecksum,
162 LithuanianPersonalIdentificationNumberChecksum,
163 LuhnChecksum,
164 LuxembourgIndividualNINChecksum,
165 Mod11_10checksum,
166 Mod11_2checksum,
167 Mod1271_36Checksum,
168 Mod27_26checksum,
169 Mod37_2checksum,
170 Mod37_36checksum,
171 Mod661_26checksum,
172 Mod97_10checksum,
173 MoneroAddress,
174 NhsCheckDigit,
175 NirChecksum,
176 PolishNationalIdChecksum,
177 PolishNipChecksum,
178 PortugueseTaxIdChecksum,
179 RodneCisloNumberChecksum,
180 RomanianPersonalNumericCode,
181 SlovenianPINChecksum,
182 SpanishDniChecksum,
183 SpanishNussChecksum,
184 SwedenPINChecksum,
185}
186
187#[cfg(test)]
188mod test {
189 use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
190 use strum::IntoEnumIterator;
191
192 use super::*;
193
194 #[test]
195 fn should_override_pattern() {
196 let rule_config = RegexRuleConfig::new("123").with_pattern("456");
197 assert_eq!(rule_config.pattern, "456");
198 }
199
200 #[test]
201 #[allow(deprecated)]
202 fn should_have_default() {
203 let rule_config = RegexRuleConfig::new("123");
204 assert_eq!(
205 rule_config,
206 RegexRuleConfig {
207 pattern: "123".to_string(),
208 proximity_keywords: None,
209 validator: None,
210 labels: Labels::empty(),
211 }
212 );
213 }
214
215 #[test]
216 fn proximity_keywords_should_have_default() {
217 let json_config = r#"{"look_ahead_character_count": 0}"#;
218 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
219 assert_eq!(
220 test,
221 ProximityKeywordsConfig {
222 look_ahead_character_count: 0,
223 included_keywords: vec![],
224 excluded_keywords: vec![]
225 }
226 );
227
228 let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
229 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
230 assert_eq!(
231 test,
232 ProximityKeywordsConfig {
233 look_ahead_character_count: 0,
234 included_keywords: vec![],
235 excluded_keywords: vec![]
236 }
237 );
238 }
239
240 #[test]
241 #[allow(deprecated)]
242 fn test_third_party_active_checker() {
243 let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
245 let validation_type = MatchValidationType::CustomHttp(http_config.clone());
246 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
247 .third_party_active_checker(validation_type.clone());
248
249 assert_eq!(
250 rule_config.third_party_active_checker,
251 Some(validation_type.clone())
252 );
253 assert_eq!(rule_config.match_validation_type, None);
254 assert_eq!(
255 rule_config.get_third_party_active_checker(),
256 Some(&validation_type)
257 );
258
259 let aws_type = AwsType::AwsId;
261 let validation_type2 = MatchValidationType::Aws(aws_type);
262 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
263 .third_party_active_checker(validation_type2.clone());
264
265 assert_eq!(
266 rule_config.third_party_active_checker,
267 Some(validation_type2.clone())
268 );
269 assert_eq!(
270 rule_config.get_third_party_active_checker(),
271 Some(&validation_type2)
272 );
273
274 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
276 .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
277
278 assert_eq!(
279 rule_config.get_third_party_active_checker(),
280 Some(&MatchValidationType::CustomHttp(http_config.clone()))
281 );
282 }
283
284 #[test]
285 fn test_secondary_validator_enum_iter() {
286 let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
288 assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
290 assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
291 }
292
293 #[test]
294 fn test_secondary_validator_are_sorted() {
295 let validator_names: Vec<String> = SecondaryValidator::iter()
296 .map(|a| a.as_ref().to_string())
297 .collect();
298 let mut sorted_validator_names = validator_names.clone();
299 sorted_validator_names.sort();
300 assert_eq!(
301 sorted_validator_names, validator_names,
302 "Secondary validators should be sorted by alphabetical order, but it's not the case, expected order:"
303 );
304 }
305
306 #[test]
308 fn test_jwt_claims_validator_config_serialization_order() {
309 use crate::secondary_validation::jwt_claims_validator::ClaimRequirement;
310 use std::collections::BTreeMap;
311
312 let mut required_claims = BTreeMap::new();
314 required_claims.insert("zzz".to_string(), ClaimRequirement::Present);
315 required_claims.insert(
316 "aaa".to_string(),
317 ClaimRequirement::ExactValue("test".to_string()),
318 );
319 required_claims.insert(
320 "mmm".to_string(),
321 ClaimRequirement::RegexMatch(r"^test.*".to_string()),
322 );
323
324 let config = JwtClaimsValidatorConfig {
325 required_claims,
326 required_headers: std::collections::BTreeMap::new(),
327 };
328
329 let serialized1 = serde_json::to_string(&config).unwrap();
331 let serialized2 = serde_json::to_string(&config).unwrap();
332
333 assert_eq!(serialized1, serialized2, "Serialization should be stable");
335
336 assert!(serialized1.find("aaa").unwrap() < serialized1.find("mmm").unwrap());
338 assert!(serialized1.find("mmm").unwrap() < serialized1.find("zzz").unwrap());
339 }
340}