1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::validation::validate_and_create_regex;
7use crate::{CompiledRule, CreateScannerError, Labels};
8use serde::{Deserialize, Serialize};
9use serde_with::DefaultOnNull;
10use serde_with::serde_as;
11use std::sync::Arc;
12use strum::{AsRefStr, EnumIter};
13
14pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
15
16#[serde_as]
17#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
18pub struct RegexRuleConfig {
19 pub pattern: String,
20 pub proximity_keywords: Option<ProximityKeywordsConfig>,
21 pub validator: Option<SecondaryValidator>,
22 #[serde_as(deserialize_as = "DefaultOnNull")]
23 #[serde(default)]
24 pub labels: Labels,
25 pub pattern_capture_group: Option<String>,
26}
27
28impl RegexRuleConfig {
29 pub fn new(pattern: &str) -> Self {
30 #[allow(deprecated)]
31 Self {
32 pattern: pattern.to_owned(),
33 proximity_keywords: None,
34 validator: None,
35 labels: Labels::default(),
36 pattern_capture_group: None,
37 }
38 }
39
40 pub fn with_pattern(&self, pattern: &str) -> Self {
41 self.mutate_clone(|x| x.pattern = pattern.to_string())
42 }
43
44 pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
45 self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
46 }
47
48 pub fn with_labels(&self, labels: Labels) -> Self {
49 self.mutate_clone(|x| x.labels = labels)
50 }
51
52 pub fn with_pattern_capture_group(&self, pattern_capture_group: &str) -> Self {
53 self.mutate_clone(|x| x.pattern_capture_group = Some(pattern_capture_group.to_string()))
54 }
55
56 pub fn build(&self) -> Arc<dyn RuleConfig> {
57 Arc::new(self.clone())
58 }
59
60 fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
61 let mut clone = self.clone();
62 modify(&mut clone);
63 clone
64 }
65
66 pub fn with_included_keywords(
67 &self,
68 keywords: impl IntoIterator<Item = impl AsRef<str>>,
69 ) -> Self {
70 let mut this = self.clone();
71 let mut config = self.get_or_create_proximity_keywords_config();
72 config.included_keywords = keywords
73 .into_iter()
74 .map(|x| x.as_ref().to_string())
75 .collect::<Vec<_>>();
76 this.proximity_keywords = Some(config);
77 this
78 }
79
80 pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
81 let mut this = self.clone();
82 this.validator = validator;
83 this
84 }
85
86 fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
87 self.proximity_keywords
88 .clone()
89 .unwrap_or_else(|| ProximityKeywordsConfig {
90 look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
91 included_keywords: vec![],
92 excluded_keywords: vec![],
93 })
94 }
95}
96
97impl RuleConfig for RegexRuleConfig {
98 fn convert_to_compiled_rule(
99 &self,
100 rule_index: usize,
101 scanner_labels: Labels,
102 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
103 let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
104
105 let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
106
107 let (included_keywords, excluded_keywords) = self
108 .proximity_keywords
109 .as_ref()
110 .map(|config| compile_keywords_proximity_config(config, &rule_labels))
111 .unwrap_or(Ok((None, None)))?;
112
113 Ok(Box::new(RegexCompiledRule {
114 rule_index,
115 regex,
116 included_keywords,
117 excluded_keywords,
118 validator: self.validator.clone().map(|x| x.compile()),
119 metrics: RuleMetrics::new(&rule_labels),
120 pattern_capture_group: self.pattern_capture_group.clone(),
121 }))
122 }
123}
124
125#[serde_as]
126#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
127pub struct ProximityKeywordsConfig {
128 pub look_ahead_character_count: usize,
129
130 #[serde_as(deserialize_as = "DefaultOnNull")]
131 #[serde(default)]
132 pub included_keywords: Vec<String>,
133
134 #[serde_as(deserialize_as = "DefaultOnNull")]
135 #[serde(default)]
136 pub excluded_keywords: Vec<String>,
137}
138
139#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter, AsRefStr)]
140#[serde(tag = "type")]
141pub enum SecondaryValidator {
142 AbaRtnChecksum,
143 BrazilianCnpjChecksum,
144 BrazilianCpfChecksum,
145 BtcChecksum,
146 BulgarianEGNChecksum,
147 ChineseIdChecksum,
148 CoordinationNumberChecksum,
149 CzechPersonalIdentificationNumberChecksum,
150 CzechTaxIdentificationNumberChecksum,
151 DutchBsnChecksum,
152 DutchPassportChecksum,
153 EntropyCheck,
154 EthereumChecksum,
155 FinnishHetuChecksum,
156 FranceNifChecksum,
157 FranceSsnChecksum,
158 GermanIdsChecksum,
159 GermanSvnrChecksum,
160 GithubTokenChecksum,
161 GreekTinChecksum,
162 HungarianTinChecksum,
163 IbanChecker,
164 IrishPpsChecksum,
165 ItalianNationalIdChecksum,
166 JwtClaimsValidator { config: JwtClaimsValidatorConfig },
167 JwtExpirationChecker,
168 LatviaNationalIdChecksum,
169 LithuanianPersonalIdentificationNumberChecksum,
170 LuhnChecksum,
171 LuxembourgIndividualNINChecksum,
172 Mod11_10checksum,
173 Mod11_2checksum,
174 Mod1271_36Checksum,
175 Mod27_26checksum,
176 Mod37_2checksum,
177 Mod37_36checksum,
178 Mod661_26checksum,
179 Mod97_10checksum,
180 MoneroAddress,
181 NhsCheckDigit,
182 NirChecksum,
183 PolishNationalIdChecksum,
184 PolishNipChecksum,
185 PortugueseTaxIdChecksum,
186 RodneCisloNumberChecksum,
187 RomanianPersonalNumericCode,
188 SlovenianPINChecksum,
189 SpanishDniChecksum,
190 SpanishNussChecksum,
191 SwedenPINChecksum,
192}
193
194#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
195#[serde(tag = "type", content = "config")]
196pub enum ClaimRequirement {
197 Present,
199 ExactValue(String),
201 RegexMatch(String),
203}
204
205#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)]
206pub struct JwtClaimsValidatorConfig {
207 #[serde(default)]
208 pub required_headers: std::collections::BTreeMap<String, ClaimRequirement>,
209 #[serde(default)]
210 pub required_claims: std::collections::BTreeMap<String, ClaimRequirement>,
211}
212
213#[cfg(test)]
214mod test {
215 use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
216 use std::collections::BTreeMap;
217 use strum::IntoEnumIterator;
218
219 use super::*;
220
221 #[test]
222 fn should_override_pattern() {
223 let rule_config = RegexRuleConfig::new("123").with_pattern("456");
224 assert_eq!(rule_config.pattern, "456");
225 }
226
227 #[test]
228 #[allow(deprecated)]
229 fn should_have_default() {
230 let rule_config = RegexRuleConfig::new("123");
231 assert_eq!(
232 rule_config,
233 RegexRuleConfig {
234 pattern: "123".to_string(),
235 proximity_keywords: None,
236 validator: None,
237 labels: Labels::empty(),
238 pattern_capture_group: None,
239 }
240 );
241 }
242
243 #[test]
244 fn should_use_capture_group() {
245 let rule_config = RegexRuleConfig::new("hey (?<capture_group>world)")
246 .with_pattern_capture_group("capture_group");
247 assert_eq!(
248 rule_config,
249 RegexRuleConfig {
250 pattern: "hey (?<capture_group>world)".to_string(),
251 proximity_keywords: None,
252 validator: None,
253 labels: Labels::empty(),
254 pattern_capture_group: Some("capture_group".to_string()),
255 }
256 );
257 }
258
259 #[test]
260 fn proximity_keywords_should_have_default() {
261 let json_config = r#"{"look_ahead_character_count": 0}"#;
262 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
263 assert_eq!(
264 test,
265 ProximityKeywordsConfig {
266 look_ahead_character_count: 0,
267 included_keywords: vec![],
268 excluded_keywords: vec![]
269 }
270 );
271
272 let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
273 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
274 assert_eq!(
275 test,
276 ProximityKeywordsConfig {
277 look_ahead_character_count: 0,
278 included_keywords: vec![],
279 excluded_keywords: vec![]
280 }
281 );
282 }
283
284 #[test]
285 #[allow(deprecated)]
286 fn test_third_party_active_checker() {
287 let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
289 let validation_type = MatchValidationType::CustomHttp(http_config.clone());
290 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
291 .third_party_active_checker(validation_type.clone());
292
293 assert_eq!(
294 rule_config.third_party_active_checker,
295 Some(validation_type.clone())
296 );
297 assert_eq!(rule_config.match_validation_type, None);
298 assert_eq!(
299 rule_config.get_third_party_active_checker(),
300 Some(&validation_type)
301 );
302
303 let aws_type = AwsType::AwsId;
305 let validation_type2 = MatchValidationType::Aws(aws_type);
306 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
307 .third_party_active_checker(validation_type2.clone());
308
309 assert_eq!(
310 rule_config.third_party_active_checker,
311 Some(validation_type2.clone())
312 );
313 assert_eq!(
314 rule_config.get_third_party_active_checker(),
315 Some(&validation_type2)
316 );
317
318 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
320 .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
321
322 assert_eq!(
323 rule_config.get_third_party_active_checker(),
324 Some(&MatchValidationType::CustomHttp(http_config.clone()))
325 );
326 }
327
328 #[test]
329 fn test_secondary_validator_enum_iter() {
330 let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
332 assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
334 assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
335 }
336
337 #[test]
338 fn test_secondary_validator_are_sorted() {
339 let validator_names: Vec<String> = SecondaryValidator::iter()
340 .map(|a| a.as_ref().to_string())
341 .collect();
342 let mut sorted_validator_names = validator_names.clone();
343 sorted_validator_names.sort();
344 assert_eq!(
345 sorted_validator_names, validator_names,
346 "Secondary validators should be sorted by alphabetical order, but it's not the case, expected order:"
347 );
348 }
349
350 #[test]
352 fn test_jwt_claims_validator_config_serialization_order() {
353 let mut required_claims = BTreeMap::new();
355 required_claims.insert("zzz".to_string(), ClaimRequirement::Present);
356 required_claims.insert(
357 "aaa".to_string(),
358 ClaimRequirement::ExactValue("test".to_string()),
359 );
360 required_claims.insert(
361 "mmm".to_string(),
362 ClaimRequirement::RegexMatch(r"^test.*".to_string()),
363 );
364
365 let config = JwtClaimsValidatorConfig {
366 required_claims,
367 required_headers: std::collections::BTreeMap::new(),
368 };
369
370 let serialized1 = serde_json::to_string(&config).unwrap();
372 let serialized2 = serde_json::to_string(&config).unwrap();
373
374 assert_eq!(serialized1, serialized2, "Serialization should be stable");
376
377 assert!(serialized1.find("aaa").unwrap() < serialized1.find("mmm").unwrap());
379 assert!(serialized1.find("mmm").unwrap() < serialized1.find("zzz").unwrap());
380 }
381}