dd_sds/scanner/regex_rule/
config.rs1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::validation::validate_and_create_regex;
7use crate::{CompiledRule, CreateScannerError, Labels};
8use serde::{Deserialize, Serialize};
9use serde_with::serde_as;
10use serde_with::DefaultOnNull;
11use std::sync::Arc;
12use strum::{AsRefStr, EnumIter};
13
14pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
15
16#[serde_as]
17#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
18pub struct RegexRuleConfig {
19 pub pattern: String,
20 pub proximity_keywords: Option<ProximityKeywordsConfig>,
21 pub validator: Option<SecondaryValidator>,
22 #[serde_as(deserialize_as = "DefaultOnNull")]
23 #[serde(default)]
24 pub labels: Labels,
25}
26
27impl RegexRuleConfig {
28 pub fn new(pattern: &str) -> Self {
29 #[allow(deprecated)]
30 Self {
31 pattern: pattern.to_owned(),
32 proximity_keywords: None,
33 validator: None,
34 labels: Labels::default(),
35 }
36 }
37
38 pub fn with_pattern(&self, pattern: &str) -> Self {
39 self.mutate_clone(|x| x.pattern = pattern.to_string())
40 }
41
42 pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
43 self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
44 }
45
46 pub fn with_labels(&self, labels: Labels) -> Self {
47 self.mutate_clone(|x| x.labels = labels)
48 }
49
50 pub fn build(&self) -> Arc<dyn RuleConfig> {
51 Arc::new(self.clone())
52 }
53
54 fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
55 let mut clone = self.clone();
56 modify(&mut clone);
57 clone
58 }
59
60 pub fn with_included_keywords(
61 &self,
62 keywords: impl IntoIterator<Item = impl AsRef<str>>,
63 ) -> Self {
64 let mut this = self.clone();
65 let mut config = self.get_or_create_proximity_keywords_config();
66 config.included_keywords = keywords
67 .into_iter()
68 .map(|x| x.as_ref().to_string())
69 .collect::<Vec<_>>();
70 this.proximity_keywords = Some(config);
71 this
72 }
73
74 pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
75 let mut this = self.clone();
76 this.validator = validator;
77 this
78 }
79
80 fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
81 self.proximity_keywords
82 .clone()
83 .unwrap_or_else(|| ProximityKeywordsConfig {
84 look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
85 included_keywords: vec![],
86 excluded_keywords: vec![],
87 })
88 }
89}
90
91impl RuleConfig for RegexRuleConfig {
92 fn convert_to_compiled_rule(
93 &self,
94 rule_index: usize,
95 scanner_labels: Labels,
96 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
97 let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
98
99 let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
100
101 let (included_keywords, excluded_keywords) = self
102 .proximity_keywords
103 .as_ref()
104 .map(|config| compile_keywords_proximity_config(config, &rule_labels))
105 .unwrap_or(Ok((None, None)))?;
106
107 Ok(Box::new(RegexCompiledRule {
108 rule_index,
109 regex,
110 included_keywords,
111 excluded_keywords,
112 validator: self.validator.clone().map(|x| x.compile()),
113 metrics: RuleMetrics::new(&rule_labels),
114 }))
115 }
116}
117
118#[serde_as]
119#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
120pub struct ProximityKeywordsConfig {
121 pub look_ahead_character_count: usize,
122
123 #[serde_as(deserialize_as = "DefaultOnNull")]
124 #[serde(default)]
125 pub included_keywords: Vec<String>,
126
127 #[serde_as(deserialize_as = "DefaultOnNull")]
128 #[serde(default)]
129 pub excluded_keywords: Vec<String>,
130}
131
132#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
133pub enum ClaimRequirement {
134 Present,
136 ExactValue(String),
138 RegexMatch(String),
140}
141
142#[serde_as]
143#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)]
144pub struct JwtClaimsCheckerConfig {
145 #[serde_as(deserialize_as = "DefaultOnNull")]
146 #[serde(default)]
147 pub required_claims: std::collections::HashMap<String, ClaimRequirement>,
148}
149
150#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter, AsRefStr)]
151#[serde(tag = "type")]
152pub enum SecondaryValidator {
153 AbaRtnChecksum,
154 BrazilianCnpjChecksum,
155 BrazilianCpfChecksum,
156 BtcChecksum,
157 BulgarianEGNChecksum,
158 ChineseIdChecksum,
159 CoordinationNumberChecksum,
160 CzechPersonalIdentificationNumberChecksum,
161 CzechTaxIdentificationNumberChecksum,
162 DutchBsnChecksum,
163 DutchPassportChecksum,
164 EthereumChecksum,
165 FinnishHetuChecksum,
166 FranceNifChecksum,
167 FranceSsnChecksum,
168 GermanIdsChecksum,
169 GermanSvnrChecksum,
170 GithubTokenChecksum,
171 GreekTinChecksum,
172 HungarianTinChecksum,
173 IbanChecker,
174 IrishPpsChecksum,
175 ItalianNationalIdChecksum,
176 JwtClaimsChecker { config: JwtClaimsCheckerConfig },
177 JwtExpirationChecker,
178 LatviaNationalIdChecksum,
179 LithuanianPersonalIdentificationNumberChecksum,
180 LuhnChecksum,
181 LuxembourgIndividualNINChecksum,
182 Mod11_10checksum,
183 Mod11_2checksum,
184 Mod1271_36Checksum,
185 Mod27_26checksum,
186 Mod37_2checksum,
187 Mod37_36checksum,
188 Mod661_26checksum,
189 Mod97_10checksum,
190 MoneroAddress,
191 NhsCheckDigit,
192 NirChecksum,
193 PolishNationalIdChecksum,
194 PolishNipChecksum,
195 PortugueseTaxIdChecksum,
196 RodneCisloNumberChecksum,
197 RomanianPersonalNumericCode,
198 SlovenianPINChecksum,
199 SpanishDniChecksum,
200 SpanishNussChecksum,
201 SwedenPINChecksum,
202}
203
204#[cfg(test)]
205mod test {
206 use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
207 use strum::IntoEnumIterator;
208
209 use super::*;
210
211 #[test]
212 fn should_override_pattern() {
213 let rule_config = RegexRuleConfig::new("123").with_pattern("456");
214 assert_eq!(rule_config.pattern, "456");
215 }
216
217 #[test]
218 #[allow(deprecated)]
219 fn should_have_default() {
220 let rule_config = RegexRuleConfig::new("123");
221 assert_eq!(
222 rule_config,
223 RegexRuleConfig {
224 pattern: "123".to_string(),
225 proximity_keywords: None,
226 validator: None,
227 labels: Labels::empty(),
228 }
229 );
230 }
231
232 #[test]
233 fn proximity_keywords_should_have_default() {
234 let json_config = r#"{"look_ahead_character_count": 0}"#;
235 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
236 assert_eq!(
237 test,
238 ProximityKeywordsConfig {
239 look_ahead_character_count: 0,
240 included_keywords: vec![],
241 excluded_keywords: vec![]
242 }
243 );
244
245 let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
246 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
247 assert_eq!(
248 test,
249 ProximityKeywordsConfig {
250 look_ahead_character_count: 0,
251 included_keywords: vec![],
252 excluded_keywords: vec![]
253 }
254 );
255 }
256
257 #[test]
258 #[allow(deprecated)]
259 fn test_third_party_active_checker() {
260 let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
262 let validation_type = MatchValidationType::CustomHttp(http_config.clone());
263 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
264 .third_party_active_checker(validation_type.clone());
265
266 assert_eq!(
267 rule_config.third_party_active_checker,
268 Some(validation_type.clone())
269 );
270 assert_eq!(rule_config.match_validation_type, None);
271 assert_eq!(
272 rule_config.get_third_party_active_checker(),
273 Some(&validation_type)
274 );
275
276 let aws_type = AwsType::AwsId;
278 let validation_type2 = MatchValidationType::Aws(aws_type);
279 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
280 .third_party_active_checker(validation_type2.clone());
281
282 assert_eq!(
283 rule_config.third_party_active_checker,
284 Some(validation_type2.clone())
285 );
286 assert_eq!(
287 rule_config.get_third_party_active_checker(),
288 Some(&validation_type2)
289 );
290
291 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
293 .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
294
295 assert_eq!(
296 rule_config.get_third_party_active_checker(),
297 Some(&MatchValidationType::CustomHttp(http_config.clone()))
298 );
299 }
300
301 #[test]
302 fn test_secondary_validator_enum_iter() {
303 let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
305 assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
307 assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
308 }
309
310 #[test]
311 fn test_secondary_validator_are_sorted() {
312 let validator_names: Vec<String> = SecondaryValidator::iter()
313 .map(|a| a.as_ref().to_string())
314 .collect();
315 let mut sorted_validator_names = validator_names.clone();
316 sorted_validator_names.sort();
317 assert_eq!(sorted_validator_names, validator_names, "Secondary validators should be sorted by alphabetical order, but it's not the case, expected order:");
318 }
319}