dd_sds/scanner/regex_rule/
config.rs1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::secondary_validation::Validator;
7use crate::validation::validate_and_create_regex;
8use crate::{CompiledRule, CreateScannerError, Labels};
9use serde::{Deserialize, Serialize};
10use serde_with::serde_as;
11use serde_with::DefaultOnNull;
12use std::sync::Arc;
13use strum::EnumIter;
14
15pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
19pub struct RegexRuleConfig {
20 pub pattern: String,
21 pub proximity_keywords: Option<ProximityKeywordsConfig>,
22 pub validator: Option<SecondaryValidator>,
23 #[serde_as(deserialize_as = "DefaultOnNull")]
24 #[serde(default)]
25 pub labels: Labels,
26}
27
28impl RegexRuleConfig {
29 pub fn new(pattern: &str) -> Self {
30 #[allow(deprecated)]
31 Self {
32 pattern: pattern.to_owned(),
33 proximity_keywords: None,
34 validator: None,
35 labels: Labels::default(),
36 }
37 }
38
39 pub fn with_pattern(&self, pattern: &str) -> Self {
40 self.mutate_clone(|x| x.pattern = pattern.to_string())
41 }
42
43 pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
44 self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
45 }
46
47 pub fn with_labels(&self, labels: Labels) -> Self {
48 self.mutate_clone(|x| x.labels = labels)
49 }
50
51 pub fn build(&self) -> Arc<dyn RuleConfig> {
52 Arc::new(self.clone())
53 }
54
55 fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
56 let mut clone = self.clone();
57 modify(&mut clone);
58 clone
59 }
60
61 pub fn with_included_keywords(
62 &self,
63 keywords: impl IntoIterator<Item = impl AsRef<str>>,
64 ) -> Self {
65 let mut this = self.clone();
66 let mut config = self.get_or_create_proximity_keywords_config();
67 config.included_keywords = keywords
68 .into_iter()
69 .map(|x| x.as_ref().to_string())
70 .collect::<Vec<_>>();
71 this.proximity_keywords = Some(config);
72 this
73 }
74
75 pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
76 let mut this = self.clone();
77 this.validator = validator;
78 this
79 }
80
81 fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
82 self.proximity_keywords
83 .clone()
84 .unwrap_or_else(|| ProximityKeywordsConfig {
85 look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
86 included_keywords: vec![],
87 excluded_keywords: vec![],
88 })
89 }
90}
91
92impl RuleConfig for RegexRuleConfig {
93 fn convert_to_compiled_rule(
94 &self,
95 rule_index: usize,
96 scanner_labels: Labels,
97 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
98 let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
99
100 let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
101
102 let (included_keywords, excluded_keywords) = self
103 .proximity_keywords
104 .as_ref()
105 .map(|config| compile_keywords_proximity_config(config, &rule_labels))
106 .unwrap_or(Ok((None, None)))?;
107
108 Ok(Box::new(RegexCompiledRule {
109 rule_index,
110 regex,
111 included_keywords,
112 excluded_keywords,
113 validator: self
114 .validator
115 .clone()
116 .map(|x| Arc::new(x) as Arc<dyn Validator>),
117 metrics: RuleMetrics::new(&rule_labels),
118 }))
119 }
120}
121
122#[serde_as]
123#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
124pub struct ProximityKeywordsConfig {
125 pub look_ahead_character_count: usize,
126
127 #[serde_as(deserialize_as = "DefaultOnNull")]
128 #[serde(default)]
129 pub included_keywords: Vec<String>,
130
131 #[serde_as(deserialize_as = "DefaultOnNull")]
132 #[serde(default)]
133 pub excluded_keywords: Vec<String>,
134}
135
136#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter)]
137#[serde(tag = "type")]
138pub enum SecondaryValidator {
139 AbaRtnChecksum,
140 BrazilianCpfChecksum,
141 BrazilianCnpjChecksum,
142 ChineseIdChecksum,
143 GithubTokenChecksum,
144 IbanChecker,
145 JwtExpirationChecker,
146 LuhnChecksum,
147 NhsCheckDigit,
148 NirChecksum,
149 PolishNationalIdChecksum,
150 LuxembourgIndividualNINChecksum,
151 FranceSsnChecksum,
152}
153
154#[cfg(test)]
155mod test {
156 use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
157 use strum::IntoEnumIterator;
158
159 use super::*;
160
161 #[test]
162 fn should_override_pattern() {
163 let rule_config = RegexRuleConfig::new("123").with_pattern("456");
164 assert_eq!(rule_config.pattern, "456");
165 }
166
167 #[test]
168 #[allow(deprecated)]
169 fn should_have_default() {
170 let rule_config = RegexRuleConfig::new("123");
171 assert_eq!(
172 rule_config,
173 RegexRuleConfig {
174 pattern: "123".to_string(),
175 proximity_keywords: None,
176 validator: None,
177 labels: Labels::empty(),
178 }
179 );
180 }
181
182 #[test]
183 fn proximity_keywords_should_have_default() {
184 let json_config = r#"{"look_ahead_character_count": 0}"#;
185 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
186 assert_eq!(
187 test,
188 ProximityKeywordsConfig {
189 look_ahead_character_count: 0,
190 included_keywords: vec![],
191 excluded_keywords: vec![]
192 }
193 );
194
195 let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
196 let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
197 assert_eq!(
198 test,
199 ProximityKeywordsConfig {
200 look_ahead_character_count: 0,
201 included_keywords: vec![],
202 excluded_keywords: vec![]
203 }
204 );
205 }
206
207 #[test]
208 #[allow(deprecated)]
209 fn test_third_party_active_checker() {
210 let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
212 let validation_type = MatchValidationType::CustomHttp(http_config.clone());
213 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
214 .third_party_active_checker(validation_type.clone());
215
216 assert_eq!(
217 rule_config.third_party_active_checker,
218 Some(validation_type.clone())
219 );
220 assert_eq!(rule_config.match_validation_type, None);
221 assert_eq!(
222 rule_config.get_third_party_active_checker(),
223 Some(&validation_type)
224 );
225
226 let aws_type = AwsType::AwsId;
228 let validation_type2 = MatchValidationType::Aws(aws_type);
229 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
230 .third_party_active_checker(validation_type2.clone());
231
232 assert_eq!(
233 rule_config.third_party_active_checker,
234 Some(validation_type2.clone())
235 );
236 assert_eq!(
237 rule_config.get_third_party_active_checker(),
238 Some(&validation_type2)
239 );
240
241 let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
243 .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
244
245 assert_eq!(
246 rule_config.get_third_party_active_checker(),
247 Some(&MatchValidationType::CustomHttp(http_config.clone()))
248 );
249 }
250
251 #[test]
252 fn test_secondary_validator_enum_iter() {
253 let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
255 assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
257 assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
258 }
259}