dd_sds/scanner/regex_rule/
config.rs

1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::secondary_validation::Validator;
7use crate::validation::validate_and_create_regex;
8use crate::{CompiledRule, CreateScannerError, Labels};
9use serde::{Deserialize, Serialize};
10use serde_with::serde_as;
11use serde_with::DefaultOnNull;
12use std::sync::Arc;
13use strum::{AsRefStr, EnumIter};
14
15pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
19pub struct RegexRuleConfig {
20    pub pattern: String,
21    pub proximity_keywords: Option<ProximityKeywordsConfig>,
22    pub validator: Option<SecondaryValidator>,
23    #[serde_as(deserialize_as = "DefaultOnNull")]
24    #[serde(default)]
25    pub labels: Labels,
26}
27
28impl RegexRuleConfig {
29    pub fn new(pattern: &str) -> Self {
30        #[allow(deprecated)]
31        Self {
32            pattern: pattern.to_owned(),
33            proximity_keywords: None,
34            validator: None,
35            labels: Labels::default(),
36        }
37    }
38
39    pub fn with_pattern(&self, pattern: &str) -> Self {
40        self.mutate_clone(|x| x.pattern = pattern.to_string())
41    }
42
43    pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
44        self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
45    }
46
47    pub fn with_labels(&self, labels: Labels) -> Self {
48        self.mutate_clone(|x| x.labels = labels)
49    }
50
51    pub fn build(&self) -> Arc<dyn RuleConfig> {
52        Arc::new(self.clone())
53    }
54
55    fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
56        let mut clone = self.clone();
57        modify(&mut clone);
58        clone
59    }
60
61    pub fn with_included_keywords(
62        &self,
63        keywords: impl IntoIterator<Item = impl AsRef<str>>,
64    ) -> Self {
65        let mut this = self.clone();
66        let mut config = self.get_or_create_proximity_keywords_config();
67        config.included_keywords = keywords
68            .into_iter()
69            .map(|x| x.as_ref().to_string())
70            .collect::<Vec<_>>();
71        this.proximity_keywords = Some(config);
72        this
73    }
74
75    pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
76        let mut this = self.clone();
77        this.validator = validator;
78        this
79    }
80
81    fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
82        self.proximity_keywords
83            .clone()
84            .unwrap_or_else(|| ProximityKeywordsConfig {
85                look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
86                included_keywords: vec![],
87                excluded_keywords: vec![],
88            })
89    }
90}
91
92impl RuleConfig for RegexRuleConfig {
93    fn convert_to_compiled_rule(
94        &self,
95        rule_index: usize,
96        scanner_labels: Labels,
97    ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
98        let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
99
100        let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
101
102        let (included_keywords, excluded_keywords) = self
103            .proximity_keywords
104            .as_ref()
105            .map(|config| compile_keywords_proximity_config(config, &rule_labels))
106            .unwrap_or(Ok((None, None)))?;
107
108        Ok(Box::new(RegexCompiledRule {
109            rule_index,
110            regex,
111            included_keywords,
112            excluded_keywords,
113            validator: self
114                .validator
115                .clone()
116                .map(|x| Arc::new(x) as Arc<dyn Validator>),
117            metrics: RuleMetrics::new(&rule_labels),
118        }))
119    }
120}
121
122#[serde_as]
123#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
124pub struct ProximityKeywordsConfig {
125    pub look_ahead_character_count: usize,
126
127    #[serde_as(deserialize_as = "DefaultOnNull")]
128    #[serde(default)]
129    pub included_keywords: Vec<String>,
130
131    #[serde_as(deserialize_as = "DefaultOnNull")]
132    #[serde(default)]
133    pub excluded_keywords: Vec<String>,
134}
135
136#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter, AsRefStr)]
137#[serde(tag = "type")]
138pub enum SecondaryValidator {
139    AbaRtnChecksum,
140    BrazilianCnpjChecksum,
141    BrazilianCpfChecksum,
142    BtcChecksum,
143    BulgarianEGNChecksum,
144    ChineseIdChecksum,
145    CoordinationNumberChecksum,
146    CzechPersonalIdentificationNumberChecksum,
147    CzechTaxIdentificationNumberChecksum,
148    DutchBsnChecksum,
149    DutchPassportChecksum,
150    EthereumChecksum,
151    FinnishHetuChecksum,
152    FranceNifChecksum,
153    FranceSsnChecksum,
154    GermanIdsChecksum,
155    GermanSvnrChecksum,
156    GithubTokenChecksum,
157    GreekTinChecksum,
158    HungarianTinChecksum,
159    IbanChecker,
160    IrishPpsChecksum,
161    ItalianNationalIdChecksum,
162    JwtExpirationChecker,
163    LatviaNationalIdChecksum,
164    LithuanianPersonalIdentificationNumberChecksum,
165    LuhnChecksum,
166    LuxembourgIndividualNINChecksum,
167    Mod11_10checksum,
168    Mod11_2checksum,
169    Mod1271_36Checksum,
170    Mod27_26checksum,
171    Mod37_2checksum,
172    Mod37_36checksum,
173    Mod661_26checksum,
174    Mod97_10checksum,
175    MoneroAddress,
176    NhsCheckDigit,
177    NirChecksum,
178    PolishNationalIdChecksum,
179    PolishNipChecksum,
180    PortugueseTaxIdChecksum,
181    RodneCisloNumberChecksum,
182    RomanianPersonalNumericCode,
183    SlovenianPINChecksum,
184    SpanishDniChecksum,
185    SpanishNussChecksum,
186    SwedenPINChecksum,
187}
188
189#[cfg(test)]
190mod test {
191    use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
192    use strum::IntoEnumIterator;
193
194    use super::*;
195
196    #[test]
197    fn should_override_pattern() {
198        let rule_config = RegexRuleConfig::new("123").with_pattern("456");
199        assert_eq!(rule_config.pattern, "456");
200    }
201
202    #[test]
203    #[allow(deprecated)]
204    fn should_have_default() {
205        let rule_config = RegexRuleConfig::new("123");
206        assert_eq!(
207            rule_config,
208            RegexRuleConfig {
209                pattern: "123".to_string(),
210                proximity_keywords: None,
211                validator: None,
212                labels: Labels::empty(),
213            }
214        );
215    }
216
217    #[test]
218    fn proximity_keywords_should_have_default() {
219        let json_config = r#"{"look_ahead_character_count": 0}"#;
220        let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
221        assert_eq!(
222            test,
223            ProximityKeywordsConfig {
224                look_ahead_character_count: 0,
225                included_keywords: vec![],
226                excluded_keywords: vec![]
227            }
228        );
229
230        let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
231        let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
232        assert_eq!(
233            test,
234            ProximityKeywordsConfig {
235                look_ahead_character_count: 0,
236                included_keywords: vec![],
237                excluded_keywords: vec![]
238            }
239        );
240    }
241
242    #[test]
243    #[allow(deprecated)]
244    fn test_third_party_active_checker() {
245        // Test setting only the new field
246        let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
247        let validation_type = MatchValidationType::CustomHttp(http_config.clone());
248        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
249            .third_party_active_checker(validation_type.clone());
250
251        assert_eq!(
252            rule_config.third_party_active_checker,
253            Some(validation_type.clone())
254        );
255        assert_eq!(rule_config.match_validation_type, None);
256        assert_eq!(
257            rule_config.get_third_party_active_checker(),
258            Some(&validation_type)
259        );
260
261        // Test setting via deprecated field updates both
262        let aws_type = AwsType::AwsId;
263        let validation_type2 = MatchValidationType::Aws(aws_type);
264        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
265            .third_party_active_checker(validation_type2.clone());
266
267        assert_eq!(
268            rule_config.third_party_active_checker,
269            Some(validation_type2.clone())
270        );
271        assert_eq!(
272            rule_config.get_third_party_active_checker(),
273            Some(&validation_type2)
274        );
275
276        // Test that get_match_validation_type prioritizes third_party_active_checker
277        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
278            .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
279
280        assert_eq!(
281            rule_config.get_third_party_active_checker(),
282            Some(&MatchValidationType::CustomHttp(http_config.clone()))
283        );
284    }
285
286    #[test]
287    fn test_secondary_validator_enum_iter() {
288        // Test that we can iterate over all SecondaryValidator variants
289        let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
290        // Verify some variants
291        assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
292        assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
293    }
294
295    #[test]
296    fn test_secondary_validator_are_sorted() {
297        let validator_names: Vec<String> = SecondaryValidator::iter()
298            .map(|a| a.as_ref().to_string())
299            .collect();
300        let mut sorted_validator_names = validator_names.clone();
301        sorted_validator_names.sort();
302        assert_eq!(sorted_validator_names, validator_names, "Secondary validators should be sorted by alphabetical order, but it's not the case, expected order:");
303    }
304}