dd_sds/scanner/regex_rule/
config.rs

1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::validation::validate_and_create_regex;
7use crate::{CompiledRule, CreateScannerError, Labels};
8use serde::{Deserialize, Serialize};
9use serde_with::serde_as;
10use serde_with::DefaultOnNull;
11use std::sync::Arc;
12use strum::{AsRefStr, EnumIter};
13
14pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
15
16#[serde_as]
17#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
18pub struct RegexRuleConfig {
19    pub pattern: String,
20    pub proximity_keywords: Option<ProximityKeywordsConfig>,
21    pub validator: Option<SecondaryValidator>,
22    #[serde_as(deserialize_as = "DefaultOnNull")]
23    #[serde(default)]
24    pub labels: Labels,
25}
26
27impl RegexRuleConfig {
28    pub fn new(pattern: &str) -> Self {
29        #[allow(deprecated)]
30        Self {
31            pattern: pattern.to_owned(),
32            proximity_keywords: None,
33            validator: None,
34            labels: Labels::default(),
35        }
36    }
37
38    pub fn with_pattern(&self, pattern: &str) -> Self {
39        self.mutate_clone(|x| x.pattern = pattern.to_string())
40    }
41
42    pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
43        self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
44    }
45
46    pub fn with_labels(&self, labels: Labels) -> Self {
47        self.mutate_clone(|x| x.labels = labels)
48    }
49
50    pub fn build(&self) -> Arc<dyn RuleConfig> {
51        Arc::new(self.clone())
52    }
53
54    fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
55        let mut clone = self.clone();
56        modify(&mut clone);
57        clone
58    }
59
60    pub fn with_included_keywords(
61        &self,
62        keywords: impl IntoIterator<Item = impl AsRef<str>>,
63    ) -> Self {
64        let mut this = self.clone();
65        let mut config = self.get_or_create_proximity_keywords_config();
66        config.included_keywords = keywords
67            .into_iter()
68            .map(|x| x.as_ref().to_string())
69            .collect::<Vec<_>>();
70        this.proximity_keywords = Some(config);
71        this
72    }
73
74    pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
75        let mut this = self.clone();
76        this.validator = validator;
77        this
78    }
79
80    fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
81        self.proximity_keywords
82            .clone()
83            .unwrap_or_else(|| ProximityKeywordsConfig {
84                look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
85                included_keywords: vec![],
86                excluded_keywords: vec![],
87            })
88    }
89}
90
91impl RuleConfig for RegexRuleConfig {
92    fn convert_to_compiled_rule(
93        &self,
94        rule_index: usize,
95        scanner_labels: Labels,
96    ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
97        let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
98
99        let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
100
101        let (included_keywords, excluded_keywords) = self
102            .proximity_keywords
103            .as_ref()
104            .map(|config| compile_keywords_proximity_config(config, &rule_labels))
105            .unwrap_or(Ok((None, None)))?;
106
107        Ok(Box::new(RegexCompiledRule {
108            rule_index,
109            regex,
110            included_keywords,
111            excluded_keywords,
112            validator: self.validator.clone().map(|x| x.compile()),
113            metrics: RuleMetrics::new(&rule_labels),
114        }))
115    }
116}
117
118#[serde_as]
119#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
120pub struct ProximityKeywordsConfig {
121    pub look_ahead_character_count: usize,
122
123    #[serde_as(deserialize_as = "DefaultOnNull")]
124    #[serde(default)]
125    pub included_keywords: Vec<String>,
126
127    #[serde_as(deserialize_as = "DefaultOnNull")]
128    #[serde(default)]
129    pub excluded_keywords: Vec<String>,
130}
131
132#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
133pub enum ClaimRequirement {
134    /// Just check that the claim exists
135    Present,
136    /// Check that the claim exists and has an exact value
137    ExactValue(String),
138    /// Check that the claim exists and matches a regex pattern
139    RegexMatch(String),
140}
141
142#[serde_as]
143#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq)]
144pub struct JwtClaimsCheckerConfig {
145    #[serde_as(deserialize_as = "DefaultOnNull")]
146    #[serde(default)]
147    pub required_claims: std::collections::HashMap<String, ClaimRequirement>,
148}
149
150#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter, AsRefStr)]
151#[serde(tag = "type")]
152pub enum SecondaryValidator {
153    AbaRtnChecksum,
154    BrazilianCnpjChecksum,
155    BrazilianCpfChecksum,
156    BtcChecksum,
157    BulgarianEGNChecksum,
158    ChineseIdChecksum,
159    CoordinationNumberChecksum,
160    CzechPersonalIdentificationNumberChecksum,
161    CzechTaxIdentificationNumberChecksum,
162    DutchBsnChecksum,
163    DutchPassportChecksum,
164    EthereumChecksum,
165    FinnishHetuChecksum,
166    FranceNifChecksum,
167    FranceSsnChecksum,
168    GermanIdsChecksum,
169    GermanSvnrChecksum,
170    GithubTokenChecksum,
171    GreekTinChecksum,
172    HungarianTinChecksum,
173    IbanChecker,
174    IrishPpsChecksum,
175    ItalianNationalIdChecksum,
176    JwtClaimsChecker { config: JwtClaimsCheckerConfig },
177    JwtExpirationChecker,
178    LatviaNationalIdChecksum,
179    LithuanianPersonalIdentificationNumberChecksum,
180    LuhnChecksum,
181    LuxembourgIndividualNINChecksum,
182    Mod11_10checksum,
183    Mod11_2checksum,
184    Mod1271_36Checksum,
185    Mod27_26checksum,
186    Mod37_2checksum,
187    Mod37_36checksum,
188    Mod661_26checksum,
189    Mod97_10checksum,
190    MoneroAddress,
191    NhsCheckDigit,
192    NirChecksum,
193    PolishNationalIdChecksum,
194    PolishNipChecksum,
195    PortugueseTaxIdChecksum,
196    RodneCisloNumberChecksum,
197    RomanianPersonalNumericCode,
198    SlovenianPINChecksum,
199    SpanishDniChecksum,
200    SpanishNussChecksum,
201    SwedenPINChecksum,
202}
203
204#[cfg(test)]
205mod test {
206    use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
207    use strum::IntoEnumIterator;
208
209    use super::*;
210
211    #[test]
212    fn should_override_pattern() {
213        let rule_config = RegexRuleConfig::new("123").with_pattern("456");
214        assert_eq!(rule_config.pattern, "456");
215    }
216
217    #[test]
218    #[allow(deprecated)]
219    fn should_have_default() {
220        let rule_config = RegexRuleConfig::new("123");
221        assert_eq!(
222            rule_config,
223            RegexRuleConfig {
224                pattern: "123".to_string(),
225                proximity_keywords: None,
226                validator: None,
227                labels: Labels::empty(),
228            }
229        );
230    }
231
232    #[test]
233    fn proximity_keywords_should_have_default() {
234        let json_config = r#"{"look_ahead_character_count": 0}"#;
235        let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
236        assert_eq!(
237            test,
238            ProximityKeywordsConfig {
239                look_ahead_character_count: 0,
240                included_keywords: vec![],
241                excluded_keywords: vec![]
242            }
243        );
244
245        let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
246        let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
247        assert_eq!(
248            test,
249            ProximityKeywordsConfig {
250                look_ahead_character_count: 0,
251                included_keywords: vec![],
252                excluded_keywords: vec![]
253            }
254        );
255    }
256
257    #[test]
258    #[allow(deprecated)]
259    fn test_third_party_active_checker() {
260        // Test setting only the new field
261        let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
262        let validation_type = MatchValidationType::CustomHttp(http_config.clone());
263        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
264            .third_party_active_checker(validation_type.clone());
265
266        assert_eq!(
267            rule_config.third_party_active_checker,
268            Some(validation_type.clone())
269        );
270        assert_eq!(rule_config.match_validation_type, None);
271        assert_eq!(
272            rule_config.get_third_party_active_checker(),
273            Some(&validation_type)
274        );
275
276        // Test setting via deprecated field updates both
277        let aws_type = AwsType::AwsId;
278        let validation_type2 = MatchValidationType::Aws(aws_type);
279        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
280            .third_party_active_checker(validation_type2.clone());
281
282        assert_eq!(
283            rule_config.third_party_active_checker,
284            Some(validation_type2.clone())
285        );
286        assert_eq!(
287            rule_config.get_third_party_active_checker(),
288            Some(&validation_type2)
289        );
290
291        // Test that get_match_validation_type prioritizes third_party_active_checker
292        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
293            .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
294
295        assert_eq!(
296            rule_config.get_third_party_active_checker(),
297            Some(&MatchValidationType::CustomHttp(http_config.clone()))
298        );
299    }
300
301    #[test]
302    fn test_secondary_validator_enum_iter() {
303        // Test that we can iterate over all SecondaryValidator variants
304        let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
305        // Verify some variants
306        assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
307        assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
308    }
309
310    #[test]
311    fn test_secondary_validator_are_sorted() {
312        let validator_names: Vec<String> = SecondaryValidator::iter()
313            .map(|a| a.as_ref().to_string())
314            .collect();
315        let mut sorted_validator_names = validator_names.clone();
316        sorted_validator_names.sort();
317        assert_eq!(sorted_validator_names, validator_names, "Secondary validators should be sorted by alphabetical order, but it's not the case, expected order:");
318    }
319}