dd_sds/scanner/regex_rule/
config.rs

1use crate::proximity_keywords::compile_keywords_proximity_config;
2use crate::scanner::config::RuleConfig;
3use crate::scanner::metrics::RuleMetrics;
4use crate::scanner::regex_rule::compiled::RegexCompiledRule;
5use crate::scanner::regex_rule::regex_store::get_memoized_regex;
6use crate::secondary_validation::Validator;
7use crate::validation::validate_and_create_regex;
8use crate::{CompiledRule, CreateScannerError, Labels};
9use serde::{Deserialize, Serialize};
10use serde_with::serde_as;
11use serde_with::DefaultOnNull;
12use std::sync::Arc;
13use strum::EnumIter;
14
15pub const DEFAULT_KEYWORD_LOOKAHEAD: usize = 30;
16
17#[serde_as]
18#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
19pub struct RegexRuleConfig {
20    pub pattern: String,
21    pub proximity_keywords: Option<ProximityKeywordsConfig>,
22    pub validator: Option<SecondaryValidator>,
23    #[serde_as(deserialize_as = "DefaultOnNull")]
24    #[serde(default)]
25    pub labels: Labels,
26}
27
28impl RegexRuleConfig {
29    pub fn new(pattern: &str) -> Self {
30        #[allow(deprecated)]
31        Self {
32            pattern: pattern.to_owned(),
33            proximity_keywords: None,
34            validator: None,
35            labels: Labels::default(),
36        }
37    }
38
39    pub fn with_pattern(&self, pattern: &str) -> Self {
40        self.mutate_clone(|x| x.pattern = pattern.to_string())
41    }
42
43    pub fn with_proximity_keywords(&self, proximity_keywords: ProximityKeywordsConfig) -> Self {
44        self.mutate_clone(|x| x.proximity_keywords = Some(proximity_keywords))
45    }
46
47    pub fn with_labels(&self, labels: Labels) -> Self {
48        self.mutate_clone(|x| x.labels = labels)
49    }
50
51    pub fn build(&self) -> Arc<dyn RuleConfig> {
52        Arc::new(self.clone())
53    }
54
55    fn mutate_clone(&self, modify: impl FnOnce(&mut Self)) -> Self {
56        let mut clone = self.clone();
57        modify(&mut clone);
58        clone
59    }
60
61    pub fn with_included_keywords(
62        &self,
63        keywords: impl IntoIterator<Item = impl AsRef<str>>,
64    ) -> Self {
65        let mut this = self.clone();
66        let mut config = self.get_or_create_proximity_keywords_config();
67        config.included_keywords = keywords
68            .into_iter()
69            .map(|x| x.as_ref().to_string())
70            .collect::<Vec<_>>();
71        this.proximity_keywords = Some(config);
72        this
73    }
74
75    pub fn with_validator(&self, validator: Option<SecondaryValidator>) -> Self {
76        let mut this = self.clone();
77        this.validator = validator;
78        this
79    }
80
81    fn get_or_create_proximity_keywords_config(&self) -> ProximityKeywordsConfig {
82        self.proximity_keywords
83            .clone()
84            .unwrap_or_else(|| ProximityKeywordsConfig {
85                look_ahead_character_count: DEFAULT_KEYWORD_LOOKAHEAD,
86                included_keywords: vec![],
87                excluded_keywords: vec![],
88            })
89    }
90}
91
92impl RuleConfig for RegexRuleConfig {
93    fn convert_to_compiled_rule(
94        &self,
95        rule_index: usize,
96        scanner_labels: Labels,
97    ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
98        let regex = get_memoized_regex(&self.pattern, validate_and_create_regex)?;
99
100        let rule_labels = scanner_labels.clone_with_labels(self.labels.clone());
101
102        let (included_keywords, excluded_keywords) = self
103            .proximity_keywords
104            .as_ref()
105            .map(|config| compile_keywords_proximity_config(config, &rule_labels))
106            .unwrap_or(Ok((None, None)))?;
107
108        Ok(Box::new(RegexCompiledRule {
109            rule_index,
110            regex,
111            included_keywords,
112            excluded_keywords,
113            validator: self
114                .validator
115                .clone()
116                .map(|x| Arc::new(x) as Arc<dyn Validator>),
117            metrics: RuleMetrics::new(&rule_labels),
118        }))
119    }
120}
121
122#[serde_as]
123#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
124pub struct ProximityKeywordsConfig {
125    pub look_ahead_character_count: usize,
126
127    #[serde_as(deserialize_as = "DefaultOnNull")]
128    #[serde(default)]
129    pub included_keywords: Vec<String>,
130
131    #[serde_as(deserialize_as = "DefaultOnNull")]
132    #[serde(default)]
133    pub excluded_keywords: Vec<String>,
134}
135
136#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, EnumIter)]
137#[serde(tag = "type")]
138pub enum SecondaryValidator {
139    AbaRtnChecksum,
140    BrazilianCpfChecksum,
141    BrazilianCnpjChecksum,
142    ChineseIdChecksum,
143    GithubTokenChecksum,
144    IbanChecker,
145    JwtExpirationChecker,
146    LuhnChecksum,
147    NhsCheckDigit,
148    NirChecksum,
149    PolishNationalIdChecksum,
150    LuxembourgIndividualNINChecksum,
151    FranceSsnChecksum,
152}
153
154#[cfg(test)]
155mod test {
156    use crate::{AwsType, CustomHttpConfig, MatchValidationType, RootRuleConfig};
157    use strum::IntoEnumIterator;
158
159    use super::*;
160
161    #[test]
162    fn should_override_pattern() {
163        let rule_config = RegexRuleConfig::new("123").with_pattern("456");
164        assert_eq!(rule_config.pattern, "456");
165    }
166
167    #[test]
168    #[allow(deprecated)]
169    fn should_have_default() {
170        let rule_config = RegexRuleConfig::new("123");
171        assert_eq!(
172            rule_config,
173            RegexRuleConfig {
174                pattern: "123".to_string(),
175                proximity_keywords: None,
176                validator: None,
177                labels: Labels::empty(),
178            }
179        );
180    }
181
182    #[test]
183    fn proximity_keywords_should_have_default() {
184        let json_config = r#"{"look_ahead_character_count": 0}"#;
185        let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
186        assert_eq!(
187            test,
188            ProximityKeywordsConfig {
189                look_ahead_character_count: 0,
190                included_keywords: vec![],
191                excluded_keywords: vec![]
192            }
193        );
194
195        let json_config = r#"{"look_ahead_character_count": 0, "excluded_keywords": null, "included_keywords": null}"#;
196        let test: ProximityKeywordsConfig = serde_json::from_str(json_config).unwrap();
197        assert_eq!(
198            test,
199            ProximityKeywordsConfig {
200                look_ahead_character_count: 0,
201                included_keywords: vec![],
202                excluded_keywords: vec![]
203            }
204        );
205    }
206
207    #[test]
208    #[allow(deprecated)]
209    fn test_third_party_active_checker() {
210        // Test setting only the new field
211        let http_config = CustomHttpConfig::default().with_endpoint("http://test.com".to_string());
212        let validation_type = MatchValidationType::CustomHttp(http_config.clone());
213        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
214            .third_party_active_checker(validation_type.clone());
215
216        assert_eq!(
217            rule_config.third_party_active_checker,
218            Some(validation_type.clone())
219        );
220        assert_eq!(rule_config.match_validation_type, None);
221        assert_eq!(
222            rule_config.get_third_party_active_checker(),
223            Some(&validation_type)
224        );
225
226        // Test setting via deprecated field updates both
227        let aws_type = AwsType::AwsId;
228        let validation_type2 = MatchValidationType::Aws(aws_type);
229        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
230            .third_party_active_checker(validation_type2.clone());
231
232        assert_eq!(
233            rule_config.third_party_active_checker,
234            Some(validation_type2.clone())
235        );
236        assert_eq!(
237            rule_config.get_third_party_active_checker(),
238            Some(&validation_type2)
239        );
240
241        // Test that get_match_validation_type prioritizes third_party_active_checker
242        let rule_config = RootRuleConfig::new(RegexRuleConfig::new("123"))
243            .third_party_active_checker(MatchValidationType::CustomHttp(http_config.clone()));
244
245        assert_eq!(
246            rule_config.get_third_party_active_checker(),
247            Some(&MatchValidationType::CustomHttp(http_config.clone()))
248        );
249    }
250
251    #[test]
252    fn test_secondary_validator_enum_iter() {
253        // Test that we can iterate over all SecondaryValidator variants
254        let validators: Vec<SecondaryValidator> = SecondaryValidator::iter().collect();
255        // Verify some variants
256        assert!(validators.contains(&SecondaryValidator::GithubTokenChecksum));
257        assert!(validators.contains(&SecondaryValidator::JwtExpirationChecker));
258    }
259}