1use crate::encoding::Encoding;
2use crate::event::Event;
3
4use crate::match_validation::{
5 config::InternalMatchValidationType, config::MatchValidationType, match_status::MatchStatus,
6 match_validator::MatchValidator,
7};
8use rayon::prelude::*;
9
10use error::{MatchValidationError, MatchValidatorCreationError};
11
12use crate::observability::labels::Labels;
13use crate::rule_match::{InternalRuleMatch, RuleMatch};
14use crate::scoped_ruleset::{ContentVisitor, ExclusionCheck, ScopedRuleSet};
15pub use crate::secondary_validation::Validator;
16use crate::{CreateScannerError, EncodeIndices, MatchAction, Path, RegexValidationError};
17use std::ops::Deref;
18use std::sync::Arc;
19
20use self::metrics::ScannerMetrics;
21use crate::scanner::config::RuleConfig;
22use crate::scanner::regex_rule::compiled::RegexCompiledRule;
23use crate::scanner::regex_rule::{access_regex_caches, RegexCaches};
24use crate::scanner::scope::Scope;
25pub use crate::scanner::shared_data::SharedData;
26use crate::stats::GLOBAL_STATS;
27use ahash::{AHashMap, AHashSet};
28use regex_automata::Match;
29use serde::{Deserialize, Serialize};
30use serde_with::serde_as;
31
32pub mod config;
33pub mod error;
34pub mod metrics;
35pub mod regex_rule;
36pub mod scope;
37pub mod shared_data;
38pub mod shared_pool;
39
40#[cfg(test)]
41mod test;
42
43pub struct StringMatch {
44 pub start: usize,
45 pub end: usize,
46}
47
48pub trait MatchEmitter<T = ()> {
49 fn emit(&mut self, string_match: StringMatch) -> T;
50}
51
52impl<F, T> MatchEmitter<T> for F
55where
56 F: FnMut(StringMatch) -> T,
57{
58 fn emit(&mut self, string_match: StringMatch) -> T {
59 (self)(string_match)
61 }
62}
63
64#[serde_as]
65#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
66pub struct RootRuleConfig<T> {
67 pub match_action: MatchAction,
68 #[serde(default)]
69 pub scope: Scope,
70 #[deprecated(note = "Use `third_party_active_checker` instead")]
71 match_validation_type: Option<MatchValidationType>,
72 third_party_active_checker: Option<MatchValidationType>,
73 #[serde(flatten)]
74 pub inner: T,
75}
76
77impl<T> RootRuleConfig<T>
78where
79 T: RuleConfig + 'static,
80{
81 pub fn new_dyn(inner: T) -> RootRuleConfig<Arc<dyn RuleConfig>> {
82 RootRuleConfig::new(Arc::new(inner) as Arc<dyn RuleConfig>)
83 }
84
85 pub fn into_dyn(self) -> RootRuleConfig<Arc<dyn RuleConfig>> {
86 self.map_inner(|x| Arc::new(x) as Arc<dyn RuleConfig>)
87 }
88}
89
90impl<T> RootRuleConfig<T> {
91 pub fn new(inner: T) -> Self {
92 #[allow(deprecated)]
93 Self {
94 match_action: MatchAction::None,
95 scope: Scope::all(),
96 match_validation_type: None,
97 third_party_active_checker: None,
98 inner,
99 }
100 }
101
102 pub fn map_inner<U>(self, func: impl FnOnce(T) -> U) -> RootRuleConfig<U> {
103 #[allow(deprecated)]
104 RootRuleConfig {
105 match_action: self.match_action,
106 scope: self.scope,
107 match_validation_type: self.match_validation_type,
108 third_party_active_checker: self.third_party_active_checker,
109 inner: func(self.inner),
110 }
111 }
112
113 pub fn match_action(mut self, action: MatchAction) -> Self {
114 self.match_action = action;
115 self
116 }
117
118 pub fn scope(mut self, scope: Scope) -> Self {
119 self.scope = scope;
120 self
121 }
122
123 pub fn third_party_active_checker(
124 mut self,
125 match_validation_type: MatchValidationType,
126 ) -> Self {
127 self.third_party_active_checker = Some(match_validation_type);
128 self
129 }
130
131 fn get_third_party_active_checker(&self) -> Option<&MatchValidationType> {
132 #[allow(deprecated)]
133 self.third_party_active_checker
134 .as_ref()
135 .or(self.match_validation_type.as_ref())
136 }
137}
138
139impl<T> Deref for RootRuleConfig<T> {
140 type Target = T;
141
142 fn deref(&self) -> &Self::Target {
143 &self.inner
144 }
145}
146pub struct RootCompiledRule {
147 pub inner: Box<dyn CompiledRule>,
148 pub scope: Scope,
149 pub match_action: MatchAction,
150 pub match_validation_type: Option<MatchValidationType>,
151}
152
153impl RootCompiledRule {
154 pub fn internal_match_validation_type(&self) -> Option<InternalMatchValidationType> {
155 self.match_validation_type
156 .as_ref()
157 .map(|x| x.get_internal_match_validation_type())
158 }
159}
160
161impl Deref for RootCompiledRule {
162 type Target = dyn CompiledRule;
163
164 fn deref(&self) -> &Self::Target {
165 self.inner.as_ref()
166 }
167}
168
169pub trait CompiledRule: Send + Sync {
171 fn init_per_scanner_data(&self, _per_scanner_data: &mut SharedData) {
172 }
174
175 fn init_per_string_data(&self, _labels: &Labels, _per_string_data: &mut SharedData) {
176 }
178
179 fn init_per_event_data(&self, _per_event_data: &mut SharedData) {
180 }
182
183 #[allow(clippy::too_many_arguments)]
184 fn get_string_matches(
185 &self,
186 content: &str,
187 path: &Path,
188 regex_caches: &mut RegexCaches,
189 per_string_data: &mut SharedData,
190 per_scanner_data: &SharedData,
191 per_event_data: &mut SharedData,
192 exclusion_check: &ExclusionCheck<'_>,
193 excluded_matches: &mut AHashSet<String>,
194 match_emitter: &mut dyn MatchEmitter,
195 wildcard_indices: Option<&Vec<(usize, usize)>>,
196 );
197
198 #[allow(clippy::too_many_arguments)]
203 fn has_string_match(
204 &self,
205 content: &str,
206 path: &Path,
207 regex_caches: &mut RegexCaches,
208 per_string_data: &mut SharedData,
209 per_scanner_data: &SharedData,
210 per_event_data: &mut SharedData,
211 exclusion_check: &ExclusionCheck<'_>,
212 excluded_matches: &mut AHashSet<String>,
213 wildcard_indices: Option<&Vec<(usize, usize)>>,
214 ) -> bool {
215 let mut found_match = false;
216 let mut match_emitter = |_| found_match = true;
217 self.get_string_matches(
218 content,
219 path,
220 regex_caches,
221 per_string_data,
222 per_scanner_data,
223 per_event_data,
224 exclusion_check,
225 excluded_matches,
226 &mut match_emitter,
227 wildcard_indices,
228 );
229 found_match
230 }
231
232 fn should_exclude_multipass_v0(&self) -> bool {
235 false
237 }
238
239 fn on_excluded_match_multipass_v0(&self) {
240 }
242}
243
244impl<T> RuleConfig for Box<T>
245where
246 T: RuleConfig + ?Sized,
247{
248 fn convert_to_compiled_rule(
249 &self,
250 rule_index: usize,
251 labels: Labels,
252 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
253 self.as_ref().convert_to_compiled_rule(rule_index, labels)
254 }
255}
256
257#[derive(Debug, PartialEq, Clone)]
258struct ScannerFeatures {
259 pub add_implicit_index_wildcards: bool,
260 pub multipass_v0_enabled: bool,
261 pub return_matches: bool,
262 pub skip_rules_with_regex_matching_empty_string: bool,
265}
266
267impl Default for ScannerFeatures {
268 fn default() -> Self {
269 Self {
270 add_implicit_index_wildcards: false,
271 multipass_v0_enabled: true,
272 return_matches: false,
273 skip_rules_with_regex_matching_empty_string: false,
274 }
275 }
276}
277
278pub struct ScanOptions {
279 pub blocked_rules_idx: Vec<usize>,
282 pub wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
284}
285
286impl Default for ScanOptions {
287 fn default() -> Self {
288 Self {
289 blocked_rules_idx: vec![],
290 wildcarded_indices: AHashMap::new(),
291 }
292 }
293}
294
295pub struct ScanOptionBuilder {
296 blocked_rules_idx: Vec<usize>,
297 wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
298}
299
300impl ScanOptionBuilder {
301 pub fn new() -> Self {
302 Self {
303 blocked_rules_idx: vec![],
304 wildcarded_indices: AHashMap::new(),
305 }
306 }
307
308 pub fn with_blocked_rules_idx(mut self, blocked_rules_idx: Vec<usize>) -> Self {
309 self.blocked_rules_idx = blocked_rules_idx;
310 self
311 }
312
313 pub fn with_wildcarded_indices(
314 mut self,
315 wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
316 ) -> Self {
317 self.wildcarded_indices = wildcarded_indices;
318 self
319 }
320
321 pub fn build(self) -> ScanOptions {
322 ScanOptions {
323 blocked_rules_idx: self.blocked_rules_idx,
324 wildcarded_indices: self.wildcarded_indices,
325 }
326 }
327}
328
329pub struct Scanner {
330 rules: Vec<RootCompiledRule>,
331 scoped_ruleset: ScopedRuleSet,
332 scanner_features: ScannerFeatures,
333 metrics: ScannerMetrics,
334 labels: Labels,
335 match_validators_per_type: AHashMap<InternalMatchValidationType, Box<dyn MatchValidator>>,
336 per_scanner_data: SharedData,
337}
338
339impl Scanner {
340 pub fn builder(rules: &[RootRuleConfig<Arc<dyn RuleConfig>>]) -> ScannerBuilder {
341 ScannerBuilder::new(rules)
342 }
343
344 pub fn scan_with_options<E: Event>(
345 &self,
346 event: &mut E,
347 options: ScanOptions,
348 ) -> Vec<RuleMatch> {
349 let mut rule_matches_list = vec![];
352
353 let mut excluded_matches = AHashSet::new();
354
355 let start = std::time::Instant::now();
357 access_regex_caches(|regex_caches| {
358 self.scoped_ruleset.visit_string_rule_combinations(
359 event,
360 ScannerContentVisitor {
361 scanner: self,
362 regex_caches,
363 rule_matches: &mut rule_matches_list,
364 blocked_rules: &options.blocked_rules_idx,
365 excluded_matches: &mut excluded_matches,
366 per_event_data: SharedData::new(),
367 wildcarded_indexes: &options.wildcarded_indices,
368 },
369 );
370 });
371
372 let mut output_rule_matches = vec![];
373
374 for (path, rule_matches) in &mut rule_matches_list {
375 event.visit_string_mut(path, |content| {
377 if self.scanner_features.multipass_v0_enabled {
378 rule_matches.retain(|rule_match| {
381 if self.rules[rule_match.rule_index]
382 .inner
383 .should_exclude_multipass_v0()
384 {
385 let is_false_positive = excluded_matches
386 .contains(&content[rule_match.utf8_start..rule_match.utf8_end]);
387 if is_false_positive && self.scanner_features.multipass_v0_enabled {
388 self.rules[rule_match.rule_index].on_excluded_match_multipass_v0();
389 }
390 !is_false_positive
391 } else {
392 true
393 }
394 });
395 }
396
397 self.sort_and_remove_overlapping_rules::<E::Encoding>(rule_matches);
398
399 let will_mutate = rule_matches
400 .iter()
401 .any(|rule_match| self.rules[rule_match.rule_index].match_action.is_mutating());
402
403 self.apply_match_actions(content, path, rule_matches, &mut output_rule_matches);
404
405 will_mutate
406 });
407 }
408 self.metrics
410 .duration_ns
411 .increment(start.elapsed().as_nanos() as u64);
412 self.metrics.num_scanned_events.increment(1);
414 self.metrics
416 .match_count
417 .increment(output_rule_matches.len() as u64);
418
419 output_rule_matches
420 }
421
422 pub fn scan<E: Event>(&self, event: &mut E) -> Vec<RuleMatch> {
426 self.scan_with_options(event, ScanOptions::default())
427 }
428
429 pub fn validate_matches(
430 &self,
431 rule_matches: &mut Vec<RuleMatch>,
432 ) -> Result<(), MatchValidationError> {
433 if !self.scanner_features.return_matches {
434 return Err(MatchValidationError::NoMatchValidationType);
435 }
436 let mut match_validator_rule_match_per_type = AHashMap::new();
438
439 let mut validated_rule_matches = vec![];
440
441 for mut rule_match in rule_matches.drain(..) {
442 let rule = &self.rules[rule_match.rule_index];
443 if let Some(match_validation_type) = rule.internal_match_validation_type() {
444 match_validator_rule_match_per_type
445 .entry(match_validation_type)
446 .or_insert_with(Vec::new)
447 .push(rule_match)
448 } else {
449 rule_match.match_status.merge(MatchStatus::NotAvailable);
451 validated_rule_matches.push(rule_match);
452 }
453 }
454
455 match_validator_rule_match_per_type.par_iter_mut().for_each(
456 |(match_validation_type, matches_per_type)| {
457 let match_validator = self.match_validators_per_type.get(match_validation_type);
458 if let Some(match_validator) = match_validator {
459 match_validator
460 .as_ref()
461 .validate(matches_per_type, &self.rules)
462 }
463 },
464 );
465
466 for (_, mut matches) in match_validator_rule_match_per_type {
468 validated_rule_matches.append(&mut matches);
469 }
470
471 validated_rule_matches.sort_by_key(|rule_match| rule_match.start_index);
473 *rule_matches = validated_rule_matches;
474 Ok(())
475 }
476
477 fn apply_match_actions<E: Encoding>(
480 &self,
481 content: &mut String,
482 path: &Path<'static>,
483 rule_matches: &mut [InternalRuleMatch<E>],
484 output_rule_matches: &mut Vec<RuleMatch>,
485 ) {
486 let mut utf8_byte_delta: isize = 0;
487 let mut custom_index_delta: <E>::IndexShift = <E>::zero_shift();
488
489 for rule_match in rule_matches {
490 output_rule_matches.push(self.apply_match_actions_for_string::<E>(
491 content,
492 path.clone(),
493 rule_match,
494 &mut utf8_byte_delta,
495 &mut custom_index_delta,
496 ));
497 }
498 }
499
500 fn apply_match_actions_for_string<E: Encoding>(
502 &self,
503 content: &mut String,
504 path: Path<'static>,
505 rule_match: &InternalRuleMatch<E>,
506 utf8_byte_delta: &mut isize,
508
509 custom_index_delta: &mut <E>::IndexShift,
511 ) -> RuleMatch {
512 let rule = &self.rules[rule_match.rule_index];
513
514 let custom_start =
515 (<E>::get_index(&rule_match.custom_start, rule_match.utf8_start) as isize
516 + <E>::get_shift(custom_index_delta, *utf8_byte_delta)) as usize;
517
518 let mut matched_content_copy = None;
519
520 if self.scanner_features.return_matches {
521 let mutated_utf8_match_start =
523 (rule_match.utf8_start as isize + *utf8_byte_delta) as usize;
524 let mutated_utf8_match_end = (rule_match.utf8_end as isize + *utf8_byte_delta) as usize;
525
526 debug_assert!(content.is_char_boundary(mutated_utf8_match_start));
528 debug_assert!(content.is_char_boundary(mutated_utf8_match_end));
529
530 let matched_content = &content[mutated_utf8_match_start..mutated_utf8_match_end];
531 matched_content_copy = Some(matched_content.to_string());
532 }
533
534 if rule.match_action.is_mutating() {
535 let mutated_utf8_match_start =
536 (rule_match.utf8_start as isize + *utf8_byte_delta) as usize;
537 let mutated_utf8_match_end = (rule_match.utf8_end as isize + *utf8_byte_delta) as usize;
538
539 debug_assert!(content.is_char_boundary(mutated_utf8_match_start));
541 debug_assert!(content.is_char_boundary(mutated_utf8_match_end));
542
543 let matched_content = &content[mutated_utf8_match_start..mutated_utf8_match_end];
544 if let Some(replacement) = rule.match_action.get_replacement(matched_content) {
545 let before_replacement = &matched_content[replacement.start..replacement.end];
546
547 <E>::adjust_shift(
549 custom_index_delta,
550 before_replacement,
551 &replacement.replacement,
552 );
553 *utf8_byte_delta +=
554 replacement.replacement.len() as isize - before_replacement.len() as isize;
555
556 let replacement_start = mutated_utf8_match_start + replacement.start;
557 let replacement_end = mutated_utf8_match_start + replacement.end;
558 content.replace_range(replacement_start..replacement_end, &replacement.replacement);
559 }
560 }
561
562 let shift_offset = <E>::get_shift(custom_index_delta, *utf8_byte_delta);
563 let custom_end = (<E>::get_index(&rule_match.custom_end, rule_match.utf8_end) as isize
564 + shift_offset) as usize;
565
566 let rule = &self.rules[rule_match.rule_index];
567
568 let match_status: MatchStatus = if rule.match_validation_type.is_some() {
569 MatchStatus::NotChecked
570 } else {
571 MatchStatus::NotAvailable
572 };
573
574 RuleMatch {
575 rule_index: rule_match.rule_index,
576 path,
577 replacement_type: rule.match_action.replacement_type(),
578 start_index: custom_start,
579 end_index_exclusive: custom_end,
580 shift_offset,
581 match_value: matched_content_copy,
582 match_status,
583 }
584 }
585
586 fn sort_and_remove_overlapping_rules<E: Encoding>(
587 &self,
588 rule_matches: &mut Vec<InternalRuleMatch<E>>,
589 ) {
590 rule_matches.sort_unstable_by(|a, b| {
594 let ord = self.rules[a.rule_index]
596 .match_action
597 .is_mutating()
598 .cmp(&self.rules[b.rule_index].match_action.is_mutating())
599 .reverse();
600
601 let ord = ord.then(a.utf8_start.cmp(&b.utf8_start));
603
604 let ord = ord.then(a.len().cmp(&b.len()).reverse());
606
607 let ord = ord.then(a.rule_index.cmp(&b.rule_index));
609
610 ord.reverse()
612 });
613
614 let mut retained_rules: Vec<InternalRuleMatch<E>> = vec![];
615
616 'rule_matches: while let Some(rule_match) = rule_matches.pop() {
617 if self.rules[rule_match.rule_index].match_action.is_mutating() {
618 if let Some(last) = retained_rules.last() {
620 if last.utf8_end > rule_match.utf8_start {
621 continue;
622 }
623 }
624 } else {
625 for retained_rule in &retained_rules {
628 if retained_rule.utf8_start < rule_match.utf8_end
629 && retained_rule.utf8_end > rule_match.utf8_start
630 {
631 continue 'rule_matches;
632 }
633 }
634 };
635 retained_rules.push(rule_match);
636 }
637
638 retained_rules.sort_unstable_by_key(|rule_match| rule_match.utf8_start);
640
641 *rule_matches = retained_rules;
642 }
643}
644
645impl Drop for Scanner {
646 fn drop(&mut self) {
647 let stats = &*GLOBAL_STATS;
648 stats.scanner_deletions.increment(1);
649 stats.decrement_total_scanners();
650 }
651}
652
653#[derive(Default)]
654pub struct ScannerBuilder<'a> {
655 rules: &'a [RootRuleConfig<Arc<dyn RuleConfig>>],
656 labels: Labels,
657 scanner_features: ScannerFeatures,
658}
659
660impl ScannerBuilder<'_> {
661 pub fn new(rules: &[RootRuleConfig<Arc<dyn RuleConfig>>]) -> ScannerBuilder {
662 ScannerBuilder {
663 rules,
664 labels: Labels::empty(),
665 scanner_features: ScannerFeatures::default(),
666 }
667 }
668
669 pub fn labels(mut self, labels: Labels) -> Self {
670 self.labels = labels;
671 self
672 }
673
674 pub fn with_implicit_wildcard_indexes_for_scopes(mut self, value: bool) -> Self {
675 self.scanner_features.add_implicit_index_wildcards = value;
676 self
677 }
678
679 pub fn with_return_matches(mut self, value: bool) -> Self {
680 self.scanner_features.return_matches = value;
681 self
682 }
683
684 pub fn with_multipass_v0(mut self, value: bool) -> Self {
688 self.scanner_features.multipass_v0_enabled = value;
689 self
690 }
691
692 pub fn with_skip_rules_with_regex_matching_empty_string(mut self, value: bool) -> Self {
693 self.scanner_features
694 .skip_rules_with_regex_matching_empty_string = value;
695 self
696 }
697
698 pub fn build(self) -> Result<Scanner, CreateScannerError> {
699 let mut match_validators_per_type = AHashMap::new();
700
701 for rule in self.rules.iter() {
702 if let Some(match_validation_type) = &rule.get_third_party_active_checker() {
703 if match_validation_type.can_create_match_validator() {
704 let internal_type = match_validation_type.get_internal_match_validation_type();
705 let match_validator = match_validation_type.into_match_validator();
706 if let Ok(match_validator) = match_validator {
707 if !match_validators_per_type.contains_key(&internal_type) {
708 match_validators_per_type.insert(internal_type, match_validator);
709 }
710 } else {
711 return Err(CreateScannerError::InvalidMatchValidator(
712 MatchValidatorCreationError::InternalError,
713 ));
714 }
715 }
716 }
717 }
718
719 let compiled_rules = self
720 .rules
721 .iter()
722 .enumerate()
723 .filter_map(|(rule_index, config)| {
724 let inner = match config.convert_to_compiled_rule(rule_index, self.labels.clone()) {
725 Ok(inner) => Ok(inner),
726 Err(err) => {
727 if self
728 .scanner_features
729 .skip_rules_with_regex_matching_empty_string
730 && err
731 == CreateScannerError::InvalidRegex(
732 RegexValidationError::MatchesEmptyString,
733 )
734 {
735 #[allow(clippy::print_stdout)]
737 {
738 println!("skipping rule that matches empty string: rule_index={}, labels={:?}", rule_index, self.labels.clone());
739 }
740 return None;
741 } else {
742 Err(err)
743 }
744 }
745 };
746 Some((config, inner))
747 })
748 .map(|(config, inner)| {
749 config.match_action.validate()?;
750 Ok(RootCompiledRule {
751 inner: inner?,
752 scope: config.scope.clone(),
753 match_action: config.match_action.clone(),
754 match_validation_type: config.get_third_party_active_checker().cloned(),
755 })
756 })
757 .collect::<Result<Vec<RootCompiledRule>, CreateScannerError>>()?;
758
759 let mut per_scanner_data = SharedData::new();
760
761 compiled_rules.iter().for_each(|rule| {
762 rule.init_per_scanner_data(&mut per_scanner_data);
763 });
764
765 let scoped_ruleset = ScopedRuleSet::new(
766 &compiled_rules
767 .iter()
768 .map(|rule| rule.scope.clone())
769 .collect::<Vec<_>>(),
770 )
771 .with_implicit_index_wildcards(self.scanner_features.add_implicit_index_wildcards);
772
773 {
774 let stats = &*GLOBAL_STATS;
775 stats.scanner_creations.increment(1);
776 stats.increment_total_scanners();
777 }
778
779 Ok(Scanner {
780 rules: compiled_rules,
781 scoped_ruleset,
782 scanner_features: self.scanner_features,
783 metrics: ScannerMetrics::new(&self.labels),
784 match_validators_per_type,
785 labels: self.labels,
786 per_scanner_data,
787 })
788 }
789}
790
791struct ScannerContentVisitor<'a, E: Encoding> {
792 scanner: &'a Scanner,
793 regex_caches: &'a mut RegexCaches,
794 rule_matches: &'a mut Vec<(crate::Path<'static>, Vec<InternalRuleMatch<E>>)>,
795 blocked_rules: &'a Vec<usize>,
798 excluded_matches: &'a mut AHashSet<String>,
799 per_event_data: SharedData,
800 wildcarded_indexes: &'a AHashMap<Path<'static>, Vec<(usize, usize)>>,
801}
802
803impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> {
804 fn visit_content<'b>(
805 &'b mut self,
806 path: &Path<'a>,
807 content: &str,
808 mut rule_visitor: crate::scoped_ruleset::RuleIndexVisitor,
809 exclusion_check: ExclusionCheck<'b>,
810 ) -> bool {
811 let mut path_rules_matches = vec![];
813
814 let mut per_string_data = SharedData::new();
816 let wildcard_indices_per_path = self.wildcarded_indexes.get(path);
817
818 rule_visitor.visit_rule_indices(|rule_index| {
819 if self.blocked_rules.contains(&rule_index) {
820 return;
821 }
822 let rule = &self.scanner.rules[rule_index];
823 {
824 let mut emitter = |rule_match: StringMatch| {
826 assert_ne!(rule_match.start, rule_match.end, "empty match detected");
829
830 path_rules_matches.push(InternalRuleMatch {
831 rule_index,
832 utf8_start: rule_match.start,
833 utf8_end: rule_match.end,
834 custom_start: E::zero_index(),
835 custom_end: E::zero_index(),
836 });
837 };
838
839 rule.init_per_string_data(&self.scanner.labels, &mut per_string_data);
840
841 rule.init_per_event_data(&mut self.per_event_data);
843
844 rule.get_string_matches(
845 content,
846 path,
847 self.regex_caches,
848 &mut per_string_data,
849 &self.scanner.per_scanner_data,
850 &mut self.per_event_data,
851 &exclusion_check,
852 self.excluded_matches,
853 &mut emitter,
854 wildcard_indices_per_path,
855 );
856 }
857 });
858
859 path_rules_matches.sort_unstable_by_key(|rule_match| rule_match.utf8_start);
861
862 E::calculate_indices(
863 content,
864 path_rules_matches
865 .iter_mut()
866 .map(|rule_match: &mut InternalRuleMatch<E>| EncodeIndices {
867 utf8_start: rule_match.utf8_start,
868 utf8_end: rule_match.utf8_end,
869 custom_start: &mut rule_match.custom_start,
870 custom_end: &mut rule_match.custom_end,
871 }),
872 );
873
874 let has_match = !path_rules_matches.is_empty();
877
878 if has_match {
879 self.rule_matches
880 .push((path.into_static(), path_rules_matches));
881 }
882
883 has_match
884 }
885}
886
887fn get_next_regex_start(content: &str, regex_match: &Match) -> Option<usize> {
889 if let Some((i, _)) = content[regex_match.start()..].char_indices().nth(1) {
891 Some(regex_match.start() + i)
892 } else {
893 None
895 }
896}
897
898fn is_false_positive_match(
899 regex_match: &Match,
900 rule: &RegexCompiledRule,
901 content: &str,
902 check_excluded_keywords: bool,
903) -> bool {
904 if check_excluded_keywords {
905 if let Some(excluded_keywords) = &rule.excluded_keywords {
906 if excluded_keywords.is_false_positive_match(content, regex_match.start()) {
907 return true;
908 }
909 }
910 }
911
912 if let Some(validator) = rule.validator.as_ref() {
913 if !validator.is_valid_match(&content[regex_match.range()]) {
914 return true;
915 };
916 }
917 false
918}