1use crate::encoding::Encoding;
2use crate::event::Event;
3
4use crate::match_validation::{
5 config::InternalMatchValidationType, config::MatchValidationType, match_status::MatchStatus,
6 match_validator::MatchValidator,
7};
8use rayon::prelude::*;
9
10use error::{MatchValidationError, MatchValidatorCreationError};
11
12use crate::observability::labels::Labels;
13use crate::rule_match::{InternalRuleMatch, RuleMatch};
14use crate::scoped_ruleset::{ContentVisitor, ExclusionCheck, ScopedRuleSet};
15pub use crate::secondary_validation::Validator;
16use crate::{CreateScannerError, EncodeIndices, MatchAction, Path};
17use std::ops::Deref;
18use std::sync::Arc;
19
20use self::metrics::ScannerMetrics;
21use crate::scanner::config::RuleConfig;
22use crate::scanner::regex_rule::compiled::RegexCompiledRule;
23use crate::scanner::regex_rule::{access_regex_caches, RegexCaches};
24use crate::scanner::scope::Scope;
25pub use crate::scanner::shared_data::SharedData;
26use crate::stats::GLOBAL_STATS;
27use ahash::{AHashMap, AHashSet};
28use regex_automata::Match;
29use serde::{Deserialize, Serialize};
30use serde_with::serde_as;
31
32pub mod config;
33pub mod error;
34pub mod metrics;
35pub mod regex_rule;
36pub mod scope;
37pub mod shared_data;
38pub mod shared_pool;
39
40#[cfg(test)]
41mod test;
42
43pub struct StringMatch {
44 pub start: usize,
45 pub end: usize,
46}
47
48pub trait MatchEmitter<T = ()> {
49 fn emit(&mut self, string_match: StringMatch) -> T;
50}
51
52impl<F, T> MatchEmitter<T> for F
55where
56 F: FnMut(StringMatch) -> T,
57{
58 fn emit(&mut self, string_match: StringMatch) -> T {
59 (self)(string_match)
61 }
62}
63
64#[serde_as]
65#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
66pub struct RootRuleConfig<T> {
67 pub match_action: MatchAction,
68 #[serde(default)]
69 pub scope: Scope,
70 #[deprecated(note = "Use `third_party_active_checker` instead")]
71 match_validation_type: Option<MatchValidationType>,
72 third_party_active_checker: Option<MatchValidationType>,
73 #[serde(flatten)]
74 pub inner: T,
75}
76
77impl<T> RootRuleConfig<T>
78where
79 T: RuleConfig + 'static,
80{
81 pub fn new_dyn(inner: T) -> RootRuleConfig<Arc<dyn RuleConfig>> {
82 RootRuleConfig::new(Arc::new(inner) as Arc<dyn RuleConfig>)
83 }
84
85 pub fn into_dyn(self) -> RootRuleConfig<Arc<dyn RuleConfig>> {
86 self.map_inner(|x| Arc::new(x) as Arc<dyn RuleConfig>)
87 }
88}
89
90impl<T> RootRuleConfig<T> {
91 pub fn new(inner: T) -> Self {
92 #[allow(deprecated)]
93 Self {
94 match_action: MatchAction::None,
95 scope: Scope::all(),
96 match_validation_type: None,
97 third_party_active_checker: None,
98 inner,
99 }
100 }
101
102 pub fn map_inner<U>(self, func: impl FnOnce(T) -> U) -> RootRuleConfig<U> {
103 #[allow(deprecated)]
104 RootRuleConfig {
105 match_action: self.match_action,
106 scope: self.scope,
107 match_validation_type: self.match_validation_type,
108 third_party_active_checker: self.third_party_active_checker,
109 inner: func(self.inner),
110 }
111 }
112
113 pub fn match_action(mut self, action: MatchAction) -> Self {
114 self.match_action = action;
115 self
116 }
117
118 pub fn scope(mut self, scope: Scope) -> Self {
119 self.scope = scope;
120 self
121 }
122
123 pub fn third_party_active_checker(
124 mut self,
125 match_validation_type: MatchValidationType,
126 ) -> Self {
127 self.third_party_active_checker = Some(match_validation_type);
128 self
129 }
130
131 fn get_third_party_active_checker(&self) -> Option<&MatchValidationType> {
132 #[allow(deprecated)]
133 self.third_party_active_checker
134 .as_ref()
135 .or(self.match_validation_type.as_ref())
136 }
137}
138
139impl<T> Deref for RootRuleConfig<T> {
140 type Target = T;
141
142 fn deref(&self) -> &Self::Target {
143 &self.inner
144 }
145}
146pub struct RootCompiledRule {
147 pub inner: Box<dyn CompiledRule>,
148 pub scope: Scope,
149 pub match_action: MatchAction,
150 pub match_validation_type: Option<MatchValidationType>,
151}
152
153impl RootCompiledRule {
154 pub fn internal_match_validation_type(&self) -> Option<InternalMatchValidationType> {
155 self.match_validation_type
156 .as_ref()
157 .map(|x| x.get_internal_match_validation_type())
158 }
159}
160
161impl Deref for RootCompiledRule {
162 type Target = dyn CompiledRule;
163
164 fn deref(&self) -> &Self::Target {
165 self.inner.as_ref()
166 }
167}
168
169pub trait CompiledRule: Send + Sync {
171 fn init_per_scanner_data(&self, _per_scanner_data: &mut SharedData) {
172 }
174
175 fn init_per_string_data(&self, _labels: &Labels, _per_string_data: &mut SharedData) {
176 }
178
179 fn init_per_event_data(&self, _per_event_data: &mut SharedData) {
180 }
182
183 #[allow(clippy::too_many_arguments)]
184 fn get_string_matches(
185 &self,
186 content: &str,
187 path: &Path,
188 regex_caches: &mut RegexCaches,
189 per_string_data: &mut SharedData,
190 per_scanner_data: &SharedData,
191 per_event_data: &mut SharedData,
192 exclusion_check: &ExclusionCheck<'_>,
193 excluded_matches: &mut AHashSet<String>,
194 match_emitter: &mut dyn MatchEmitter,
195 wildcard_indices: Option<&Vec<(usize, usize)>>,
196 );
197
198 #[allow(clippy::too_many_arguments)]
203 fn has_string_match(
204 &self,
205 content: &str,
206 path: &Path,
207 regex_caches: &mut RegexCaches,
208 per_string_data: &mut SharedData,
209 per_scanner_data: &SharedData,
210 per_event_data: &mut SharedData,
211 exclusion_check: &ExclusionCheck<'_>,
212 excluded_matches: &mut AHashSet<String>,
213 wildcard_indices: Option<&Vec<(usize, usize)>>,
214 ) -> bool {
215 let mut found_match = false;
216 let mut match_emitter = |_| found_match = true;
217 self.get_string_matches(
218 content,
219 path,
220 regex_caches,
221 per_string_data,
222 per_scanner_data,
223 per_event_data,
224 exclusion_check,
225 excluded_matches,
226 &mut match_emitter,
227 wildcard_indices,
228 );
229 found_match
230 }
231
232 fn should_exclude_multipass_v0(&self) -> bool {
235 false
237 }
238
239 fn on_excluded_match_multipass_v0(&self) {
240 }
242}
243
244impl<T> RuleConfig for Box<T>
245where
246 T: RuleConfig + ?Sized,
247{
248 fn convert_to_compiled_rule(
249 &self,
250 rule_index: usize,
251 labels: Labels,
252 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
253 self.as_ref().convert_to_compiled_rule(rule_index, labels)
254 }
255}
256
257#[derive(Debug, PartialEq, Clone)]
258struct ScannerFeatures {
259 pub add_implicit_index_wildcards: bool,
260 pub multipass_v0_enabled: bool,
261 pub return_matches: bool,
262}
263
264impl Default for ScannerFeatures {
265 fn default() -> Self {
266 Self {
267 add_implicit_index_wildcards: false,
268 multipass_v0_enabled: true,
269 return_matches: false,
270 }
271 }
272}
273
274pub struct ScanOptions {
275 pub blocked_rules_idx: Vec<usize>,
278 pub wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
280}
281
282impl Default for ScanOptions {
283 fn default() -> Self {
284 Self {
285 blocked_rules_idx: vec![],
286 wildcarded_indices: AHashMap::new(),
287 }
288 }
289}
290
291pub struct ScanOptionBuilder {
292 blocked_rules_idx: Vec<usize>,
293 wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
294}
295
296impl ScanOptionBuilder {
297 pub fn new() -> Self {
298 Self {
299 blocked_rules_idx: vec![],
300 wildcarded_indices: AHashMap::new(),
301 }
302 }
303
304 pub fn with_blocked_rules_idx(mut self, blocked_rules_idx: Vec<usize>) -> Self {
305 self.blocked_rules_idx = blocked_rules_idx;
306 self
307 }
308
309 pub fn with_wildcarded_indices(
310 mut self,
311 wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
312 ) -> Self {
313 self.wildcarded_indices = wildcarded_indices;
314 self
315 }
316
317 pub fn build(self) -> ScanOptions {
318 ScanOptions {
319 blocked_rules_idx: self.blocked_rules_idx,
320 wildcarded_indices: self.wildcarded_indices,
321 }
322 }
323}
324
325pub struct Scanner {
326 rules: Vec<RootCompiledRule>,
327 scoped_ruleset: ScopedRuleSet,
328 scanner_features: ScannerFeatures,
329 metrics: ScannerMetrics,
330 labels: Labels,
331 match_validators_per_type: AHashMap<InternalMatchValidationType, Box<dyn MatchValidator>>,
332 per_scanner_data: SharedData,
333}
334
335impl Scanner {
336 pub fn builder(rules: &[RootRuleConfig<Arc<dyn RuleConfig>>]) -> ScannerBuilder {
337 ScannerBuilder::new(rules)
338 }
339
340 pub fn scan_with_options<E: Event>(
341 &self,
342 event: &mut E,
343 options: ScanOptions,
344 ) -> Vec<RuleMatch> {
345 let mut rule_matches_list = vec![];
348
349 let mut excluded_matches = AHashSet::new();
350
351 let start = std::time::Instant::now();
353 access_regex_caches(|regex_caches| {
354 self.scoped_ruleset.visit_string_rule_combinations(
355 event,
356 ScannerContentVisitor {
357 scanner: self,
358 regex_caches,
359 rule_matches: &mut rule_matches_list,
360 blocked_rules: &options.blocked_rules_idx,
361 excluded_matches: &mut excluded_matches,
362 per_event_data: SharedData::new(),
363 wildcarded_indexes: &options.wildcarded_indices,
364 },
365 );
366 });
367
368 let mut output_rule_matches = vec![];
369
370 for (path, rule_matches) in &mut rule_matches_list {
371 event.visit_string_mut(path, |content| {
373 if self.scanner_features.multipass_v0_enabled {
374 rule_matches.retain(|rule_match| {
377 if self.rules[rule_match.rule_index]
378 .inner
379 .should_exclude_multipass_v0()
380 {
381 let is_false_positive = excluded_matches
382 .contains(&content[rule_match.utf8_start..rule_match.utf8_end]);
383 if is_false_positive && self.scanner_features.multipass_v0_enabled {
384 self.rules[rule_match.rule_index].on_excluded_match_multipass_v0();
385 }
386 !is_false_positive
387 } else {
388 true
389 }
390 });
391 }
392
393 self.sort_and_remove_overlapping_rules::<E::Encoding>(rule_matches);
394
395 let will_mutate = rule_matches
396 .iter()
397 .any(|rule_match| self.rules[rule_match.rule_index].match_action.is_mutating());
398
399 self.apply_match_actions(content, path, rule_matches, &mut output_rule_matches);
400
401 will_mutate
402 });
403 }
404 self.metrics
406 .duration_ns
407 .increment(start.elapsed().as_nanos() as u64);
408 self.metrics.num_scanned_events.increment(1);
410 self.metrics
412 .match_count
413 .increment(output_rule_matches.len() as u64);
414
415 output_rule_matches
416 }
417
418 pub fn scan<E: Event>(&self, event: &mut E) -> Vec<RuleMatch> {
422 self.scan_with_options(event, ScanOptions::default())
423 }
424
425 pub fn validate_matches(
426 &self,
427 rule_matches: &mut Vec<RuleMatch>,
428 ) -> Result<(), MatchValidationError> {
429 if !self.scanner_features.return_matches {
430 return Err(MatchValidationError::NoMatchValidationType);
431 }
432 let mut match_validator_rule_match_per_type = AHashMap::new();
434
435 let mut validated_rule_matches = vec![];
436
437 for mut rule_match in rule_matches.drain(..) {
438 let rule = &self.rules[rule_match.rule_index];
439 if let Some(match_validation_type) = rule.internal_match_validation_type() {
440 match_validator_rule_match_per_type
441 .entry(match_validation_type)
442 .or_insert_with(Vec::new)
443 .push(rule_match)
444 } else {
445 rule_match.match_status.merge(MatchStatus::NotAvailable);
447 validated_rule_matches.push(rule_match);
448 }
449 }
450
451 match_validator_rule_match_per_type.par_iter_mut().for_each(
452 |(match_validation_type, matches_per_type)| {
453 let match_validator = self.match_validators_per_type.get(match_validation_type);
454 if let Some(match_validator) = match_validator {
455 match_validator
456 .as_ref()
457 .validate(matches_per_type, &self.rules)
458 }
459 },
460 );
461
462 for (_, mut matches) in match_validator_rule_match_per_type {
464 validated_rule_matches.append(&mut matches);
465 }
466
467 validated_rule_matches.sort_by_key(|rule_match| rule_match.start_index);
469 *rule_matches = validated_rule_matches;
470 Ok(())
471 }
472
473 fn apply_match_actions<E: Encoding>(
476 &self,
477 content: &mut String,
478 path: &Path<'static>,
479 rule_matches: &mut [InternalRuleMatch<E>],
480 output_rule_matches: &mut Vec<RuleMatch>,
481 ) {
482 let mut utf8_byte_delta: isize = 0;
483 let mut custom_index_delta: <E>::IndexShift = <E>::zero_shift();
484
485 for rule_match in rule_matches {
486 output_rule_matches.push(self.apply_match_actions_for_string::<E>(
487 content,
488 path.clone(),
489 rule_match,
490 &mut utf8_byte_delta,
491 &mut custom_index_delta,
492 ));
493 }
494 }
495
496 fn apply_match_actions_for_string<E: Encoding>(
498 &self,
499 content: &mut String,
500 path: Path<'static>,
501 rule_match: &InternalRuleMatch<E>,
502 utf8_byte_delta: &mut isize,
504
505 custom_index_delta: &mut <E>::IndexShift,
507 ) -> RuleMatch {
508 let rule = &self.rules[rule_match.rule_index];
509
510 let custom_start =
511 (<E>::get_index(&rule_match.custom_start, rule_match.utf8_start) as isize
512 + <E>::get_shift(custom_index_delta, *utf8_byte_delta)) as usize;
513
514 let mut matched_content_copy = None;
515
516 if self.scanner_features.return_matches {
517 let mutated_utf8_match_start =
519 (rule_match.utf8_start as isize + *utf8_byte_delta) as usize;
520 let mutated_utf8_match_end = (rule_match.utf8_end as isize + *utf8_byte_delta) as usize;
521
522 debug_assert!(content.is_char_boundary(mutated_utf8_match_start));
524 debug_assert!(content.is_char_boundary(mutated_utf8_match_end));
525
526 let matched_content = &content[mutated_utf8_match_start..mutated_utf8_match_end];
527 matched_content_copy = Some(matched_content.to_string());
528 }
529
530 if rule.match_action.is_mutating() {
531 let mutated_utf8_match_start =
532 (rule_match.utf8_start as isize + *utf8_byte_delta) as usize;
533 let mutated_utf8_match_end = (rule_match.utf8_end as isize + *utf8_byte_delta) as usize;
534
535 debug_assert!(content.is_char_boundary(mutated_utf8_match_start));
537 debug_assert!(content.is_char_boundary(mutated_utf8_match_end));
538
539 let matched_content = &content[mutated_utf8_match_start..mutated_utf8_match_end];
540 if let Some(replacement) = rule.match_action.get_replacement(matched_content) {
541 let before_replacement = &matched_content[replacement.start..replacement.end];
542
543 <E>::adjust_shift(
545 custom_index_delta,
546 before_replacement,
547 &replacement.replacement,
548 );
549 *utf8_byte_delta +=
550 replacement.replacement.len() as isize - before_replacement.len() as isize;
551
552 let replacement_start = mutated_utf8_match_start + replacement.start;
553 let replacement_end = mutated_utf8_match_start + replacement.end;
554 content.replace_range(replacement_start..replacement_end, &replacement.replacement);
555 }
556 }
557
558 let shift_offset = <E>::get_shift(custom_index_delta, *utf8_byte_delta);
559 let custom_end = (<E>::get_index(&rule_match.custom_end, rule_match.utf8_end) as isize
560 + shift_offset) as usize;
561
562 let rule = &self.rules[rule_match.rule_index];
563
564 let match_status: MatchStatus = if rule.match_validation_type.is_some() {
565 MatchStatus::NotChecked
566 } else {
567 MatchStatus::NotAvailable
568 };
569
570 RuleMatch {
571 rule_index: rule_match.rule_index,
572 path,
573 replacement_type: rule.match_action.replacement_type(),
574 start_index: custom_start,
575 end_index_exclusive: custom_end,
576 shift_offset,
577 match_value: matched_content_copy,
578 match_status,
579 }
580 }
581
582 fn sort_and_remove_overlapping_rules<E: Encoding>(
583 &self,
584 rule_matches: &mut Vec<InternalRuleMatch<E>>,
585 ) {
586 rule_matches.sort_unstable_by(|a, b| {
590 let ord = self.rules[a.rule_index]
592 .match_action
593 .is_mutating()
594 .cmp(&self.rules[b.rule_index].match_action.is_mutating())
595 .reverse();
596
597 let ord = ord.then(a.utf8_start.cmp(&b.utf8_start));
599
600 let ord = ord.then(a.len().cmp(&b.len()).reverse());
602
603 let ord = ord.then(a.rule_index.cmp(&b.rule_index));
605
606 ord.reverse()
608 });
609
610 let mut retained_rules: Vec<InternalRuleMatch<E>> = vec![];
611
612 'rule_matches: while let Some(rule_match) = rule_matches.pop() {
613 if self.rules[rule_match.rule_index].match_action.is_mutating() {
614 if let Some(last) = retained_rules.last() {
616 if last.utf8_end > rule_match.utf8_start {
617 continue;
618 }
619 }
620 } else {
621 for retained_rule in &retained_rules {
624 if retained_rule.utf8_start < rule_match.utf8_end
625 && retained_rule.utf8_end > rule_match.utf8_start
626 {
627 continue 'rule_matches;
628 }
629 }
630 };
631 retained_rules.push(rule_match);
632 }
633
634 retained_rules.sort_unstable_by_key(|rule_match| rule_match.utf8_start);
636
637 *rule_matches = retained_rules;
638 }
639}
640
641impl Drop for Scanner {
642 fn drop(&mut self) {
643 let stats = &*GLOBAL_STATS;
644 stats.scanner_deletions.increment(1);
645 stats.decrement_total_scanners();
646 }
647}
648
649#[derive(Default)]
650pub struct ScannerBuilder<'a> {
651 rules: &'a [RootRuleConfig<Arc<dyn RuleConfig>>],
652 labels: Labels,
653 scanner_features: ScannerFeatures,
654}
655
656impl ScannerBuilder<'_> {
657 pub fn new(rules: &[RootRuleConfig<Arc<dyn RuleConfig>>]) -> ScannerBuilder {
658 ScannerBuilder {
659 rules,
660 labels: Labels::empty(),
661 scanner_features: ScannerFeatures::default(),
662 }
663 }
664
665 pub fn labels(mut self, labels: Labels) -> Self {
666 self.labels = labels;
667 self
668 }
669
670 pub fn with_implicit_wildcard_indexes_for_scopes(mut self, value: bool) -> Self {
671 self.scanner_features.add_implicit_index_wildcards = value;
672 self
673 }
674
675 pub fn with_return_matches(mut self, value: bool) -> Self {
676 self.scanner_features.return_matches = value;
677 self
678 }
679
680 pub fn with_multipass_v0(mut self, value: bool) -> Self {
684 self.scanner_features.multipass_v0_enabled = value;
685 self
686 }
687
688 pub fn build(self) -> Result<Scanner, CreateScannerError> {
689 let mut scanner_features = self.scanner_features.clone();
690 let mut match_validators_per_type = AHashMap::new();
691
692 for rule in self.rules.iter() {
693 if let Some(match_validation_type) = &rule.get_third_party_active_checker() {
694 if match_validation_type.can_create_match_validator() {
695 let internal_type = match_validation_type.get_internal_match_validation_type();
696 let match_validator = match_validation_type.into_match_validator();
697 if let Ok(match_validator) = match_validator {
698 if !match_validators_per_type.contains_key(&internal_type) {
699 match_validators_per_type.insert(internal_type, match_validator);
700 scanner_features.return_matches = true;
704 }
705 } else {
706 return Err(CreateScannerError::InvalidMatchValidator(
707 MatchValidatorCreationError::InternalError,
708 ));
709 }
710 }
711 }
712 }
713
714 let compiled_rules = self
715 .rules
716 .iter()
717 .enumerate()
718 .map(|(rule_index, config)| {
719 let inner = config.convert_to_compiled_rule(rule_index, self.labels.clone())?;
720 config.match_action.validate()?;
721 Ok(RootCompiledRule {
722 inner,
723 scope: config.scope.clone(),
724 match_action: config.match_action.clone(),
725 match_validation_type: config.get_third_party_active_checker().cloned(),
726 })
727 })
728 .collect::<Result<Vec<RootCompiledRule>, CreateScannerError>>()?;
729
730 let mut per_scanner_data = SharedData::new();
731
732 compiled_rules.iter().for_each(|rule| {
733 rule.init_per_scanner_data(&mut per_scanner_data);
734 });
735
736 let scoped_ruleset = ScopedRuleSet::new(
737 &compiled_rules
738 .iter()
739 .map(|rule| rule.scope.clone())
740 .collect::<Vec<_>>(),
741 )
742 .with_implicit_index_wildcards(self.scanner_features.add_implicit_index_wildcards);
743
744 {
745 let stats = &*GLOBAL_STATS;
746 stats.scanner_creations.increment(1);
747 stats.increment_total_scanners();
748 }
749
750 Ok(Scanner {
751 rules: compiled_rules,
752 scoped_ruleset,
753 scanner_features,
754 metrics: ScannerMetrics::new(&self.labels),
755 match_validators_per_type,
756 labels: self.labels,
757 per_scanner_data,
758 })
759 }
760}
761
762struct ScannerContentVisitor<'a, E: Encoding> {
763 scanner: &'a Scanner,
764 regex_caches: &'a mut RegexCaches,
765 rule_matches: &'a mut Vec<(crate::Path<'static>, Vec<InternalRuleMatch<E>>)>,
766 blocked_rules: &'a Vec<usize>,
769 excluded_matches: &'a mut AHashSet<String>,
770 per_event_data: SharedData,
771 wildcarded_indexes: &'a AHashMap<Path<'static>, Vec<(usize, usize)>>,
772}
773
774impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> {
775 fn visit_content<'b>(
776 &'b mut self,
777 path: &Path<'a>,
778 content: &str,
779 mut rule_visitor: crate::scoped_ruleset::RuleIndexVisitor,
780 exclusion_check: ExclusionCheck<'b>,
781 ) -> bool {
782 let mut path_rules_matches = vec![];
784
785 let mut per_string_data = SharedData::new();
787 let wildcard_indices_per_path = self.wildcarded_indexes.get(path);
788
789 rule_visitor.visit_rule_indices(|rule_index| {
790 if self.blocked_rules.contains(&rule_index) {
791 return;
792 }
793 let rule = &self.scanner.rules[rule_index];
794 {
795 let mut emitter = |rule_match: StringMatch| {
797 path_rules_matches.push(InternalRuleMatch {
798 rule_index,
799 utf8_start: rule_match.start,
800 utf8_end: rule_match.end,
801 custom_start: E::zero_index(),
802 custom_end: E::zero_index(),
803 });
804 };
805
806 rule.init_per_string_data(&self.scanner.labels, &mut per_string_data);
807
808 rule.init_per_event_data(&mut self.per_event_data);
810
811 rule.get_string_matches(
812 content,
813 path,
814 self.regex_caches,
815 &mut per_string_data,
816 &self.scanner.per_scanner_data,
817 &mut self.per_event_data,
818 &exclusion_check,
819 self.excluded_matches,
820 &mut emitter,
821 wildcard_indices_per_path,
822 );
823 }
824 });
825
826 path_rules_matches.sort_unstable_by_key(|rule_match| rule_match.utf8_start);
828
829 E::calculate_indices(
830 content,
831 path_rules_matches
832 .iter_mut()
833 .map(|rule_match: &mut InternalRuleMatch<E>| EncodeIndices {
834 utf8_start: rule_match.utf8_start,
835 utf8_end: rule_match.utf8_end,
836 custom_start: &mut rule_match.custom_start,
837 custom_end: &mut rule_match.custom_end,
838 }),
839 );
840
841 let has_match = !path_rules_matches.is_empty();
844
845 if has_match {
846 self.rule_matches
847 .push((path.into_static(), path_rules_matches));
848 }
849
850 has_match
851 }
852}
853
854fn get_next_regex_start(content: &str, regex_match: &Match) -> Option<usize> {
856 if let Some((i, _)) = content[regex_match.start()..].char_indices().nth(1) {
858 Some(regex_match.start() + i)
859 } else {
860 None
862 }
863}
864
865fn is_false_positive_match(
866 regex_match: &Match,
867 rule: &RegexCompiledRule,
868 content: &str,
869 check_excluded_keywords: bool,
870) -> bool {
871 if check_excluded_keywords {
872 if let Some(excluded_keywords) = &rule.excluded_keywords {
873 if excluded_keywords.is_false_positive_match(content, regex_match.start()) {
874 return true;
875 }
876 }
877 }
878
879 if let Some(validator) = rule.validator.as_ref() {
880 if !validator.is_valid_match(&content[regex_match.range()]) {
881 return true;
882 };
883 }
884 false
885}