1use crate::encoding::Encoding;
2use crate::event::Event;
3
4use crate::match_validation::{
5 config::InternalMatchValidationType, config::MatchValidationType, match_status::MatchStatus,
6 match_validator::MatchValidator,
7};
8
9use error::{MatchValidationError, MatchValidatorCreationError};
10
11use crate::observability::labels::Labels;
12use crate::rule_match::{InternalRuleMatch, RuleMatch};
13use crate::scoped_ruleset::{ContentVisitor, ExclusionCheck, ScopedRuleSet};
14pub use crate::secondary_validation::Validator;
15use crate::{
16 CreateScannerError, EncodeIndices, MatchAction, Path, RegexValidationError, ScannerError,
17};
18use std::ops::Deref;
19use std::sync::Arc;
20
21use self::metrics::ScannerMetrics;
22use crate::match_validation::match_validator::RAYON_THREAD_POOL;
23use crate::scanner::config::RuleConfig;
24use crate::scanner::regex_rule::compiled::RegexCompiledRule;
25use crate::scanner::regex_rule::{access_regex_caches, RegexCaches};
26use crate::scanner::scope::Scope;
27pub use crate::scanner::shared_data::SharedData;
28use crate::stats::GLOBAL_STATS;
29use ahash::{AHashMap, AHashSet};
30use regex_automata::Match;
31use serde::{Deserialize, Serialize};
32use serde_with::serde_as;
33
34pub mod config;
35pub mod error;
36pub mod metrics;
37pub mod regex_rule;
38pub mod scope;
39pub mod shared_data;
40pub mod shared_pool;
41
42#[cfg(test)]
43mod test;
44
45pub struct StringMatch {
46 pub start: usize,
47 pub end: usize,
48}
49
50pub trait MatchEmitter<T = ()> {
51 fn emit(&mut self, string_match: StringMatch) -> T;
52}
53
54impl<F, T> MatchEmitter<T> for F
57where
58 F: FnMut(StringMatch) -> T,
59{
60 fn emit(&mut self, string_match: StringMatch) -> T {
61 (self)(string_match)
63 }
64}
65
66#[serde_as]
67#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
68pub struct RootRuleConfig<T> {
69 pub match_action: MatchAction,
70 #[serde(default)]
71 pub scope: Scope,
72 #[deprecated(note = "Use `third_party_active_checker` instead")]
73 match_validation_type: Option<MatchValidationType>,
74 third_party_active_checker: Option<MatchValidationType>,
75 #[serde(flatten)]
76 pub inner: T,
77}
78
79impl<T> RootRuleConfig<T>
80where
81 T: RuleConfig + 'static,
82{
83 pub fn new_dyn(inner: T) -> RootRuleConfig<Arc<dyn RuleConfig>> {
84 RootRuleConfig::new(Arc::new(inner) as Arc<dyn RuleConfig>)
85 }
86
87 pub fn into_dyn(self) -> RootRuleConfig<Arc<dyn RuleConfig>> {
88 self.map_inner(|x| Arc::new(x) as Arc<dyn RuleConfig>)
89 }
90}
91
92impl<T> RootRuleConfig<T> {
93 pub fn new(inner: T) -> Self {
94 #[allow(deprecated)]
95 Self {
96 match_action: MatchAction::None,
97 scope: Scope::all(),
98 match_validation_type: None,
99 third_party_active_checker: None,
100 inner,
101 }
102 }
103
104 pub fn map_inner<U>(self, func: impl FnOnce(T) -> U) -> RootRuleConfig<U> {
105 #[allow(deprecated)]
106 RootRuleConfig {
107 match_action: self.match_action,
108 scope: self.scope,
109 match_validation_type: self.match_validation_type,
110 third_party_active_checker: self.third_party_active_checker,
111 inner: func(self.inner),
112 }
113 }
114
115 pub fn match_action(mut self, action: MatchAction) -> Self {
116 self.match_action = action;
117 self
118 }
119
120 pub fn scope(mut self, scope: Scope) -> Self {
121 self.scope = scope;
122 self
123 }
124
125 pub fn third_party_active_checker(
126 mut self,
127 match_validation_type: MatchValidationType,
128 ) -> Self {
129 self.third_party_active_checker = Some(match_validation_type);
130 self
131 }
132
133 fn get_third_party_active_checker(&self) -> Option<&MatchValidationType> {
134 #[allow(deprecated)]
135 self.third_party_active_checker
136 .as_ref()
137 .or(self.match_validation_type.as_ref())
138 }
139}
140
141impl<T> Deref for RootRuleConfig<T> {
142 type Target = T;
143
144 fn deref(&self) -> &Self::Target {
145 &self.inner
146 }
147}
148pub struct RootCompiledRule {
149 pub inner: Box<dyn CompiledRule>,
150 pub scope: Scope,
151 pub match_action: MatchAction,
152 pub match_validation_type: Option<MatchValidationType>,
153}
154
155impl RootCompiledRule {
156 pub fn internal_match_validation_type(&self) -> Option<InternalMatchValidationType> {
157 self.match_validation_type
158 .as_ref()
159 .map(|x| x.get_internal_match_validation_type())
160 }
161}
162
163impl Deref for RootCompiledRule {
164 type Target = dyn CompiledRule;
165
166 fn deref(&self) -> &Self::Target {
167 self.inner.as_ref()
168 }
169}
170
171pub struct StringMatchesCtx<'a> {
172 pub regex_caches: &'a mut RegexCaches,
173 pub exclusion_check: &'a ExclusionCheck<'a>,
174 pub excluded_matches: &'a mut AHashSet<String>,
175 pub match_emitter: &'a mut dyn MatchEmitter,
176 pub wildcard_indices: Option<&'a Vec<(usize, usize)>>,
177
178 pub per_string_data: &'a mut SharedData,
180 pub per_scanner_data: &'a SharedData,
181 pub per_event_data: &'a mut SharedData,
182}
183
184pub trait CompiledRule: Send + Sync {
186 fn init_per_scanner_data(&self, _per_scanner_data: &mut SharedData) {
187 }
189
190 fn init_per_string_data(&self, _labels: &Labels, _per_string_data: &mut SharedData) {
191 }
193
194 fn init_per_event_data(&self, _per_event_data: &mut SharedData) {
195 }
197
198 fn get_string_matches(
199 &self,
200 content: &str,
201 path: &Path,
202 ctx: &mut StringMatchesCtx<'_>,
203 ) -> Result<(), ScannerError>;
204
205 #[allow(clippy::too_many_arguments)]
210 fn has_string_match(
211 &self,
212 content: &str,
213 path: &Path,
214 ctx: &mut StringMatchesCtx<'_>,
215 ) -> Result<bool, ScannerError> {
216 let mut found_match = false;
217
218 let mut match_emitter = |_| found_match = true;
219
220 let mut new_ctx = StringMatchesCtx {
221 match_emitter: &mut match_emitter,
222 regex_caches: ctx.regex_caches,
223 exclusion_check: ctx.exclusion_check,
224 excluded_matches: ctx.excluded_matches,
225 wildcard_indices: ctx.wildcard_indices,
226 per_string_data: ctx.per_string_data,
227 per_scanner_data: ctx.per_scanner_data,
228 per_event_data: ctx.per_event_data,
229 };
230
231 self.get_string_matches(content, path, &mut new_ctx)
232 .map(|_| found_match)
233 }
234
235 fn should_exclude_multipass_v0(&self) -> bool {
238 false
240 }
241
242 fn on_excluded_match_multipass_v0(&self) {
243 }
245}
246
247impl<T> RuleConfig for Box<T>
248where
249 T: RuleConfig + ?Sized,
250{
251 fn convert_to_compiled_rule(
252 &self,
253 rule_index: usize,
254 labels: Labels,
255 ) -> Result<Box<dyn CompiledRule>, CreateScannerError> {
256 self.as_ref().convert_to_compiled_rule(rule_index, labels)
257 }
258}
259
260#[derive(Debug, PartialEq, Clone)]
261struct ScannerFeatures {
262 pub add_implicit_index_wildcards: bool,
263 pub multipass_v0_enabled: bool,
264 pub return_matches: bool,
265 pub skip_rules_with_regex_matching_empty_string: bool,
268}
269
270impl Default for ScannerFeatures {
271 fn default() -> Self {
272 Self {
273 add_implicit_index_wildcards: false,
274 multipass_v0_enabled: true,
275 return_matches: false,
276 skip_rules_with_regex_matching_empty_string: false,
277 }
278 }
279}
280
281pub struct ScanOptions {
282 pub blocked_rules_idx: Vec<usize>,
285 pub wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
287}
288
289impl Default for ScanOptions {
290 fn default() -> Self {
291 Self {
292 blocked_rules_idx: vec![],
293 wildcarded_indices: AHashMap::new(),
294 }
295 }
296}
297
298pub struct ScanOptionBuilder {
299 blocked_rules_idx: Vec<usize>,
300 wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
301}
302
303impl ScanOptionBuilder {
304 pub fn new() -> Self {
305 Self {
306 blocked_rules_idx: vec![],
307 wildcarded_indices: AHashMap::new(),
308 }
309 }
310
311 pub fn with_blocked_rules_idx(mut self, blocked_rules_idx: Vec<usize>) -> Self {
312 self.blocked_rules_idx = blocked_rules_idx;
313 self
314 }
315
316 pub fn with_wildcarded_indices(
317 mut self,
318 wildcarded_indices: AHashMap<Path<'static>, Vec<(usize, usize)>>,
319 ) -> Self {
320 self.wildcarded_indices = wildcarded_indices;
321 self
322 }
323
324 pub fn build(self) -> ScanOptions {
325 ScanOptions {
326 blocked_rules_idx: self.blocked_rules_idx,
327 wildcarded_indices: self.wildcarded_indices,
328 }
329 }
330}
331
332pub struct Scanner {
333 rules: Vec<RootCompiledRule>,
334 scoped_ruleset: ScopedRuleSet,
335 scanner_features: ScannerFeatures,
336 metrics: ScannerMetrics,
337 labels: Labels,
338 match_validators_per_type: AHashMap<InternalMatchValidationType, Box<dyn MatchValidator>>,
339 per_scanner_data: SharedData,
340}
341
342impl Scanner {
343 pub fn builder(rules: &[RootRuleConfig<Arc<dyn RuleConfig>>]) -> ScannerBuilder {
344 ScannerBuilder::new(rules)
345 }
346
347 fn record_metrics(&self, output_rule_matches: &[RuleMatch], start: std::time::Instant) {
348 self.metrics
350 .duration_ns
351 .increment(start.elapsed().as_nanos() as u64);
352 self.metrics.num_scanned_events.increment(1);
354 self.metrics
356 .match_count
357 .increment(output_rule_matches.len() as u64);
358 }
359
360 pub fn scan_with_options<E: Event>(
361 &self,
362 event: &mut E,
363 options: ScanOptions,
364 ) -> Result<Vec<RuleMatch>, ScannerError> {
365 let mut rule_matches_list = vec![];
368
369 let mut excluded_matches = AHashSet::new();
370
371 let start = std::time::Instant::now();
373 let result = access_regex_caches(|regex_caches| {
374 self.scoped_ruleset.visit_string_rule_combinations(
375 event,
376 ScannerContentVisitor {
377 scanner: self,
378 regex_caches,
379 rule_matches: &mut rule_matches_list,
380 blocked_rules: &options.blocked_rules_idx,
381 excluded_matches: &mut excluded_matches,
382 per_event_data: SharedData::new(),
383 wildcarded_indexes: &options.wildcarded_indices,
384 },
385 )
386 });
387
388 if let Err(e) = result {
391 self.record_metrics(&[], start);
392 return Err(e);
393 }
394
395 let mut output_rule_matches = vec![];
396
397 for (path, rule_matches) in &mut rule_matches_list {
398 event.visit_string_mut(path, |content| {
400 if self.scanner_features.multipass_v0_enabled {
401 rule_matches.retain(|rule_match| {
404 if self.rules[rule_match.rule_index]
405 .inner
406 .should_exclude_multipass_v0()
407 {
408 let is_false_positive = excluded_matches
409 .contains(&content[rule_match.utf8_start..rule_match.utf8_end]);
410 if is_false_positive && self.scanner_features.multipass_v0_enabled {
411 self.rules[rule_match.rule_index].on_excluded_match_multipass_v0();
412 }
413 !is_false_positive
414 } else {
415 true
416 }
417 });
418 }
419
420 self.sort_and_remove_overlapping_rules::<E::Encoding>(rule_matches);
421
422 let will_mutate = rule_matches
423 .iter()
424 .any(|rule_match| self.rules[rule_match.rule_index].match_action.is_mutating());
425
426 self.apply_match_actions(content, path, rule_matches, &mut output_rule_matches);
427
428 will_mutate
429 });
430 }
431
432 self.record_metrics(&output_rule_matches, start);
433
434 Ok(output_rule_matches)
435 }
436
437 pub fn scan<E: Event>(&self, event: &mut E) -> Result<Vec<RuleMatch>, ScannerError> {
441 self.scan_with_options(event, ScanOptions::default())
442 }
443
444 pub fn validate_matches(
445 &self,
446 rule_matches: &mut Vec<RuleMatch>,
447 ) -> Result<(), MatchValidationError> {
448 if !self.scanner_features.return_matches {
449 return Err(MatchValidationError::NoMatchValidationType);
450 }
451 let mut match_validator_rule_match_per_type = AHashMap::new();
453
454 let mut validated_rule_matches = vec![];
455
456 for mut rule_match in rule_matches.drain(..) {
457 let rule = &self.rules[rule_match.rule_index];
458 if let Some(match_validation_type) = rule.internal_match_validation_type() {
459 match_validator_rule_match_per_type
460 .entry(match_validation_type)
461 .or_insert_with(Vec::new)
462 .push(rule_match)
463 } else {
464 rule_match.match_status.merge(MatchStatus::NotAvailable);
466 validated_rule_matches.push(rule_match);
467 }
468 }
469
470 RAYON_THREAD_POOL.install(|| {
471 use rayon::prelude::*;
472
473 match_validator_rule_match_per_type.par_iter_mut().for_each(
474 |(match_validation_type, matches_per_type)| {
475 let match_validator = self.match_validators_per_type.get(match_validation_type);
476 if let Some(match_validator) = match_validator {
477 match_validator
478 .as_ref()
479 .validate(matches_per_type, &self.rules)
480 }
481 },
482 );
483 });
484
485 for (_, mut matches) in match_validator_rule_match_per_type {
487 validated_rule_matches.append(&mut matches);
488 }
489
490 validated_rule_matches.sort_by_key(|rule_match| rule_match.start_index);
492 *rule_matches = validated_rule_matches;
493 Ok(())
494 }
495
496 fn apply_match_actions<E: Encoding>(
499 &self,
500 content: &mut String,
501 path: &Path<'static>,
502 rule_matches: &mut [InternalRuleMatch<E>],
503 output_rule_matches: &mut Vec<RuleMatch>,
504 ) {
505 let mut utf8_byte_delta: isize = 0;
506 let mut custom_index_delta: <E>::IndexShift = <E>::zero_shift();
507
508 for rule_match in rule_matches {
509 output_rule_matches.push(self.apply_match_actions_for_string::<E>(
510 content,
511 path.clone(),
512 rule_match,
513 &mut utf8_byte_delta,
514 &mut custom_index_delta,
515 ));
516 }
517 }
518
519 fn apply_match_actions_for_string<E: Encoding>(
521 &self,
522 content: &mut String,
523 path: Path<'static>,
524 rule_match: &InternalRuleMatch<E>,
525 utf8_byte_delta: &mut isize,
527
528 custom_index_delta: &mut <E>::IndexShift,
530 ) -> RuleMatch {
531 let rule = &self.rules[rule_match.rule_index];
532
533 let custom_start =
534 (<E>::get_index(&rule_match.custom_start, rule_match.utf8_start) as isize
535 + <E>::get_shift(custom_index_delta, *utf8_byte_delta)) as usize;
536
537 let mut matched_content_copy = None;
538
539 if self.scanner_features.return_matches {
540 let mutated_utf8_match_start =
542 (rule_match.utf8_start as isize + *utf8_byte_delta) as usize;
543 let mutated_utf8_match_end = (rule_match.utf8_end as isize + *utf8_byte_delta) as usize;
544
545 debug_assert!(content.is_char_boundary(mutated_utf8_match_start));
547 debug_assert!(content.is_char_boundary(mutated_utf8_match_end));
548
549 let matched_content = &content[mutated_utf8_match_start..mutated_utf8_match_end];
550 matched_content_copy = Some(matched_content.to_string());
551 }
552
553 if rule.match_action.is_mutating() {
554 let mutated_utf8_match_start =
555 (rule_match.utf8_start as isize + *utf8_byte_delta) as usize;
556 let mutated_utf8_match_end = (rule_match.utf8_end as isize + *utf8_byte_delta) as usize;
557
558 debug_assert!(content.is_char_boundary(mutated_utf8_match_start));
560 debug_assert!(content.is_char_boundary(mutated_utf8_match_end));
561
562 let matched_content = &content[mutated_utf8_match_start..mutated_utf8_match_end];
563 if let Some(replacement) = rule.match_action.get_replacement(matched_content) {
564 let before_replacement = &matched_content[replacement.start..replacement.end];
565
566 <E>::adjust_shift(
568 custom_index_delta,
569 before_replacement,
570 &replacement.replacement,
571 );
572 *utf8_byte_delta +=
573 replacement.replacement.len() as isize - before_replacement.len() as isize;
574
575 let replacement_start = mutated_utf8_match_start + replacement.start;
576 let replacement_end = mutated_utf8_match_start + replacement.end;
577 content.replace_range(replacement_start..replacement_end, &replacement.replacement);
578 }
579 }
580
581 let shift_offset = <E>::get_shift(custom_index_delta, *utf8_byte_delta);
582 let custom_end = (<E>::get_index(&rule_match.custom_end, rule_match.utf8_end) as isize
583 + shift_offset) as usize;
584
585 let rule = &self.rules[rule_match.rule_index];
586
587 let match_status: MatchStatus = if rule.match_validation_type.is_some() {
588 MatchStatus::NotChecked
589 } else {
590 MatchStatus::NotAvailable
591 };
592
593 RuleMatch {
594 rule_index: rule_match.rule_index,
595 path,
596 replacement_type: rule.match_action.replacement_type(),
597 start_index: custom_start,
598 end_index_exclusive: custom_end,
599 shift_offset,
600 match_value: matched_content_copy,
601 match_status,
602 }
603 }
604
605 fn sort_and_remove_overlapping_rules<E: Encoding>(
606 &self,
607 rule_matches: &mut Vec<InternalRuleMatch<E>>,
608 ) {
609 rule_matches.sort_unstable_by(|a, b| {
613 let ord = self.rules[a.rule_index]
615 .match_action
616 .is_mutating()
617 .cmp(&self.rules[b.rule_index].match_action.is_mutating())
618 .reverse();
619
620 let ord = ord.then(a.utf8_start.cmp(&b.utf8_start));
622
623 let ord = ord.then(a.len().cmp(&b.len()).reverse());
625
626 let ord = ord.then(a.rule_index.cmp(&b.rule_index));
628
629 ord.reverse()
631 });
632
633 let mut retained_rules: Vec<InternalRuleMatch<E>> = vec![];
634
635 'rule_matches: while let Some(rule_match) = rule_matches.pop() {
636 if self.rules[rule_match.rule_index].match_action.is_mutating() {
637 if let Some(last) = retained_rules.last() {
639 if last.utf8_end > rule_match.utf8_start {
640 continue;
641 }
642 }
643 } else {
644 for retained_rule in &retained_rules {
647 if retained_rule.utf8_start < rule_match.utf8_end
648 && retained_rule.utf8_end > rule_match.utf8_start
649 {
650 continue 'rule_matches;
651 }
652 }
653 };
654 retained_rules.push(rule_match);
655 }
656
657 retained_rules.sort_unstable_by_key(|rule_match| rule_match.utf8_start);
659
660 *rule_matches = retained_rules;
661 }
662}
663
664impl Drop for Scanner {
665 fn drop(&mut self) {
666 let stats = &*GLOBAL_STATS;
667 stats.scanner_deletions.increment(1);
668 stats.decrement_total_scanners();
669 }
670}
671
672#[derive(Default)]
673pub struct ScannerBuilder<'a> {
674 rules: &'a [RootRuleConfig<Arc<dyn RuleConfig>>],
675 labels: Labels,
676 scanner_features: ScannerFeatures,
677}
678
679impl ScannerBuilder<'_> {
680 pub fn new(rules: &[RootRuleConfig<Arc<dyn RuleConfig>>]) -> ScannerBuilder {
681 ScannerBuilder {
682 rules,
683 labels: Labels::empty(),
684 scanner_features: ScannerFeatures::default(),
685 }
686 }
687
688 pub fn labels(mut self, labels: Labels) -> Self {
689 self.labels = labels;
690 self
691 }
692
693 pub fn with_implicit_wildcard_indexes_for_scopes(mut self, value: bool) -> Self {
694 self.scanner_features.add_implicit_index_wildcards = value;
695 self
696 }
697
698 pub fn with_return_matches(mut self, value: bool) -> Self {
699 self.scanner_features.return_matches = value;
700 self
701 }
702
703 pub fn with_multipass_v0(mut self, value: bool) -> Self {
707 self.scanner_features.multipass_v0_enabled = value;
708 self
709 }
710
711 pub fn with_skip_rules_with_regex_matching_empty_string(mut self, value: bool) -> Self {
712 self.scanner_features
713 .skip_rules_with_regex_matching_empty_string = value;
714 self
715 }
716
717 pub fn build(self) -> Result<Scanner, CreateScannerError> {
718 let mut match_validators_per_type = AHashMap::new();
719
720 for rule in self.rules.iter() {
721 if let Some(match_validation_type) = &rule.get_third_party_active_checker() {
722 if match_validation_type.can_create_match_validator() {
723 let internal_type = match_validation_type.get_internal_match_validation_type();
724 let match_validator = match_validation_type.into_match_validator();
725 if let Ok(match_validator) = match_validator {
726 if !match_validators_per_type.contains_key(&internal_type) {
727 match_validators_per_type.insert(internal_type, match_validator);
728 }
729 } else {
730 return Err(CreateScannerError::InvalidMatchValidator(
731 MatchValidatorCreationError::InternalError,
732 ));
733 }
734 }
735 }
736 }
737
738 let compiled_rules = self
739 .rules
740 .iter()
741 .enumerate()
742 .filter_map(|(rule_index, config)| {
743 let inner = match config.convert_to_compiled_rule(rule_index, self.labels.clone()) {
744 Ok(inner) => Ok(inner),
745 Err(err) => {
746 if self
747 .scanner_features
748 .skip_rules_with_regex_matching_empty_string
749 && err
750 == CreateScannerError::InvalidRegex(
751 RegexValidationError::MatchesEmptyString,
752 )
753 {
754 #[allow(clippy::print_stdout)]
756 {
757 println!("skipping rule that matches empty string: rule_index={}, labels={:?}", rule_index, self.labels.clone());
758 }
759 return None;
760 } else {
761 Err(err)
762 }
763 }
764 };
765 Some((config, inner))
766 })
767 .map(|(config, inner)| {
768 config.match_action.validate()?;
769 Ok(RootCompiledRule {
770 inner: inner?,
771 scope: config.scope.clone(),
772 match_action: config.match_action.clone(),
773 match_validation_type: config.get_third_party_active_checker().cloned(),
774 })
775 })
776 .collect::<Result<Vec<RootCompiledRule>, CreateScannerError>>()?;
777
778 let mut per_scanner_data = SharedData::new();
779
780 compiled_rules.iter().for_each(|rule| {
781 rule.init_per_scanner_data(&mut per_scanner_data);
782 });
783
784 let scoped_ruleset = ScopedRuleSet::new(
785 &compiled_rules
786 .iter()
787 .map(|rule| rule.scope.clone())
788 .collect::<Vec<_>>(),
789 )
790 .with_implicit_index_wildcards(self.scanner_features.add_implicit_index_wildcards);
791
792 {
793 let stats = &*GLOBAL_STATS;
794 stats.scanner_creations.increment(1);
795 stats.increment_total_scanners();
796 }
797
798 Ok(Scanner {
799 rules: compiled_rules,
800 scoped_ruleset,
801 scanner_features: self.scanner_features,
802 metrics: ScannerMetrics::new(&self.labels),
803 match_validators_per_type,
804 labels: self.labels,
805 per_scanner_data,
806 })
807 }
808}
809
810struct ScannerContentVisitor<'a, E: Encoding> {
811 scanner: &'a Scanner,
812 regex_caches: &'a mut RegexCaches,
813 rule_matches: &'a mut Vec<(crate::Path<'static>, Vec<InternalRuleMatch<E>>)>,
814 blocked_rules: &'a Vec<usize>,
817 excluded_matches: &'a mut AHashSet<String>,
818 per_event_data: SharedData,
819 wildcarded_indexes: &'a AHashMap<Path<'static>, Vec<(usize, usize)>>,
820}
821
822impl<'a, E: Encoding> ContentVisitor<'a> for ScannerContentVisitor<'a, E> {
823 fn visit_content<'b>(
824 &'b mut self,
825 path: &Path<'a>,
826 content: &str,
827 mut rule_visitor: crate::scoped_ruleset::RuleIndexVisitor,
828 exclusion_check: ExclusionCheck<'b>,
829 ) -> Result<bool, ScannerError> {
830 let mut path_rules_matches = vec![];
832
833 let mut per_string_data = SharedData::new();
835 let wildcard_indices_per_path = self.wildcarded_indexes.get(path);
836
837 rule_visitor.visit_rule_indices(|rule_index| {
838 if self.blocked_rules.contains(&rule_index) {
839 return Ok(());
840 }
841 let rule = &self.scanner.rules[rule_index];
842 {
843 let mut emitter = |rule_match: StringMatch| {
845 assert_ne!(rule_match.start, rule_match.end, "empty match detected");
848
849 path_rules_matches.push(InternalRuleMatch {
850 rule_index,
851 utf8_start: rule_match.start,
852 utf8_end: rule_match.end,
853 custom_start: E::zero_index(),
854 custom_end: E::zero_index(),
855 });
856 };
857
858 rule.init_per_string_data(&self.scanner.labels, &mut per_string_data);
859
860 rule.init_per_event_data(&mut self.per_event_data);
862
863 let mut ctx = StringMatchesCtx {
864 regex_caches: self.regex_caches,
865 exclusion_check: &exclusion_check,
866 excluded_matches: self.excluded_matches,
867 match_emitter: &mut emitter,
868 wildcard_indices: wildcard_indices_per_path,
869 per_string_data: &mut per_string_data,
870 per_scanner_data: &self.scanner.per_scanner_data,
871 per_event_data: &mut self.per_event_data,
872 };
873
874 rule.get_string_matches(content, path, &mut ctx)?;
875 }
876 Ok(())
877 })?;
878
879 path_rules_matches.sort_unstable_by_key(|rule_match| rule_match.utf8_start);
881
882 E::calculate_indices(
883 content,
884 path_rules_matches
885 .iter_mut()
886 .map(|rule_match: &mut InternalRuleMatch<E>| EncodeIndices {
887 utf8_start: rule_match.utf8_start,
888 utf8_end: rule_match.utf8_end,
889 custom_start: &mut rule_match.custom_start,
890 custom_end: &mut rule_match.custom_end,
891 }),
892 );
893
894 let has_match = !path_rules_matches.is_empty();
897
898 if has_match {
899 self.rule_matches
900 .push((path.into_static(), path_rules_matches));
901 }
902
903 Ok(has_match)
904 }
905}
906
907fn get_next_regex_start(content: &str, regex_match: &Match) -> Option<usize> {
909 if let Some((i, _)) = content[regex_match.start()..].char_indices().nth(1) {
911 Some(regex_match.start() + i)
912 } else {
913 None
915 }
916}
917
918fn is_false_positive_match(
919 regex_match: &Match,
920 rule: &RegexCompiledRule,
921 content: &str,
922 check_excluded_keywords: bool,
923) -> bool {
924 if check_excluded_keywords {
925 if let Some(excluded_keywords) = &rule.excluded_keywords {
926 if excluded_keywords.is_false_positive_match(content, regex_match.start()) {
927 return true;
928 }
929 }
930 }
931
932 if let Some(validator) = rule.validator.as_ref() {
933 if !validator.is_valid_match(&content[regex_match.range()]) {
934 return true;
935 };
936 }
937 false
938}