1use crate::normalization::rust_regex_adapter::{convert_to_rust_regex, QUANTIFIER_LIMIT};
2use crate::parser::error::ParseError;
3use regex_automata::meta::{self};
4use thiserror::Error;
5
6#[derive(Debug, PartialEq, Eq, Error)]
7pub enum RegexValidationError {
8 #[error("Invalid regex syntax")]
9 InvalidSyntax,
10
11 #[error("The regex pattern is nested too deeply")]
12 ExceededDepthLimit,
13
14 #[error("The regex has exceeded the complexity limit (try simplifying the regex)")]
15 TooComplex,
16
17 #[error("Regex patterns are not allowed to match an empty string")]
18 MatchesEmptyString,
19
20 #[error("Regex quantifier is too high. Max is {}", QUANTIFIER_LIMIT)]
21 ExceededQuantifierLimit,
22}
23
24impl From<ParseError> for RegexValidationError {
25 fn from(err: ParseError) -> Self {
26 match err {
27 ParseError::InvalidSyntax => Self::InvalidSyntax,
28 ParseError::ExceededDepthLimit => Self::ExceededDepthLimit,
29 ParseError::ExceededQuantifierLimit => Self::ExceededQuantifierLimit,
30 }
31 }
32}
33
34const REGEX_COMPLEXITY_LIMIT: usize = 1_000_000;
35
36pub fn validate_regex(input: &str) -> Result<(), RegexValidationError> {
38 validate_and_create_regex(input).map(|_| ())
41}
42
43pub fn get_regex_complexity_estimate_very_slow(input: &str) -> Result<usize, RegexValidationError> {
44 let converted_pattern = convert_to_rust_regex(input)?;
48
49 let mut low = 1;
50 let mut high = 10 * REGEX_COMPLEXITY_LIMIT;
53 while low < high {
54 let mid = low + (high - low) / 2;
55 if is_regex_within_complexity_limit(&converted_pattern, mid)? {
56 high = mid;
57 } else {
58 low = mid + 1;
59 }
60 }
61 Ok(low)
62}
63
64pub fn validate_and_create_regex(input: &str) -> Result<meta::Regex, RegexValidationError> {
65 let converted_pattern = convert_to_rust_regex(input)?;
67 let regex = build_regex(&converted_pattern, REGEX_COMPLEXITY_LIMIT)?;
68 if regex.is_match("") {
69 return Err(RegexValidationError::MatchesEmptyString);
70 }
71 Ok(regex)
72}
73
74fn is_regex_within_complexity_limit(
75 converted_pattern: &str,
76 complexity_limit: usize,
77) -> Result<bool, RegexValidationError> {
78 match build_regex(converted_pattern, complexity_limit) {
79 Ok(_) => Ok(true),
80 Err(err) => match err {
81 RegexValidationError::TooComplex => Ok(false),
82 _ => Err(err),
83 },
84 }
85}
86
87fn build_regex(
88 converted_pattern: &str,
89 complexity_limit: usize,
90) -> Result<meta::Regex, RegexValidationError> {
91 meta::Builder::new()
92 .configure(
93 meta::Config::new()
94 .nfa_size_limit(Some(complexity_limit))
95 .hybrid_cache_capacity(2 * (1 << 20)),
97 )
98 .syntax(
99 regex_automata::util::syntax::Config::default()
100 .dot_matches_new_line(false)
101 .unicode(true),
102 )
103 .build(converted_pattern)
104 .map_err(|regex_err| {
105 if regex_err.size_limit().is_some() {
106 RegexValidationError::TooComplex
107 } else {
108 RegexValidationError::InvalidSyntax
110 }
111 })
112}
113
114#[cfg(test)]
115mod test {
116 use crate::validation::{
117 get_regex_complexity_estimate_very_slow, validate_and_create_regex, validate_regex,
118 RegexValidationError,
119 };
120
121 #[test]
122 fn pattern_matching_empty_string_is_invalid() {
123 assert_eq!(
125 validate_regex(""),
126 Err(RegexValidationError::MatchesEmptyString)
127 );
128 assert_eq!(
129 validate_regex("a|"),
131 Err(RegexValidationError::MatchesEmptyString)
132 );
133
134 assert!(validate_regex("(a|)b").is_ok(),);
136 }
137
138 #[test]
139 fn too_complex_pattern_is_rejected() {
140 assert_eq!(
141 validate_regex(".{1000}{1000}"),
142 Err(RegexValidationError::TooComplex)
143 );
144 }
145
146 #[test]
147 fn high_repetition_pattern_is_rejected() {
148 assert_eq!(
149 validate_regex(".{10000}"),
150 Err(RegexValidationError::ExceededQuantifierLimit)
151 );
152 }
153
154 #[test]
155 fn test_invalid_range_quantifiers() {
156 assert_eq!(
157 validate_regex(".{100,1}"),
158 Err(RegexValidationError::InvalidSyntax)
159 );
160 }
161
162 #[test]
163 fn dot_new_line() {
164 let regex = validate_and_create_regex(".").unwrap();
165 assert!(!regex.is_match("\n"));
166 }
167
168 #[test]
169 fn test_complexity() {
170 assert_eq!(get_regex_complexity_estimate_very_slow("x"), Ok(1));
175 assert_eq!(get_regex_complexity_estimate_very_slow("x{1,10}"), Ok(920));
176 assert_eq!(get_regex_complexity_estimate_very_slow("."), Ok(1144));
177 assert_eq!(
178 get_regex_complexity_estimate_very_slow(".{1,10}"),
179 Ok(10536)
180 );
181 assert_eq!(
182 get_regex_complexity_estimate_very_slow(".{1,1000}"),
183 Ok(1_040_136)
184 );
185 }
186}