1use crate::normalization::rust_regex_adapter::{
2 convert_to_rust_regex, convert_to_rust_regex_ast, QUANTIFIER_LIMIT,
3};
4use crate::parser::error::ParseError;
5use moka::sync::Cache;
6use once_cell::sync::Lazy;
7use regex_automata::meta::{self};
8use regex_syntax::ast::Ast;
9use regex_syntax::hir::translate::Translator;
10use thiserror::Error;
11
12static REGEX_CACHE: Lazy<Cache<String, Result<(), RegexValidationError>>> =
13 Lazy::new(|| Cache::new(1000));
14
15#[derive(Debug, PartialEq, Eq, Error, Clone)]
16pub enum RegexValidationError {
17 #[error("Invalid regex syntax")]
18 InvalidSyntax,
19
20 #[error("The regex pattern is nested too deeply")]
21 ExceededDepthLimit,
22
23 #[error("The regex has exceeded the complexity limit (try simplifying the regex)")]
24 TooComplex,
25
26 #[error("Regex patterns are not allowed to match an empty string")]
27 MatchesEmptyString,
28
29 #[error("Regex quantifier is too high. Max is {}", QUANTIFIER_LIMIT)]
30 ExceededQuantifierLimit,
31}
32
33impl From<ParseError> for RegexValidationError {
34 fn from(err: ParseError) -> Self {
35 match err {
36 ParseError::InvalidSyntax => Self::InvalidSyntax,
37 ParseError::ExceededDepthLimit => Self::ExceededDepthLimit,
38 ParseError::ExceededQuantifierLimit => Self::ExceededQuantifierLimit,
39 }
40 }
41}
42
43const REGEX_COMPLEXITY_LIMIT: usize = 1_000_000;
44
45pub fn validate_regex(input: &str) -> Result<(), RegexValidationError> {
47 if let Some(cache_hit) = REGEX_CACHE.get(input) {
50 return cache_hit;
51 }
52
53 let result = validate_and_create_regex(input).map(|_| ());
54
55 REGEX_CACHE.insert(input.to_owned(), result.clone());
56 result
57}
58
59pub fn get_regex_complexity_estimate_very_slow(input: &str) -> Result<usize, RegexValidationError> {
60 let converted_pattern = convert_to_rust_regex(input)?;
64
65 let mut low = 1;
66 let mut high = 10 * REGEX_COMPLEXITY_LIMIT;
69 while low < high {
70 let mid = low + (high - low) / 2;
71 if is_regex_within_complexity_limit(&converted_pattern, mid)? {
72 high = mid;
73 } else {
74 low = mid + 1;
75 }
76 }
77 Ok(low)
78}
79
80pub fn validate_and_create_regex(input: &str) -> Result<meta::Regex, RegexValidationError> {
81 let ast = convert_to_rust_regex_ast(input)?;
83 let pattern = ast.to_string();
84
85 check_minimum_match_length(&ast, &pattern)?;
87
88 let regex = build_regex(&pattern.to_string(), REGEX_COMPLEXITY_LIMIT)?;
89 Ok(regex)
90}
91
92fn check_minimum_match_length(ast: &Ast, ast_str: &str) -> Result<(), RegexValidationError> {
93 let hir = Translator::new()
94 .translate(ast_str, ast)
95 .map_err(|_| RegexValidationError::InvalidSyntax)?;
96 match hir.properties().minimum_len() {
97 None => {
98 Err(RegexValidationError::InvalidSyntax)
100 }
101 Some(min_length) => {
102 if min_length == 0 {
103 Err(RegexValidationError::MatchesEmptyString)
104 } else {
105 Ok(())
106 }
107 }
108 }
109}
110
111fn is_regex_within_complexity_limit(
112 converted_pattern: &str,
113 complexity_limit: usize,
114) -> Result<bool, RegexValidationError> {
115 match build_regex(converted_pattern, complexity_limit) {
116 Ok(_) => Ok(true),
117 Err(err) => match err {
118 RegexValidationError::TooComplex => Ok(false),
119 _ => Err(err),
120 },
121 }
122}
123
124fn build_regex(
125 converted_pattern: &str,
126 complexity_limit: usize,
127) -> Result<meta::Regex, RegexValidationError> {
128 meta::Builder::new()
129 .configure(
130 meta::Config::new()
131 .nfa_size_limit(Some(complexity_limit))
132 .hybrid_cache_capacity(2 * (1 << 20)),
134 )
135 .syntax(
136 regex_automata::util::syntax::Config::default()
137 .dot_matches_new_line(false)
138 .unicode(true),
139 )
140 .build(converted_pattern)
141 .map_err(|regex_err| {
142 if regex_err.size_limit().is_some() {
143 RegexValidationError::TooComplex
144 } else {
145 RegexValidationError::InvalidSyntax
147 }
148 })
149}
150
151#[cfg(test)]
152mod test {
153 use crate::validation::{
154 get_regex_complexity_estimate_very_slow, validate_and_create_regex, validate_regex,
155 RegexValidationError,
156 };
157
158 #[test]
159 fn pattern_matching_empty_string_is_invalid() {
160 assert_eq!(
162 validate_regex(""),
163 Err(RegexValidationError::MatchesEmptyString)
164 );
165 assert_eq!(
166 validate_regex("a|"),
168 Err(RegexValidationError::MatchesEmptyString)
169 );
170
171 assert!(validate_regex("(a|)b").is_ok(),);
173 }
174
175 #[test]
176 fn too_complex_pattern_is_rejected() {
177 assert_eq!(
178 validate_regex(".{1000}{1000}"),
179 Err(RegexValidationError::TooComplex)
180 );
181 }
182
183 #[test]
184 fn high_repetition_pattern_is_rejected() {
185 assert_eq!(
186 validate_regex(".{10000}"),
187 Err(RegexValidationError::ExceededQuantifierLimit)
188 );
189 }
190
191 #[test]
192 fn test_invalid_range_quantifiers() {
193 assert_eq!(
194 validate_regex(".{100,1}"),
195 Err(RegexValidationError::InvalidSyntax)
196 );
197 }
198
199 #[test]
200 fn dot_new_line() {
201 let regex = validate_and_create_regex(".").unwrap();
202 assert!(!regex.is_match("\n"));
203 }
204
205 #[test]
206 fn test_complexity() {
207 assert_eq!(get_regex_complexity_estimate_very_slow("x"), Ok(1));
212 assert_eq!(get_regex_complexity_estimate_very_slow("x{1,10}"), Ok(920));
213 assert_eq!(get_regex_complexity_estimate_very_slow("."), Ok(1144));
214 assert_eq!(
215 get_regex_complexity_estimate_very_slow(".{1,10}"),
216 Ok(10536)
217 );
218 assert_eq!(
219 get_regex_complexity_estimate_very_slow(".{1,1000}"),
220 Ok(1_040_136)
221 );
222 }
223
224 #[test]
225 fn test_empty_match_without_empty_string() {
226 assert_eq!(
227 validate_regex("\\bx{0,2}\\b"),
228 Err(RegexValidationError::MatchesEmptyString)
229 );
230 }
231}