1use crate::normalization::rust_regex_adapter::{
2 convert_to_rust_regex, convert_to_rust_regex_ast, QUANTIFIER_LIMIT,
3};
4use crate::parser::error::ParseError;
5use regex_automata::meta::{self};
6use regex_syntax::ast::Ast;
7use regex_syntax::hir::translate::Translator;
8use thiserror::Error;
9
10#[derive(Debug, PartialEq, Eq, Error)]
11pub enum RegexValidationError {
12 #[error("Invalid regex syntax")]
13 InvalidSyntax,
14
15 #[error("The regex pattern is nested too deeply")]
16 ExceededDepthLimit,
17
18 #[error("The regex has exceeded the complexity limit (try simplifying the regex)")]
19 TooComplex,
20
21 #[error("Regex patterns are not allowed to match an empty string")]
22 MatchesEmptyString,
23
24 #[error("Regex quantifier is too high. Max is {}", QUANTIFIER_LIMIT)]
25 ExceededQuantifierLimit,
26}
27
28impl From<ParseError> for RegexValidationError {
29 fn from(err: ParseError) -> Self {
30 match err {
31 ParseError::InvalidSyntax => Self::InvalidSyntax,
32 ParseError::ExceededDepthLimit => Self::ExceededDepthLimit,
33 ParseError::ExceededQuantifierLimit => Self::ExceededQuantifierLimit,
34 }
35 }
36}
37
38const REGEX_COMPLEXITY_LIMIT: usize = 1_000_000;
39
40pub fn validate_regex(input: &str) -> Result<(), RegexValidationError> {
42 validate_and_create_regex(input).map(|_| ())
45}
46
47pub fn get_regex_complexity_estimate_very_slow(input: &str) -> Result<usize, RegexValidationError> {
48 let converted_pattern = convert_to_rust_regex(input)?;
52
53 let mut low = 1;
54 let mut high = 10 * REGEX_COMPLEXITY_LIMIT;
57 while low < high {
58 let mid = low + (high - low) / 2;
59 if is_regex_within_complexity_limit(&converted_pattern, mid)? {
60 high = mid;
61 } else {
62 low = mid + 1;
63 }
64 }
65 Ok(low)
66}
67
68pub fn validate_and_create_regex(input: &str) -> Result<meta::Regex, RegexValidationError> {
69 let ast = convert_to_rust_regex_ast(input)?;
71 let pattern = ast.to_string();
72
73 check_minimum_match_length(&ast, &pattern)?;
75
76 let regex = build_regex(&pattern.to_string(), REGEX_COMPLEXITY_LIMIT)?;
77 Ok(regex)
78}
79
80fn check_minimum_match_length(ast: &Ast, ast_str: &str) -> Result<(), RegexValidationError> {
81 let hir = Translator::new()
82 .translate(ast_str, ast)
83 .map_err(|_| RegexValidationError::InvalidSyntax)?;
84 match hir.properties().minimum_len() {
85 None => {
86 Err(RegexValidationError::InvalidSyntax)
88 }
89 Some(min_length) => {
90 if min_length == 0 {
91 Err(RegexValidationError::MatchesEmptyString)
92 } else {
93 Ok(())
94 }
95 }
96 }
97}
98
99fn is_regex_within_complexity_limit(
100 converted_pattern: &str,
101 complexity_limit: usize,
102) -> Result<bool, RegexValidationError> {
103 match build_regex(converted_pattern, complexity_limit) {
104 Ok(_) => Ok(true),
105 Err(err) => match err {
106 RegexValidationError::TooComplex => Ok(false),
107 _ => Err(err),
108 },
109 }
110}
111
112fn build_regex(
113 converted_pattern: &str,
114 complexity_limit: usize,
115) -> Result<meta::Regex, RegexValidationError> {
116 meta::Builder::new()
117 .configure(
118 meta::Config::new()
119 .nfa_size_limit(Some(complexity_limit))
120 .hybrid_cache_capacity(2 * (1 << 20)),
122 )
123 .syntax(
124 regex_automata::util::syntax::Config::default()
125 .dot_matches_new_line(false)
126 .unicode(true),
127 )
128 .build(converted_pattern)
129 .map_err(|regex_err| {
130 if regex_err.size_limit().is_some() {
131 RegexValidationError::TooComplex
132 } else {
133 RegexValidationError::InvalidSyntax
135 }
136 })
137}
138
139#[cfg(test)]
140mod test {
141 use crate::validation::{
142 get_regex_complexity_estimate_very_slow, validate_and_create_regex, validate_regex,
143 RegexValidationError,
144 };
145
146 #[test]
147 fn pattern_matching_empty_string_is_invalid() {
148 assert_eq!(
150 validate_regex(""),
151 Err(RegexValidationError::MatchesEmptyString)
152 );
153 assert_eq!(
154 validate_regex("a|"),
156 Err(RegexValidationError::MatchesEmptyString)
157 );
158
159 assert!(validate_regex("(a|)b").is_ok(),);
161 }
162
163 #[test]
164 fn too_complex_pattern_is_rejected() {
165 assert_eq!(
166 validate_regex(".{1000}{1000}"),
167 Err(RegexValidationError::TooComplex)
168 );
169 }
170
171 #[test]
172 fn high_repetition_pattern_is_rejected() {
173 assert_eq!(
174 validate_regex(".{10000}"),
175 Err(RegexValidationError::ExceededQuantifierLimit)
176 );
177 }
178
179 #[test]
180 fn test_invalid_range_quantifiers() {
181 assert_eq!(
182 validate_regex(".{100,1}"),
183 Err(RegexValidationError::InvalidSyntax)
184 );
185 }
186
187 #[test]
188 fn dot_new_line() {
189 let regex = validate_and_create_regex(".").unwrap();
190 assert!(!regex.is_match("\n"));
191 }
192
193 #[test]
194 fn test_complexity() {
195 assert_eq!(get_regex_complexity_estimate_very_slow("x"), Ok(1));
200 assert_eq!(get_regex_complexity_estimate_very_slow("x{1,10}"), Ok(920));
201 assert_eq!(get_regex_complexity_estimate_very_slow("."), Ok(1144));
202 assert_eq!(
203 get_regex_complexity_estimate_very_slow(".{1,10}"),
204 Ok(10536)
205 );
206 assert_eq!(
207 get_regex_complexity_estimate_very_slow(".{1,1000}"),
208 Ok(1_040_136)
209 );
210 }
211
212 #[test]
213 fn test_empty_match_without_empty_string() {
214 assert_eq!(
215 validate_regex("\\bx{0,2}\\b"),
216 Err(RegexValidationError::MatchesEmptyString)
217 );
218 }
219}