substrait_explain/parser/
common.rs

1use std::fmt;
2
3use pest::Parser as PestParser;
4use pest_derive::Parser as PestDeriveParser;
5use thiserror::Error;
6
7use crate::extensions::SimpleExtensions;
8use crate::extensions::simple::MissingReference;
9
10#[derive(PestDeriveParser)]
11#[grammar = "parser/expression_grammar.pest"] // Path relative to src
12pub struct ExpressionParser;
13
14/// An error that occurs when parsing a message within a specific line. Contains
15/// context pointing at that specific error.
16#[derive(Error, Debug, Clone)]
17#[error("{kind} Error parsing {message}:\n{error}")]
18pub struct MessageParseError {
19    pub message: &'static str,
20    pub kind: ErrorKind,
21    #[source]
22    pub error: Box<pest::error::Error<Rule>>,
23}
24
25#[derive(Debug, Clone)]
26pub enum ErrorKind {
27    Syntax,
28    InvalidValue,
29    Lookup(MissingReference),
30}
31
32impl MessageParseError {
33    pub fn syntax(message: &'static str, span: pest::Span, description: impl ToString) -> Self {
34        let error = pest::error::Error::new_from_span(
35            pest::error::ErrorVariant::CustomError {
36                message: description.to_string(),
37            },
38            span,
39        );
40        Self::new(message, ErrorKind::Syntax, Box::new(error))
41    }
42    pub fn invalid(message: &'static str, span: pest::Span, description: impl ToString) -> Self {
43        let error = pest::error::Error::new_from_span(
44            pest::error::ErrorVariant::CustomError {
45                message: description.to_string(),
46            },
47            span,
48        );
49        Self::new(message, ErrorKind::InvalidValue, Box::new(error))
50    }
51
52    pub fn lookup(
53        message: &'static str,
54        missing: MissingReference,
55        span: pest::Span,
56        description: impl ToString,
57    ) -> Self {
58        let error = pest::error::Error::new_from_span(
59            pest::error::ErrorVariant::CustomError {
60                message: description.to_string(),
61            },
62            span,
63        );
64        Self::new(message, ErrorKind::Lookup(missing), Box::new(error))
65    }
66}
67
68impl MessageParseError {
69    pub fn new(
70        message: &'static str,
71        kind: ErrorKind,
72        error: Box<pest::error::Error<Rule>>,
73    ) -> Self {
74        Self {
75            message,
76            kind,
77            error,
78        }
79    }
80}
81
82impl fmt::Display for ErrorKind {
83    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
84        match self {
85            ErrorKind::Syntax => write!(f, "Syntax"),
86            ErrorKind::InvalidValue => write!(f, "Invalid value"),
87            ErrorKind::Lookup(e) => write!(f, "Invalid reference ({e})"),
88        }
89    }
90}
91
92pub fn unwrap_single_pair(pair: pest::iterators::Pair<Rule>) -> pest::iterators::Pair<Rule> {
93    let mut pairs = pair.into_inner();
94    let pair = pairs.next().unwrap();
95    assert_eq!(pairs.next(), None);
96    pair
97}
98
99/// Unescapes a quoted string literal, handling escape sequences.
100///
101/// # Arguments
102/// * `pair` - The pest pair containing the string to unescape (must be Rule::string_literal or Rule::quoted_name).
103///
104/// # Returns
105/// * `String` with the unescaped contents.
106///
107/// # Panics
108/// Panics if the rule is not `string_literal` or `quoted_name` (this should never happen
109/// if the pest grammar is working correctly).
110///
111pub fn unescape_string(pair: pest::iterators::Pair<Rule>) -> String {
112    let s = pair.as_str();
113
114    // Determine opener/closer based on rule type
115    let (opener, closer) = match pair.as_rule() {
116        Rule::string_literal => ('\'', '\''),
117        Rule::quoted_name => ('"', '"'),
118        _ => panic!(
119            "unescape_string called with unexpected rule: {:?}",
120            pair.as_rule()
121        ),
122    };
123
124    let mut result = String::new();
125    let mut chars = s.chars();
126    let first = chars.next().expect("Empty string literal");
127
128    assert_eq!(
129        first, opener,
130        "Expected opening quote '{opener}', got '{first}'"
131    );
132
133    // Skip the opening quote
134    while let Some(c) = chars.next() {
135        match c {
136            c if c == closer => {
137                // Skip the closing quote, and assert that there are no more characters.
138                assert_eq!(
139                    chars.next(),
140                    None,
141                    "Unexpected characters after closing quote"
142                );
143                break;
144            }
145            '\\' => {
146                let next = chars
147                    .next()
148                    .expect("Incomplete escape sequence at end of string");
149                match next {
150                    'n' => result.push('\n'),
151                    't' => result.push('\t'),
152                    'r' => result.push('\r'),
153                    // For all other characters (especially `"`, `'`, and `\`), we just
154                    // push the character.
155                    _ => result.push(next),
156                }
157            }
158            _ => result.push(c),
159        }
160    }
161    result
162}
163
164// A trait for converting a pest::iterators::Pair<Rule> into a Rust type. This
165// is used to convert from the uniformly structured nesting
166// pest::iterators::Pair<Rule> into more structured types.
167pub trait ParsePair: Sized {
168    // The rule that this type is parsed from.
169    fn rule() -> Rule;
170
171    // The name of the protobuf message type that this type corresponds to.
172    fn message() -> &'static str;
173
174    // Parse a single instance of this type from a pest::iterators::Pair<Rule>.
175    // The input must match the rule returned by `rule`; otherwise, a panic is
176    // expected.
177    fn parse_pair(pair: pest::iterators::Pair<Rule>) -> Self;
178
179    fn parse_str(s: &str) -> Result<Self, MessageParseError> {
180        let mut pairs = <ExpressionParser as pest::Parser<Rule>>::parse(Self::rule(), s)
181            .map_err(|e| MessageParseError::new(Self::message(), ErrorKind::Syntax, Box::new(e)))?;
182        assert_eq!(pairs.as_str(), s);
183        let pair = pairs.next().unwrap();
184        assert_eq!(pairs.next(), None);
185        Ok(Self::parse_pair(pair))
186    }
187}
188
189/// A trait for types that can be directly parsed from a string input,
190/// regardless of context.
191pub trait Parse {
192    fn parse(input: &str) -> Result<Self, MessageParseError>
193    where
194        Self: Sized;
195}
196
197impl<T: ParsePair> Parse for T {
198    fn parse(input: &str) -> Result<Self, MessageParseError> {
199        T::parse_str(input)
200    }
201}
202
203/// A trait for types that are parsed from a `pest::iterators::Pair<Rule>` that
204/// depends on the context - e.g. extension lookups or other contextual
205/// information. This is used for types that are not directly parsed from the
206/// grammar, but rather require additional context to parse correctly.
207pub trait ScopedParsePair: Sized {
208    // The rule that this type is parsed from.
209    fn rule() -> Rule;
210
211    // The name of the protobuf message type that this type corresponds to.
212    fn message() -> &'static str;
213
214    // Parse a single instance of this type from a `pest::iterators::Pair<Rule>`.
215    // The input must match the rule returned by `rule`; otherwise, a panic is
216    // expected.
217    fn parse_pair(
218        extensions: &SimpleExtensions,
219        pair: pest::iterators::Pair<Rule>,
220    ) -> Result<Self, MessageParseError>;
221}
222
223pub trait ScopedParse: Sized {
224    fn parse(extensions: &SimpleExtensions, input: &str) -> Result<Self, MessageParseError>
225    where
226        Self: Sized;
227}
228
229impl<T: ScopedParsePair> ScopedParse for T {
230    fn parse(extensions: &SimpleExtensions, input: &str) -> Result<Self, MessageParseError> {
231        let mut pairs = ExpressionParser::parse(Self::rule(), input)
232            .map_err(|e| MessageParseError::new(Self::message(), ErrorKind::Syntax, Box::new(e)))?;
233        assert_eq!(pairs.as_str(), input);
234        let pair = pairs.next().unwrap();
235        assert_eq!(pairs.next(), None);
236        Self::parse_pair(extensions, pair)
237    }
238}
239
240pub fn iter_pairs(pair: pest::iterators::Pairs<'_, Rule>) -> RuleIter<'_> {
241    RuleIter {
242        iter: pair,
243        done: false,
244    }
245}
246
247pub struct RuleIter<'a> {
248    iter: pest::iterators::Pairs<'a, Rule>,
249    // Set to true when done is called, so destructor doesn't panic
250    done: bool,
251}
252
253impl<'a> From<pest::iterators::Pairs<'a, Rule>> for RuleIter<'a> {
254    fn from(iter: pest::iterators::Pairs<'a, Rule>) -> Self {
255        RuleIter { iter, done: false }
256    }
257}
258
259impl<'a> RuleIter<'a> {
260    pub fn peek(&self) -> Option<pest::iterators::Pair<'a, Rule>> {
261        self.iter.peek()
262    }
263
264    // Pop the next pair if it matches the rule. Returns None if not.
265    pub fn try_pop(&mut self, rule: Rule) -> Option<pest::iterators::Pair<'a, Rule>> {
266        match self.peek() {
267            Some(pair) if pair.as_rule() == rule => {
268                self.iter.next();
269                Some(pair)
270            }
271            _ => None,
272        }
273    }
274
275    // Pop the next pair, asserting it matches the given rule. Panics if not.
276    pub fn pop(&mut self, rule: Rule) -> pest::iterators::Pair<'a, Rule> {
277        let pair = self.iter.next().expect("expected another pair");
278        assert_eq!(
279            pair.as_rule(),
280            rule,
281            "expected rule {:?}, got {:?}",
282            rule,
283            pair.as_rule()
284        );
285        pair
286    }
287
288    // Parse the next pair if it matches the rule. Returns None if not.
289    pub fn parse_if_next<T: ParsePair>(&mut self) -> Option<T> {
290        match self.peek() {
291            Some(pair) if pair.as_rule() == T::rule() => {
292                self.iter.next();
293                Some(T::parse_pair(pair))
294            }
295            _ => None,
296        }
297    }
298
299    // Parse the next pair if it matches the rule. Returns None if not.
300    pub fn parse_if_next_scoped<T: ScopedParsePair>(
301        &mut self,
302        extensions: &SimpleExtensions,
303    ) -> Option<Result<T, MessageParseError>> {
304        match self.peek() {
305            Some(pair) if pair.as_rule() == T::rule() => {
306                self.iter.next();
307                Some(T::parse_pair(extensions, pair))
308            }
309            _ => None,
310        }
311    }
312
313    // Parse the next pair, assuming it matches the rule. Panics if not.
314    pub fn parse_next<T: ParsePair>(&mut self) -> T {
315        let pair = self.iter.next().unwrap();
316        T::parse_pair(pair)
317    }
318
319    // Parse the next pair, assuming it matches the rule. Panics if not.
320    pub fn parse_next_scoped<T: ScopedParsePair>(
321        &mut self,
322        extensions: &SimpleExtensions,
323    ) -> Result<T, MessageParseError> {
324        let pair = self.iter.next().unwrap();
325        T::parse_pair(extensions, pair)
326    }
327
328    pub fn done(mut self) {
329        self.done = true;
330        assert_eq!(self.iter.next(), None);
331    }
332}
333
334/// Make sure that the iterator was completely consumed when the iterator is
335/// dropped - that we didn't leave any partially-parsed tokens.
336///
337/// This is not strictly necessary, but it's a good way to catch bugs.
338impl Drop for RuleIter<'_> {
339    fn drop(&mut self) {
340        if self.done || std::thread::panicking() {
341            return;
342        }
343        // If the iterator is not done, something probably went wrong.
344        assert_eq!(self.iter.next(), None);
345    }
346}