Skip to main content

substrait_explain/parser/
common.rs

1use std::fmt;
2
3use pest_derive::Parser as PestDeriveParser;
4use thiserror::Error;
5
6use crate::extensions::SimpleExtensions;
7use crate::extensions::simple::MissingReference;
8
9#[derive(PestDeriveParser)]
10#[grammar = "parser/expression_grammar.pest"] // Path relative to src
11pub(crate) struct ExpressionParser;
12
13/// An error that occurs when parsing a message within a specific line. Contains
14/// context pointing at that specific error.
15#[derive(Error, Debug, Clone)]
16#[error("{kind} Error parsing {message}:\n{error}")]
17pub struct MessageParseError {
18    message: &'static str,
19    kind: ErrorKind,
20    #[source]
21    error: Box<pest::error::Error<Rule>>,
22}
23
24#[derive(Debug, Clone)]
25pub(crate) enum ErrorKind {
26    Syntax,
27    InvalidValue,
28    Lookup(MissingReference),
29}
30
31impl MessageParseError {
32    pub(crate) fn invalid(
33        message: &'static str,
34        span: pest::Span,
35        description: impl ToString,
36    ) -> Self {
37        let error = pest::error::Error::new_from_span(
38            pest::error::ErrorVariant::CustomError {
39                message: description.to_string(),
40            },
41            span,
42        );
43        Self::new(message, ErrorKind::InvalidValue, Box::new(error))
44    }
45
46    pub(crate) fn lookup(
47        message: &'static str,
48        missing: MissingReference,
49        span: pest::Span,
50        description: impl ToString,
51    ) -> Self {
52        let error = pest::error::Error::new_from_span(
53            pest::error::ErrorVariant::CustomError {
54                message: description.to_string(),
55            },
56            span,
57        );
58        Self::new(message, ErrorKind::Lookup(missing), Box::new(error))
59    }
60}
61
62impl MessageParseError {
63    pub(crate) fn new(
64        message: &'static str,
65        kind: ErrorKind,
66        error: Box<pest::error::Error<Rule>>,
67    ) -> Self {
68        Self {
69            message,
70            kind,
71            error,
72        }
73    }
74}
75
76impl fmt::Display for ErrorKind {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        match self {
79            ErrorKind::Syntax => write!(f, "Syntax"),
80            ErrorKind::InvalidValue => write!(f, "Invalid value"),
81            ErrorKind::Lookup(e) => write!(f, "Invalid reference ({e})"),
82        }
83    }
84}
85
86pub(crate) fn unwrap_single_pair(pair: pest::iterators::Pair<Rule>) -> pest::iterators::Pair<Rule> {
87    let mut pairs = pair.into_inner();
88    let pair = pairs.next().unwrap();
89    assert_eq!(pairs.next(), None);
90    pair
91}
92
93/// Unescapes a quoted string literal, handling escape sequences.
94///
95/// # Arguments
96/// * `pair` - The pest pair containing the string to unescape (must be Rule::string_literal or Rule::quoted_name).
97///
98/// # Returns
99/// * `String` with the unescaped contents.
100///
101/// # Panics
102/// Panics if the rule is not `string_literal` or `quoted_name` (this should never happen
103/// if the pest grammar is working correctly).
104///
105pub(crate) fn unescape_string(pair: pest::iterators::Pair<Rule>) -> String {
106    let s = pair.as_str();
107
108    // Determine opener/closer based on rule type
109    let (opener, closer) = match pair.as_rule() {
110        Rule::string_literal => ('\'', '\''),
111        Rule::quoted_name => ('"', '"'),
112        _ => panic!(
113            "unescape_string called with unexpected rule: {:?}",
114            pair.as_rule()
115        ),
116    };
117
118    let mut result = String::new();
119    let mut chars = s.chars();
120    let first = chars.next().expect("Empty string literal");
121
122    assert_eq!(
123        first, opener,
124        "Expected opening quote '{opener}', got '{first}'"
125    );
126
127    // Skip the opening quote
128    while let Some(c) = chars.next() {
129        match c {
130            c if c == closer => {
131                // Skip the closing quote, and assert that there are no more characters.
132                assert_eq!(
133                    chars.next(),
134                    None,
135                    "Unexpected characters after closing quote"
136                );
137                break;
138            }
139            '\\' => {
140                let next = chars
141                    .next()
142                    .expect("Incomplete escape sequence at end of string");
143                match next {
144                    'n' => result.push('\n'),
145                    't' => result.push('\t'),
146                    'r' => result.push('\r'),
147                    // For all other characters (especially `"`, `'`, and `\`), we just
148                    // push the character.
149                    _ => result.push(next),
150                }
151            }
152            _ => result.push(c),
153        }
154    }
155    result
156}
157
158// A trait for converting a pest::iterators::Pair<Rule> into a Rust type. This
159// is used to convert from the uniformly structured nesting
160// pest::iterators::Pair<Rule> into more structured types.
161pub(crate) trait ParsePair: Sized {
162    // The rule that this type is parsed from.
163    fn rule() -> Rule;
164
165    // The name of the protobuf message type that this type corresponds to.
166    fn message() -> &'static str;
167
168    // Parse a single instance of this type from a pest::iterators::Pair<Rule>.
169    // The input must match the rule returned by `rule`; otherwise, a panic is
170    // expected.
171    fn parse_pair(pair: pest::iterators::Pair<Rule>) -> Self;
172
173    fn parse_str(s: &str) -> Result<Self, MessageParseError> {
174        let mut pairs = <ExpressionParser as pest::Parser<Rule>>::parse(Self::rule(), s)
175            .map_err(|e| MessageParseError::new(Self::message(), ErrorKind::Syntax, Box::new(e)))?;
176        assert_eq!(pairs.as_str(), s);
177        let pair = pairs.next().unwrap();
178        assert_eq!(pairs.next(), None);
179        Ok(Self::parse_pair(pair))
180    }
181}
182
183/// A trait for types that are parsed from a `pest::iterators::Pair<Rule>` that
184/// depends on the context - e.g. extension lookups or other contextual
185/// information. This is used for types that are not directly parsed from the
186/// grammar, but rather require additional context to parse correctly.
187pub(crate) trait ScopedParsePair: Sized {
188    // The rule that this type is parsed from.
189    fn rule() -> Rule;
190
191    // The name of the protobuf message type that this type corresponds to.
192    fn message() -> &'static str;
193
194    // Parse a single instance of this type from a `pest::iterators::Pair<Rule>`.
195    // The input must match the rule returned by `rule`; otherwise, a panic is
196    // expected.
197    fn parse_pair(
198        extensions: &SimpleExtensions,
199        pair: pest::iterators::Pair<Rule>,
200    ) -> Result<Self, MessageParseError>;
201}
202
203pub(crate) fn iter_pairs(pair: pest::iterators::Pairs<'_, Rule>) -> RuleIter<'_> {
204    RuleIter {
205        iter: pair,
206        done: false,
207    }
208}
209
210pub(crate) struct RuleIter<'a> {
211    iter: pest::iterators::Pairs<'a, Rule>,
212    // Set to true when done is called, so destructor doesn't panic
213    done: bool,
214}
215
216impl<'a> From<pest::iterators::Pairs<'a, Rule>> for RuleIter<'a> {
217    fn from(iter: pest::iterators::Pairs<'a, Rule>) -> Self {
218        RuleIter { iter, done: false }
219    }
220}
221
222impl<'a> RuleIter<'a> {
223    pub(crate) fn peek(&self) -> Option<pest::iterators::Pair<'a, Rule>> {
224        self.iter.peek()
225    }
226
227    // Pop the next pair if it matches the rule. Returns None if not.
228    pub(crate) fn try_pop(&mut self, rule: Rule) -> Option<pest::iterators::Pair<'a, Rule>> {
229        match self.peek() {
230            Some(pair) if pair.as_rule() == rule => {
231                self.iter.next();
232                Some(pair)
233            }
234            _ => None,
235        }
236    }
237
238    // Pop the next pair, asserting it matches the given rule. Panics if not.
239    pub(crate) fn pop(&mut self, rule: Rule) -> pest::iterators::Pair<'a, Rule> {
240        let pair = self.iter.next().expect("expected another pair");
241        assert_eq!(
242            pair.as_rule(),
243            rule,
244            "expected rule {:?}, got {:?}",
245            rule,
246            pair.as_rule()
247        );
248        pair
249    }
250
251    // Parse the next pair if it matches the rule. Returns None if not.
252    pub(crate) fn parse_if_next<T: ParsePair>(&mut self) -> Option<T> {
253        match self.peek() {
254            Some(pair) if pair.as_rule() == T::rule() => {
255                self.iter.next();
256                Some(T::parse_pair(pair))
257            }
258            _ => None,
259        }
260    }
261
262    // Parse the next pair if it matches the rule. Returns None if not.
263    pub(crate) fn parse_if_next_scoped<T: ScopedParsePair>(
264        &mut self,
265        extensions: &SimpleExtensions,
266    ) -> Option<Result<T, MessageParseError>> {
267        match self.peek() {
268            Some(pair) if pair.as_rule() == T::rule() => {
269                self.iter.next();
270                Some(T::parse_pair(extensions, pair))
271            }
272            _ => None,
273        }
274    }
275
276    // Parse the next pair, assuming it matches the rule. Panics if not.
277    pub(crate) fn parse_next<T: ParsePair>(&mut self) -> T {
278        let pair = self.iter.next().unwrap();
279        T::parse_pair(pair)
280    }
281
282    // Parse the next pair, assuming it matches the rule. Panics if not.
283    pub(crate) fn parse_next_scoped<T: ScopedParsePair>(
284        &mut self,
285        extensions: &SimpleExtensions,
286    ) -> Result<T, MessageParseError> {
287        let pair = self.iter.next().unwrap();
288        T::parse_pair(extensions, pair)
289    }
290
291    pub(crate) fn done(mut self) {
292        self.done = true;
293        assert_eq!(self.iter.next(), None);
294    }
295}
296
297/// Make sure that the iterator was completely consumed when the iterator is
298/// dropped - that we didn't leave any partially-parsed tokens.
299///
300/// This is not strictly necessary, but it's a good way to catch bugs.
301impl Drop for RuleIter<'_> {
302    fn drop(&mut self) {
303        if self.done || std::thread::panicking() {
304            return;
305        }
306        // If the iterator is not done, something probably went wrong.
307        assert_eq!(self.iter.next(), None);
308    }
309}
310
311#[cfg(test)]
312pub(crate) mod test_support {
313    use pest::Parser as PestParser;
314
315    use super::{ErrorKind, ExpressionParser, MessageParseError, ParsePair, ScopedParsePair};
316    use crate::extensions::SimpleExtensions;
317
318    /// Test-only adapter for parsing individual grammar fragments from strings.
319    ///
320    /// Production parsing goes through [`ParsePair`] and the structural [`Parser`](crate::Parser).
321    pub(crate) trait Parse {
322        fn parse(input: &str) -> Result<Self, MessageParseError>
323        where
324            Self: Sized;
325    }
326
327    impl<T: ParsePair> Parse for T {
328        fn parse(input: &str) -> Result<Self, MessageParseError> {
329            T::parse_str(input)
330        }
331    }
332
333    /// Test-only adapter for parsing context-dependent grammar fragments from strings.
334    ///
335    /// Production parsing goes through [`ScopedParsePair`] and the structural
336    /// [`Parser`](crate::Parser).
337    pub(crate) trait ScopedParse: Sized {
338        fn parse(extensions: &SimpleExtensions, input: &str) -> Result<Self, MessageParseError>
339        where
340            Self: Sized;
341    }
342
343    impl<T: ScopedParsePair> ScopedParse for T {
344        fn parse(extensions: &SimpleExtensions, input: &str) -> Result<Self, MessageParseError> {
345            let mut pairs = ExpressionParser::parse(Self::rule(), input).map_err(|e| {
346                MessageParseError::new(Self::message(), ErrorKind::Syntax, Box::new(e))
347            })?;
348            assert_eq!(pairs.as_str(), input);
349            let pair = pairs.next().unwrap();
350            assert_eq!(pairs.next(), None);
351            Self::parse_pair(extensions, pair)
352        }
353    }
354}