substrait_explain/parser/
structural.rs

1//! Parser for the structural part of the Substrait file format.
2//!
3//! This is the overall parser for parsing the text format. It is responsible
4//! for tracking which section of the file we are currently parsing, and parsing
5//! each line separately.
6
7use std::fmt;
8
9use substrait::proto::rel::RelType;
10use substrait::proto::{
11    AggregateRel, FetchRel, FilterRel, JoinRel, Plan, PlanRel, ProjectRel, ReadRel, Rel, RelRoot,
12    SortRel, plan_rel,
13};
14use thiserror::Error;
15
16use crate::extensions::{SimpleExtensions, simple};
17use crate::parser::common::{MessageParseError, ParsePair};
18use crate::parser::expressions::Name;
19use crate::parser::extensions::{ExtensionParseError, ExtensionParser};
20use crate::parser::{ErrorKind, ExpressionParser, RelationParsePair, Rule, unwrap_single_pair};
21
22pub const PLAN_HEADER: &str = "=== Plan";
23
24/// Represents an input line, trimmed of leading two-space indents and final
25/// whitespace. Contains the number of indents and the trimmed line.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub struct IndentedLine<'a>(pub usize, pub &'a str);
28
29impl<'a> From<&'a str> for IndentedLine<'a> {
30    fn from(line: &'a str) -> Self {
31        let line = line.trim_end();
32        let mut spaces = 0;
33        for c in line.chars() {
34            if c == ' ' {
35                spaces += 1;
36            } else {
37                break;
38            }
39        }
40
41        let indents = spaces / 2;
42
43        let (_, trimmed) = line.split_at(indents * 2);
44
45        IndentedLine(indents, trimmed)
46    }
47}
48
49#[derive(Debug, Clone, Error)]
50pub enum ParseError {
51    #[error("Error parsing extension on line {0}: {1}")]
52    Extension(ParseContext, #[source] ExtensionParseError),
53    #[error("Error parsing plan on {0}: {1}")]
54    Plan(ParseContext, #[source] MessageParseError),
55    #[error("Error parsing section header on line {0}: {1}")]
56    Initial(ParseContext, #[source] MessageParseError),
57    #[error("Error parsing relation: {0}")]
58    Relation(ParseContext, #[source] MessageParseError),
59}
60
61/// Represents a line in the [`Plan`] tree structure before it's converted to a
62/// relation. This allows us to build the tree structure first, then convert to
63/// relations with proper parent-child relationships.
64#[derive(Debug, Clone)]
65pub struct LineNode<'a> {
66    pub pair: pest::iterators::Pair<'a, Rule>,
67    pub line_no: i64,
68    pub children: Vec<LineNode<'a>>,
69}
70
71impl<'a> LineNode<'a> {
72    pub fn context(&self) -> ParseContext {
73        ParseContext {
74            line_no: self.line_no,
75            line: self.pair.as_str().to_string(),
76        }
77    }
78
79    pub fn parse(line: &'a str, line_no: i64) -> Result<Self, ParseError> {
80        // Parse the line immediately to catch syntax errors
81        let mut pairs: pest::iterators::Pairs<'a, Rule> =
82            <ExpressionParser as pest::Parser<Rule>>::parse(Rule::relation, line).map_err(|e| {
83                ParseError::Plan(
84                    ParseContext {
85                        line_no,
86                        line: line.to_string(),
87                    },
88                    MessageParseError::new("relation", ErrorKind::InvalidValue, Box::new(e)),
89                )
90            })?;
91
92        let pair = pairs.next().unwrap();
93        assert!(pairs.next().is_none()); // Should be exactly one pair
94
95        Ok(Self {
96            pair,
97            line_no,
98            children: Vec::new(),
99        })
100    }
101
102    /// Parse the root relation of a plan, at depth 0.
103    pub fn parse_root(line: &'a str, line_no: i64) -> Result<Self, ParseError> {
104        // Parse the line as a top-level relation (either root_relation or regular relation)
105        let mut pairs: pest::iterators::Pairs<'a, Rule> =
106            <ExpressionParser as pest::Parser<Rule>>::parse(Rule::top_level_relation, line)
107                .map_err(|e| {
108                    ParseError::Plan(
109                        ParseContext::new(line_no, line.to_string()),
110                        MessageParseError::new(
111                            "top_level_relation",
112                            crate::parser::ErrorKind::Syntax,
113                            Box::new(e),
114                        ),
115                    )
116                })?;
117
118        let pair = pairs.next().unwrap();
119        assert!(pairs.next().is_none());
120
121        // Get the inner pair, which is either a root relation or a regular relation
122        let inner_pair = unwrap_single_pair(pair);
123
124        Ok(Self {
125            pair: inner_pair,
126            line_no,
127            children: Vec::new(),
128        })
129    }
130}
131
132/// Helper function to get the number of input fields from a relation.
133/// This is needed for Project relations to calculate output mapping indices.
134fn get_input_field_count(rel: &Rel) -> usize {
135    match &rel.rel_type {
136        Some(RelType::Read(read_rel)) => {
137            // For Read relations, count the fields in the base schema
138            read_rel
139                .base_schema
140                .as_ref()
141                .and_then(|schema| schema.r#struct.as_ref())
142                .map(|struct_| struct_.types.len())
143                .unwrap_or(0)
144        }
145        Some(RelType::Filter(filter_rel)) => {
146            // For Filter relations, get the count from the input
147            filter_rel
148                .input
149                .as_ref()
150                .map(|input| get_input_field_count(input))
151                .unwrap_or(0)
152        }
153        Some(RelType::Project(project_rel)) => {
154            // For Project relations, get the count from the input
155            project_rel
156                .input
157                .as_ref()
158                .map(|input| get_input_field_count(input))
159                .unwrap_or(0)
160        }
161        _ => 0,
162    }
163}
164
165#[derive(Copy, Clone, Debug)]
166pub enum State {
167    // The initial state, before we have parsed any lines.
168    Initial,
169    // The extensions section, after parsing the header and any other Extension lines.
170    Extensions,
171    // The plan section, after parsing the header and any other Plan lines.
172    Plan,
173}
174
175impl fmt::Display for State {
176    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
177        write!(f, "{self:?}")
178    }
179}
180
181#[derive(Debug, Clone)]
182pub struct ParseContext {
183    pub line_no: i64,
184    pub line: String,
185}
186
187impl ParseContext {
188    pub fn new(line_no: i64, line: String) -> Self {
189        Self { line_no, line }
190    }
191}
192
193impl fmt::Display for ParseContext {
194    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
195        write!(f, "line {} ('{}')", self.line_no, self.line)
196    }
197}
198
199// An in-progress tree builder, building the tree of relations.
200#[derive(Debug, Clone, Default)]
201pub struct TreeBuilder<'a> {
202    // Current tree of nodes being built. These have been successfully parsed
203    // into Pest pairs, but have not yet been converted to substrait plans.
204    current: Option<LineNode<'a>>,
205    // Completed trees that have been built.
206    completed: Vec<LineNode<'a>>,
207}
208
209impl<'a> TreeBuilder<'a> {
210    /// Traverse down the tree, always taking the last child at each level, until reaching the specified depth.
211    pub fn get_at_depth(&mut self, depth: usize) -> Option<&mut LineNode<'a>> {
212        let mut node = self.current.as_mut()?;
213        for _ in 0..depth {
214            node = node.children.last_mut()?;
215        }
216        Some(node)
217    }
218
219    pub fn add_line(&mut self, depth: usize, node: LineNode<'a>) -> Result<(), ParseError> {
220        if depth == 0 {
221            if let Some(prev) = self.current.take() {
222                self.completed.push(prev)
223            }
224            self.current = Some(node);
225            return Ok(());
226        }
227
228        let parent = match self.get_at_depth(depth - 1) {
229            None => {
230                return Err(ParseError::Plan(
231                    node.context(),
232                    MessageParseError::invalid(
233                        "relation",
234                        node.pair.as_span(),
235                        format!("No parent found for depth {depth}"),
236                    ),
237                ));
238            }
239            Some(parent) => parent,
240        };
241
242        parent.children.push(node.clone());
243        Ok(())
244    }
245
246    /// End of input - move any remaining nodes from stack to completed and
247    /// return any trees in progress. Resets the builder to its initial state
248    /// (empty)
249    pub fn finish(&mut self) -> Vec<LineNode<'a>> {
250        // Move any remaining nodes from stack to completed
251        if let Some(node) = self.current.take() {
252            self.completed.push(node);
253        }
254        std::mem::take(&mut self.completed)
255    }
256}
257
258// Relation parsing component - handles converting LineNodes to Relations
259#[derive(Debug, Clone, Default)]
260pub struct RelationParser<'a> {
261    tree: TreeBuilder<'a>,
262}
263
264impl<'a> RelationParser<'a> {
265    pub fn parse_line(&mut self, line: IndentedLine<'a>, line_no: i64) -> Result<(), ParseError> {
266        let IndentedLine(depth, line) = line;
267
268        // Use parse_root for depth 0 (top-level relations), parse for other depths
269        let node = if depth == 0 {
270            LineNode::parse_root(line, line_no)?
271        } else {
272            LineNode::parse(line, line_no)?
273        };
274
275        self.tree.add_line(depth, node)
276    }
277
278    /// Parse a relation from a Pest pair of rule 'relation' into a Substrait
279    /// Rel.
280    //
281    // Clippy says a Vec<Box<…>> is unnecessary, as the Vec is already on the
282    // heap, but this is what the protobuf requires so we allow it here
283    #[allow(clippy::vec_box)]
284    fn parse_relation(
285        &self,
286        extensions: &SimpleExtensions,
287        line_no: i64,
288        pair: pest::iterators::Pair<Rule>,
289        child_relations: Vec<Box<substrait::proto::Rel>>,
290        input_field_count: usize,
291    ) -> Result<substrait::proto::Rel, ParseError> {
292        assert_eq!(pair.as_rule(), Rule::relation);
293        let p = unwrap_single_pair(pair);
294
295        let (e, l, p, c, ic) = (extensions, line_no, p, child_relations, input_field_count);
296
297        match p.as_rule() {
298            Rule::read_relation => self.parse_rel::<ReadRel>(e, l, p, c, ic),
299            Rule::filter_relation => self.parse_rel::<FilterRel>(e, l, p, c, ic),
300            Rule::project_relation => self.parse_rel::<ProjectRel>(e, l, p, c, ic),
301            Rule::aggregate_relation => self.parse_rel::<AggregateRel>(e, l, p, c, ic),
302            Rule::sort_relation => self.parse_rel::<SortRel>(e, l, p, c, ic),
303            Rule::fetch_relation => self.parse_rel::<FetchRel>(e, l, p, c, ic),
304            Rule::join_relation => self.parse_rel::<JoinRel>(e, l, p, c, ic),
305            _ => todo!(),
306        }
307    }
308
309    /// Parse a specific relation type from a Pest pair of matching rule into a
310    /// Substrait Rel.
311    //
312    // Clippy says a Vec<Box<…>> is unnecessary, as the Vec is already on the
313    // heap, but this is what the protobuf requires so we allow it here
314    #[allow(clippy::vec_box)]
315    fn parse_rel<T: RelationParsePair>(
316        &self,
317        extensions: &SimpleExtensions,
318        line_no: i64,
319        pair: pest::iterators::Pair<Rule>,
320        child_relations: Vec<Box<substrait::proto::Rel>>,
321        input_field_count: usize,
322    ) -> Result<substrait::proto::Rel, ParseError> {
323        assert_eq!(pair.as_rule(), T::rule());
324
325        let line = pair.as_str();
326        let rel_type =
327            T::parse_pair_with_context(extensions, pair, child_relations, input_field_count);
328
329        match rel_type {
330            Ok(rel) => Ok(rel.into_rel()),
331            Err(e) => Err(ParseError::Plan(
332                ParseContext::new(line_no, line.to_string()),
333                e,
334            )),
335        }
336    }
337
338    /// Convert a given LineNode into a Substrait Rel. Also recursively builds children.
339    fn build_rel(
340        &self,
341        extensions: &SimpleExtensions,
342        node: LineNode,
343    ) -> Result<substrait::proto::Rel, ParseError> {
344        // Parse children first to get their output schemas
345        let child_relations = node
346            .children
347            .into_iter()
348            .map(|c| self.build_rel(extensions, c).map(Box::new))
349            .collect::<Result<Vec<Box<Rel>>, ParseError>>()?;
350
351        // Get the input field count from all the children
352        let input_field_count = child_relations
353            .iter()
354            .map(|r| get_input_field_count(r.as_ref()))
355            .reduce(|a, b| a + b)
356            .unwrap_or(0);
357
358        // Parse this node using the stored pair
359        self.parse_relation(
360            extensions,
361            node.line_no,
362            node.pair,
363            child_relations,
364            input_field_count,
365        )
366    }
367
368    /// Build a tree of relations from a LineNode, with the root in the form of
369    /// a PlanRel - the root type in a Substrait Plan.
370    fn build_plan_rel(
371        &self,
372        extensions: &SimpleExtensions,
373        mut node: LineNode,
374    ) -> Result<PlanRel, ParseError> {
375        // Plain relations are allowed as root relations, they just don't have names.
376        if node.pair.as_rule() == Rule::relation {
377            let rel = self.build_rel(extensions, node)?;
378            return Ok(PlanRel {
379                rel_type: Some(plan_rel::RelType::Rel(rel)),
380            });
381        }
382
383        // Otherwise, it must be a root relation.
384        assert_eq!(node.pair.as_rule(), Rule::root_relation);
385        let context = node.context();
386        let span = node.pair.as_span();
387
388        // Parse the column names
389        let column_names_pair = unwrap_single_pair(node.pair);
390        assert_eq!(column_names_pair.as_rule(), Rule::root_name_list);
391
392        let names: Vec<String> = column_names_pair
393            .into_inner()
394            .map(|name_pair| {
395                assert_eq!(name_pair.as_rule(), Rule::name);
396                Name::parse_pair(name_pair).0
397            })
398            .collect();
399
400        let child = match node.children.len() {
401            1 => self.build_rel(extensions, node.children.pop().unwrap())?,
402            n => {
403                return Err(ParseError::Plan(
404                    context,
405                    MessageParseError::invalid(
406                        "root_relation",
407                        span,
408                        format!("Root relation must have exactly one child, found {n}"),
409                    ),
410                ));
411            }
412        };
413
414        let rel_root = RelRoot {
415            names,
416            input: Some(child),
417        };
418
419        Ok(PlanRel {
420            rel_type: Some(plan_rel::RelType::Root(rel_root)),
421        })
422    }
423
424    /// Build all the trees we have into `PlanRel`s.
425    fn build(mut self, extensions: &SimpleExtensions) -> Result<Vec<PlanRel>, ParseError> {
426        let nodes = self.tree.finish();
427        nodes
428            .into_iter()
429            .map(|n| self.build_plan_rel(extensions, n))
430            .collect::<Result<Vec<PlanRel>, ParseError>>()
431    }
432}
433
434/// A parser for Substrait query plans in text format.
435///
436/// The `Parser` converts human-readable Substrait text format into Substrait
437/// protobuf plans. It handles both the extensions section (which defines
438/// functions, types, etc.) and the plan section (which defines the actual query
439/// structure).
440///
441/// ## Usage
442///
443/// The simplest entry point is the static `parse()` method:
444///
445/// ```rust
446/// use substrait_explain::parser::Parser;
447///
448/// let plan_text = r#"
449/// === Plan
450/// Root[c, d]
451///   Project[$1, 42]
452///     Read[schema.table => a:i64, b:string?]
453/// "#;
454///
455/// let plan = Parser::parse(plan_text).unwrap();
456/// ```
457///
458/// ## Input Format
459///
460/// The parser expects input in the following format:
461///
462/// ```text
463/// === Extensions
464/// URIs:
465///   @  1: https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml
466/// Functions:
467///   # 10 @  1: add
468/// === Plan
469/// Root[columns]
470///   Relation[arguments => columns]
471///     ChildRelation[arguments => columns]
472/// ```
473///
474/// - **Extensions section** (optional): Defines URIs and function/type declarations
475/// - **Plan section** (required): Defines the query structure with indented relations
476///
477/// ## Error Handling
478///
479/// The parser provides detailed error information including:
480/// - Line number where the error occurred
481/// - The actual line content that failed to parse
482/// - Specific error type and description
483///
484/// ```rust
485/// use substrait_explain::parser::Parser;
486///
487/// let invalid_plan = r#"
488/// === Plan
489/// InvalidRelation[invalid syntax]
490/// "#;
491///
492/// match Parser::parse(invalid_plan) {
493///     Ok(plan) => println!("Successfully parsed"),
494///     Err(e) => eprintln!("Parse error: {}", e),
495/// }
496/// ```
497///
498/// ## Supported Relations
499///
500/// The parser supports all standard Substrait relations:
501/// - `Read[table => columns]` - Read from a table
502/// - `Project[expressions]` - Project columns/expressions
503/// - `Filter[condition => columns]` - Filter rows
504/// - `Root[columns]` - Root relation with output columns
505/// - And more...
506///
507/// ## Extensions Support
508///
509/// The parser fully supports Substrait Simple Extensions, allowing you to:
510/// - Define custom functions with URIs and anchors
511/// - Reference functions by name in expressions
512/// - Use custom types and type variations
513///
514/// ```rust
515/// use substrait_explain::parser::Parser;
516///
517/// let plan_with_extensions = r#"
518/// === Extensions
519/// URIs:
520///   @  1: https://example.com/functions.yaml
521/// Functions:
522///   ## 10 @  1: my_custom_function
523/// === Plan
524/// Root[result]
525///   Project[my_custom_function($0, $1)]
526///     Read[table => col1:i32, col2:i32]
527/// "#;
528///
529/// let plan = Parser::parse(plan_with_extensions).unwrap();
530/// ```
531///
532/// ## Performance
533///
534/// The parser is designed for efficiency:
535/// - Single-pass parsing with minimal allocations
536/// - Early error detection and reporting
537/// - Memory-efficient tree building
538///
539/// ## Thread Safety
540///
541/// `Parser` instances are not thread-safe and should not be shared between threads.
542/// However, the static `parse()` method is safe to call from multiple threads.
543#[derive(Debug)]
544pub struct Parser<'a> {
545    line_no: i64,
546    state: State,
547    extension_parser: ExtensionParser,
548    relation_parser: RelationParser<'a>,
549}
550impl<'a> Default for Parser<'a> {
551    fn default() -> Self {
552        Self {
553            line_no: 1,
554            state: State::Initial,
555            extension_parser: ExtensionParser::default(),
556            relation_parser: RelationParser::default(),
557        }
558    }
559}
560
561impl<'a> Parser<'a> {
562    /// Parse a Substrait plan from text format.
563    ///
564    /// This is the main entry point for parsing well-formed plans.
565    /// Returns a clear error if parsing fails.
566    ///
567    /// The input should be in the Substrait text format, which consists of:
568    /// - An optional extensions section starting with "=== Extensions"
569    /// - A plan section starting with "=== Plan"
570    /// - Indented relation definitions
571    ///
572    /// # Example
573    /// ```rust
574    /// use substrait_explain::parser::Parser;
575    ///
576    /// let plan_text = r#"
577    /// === Plan
578    /// Root[c, d]
579    ///   Project[$1, 42]
580    ///     Read[schema.table => a:i64, b:string?]
581    /// "#;
582    ///
583    /// let plan = Parser::parse(plan_text).unwrap();
584    /// ```
585    ///
586    /// # Errors
587    ///
588    /// Returns a [`ParseError`] if the input cannot be parsed. The error includes
589    /// the line number and content where parsing failed, along with a description
590    /// of what went wrong.
591    pub fn parse(input: &'a str) -> Result<Plan, ParseError> {
592        let mut parser = Self::default();
593
594        for line in input.lines() {
595            if line.trim().is_empty() {
596                parser.line_no += 1;
597                continue;
598            }
599
600            parser.parse_line(line)?;
601            parser.line_no += 1;
602        }
603
604        parser.build_plan()
605    }
606
607    /// Parse a single line of input, updating the parser state.
608    fn parse_line(&mut self, line: &'a str) -> Result<(), ParseError> {
609        let indented_line = IndentedLine::from(line);
610        let line_no = self.line_no;
611        let ctx = || ParseContext {
612            line_no,
613            line: line.to_string(),
614        };
615
616        match self.state {
617            State::Initial => self.parse_initial(indented_line),
618            State::Extensions => self
619                .parse_extensions(indented_line)
620                .map_err(|e| ParseError::Extension(ctx(), e)),
621            State::Plan => self.parse_plan_line(indented_line),
622        }
623    }
624
625    /// Parse the initial line(s) of the input, which is either a blank line or
626    /// the extensions or plan header.
627    fn parse_initial(&mut self, line: IndentedLine) -> Result<(), ParseError> {
628        match line {
629            IndentedLine(0, l) if l.trim().is_empty() => {}
630            IndentedLine(0, simple::EXTENSIONS_HEADER) => {
631                self.state = State::Extensions;
632            }
633            IndentedLine(0, PLAN_HEADER) => {
634                self.state = State::Plan;
635            }
636            IndentedLine(n, l) => {
637                return Err(ParseError::Initial(
638                    ParseContext::new(n as i64, l.to_string()),
639                    MessageParseError::invalid(
640                        "initial",
641                        pest::Span::new(l, 0, l.len()).expect("Invalid span?!"),
642                        format!("Unknown initial line: {l:?}"),
643                    ),
644                ));
645            }
646        }
647        if line.1.trim().is_empty() {
648            // Blank line - do nothing
649            return Ok(());
650        }
651
652        if line == IndentedLine(0, simple::EXTENSIONS_HEADER) {
653            self.state = State::Extensions;
654            return Ok(());
655        }
656        if line == IndentedLine(0, PLAN_HEADER) {
657            self.state = State::Plan;
658            return Ok(());
659        }
660        todo!()
661    }
662
663    /// Parse a single line from the extensions section of the input, updating
664    /// the parser state.
665    fn parse_extensions(&mut self, line: IndentedLine<'_>) -> Result<(), ExtensionParseError> {
666        if line == IndentedLine(0, PLAN_HEADER) {
667            self.state = State::Plan;
668            return Ok(());
669        }
670        self.extension_parser.parse_line(line)
671    }
672
673    /// Parse a single line from the plan section of the input, updating the
674    /// parser state.
675    fn parse_plan_line(&mut self, line: IndentedLine<'a>) -> Result<(), ParseError> {
676        self.relation_parser.parse_line(line, self.line_no)
677    }
678
679    /// Build the plan from the parser state.
680    fn build_plan(self) -> Result<Plan, ParseError> {
681        let Parser {
682            relation_parser,
683            extension_parser,
684            ..
685        } = self;
686
687        let extensions = extension_parser.extensions();
688
689        // Parse the tree into relations
690        let root_relations = relation_parser.build(extensions)?;
691
692        // Build the final plan
693        Ok(Plan {
694            extension_uris: extensions.to_extension_uris(),
695            extensions: extensions.to_extension_declarations(),
696            relations: root_relations,
697            ..Default::default()
698        })
699    }
700}
701
702#[cfg(test)]
703mod tests {
704    use substrait::proto::extensions::simple_extension_declaration::MappingType;
705
706    use super::*;
707    use crate::extensions::simple::ExtensionKind;
708    use crate::parser::extensions::ExtensionParserState;
709
710    #[test]
711    fn test_parse_basic_block() {
712        let mut expected_extensions = SimpleExtensions::new();
713        expected_extensions
714            .add_extension_uri("/uri/common".to_string(), 1)
715            .unwrap();
716        expected_extensions
717            .add_extension_uri("/uri/specific_funcs".to_string(), 2)
718            .unwrap();
719        expected_extensions
720            .add_extension(ExtensionKind::Function, 1, 10, "func_a".to_string())
721            .unwrap();
722        expected_extensions
723            .add_extension(ExtensionKind::Function, 2, 11, "func_b_special".to_string())
724            .unwrap();
725        expected_extensions
726            .add_extension(ExtensionKind::Type, 1, 20, "SomeType".to_string())
727            .unwrap();
728        expected_extensions
729            .add_extension(ExtensionKind::TypeVariation, 2, 30, "VarX".to_string())
730            .unwrap();
731
732        let mut parser = ExtensionParser::default();
733        let input_block = r#"
734URIs:
735  @  1: /uri/common
736  @  2: /uri/specific_funcs
737Functions:
738  # 10 @  1: func_a
739  # 11 @  2: func_b_special
740Types:
741  # 20 @  1: SomeType
742Type Variations:
743  # 30 @  2: VarX
744"#;
745
746        for line_str in input_block.trim().lines() {
747            parser
748                .parse_line(IndentedLine::from(line_str))
749                .unwrap_or_else(|e| panic!("Failed to parse line \'{line_str}\': {e:?}"));
750        }
751
752        assert_eq!(*parser.extensions(), expected_extensions);
753
754        let extensions_str = parser.extensions().to_string("  ");
755        // The writer adds the header; the ExtensionParser does not parse the
756        // header, so we add it here for comparison.
757        let expected_str = format!(
758            "{}\n{}",
759            simple::EXTENSIONS_HEADER,
760            input_block.trim_start()
761        );
762        assert_eq!(extensions_str.trim(), expected_str.trim());
763        // Check final state after all lines are processed.
764        // The last significant line in input_block is a TypeVariation declaration.
765        assert_eq!(
766            parser.state(),
767            ExtensionParserState::ExtensionDeclarations(ExtensionKind::TypeVariation)
768        );
769
770        // Check that a subsequent blank line correctly resets state to Extensions.
771        parser.parse_line(IndentedLine(0, "")).unwrap();
772        assert_eq!(parser.state(), ExtensionParserState::Extensions);
773    }
774
775    /// Test that we can parse a larger extensions block and it matches the input.
776    #[test]
777    fn test_parse_complete_extension_block() {
778        let mut parser = ExtensionParser::default();
779        let input_block = r#"
780URIs:
781  @  1: /uri/common
782  @  2: /uri/specific_funcs
783  @  3: /uri/types_lib
784  @  4: /uri/variations_lib
785Functions:
786  # 10 @  1: func_a
787  # 11 @  2: func_b_special
788  # 12 @  1: func_c_common
789Types:
790  # 20 @  1: CommonType
791  # 21 @  3: LibraryType
792  # 22 @  1: AnotherCommonType
793Type Variations:
794  # 30 @  4: VarX
795  # 31 @  4: VarY
796"#;
797
798        for line_str in input_block.trim().lines() {
799            parser
800                .parse_line(IndentedLine::from(line_str))
801                .unwrap_or_else(|e| panic!("Failed to parse line \'{line_str}\': {e:?}"));
802        }
803
804        let extensions_str = parser.extensions().to_string("  ");
805        // The writer adds the header; the ExtensionParser does not parse the
806        // header, so we add it here for comparison.
807        let expected_str = format!(
808            "{}\n{}",
809            simple::EXTENSIONS_HEADER,
810            input_block.trim_start()
811        );
812        assert_eq!(extensions_str.trim(), expected_str.trim());
813    }
814
815    #[test]
816    fn test_parse_relation_tree() {
817        // Example plan with a Project, a Filter, and a Read, nested by indentation
818        let plan = r#"=== Plan
819Project[$0, $1, 42, 84]
820  Filter[$2 => $0, $1]
821    Read[my.table => a:i32, b:string?, c:boolean]
822"#;
823        let mut parser = Parser::default();
824        for line in plan.lines() {
825            parser.parse_line(line).unwrap();
826        }
827
828        // Complete the current tree to convert it to relations
829        let plan = parser.build_plan().unwrap();
830
831        let root_rel = &plan.relations[0].rel_type;
832        let first_rel = match root_rel {
833            Some(plan_rel::RelType::Rel(rel)) => rel,
834            _ => panic!("Expected Rel type, got {root_rel:?}"),
835        };
836        // Root should be Project
837        let project = match &first_rel.rel_type {
838            Some(RelType::Project(p)) => p,
839            other => panic!("Expected Project at root, got {other:?}"),
840        };
841
842        // Check that Project has Filter as input
843        assert!(project.input.is_some());
844        let filter_input = project.input.as_ref().unwrap();
845
846        // Check that Filter has Read as input
847        match &filter_input.rel_type {
848            Some(RelType::Filter(_)) => {
849                match &filter_input.rel_type {
850                    Some(RelType::Filter(filter)) => {
851                        assert!(filter.input.is_some());
852                        let read_input = filter.input.as_ref().unwrap();
853
854                        // Check that Read has no input (it's a leaf)
855                        match &read_input.rel_type {
856                            Some(RelType::Read(_)) => {}
857                            other => panic!("Expected Read relation, got {other:?}"),
858                        }
859                    }
860                    other => panic!("Expected Filter relation, got {other:?}"),
861                }
862            }
863            other => panic!("Expected Filter relation, got {other:?}"),
864        }
865    }
866
867    #[test]
868    fn test_parse_root_relation() {
869        // Test a plan with a Root relation
870        let plan = r#"=== Plan
871Root[result]
872  Project[$0, $1]
873    Read[my.table => a:i32, b:string?]
874"#;
875        let mut parser = Parser::default();
876        for line in plan.lines() {
877            parser.parse_line(line).unwrap();
878        }
879
880        let plan = parser.build_plan().unwrap();
881
882        // Check that we have exactly one relation
883        assert_eq!(plan.relations.len(), 1);
884
885        let root_rel = &plan.relations[0].rel_type;
886        let rel_root = match root_rel {
887            Some(plan_rel::RelType::Root(rel_root)) => rel_root,
888            other => panic!("Expected Root type, got {other:?}"),
889        };
890
891        // Check that the root has the correct name
892        assert_eq!(rel_root.names, vec!["result"]);
893
894        // Check that the root has a Project as input
895        let project_input = match &rel_root.input {
896            Some(rel) => rel,
897            None => panic!("Root should have an input"),
898        };
899
900        let project = match &project_input.rel_type {
901            Some(RelType::Project(p)) => p,
902            other => panic!("Expected Project as root input, got {other:?}"),
903        };
904
905        // Check that Project has Read as input
906        let read_input = match &project.input {
907            Some(rel) => rel,
908            None => panic!("Project should have an input"),
909        };
910
911        match &read_input.rel_type {
912            Some(RelType::Read(_)) => {}
913            other => panic!("Expected Read relation, got {other:?}"),
914        }
915    }
916
917    #[test]
918    fn test_parse_root_relation_no_names() {
919        // Test a plan with a Root relation with no names
920        let plan = r#"=== Plan
921Root[]
922  Project[$0, $1]
923    Read[my.table => a:i32, b:string?]
924"#;
925        let mut parser = Parser::default();
926        for line in plan.lines() {
927            parser.parse_line(line).unwrap();
928        }
929
930        let plan = parser.build_plan().unwrap();
931
932        let root_rel = &plan.relations[0].rel_type;
933        let rel_root = match root_rel {
934            Some(plan_rel::RelType::Root(rel_root)) => rel_root,
935            other => panic!("Expected Root type, got {other:?}"),
936        };
937
938        // Check that the root has no names
939        assert_eq!(rel_root.names, Vec::<String>::new());
940    }
941
942    #[test]
943    fn test_parse_full_plan() {
944        // Test a complete Substrait plan with extensions and relations
945        let input = r#"
946=== Extensions
947URIs:
948  @  1: /uri/common
949  @  2: /uri/specific_funcs
950Functions:
951  # 10 @  1: func_a
952  # 11 @  2: func_b_special
953Types:
954  # 20 @  1: SomeType
955Type Variations:
956  # 30 @  2: VarX
957
958=== Plan
959Project[$0, $1, 42, 84]
960  Filter[$2 => $0, $1]
961    Read[my.table => a:i32, b:string?, c:boolean]
962"#;
963
964        let plan = Parser::parse(input).unwrap();
965
966        // Verify the plan structure
967        assert_eq!(plan.extension_uris.len(), 2);
968        assert_eq!(plan.extensions.len(), 4);
969        assert_eq!(plan.relations.len(), 1);
970
971        // Verify extension URIs
972        let uri1 = &plan.extension_uris[0];
973        assert_eq!(uri1.extension_uri_anchor, 1);
974        assert_eq!(uri1.uri, "/uri/common");
975
976        let uri2 = &plan.extension_uris[1];
977        assert_eq!(uri2.extension_uri_anchor, 2);
978        assert_eq!(uri2.uri, "/uri/specific_funcs");
979
980        // Verify extensions
981        let func1 = &plan.extensions[0];
982        match &func1.mapping_type {
983            Some(MappingType::ExtensionFunction(f)) => {
984                assert_eq!(f.function_anchor, 10);
985                assert_eq!(f.extension_uri_reference, 1);
986                assert_eq!(f.name, "func_a");
987            }
988            other => panic!("Expected ExtensionFunction, got {other:?}"),
989        }
990
991        let func2 = &plan.extensions[1];
992        match &func2.mapping_type {
993            Some(MappingType::ExtensionFunction(f)) => {
994                assert_eq!(f.function_anchor, 11);
995                assert_eq!(f.extension_uri_reference, 2);
996                assert_eq!(f.name, "func_b_special");
997            }
998            other => panic!("Expected ExtensionFunction, got {other:?}"),
999        }
1000
1001        let type1 = &plan.extensions[2];
1002        match &type1.mapping_type {
1003            Some(MappingType::ExtensionType(t)) => {
1004                assert_eq!(t.type_anchor, 20);
1005                assert_eq!(t.extension_uri_reference, 1);
1006                assert_eq!(t.name, "SomeType");
1007            }
1008            other => panic!("Expected ExtensionType, got {other:?}"),
1009        }
1010
1011        let var1 = &plan.extensions[3];
1012        match &var1.mapping_type {
1013            Some(MappingType::ExtensionTypeVariation(v)) => {
1014                assert_eq!(v.type_variation_anchor, 30);
1015                assert_eq!(v.extension_uri_reference, 2);
1016                assert_eq!(v.name, "VarX");
1017            }
1018            other => panic!("Expected ExtensionTypeVariation, got {other:?}"),
1019        }
1020
1021        // Verify the relation tree structure
1022        let root_rel = &plan.relations[0];
1023        match &root_rel.rel_type {
1024            Some(plan_rel::RelType::Rel(rel)) => {
1025                match &rel.rel_type {
1026                    Some(RelType::Project(project)) => {
1027                        // Verify Project relation
1028                        assert_eq!(project.expressions.len(), 2); // 42 and 84
1029                        println!("Project input: {:?}", project.input.is_some());
1030                        assert!(project.input.is_some()); // Should have Filter as input
1031
1032                        // Check the Filter input
1033                        let filter_input = project.input.as_ref().unwrap();
1034                        match &filter_input.rel_type {
1035                            Some(RelType::Filter(filter)) => {
1036                                println!("Filter input: {:?}", filter.input.is_some());
1037                                assert!(filter.input.is_some()); // Should have Read as input
1038
1039                                // Check the Read input
1040                                let read_input = filter.input.as_ref().unwrap();
1041                                match &read_input.rel_type {
1042                                    Some(RelType::Read(read)) => {
1043                                        // Verify Read relation
1044                                        let schema = read.base_schema.as_ref().unwrap();
1045                                        assert_eq!(schema.names.len(), 3);
1046                                        assert_eq!(schema.names[0], "a");
1047                                        assert_eq!(schema.names[1], "b");
1048                                        assert_eq!(schema.names[2], "c");
1049
1050                                        let struct_ = schema.r#struct.as_ref().unwrap();
1051                                        assert_eq!(struct_.types.len(), 3);
1052                                    }
1053                                    other => panic!("Expected Read relation, got {other:?}"),
1054                                }
1055                            }
1056                            other => panic!("Expected Filter relation, got {other:?}"),
1057                        }
1058                    }
1059                    other => panic!("Expected Project relation, got {other:?}"),
1060                }
1061            }
1062            other => panic!("Expected Rel type, got {other:?}"),
1063        }
1064    }
1065}