substrait_explain/parser/
structural.rs

1//! Parser for the structural part of the Substrait file format.
2//!
3//! This is the overall parser for parsing the text format. It is responsible
4//! for tracking which section of the file we are currently parsing, and parsing
5//! each line separately.
6
7use std::fmt;
8
9use substrait::proto::rel::RelType;
10use substrait::proto::{
11    AggregateRel, FetchRel, FilterRel, JoinRel, Plan, PlanRel, ProjectRel, ReadRel, Rel, RelRoot,
12    SortRel, plan_rel,
13};
14
15use crate::extensions::{ExtensionRegistry, SimpleExtensions, simple};
16use crate::parser::common::{MessageParseError, ParsePair};
17use crate::parser::errors::{ParseContext, ParseError, ParseResult};
18use crate::parser::expressions::Name;
19use crate::parser::extensions::{ExtensionInvocation, ExtensionParseError, ExtensionParser};
20use crate::parser::relations::RelationParsingContext;
21use crate::parser::{ErrorKind, ExpressionParser, RelationParsePair, Rule, unwrap_single_pair};
22
23pub const PLAN_HEADER: &str = "=== Plan";
24
25/// Represents an input line, trimmed of leading two-space indents and final
26/// whitespace. Contains the number of indents and the trimmed line.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub struct IndentedLine<'a>(pub usize, pub &'a str);
29
30impl<'a> From<&'a str> for IndentedLine<'a> {
31    fn from(line: &'a str) -> Self {
32        let line = line.trim_end();
33        let mut spaces = 0;
34        for c in line.chars() {
35            if c == ' ' {
36                spaces += 1;
37            } else {
38                break;
39            }
40        }
41
42        let indents = spaces / 2;
43
44        let (_, trimmed) = line.split_at(indents * 2);
45
46        IndentedLine(indents, trimmed)
47    }
48}
49
50/// Represents a line in the [`Plan`] tree structure before it's converted to a
51/// relation. This allows us to build the tree structure first, then convert to
52/// relations with proper parent-child relationships.
53#[derive(Debug, Clone)]
54pub struct LineNode<'a> {
55    pub pair: pest::iterators::Pair<'a, Rule>,
56    pub line_no: i64,
57    pub children: Vec<LineNode<'a>>,
58}
59
60impl<'a> LineNode<'a> {
61    pub fn context(&self) -> ParseContext {
62        ParseContext {
63            line_no: self.line_no,
64            line: self.pair.as_str().to_string(),
65        }
66    }
67
68    pub fn parse(line: &'a str, line_no: i64) -> Result<Self, ParseError> {
69        // Parse the line immediately to catch syntax errors
70        let mut pairs: pest::iterators::Pairs<'a, Rule> =
71            <ExpressionParser as pest::Parser<Rule>>::parse(Rule::relation, line).map_err(|e| {
72                ParseError::Plan(
73                    ParseContext {
74                        line_no,
75                        line: line.to_string(),
76                    },
77                    MessageParseError::new("relation", ErrorKind::InvalidValue, Box::new(e)),
78                )
79            })?;
80
81        let pair = pairs.next().unwrap();
82        assert!(pairs.next().is_none()); // Should be exactly one pair
83
84        Ok(Self {
85            pair,
86            line_no,
87            children: Vec::new(),
88        })
89    }
90
91    /// Parse the root relation of a plan, at depth 0.
92    pub fn parse_root(line: &'a str, line_no: i64) -> Result<Self, ParseError> {
93        // Parse the line as a top-level relation (either root_relation or regular relation)
94        let mut pairs: pest::iterators::Pairs<'a, Rule> = <ExpressionParser as pest::Parser<
95            Rule,
96        >>::parse(
97            Rule::top_level_relation, line
98        )
99        .map_err(|e| {
100            ParseError::Plan(
101                ParseContext::new(line_no, line.to_string()),
102                MessageParseError::new("top_level_relation", ErrorKind::Syntax, Box::new(e)),
103            )
104        })?;
105
106        let pair = pairs.next().unwrap();
107        assert!(pairs.next().is_none());
108
109        // Get the inner pair, which is either a root relation or a regular relation
110        let inner_pair = unwrap_single_pair(pair);
111
112        Ok(Self {
113            pair: inner_pair,
114            line_no,
115            children: Vec::new(),
116        })
117    }
118}
119
120/// Helper function to get the number of input fields from a relation.
121/// This is needed for Project relations to calculate output mapping indices.
122fn get_input_field_count(rel: &Rel) -> usize {
123    match &rel.rel_type {
124        Some(RelType::Read(read_rel)) => {
125            // For Read relations, count the fields in the base schema
126            read_rel
127                .base_schema
128                .as_ref()
129                .and_then(|schema| schema.r#struct.as_ref())
130                .map(|struct_| struct_.types.len())
131                .unwrap_or(0)
132        }
133        Some(RelType::Filter(filter_rel)) => {
134            // For Filter relations, get the count from the input
135            filter_rel
136                .input
137                .as_ref()
138                .map(|input| get_input_field_count(input))
139                .unwrap_or(0)
140        }
141        Some(RelType::Project(project_rel)) => {
142            // For Project relations, get the count from the input
143            project_rel
144                .input
145                .as_ref()
146                .map(|input| get_input_field_count(input))
147                .unwrap_or(0)
148        }
149        _ => 0,
150    }
151}
152
153#[derive(Copy, Clone, Debug)]
154pub enum State {
155    // The initial state, before we have parsed any lines.
156    Initial,
157    // The extensions section, after parsing the header and any other Extension lines.
158    Extensions,
159    // The plan section, after parsing the header and any other Plan lines.
160    Plan,
161}
162
163impl fmt::Display for State {
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        write!(f, "{self:?}")
166    }
167}
168
169// An in-progress tree builder, building the tree of relations.
170#[derive(Debug, Clone, Default)]
171pub struct TreeBuilder<'a> {
172    // Current tree of nodes being built. These have been successfully parsed
173    // into Pest pairs, but have not yet been converted to substrait plans.
174    current: Option<LineNode<'a>>,
175    // Completed trees that have been built.
176    completed: Vec<LineNode<'a>>,
177}
178
179impl<'a> TreeBuilder<'a> {
180    /// Traverse down the tree, always taking the last child at each level, until reaching the specified depth.
181    pub fn get_at_depth(&mut self, depth: usize) -> Option<&mut LineNode<'a>> {
182        let mut node = self.current.as_mut()?;
183        for _ in 0..depth {
184            node = node.children.last_mut()?;
185        }
186        Some(node)
187    }
188
189    pub fn add_line(&mut self, depth: usize, node: LineNode<'a>) -> Result<(), ParseError> {
190        if depth == 0 {
191            if let Some(prev) = self.current.take() {
192                self.completed.push(prev)
193            }
194            self.current = Some(node);
195            return Ok(());
196        }
197
198        let parent = match self.get_at_depth(depth - 1) {
199            None => {
200                return Err(ParseError::Plan(
201                    node.context(),
202                    MessageParseError::invalid(
203                        "relation",
204                        node.pair.as_span(),
205                        format!("No parent found for depth {depth}"),
206                    ),
207                ));
208            }
209            Some(parent) => parent,
210        };
211
212        parent.children.push(node.clone());
213        Ok(())
214    }
215
216    /// End of input - move any remaining nodes from stack to completed and
217    /// return any trees in progress. Resets the builder to its initial state
218    /// (empty)
219    pub fn finish(&mut self) -> Vec<LineNode<'a>> {
220        // Move any remaining nodes from stack to completed
221        if let Some(node) = self.current.take() {
222            self.completed.push(node);
223        }
224        std::mem::take(&mut self.completed)
225    }
226}
227
228// Relation parsing component - handles converting LineNodes to Relations
229#[derive(Debug, Clone, Default)]
230pub struct RelationParser<'a> {
231    tree: TreeBuilder<'a>,
232}
233
234impl<'a> RelationParser<'a> {
235    pub fn parse_line(&mut self, line: IndentedLine<'a>, line_no: i64) -> Result<(), ParseError> {
236        let IndentedLine(depth, line) = line;
237
238        // Use parse_root for depth 0 (top-level relations), parse for other depths
239        let node = if depth == 0 {
240            LineNode::parse_root(line, line_no)?
241        } else {
242            LineNode::parse(line, line_no)?
243        };
244
245        self.tree.add_line(depth, node)
246    }
247
248    /// Parse a relation from a Pest pair of rule 'relation' into a Substrait
249    /// Rel.
250    //
251    // Clippy says a Vec<Box<…>> is unnecessary, as the Vec is already on the
252    // heap, but this is what the protobuf requires so we allow it here
253    #[allow(clippy::vec_box)]
254    fn parse_relation(
255        &self,
256        extensions: &SimpleExtensions,
257        registry: &ExtensionRegistry,
258        line_no: i64,
259        pair: pest::iterators::Pair<Rule>,
260        child_relations: Vec<Box<substrait::proto::Rel>>,
261        input_field_count: usize,
262    ) -> Result<substrait::proto::Rel, ParseError> {
263        assert_eq!(pair.as_rule(), Rule::relation);
264        let p = unwrap_single_pair(pair);
265
266        let (e, r, l, p_inner, cr, ic) = (
267            extensions,
268            registry,
269            line_no,
270            p,
271            child_relations,
272            input_field_count,
273        );
274
275        match p_inner.as_rule() {
276            Rule::read_relation => self.parse_rel::<ReadRel>(e, l, p_inner, cr, ic),
277            Rule::filter_relation => self.parse_rel::<FilterRel>(e, l, p_inner, cr, ic),
278            Rule::project_relation => self.parse_rel::<ProjectRel>(e, l, p_inner, cr, ic),
279            Rule::aggregate_relation => self.parse_rel::<AggregateRel>(e, l, p_inner, cr, ic),
280            Rule::sort_relation => self.parse_rel::<SortRel>(e, l, p_inner, cr, ic),
281            Rule::fetch_relation => self.parse_rel::<FetchRel>(e, l, p_inner, cr, ic),
282            Rule::join_relation => self.parse_rel::<JoinRel>(e, l, p_inner, cr, ic),
283            Rule::extension_relation => self.parse_extension_relation(e, r, l, p_inner, cr),
284            _ => todo!(),
285        }
286    }
287
288    /// Parse a specific relation type.
289    // Box is needed because Rel is a large enum and we need to pass ownership
290    // through the RelationParsePair trait, which requires Box<Rel>.
291    #[allow(clippy::vec_box)]
292    fn parse_rel<T: RelationParsePair>(
293        &self,
294        extensions: &SimpleExtensions,
295        line_no: i64,
296        pair: pest::iterators::Pair<Rule>,
297        child_relations: Vec<Box<substrait::proto::Rel>>,
298        input_field_count: usize,
299    ) -> Result<substrait::proto::Rel, ParseError> {
300        assert_eq!(pair.as_rule(), T::rule());
301
302        let line = pair.as_str();
303
304        let rel_type =
305            T::parse_pair_with_context(extensions, pair, child_relations, input_field_count);
306
307        match rel_type {
308            Ok(rel) => Ok(rel.into_rel()),
309            Err(e) => Err(ParseError::Plan(
310                ParseContext::new(line_no, line.to_string()),
311                e,
312            )),
313        }
314    }
315
316    /// Parse extension relations.
317    /// Extension relations need the full parsing context for registry lookups and warning handling.
318    // Box is needed because Rel is a large enum and we need to pass ownership
319    // through the RelationParsePair trait, which requires Box<Rel>.
320    #[allow(clippy::vec_box)]
321    fn parse_extension_relation(
322        &self,
323        extensions: &SimpleExtensions,
324        registry: &ExtensionRegistry,
325        line_no: i64,
326        pair: pest::iterators::Pair<Rule>,
327        child_relations: Vec<Box<substrait::proto::Rel>>,
328    ) -> Result<substrait::proto::Rel, ParseError> {
329        assert_eq!(pair.as_rule(), Rule::extension_relation);
330
331        let line = pair.as_str();
332        let pair_span = pair.as_span();
333
334        // Parse extension invocation, which includes the user-provided name
335        let ExtensionInvocation {
336            name,
337            args: extension_args,
338        } = ExtensionInvocation::parse_pair(pair);
339
340        // Validate child count matches relation type
341        let child_count = child_relations.len();
342        extension_args
343            .relation_type
344            .validate_child_count(child_count)
345            .map_err(|e| {
346                ParseError::Plan(
347                    ParseContext::new(line_no, line.to_string()),
348                    MessageParseError::invalid("extension_relation", pair_span, e),
349                )
350            })?;
351
352        let context = RelationParsingContext {
353            extensions,
354            registry,
355            line_no,
356            line,
357        };
358
359        let detail = context.resolve_extension_detail(&name, &extension_args)?;
360
361        extension_args
362            .relation_type
363            .create_rel(detail, child_relations)
364            .map_err(|e| {
365                ParseError::Plan(
366                    ParseContext::new(line_no, line.to_string()),
367                    MessageParseError::invalid("extension_relation", pair_span, e),
368                )
369            })
370    }
371
372    /// Convert a LineNode into a Substrait Rel.
373    fn build_rel(
374        &self,
375        extensions: &SimpleExtensions,
376        registry: &ExtensionRegistry,
377        node: LineNode,
378    ) -> Result<substrait::proto::Rel, ParseError> {
379        // Parse children first to get their output schemas
380        let child_relations = node
381            .children
382            .into_iter()
383            .map(|c| self.build_rel(extensions, registry, c).map(Box::new))
384            .collect::<Result<Vec<Box<Rel>>, ParseError>>()?;
385
386        // Get the input field count from all the children
387        let input_field_count = child_relations
388            .iter()
389            .map(|r| get_input_field_count(r.as_ref()))
390            .reduce(|a, b| a + b)
391            .unwrap_or(0);
392
393        // Parse this node using the stored pair
394        self.parse_relation(
395            extensions,
396            registry,
397            node.line_no,
398            node.pair,
399            child_relations,
400            input_field_count,
401        )
402    }
403
404    /// Build a tree of relations.
405    fn build_plan_rel(
406        &self,
407        extensions: &SimpleExtensions,
408        registry: &ExtensionRegistry,
409        mut node: LineNode,
410    ) -> Result<PlanRel, ParseError> {
411        // Plain relations are allowed as root relations, they just don't have names.
412        if node.pair.as_rule() == Rule::relation {
413            let rel = self.build_rel(extensions, registry, node)?;
414            return Ok(PlanRel {
415                rel_type: Some(plan_rel::RelType::Rel(rel)),
416            });
417        }
418
419        // Otherwise, it must be a root relation.
420        assert_eq!(node.pair.as_rule(), Rule::root_relation);
421        let context = node.context();
422        let span = node.pair.as_span();
423
424        // Parse the column names
425        let column_names_pair = unwrap_single_pair(node.pair);
426        assert_eq!(column_names_pair.as_rule(), Rule::root_name_list);
427
428        let names: Vec<String> = column_names_pair
429            .into_inner()
430            .map(|name_pair| {
431                assert_eq!(name_pair.as_rule(), Rule::name);
432                Name::parse_pair(name_pair).0
433            })
434            .collect();
435
436        let child = match node.children.len() {
437            1 => self.build_rel(extensions, registry, node.children.pop().unwrap())?,
438            n => {
439                return Err(ParseError::Plan(
440                    context,
441                    MessageParseError::invalid(
442                        "root_relation",
443                        span,
444                        format!("Root relation must have exactly one child, found {n}"),
445                    ),
446                ));
447            }
448        };
449
450        let rel_root = RelRoot {
451            names,
452            input: Some(child),
453        };
454
455        Ok(PlanRel {
456            rel_type: Some(plan_rel::RelType::Root(rel_root)),
457        })
458    }
459
460    /// Build all the trees.
461    fn build(
462        mut self,
463        extensions: &SimpleExtensions,
464        registry: &ExtensionRegistry,
465    ) -> Result<Vec<PlanRel>, ParseError> {
466        let nodes = self.tree.finish();
467        nodes
468            .into_iter()
469            .map(|n| self.build_plan_rel(extensions, registry, n))
470            .collect::<Result<Vec<PlanRel>, ParseError>>()
471    }
472}
473
474/// A parser for Substrait query plans in text format.
475///
476/// The `Parser` converts human-readable Substrait text format into Substrait
477/// protobuf plans. It handles both the extensions section (which defines
478/// functions, types, etc.) and the plan section (which defines the actual query
479/// structure).
480///
481/// ## Usage
482///
483/// The simplest entry point is the static `parse()` method:
484///
485/// ```rust
486/// use substrait_explain::parser::Parser;
487///
488/// let plan_text = r#"
489/// === Plan
490/// Root[c, d]
491///   Project[$1, 42]
492///     Read[schema.table => a:i64, b:string?]
493/// "#;
494///
495/// let plan = Parser::parse(plan_text).unwrap();
496/// ```
497///
498/// ## Input Format
499///
500/// The parser expects input in the following format:
501///
502/// ```text
503/// === Extensions
504/// URNs:
505///   @  1: https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml
506/// Functions:
507///   # 10 @  1: add
508/// === Plan
509/// Root[columns]
510///   Relation[arguments => columns]
511///     ChildRelation[arguments => columns]
512/// ```
513///
514/// - **Extensions section** (optional): Defines URNs and function/type declarations
515/// - **Plan section** (required): Defines the query structure with indented relations
516///
517/// ## Error Handling
518///
519/// The parser provides detailed error information including:
520/// - Line number where the error occurred
521/// - The actual line content that failed to parse
522/// - Specific error type and description
523///
524/// ```rust
525/// use substrait_explain::parser::Parser;
526///
527/// let invalid_plan = r#"
528/// === Plan
529/// InvalidRelation[invalid syntax]
530/// "#;
531///
532/// match Parser::parse(invalid_plan) {
533///     Ok(plan) => println!("Successfully parsed"),
534///     Err(e) => eprintln!("Parse error: {}", e),
535/// }
536/// ```
537///
538/// ## Supported Relations
539///
540/// The parser supports all standard Substrait relations:
541/// - `Read[table => columns]` - Read from a table
542/// - `Project[expressions]` - Project columns/expressions
543/// - `Filter[condition => columns]` - Filter rows
544/// - `Root[columns]` - Root relation with output columns
545/// - And more...
546///
547/// ## Extensions Support
548///
549/// The parser fully supports Substrait Simple Extensions, allowing you to:
550/// - Define custom functions with URNs and anchors
551/// - Reference functions by name in expressions
552/// - Use custom types and type variations
553///
554/// ```rust
555/// use substrait_explain::parser::Parser;
556///
557/// let plan_with_extensions = r#"
558/// === Extensions
559/// URNs:
560///   @  1: https://example.com/functions.yaml
561/// Functions:
562///   ## 10 @  1: my_custom_function
563/// === Plan
564/// Root[result]
565///   Project[my_custom_function($0, $1)]
566///     Read[table => col1:i32, col2:i32]
567/// "#;
568///
569/// let plan = Parser::parse(plan_with_extensions).unwrap();
570/// ```
571///
572/// ## Performance
573///
574/// The parser is designed for efficiency:
575/// - Single-pass parsing with minimal allocations
576/// - Early error detection and reporting
577/// - Memory-efficient tree building
578///
579/// ## Thread Safety
580///
581/// `Parser` instances are not thread-safe and should not be shared between threads.
582/// However, the static `parse()` method is safe to call from multiple threads.
583#[derive(Debug)]
584pub struct Parser<'a> {
585    line_no: i64,
586    state: State,
587    extension_parser: ExtensionParser,
588    extension_registry: ExtensionRegistry,
589    relation_parser: RelationParser<'a>,
590}
591impl<'a> Default for Parser<'a> {
592    fn default() -> Self {
593        Self::new()
594    }
595}
596
597impl<'a> Parser<'a> {
598    /// Parse a Substrait plan from text format.
599    ///
600    /// This is the main entry point for parsing.
601    ///
602    /// The input should be in the Substrait text format, which consists of:
603    /// - An optional extensions section starting with "=== Extensions"
604    /// - A plan section starting with "=== Plan"
605    /// - Indented relation definitions
606    ///
607    /// # Examples
608    ///
609    /// Simple parsing:
610    /// ```rust
611    /// use substrait_explain::parser::Parser;
612    ///
613    /// let plan_text = r#"
614    /// === Plan
615    /// Root[result]
616    ///   Read[table => col:i32]
617    /// "#;
618    ///
619    /// let plan = Parser::parse(plan_text).unwrap();
620    /// assert_eq!(plan.relations.len(), 1);
621    /// ```
622    ///
623    /// # Errors
624    ///
625    /// Returns a [`ParseError`] if the input cannot be parsed.
626    pub fn parse(input: &str) -> ParseResult {
627        Self::new().parse_plan(input)
628    }
629
630    /// Create a new parser with default configuration.
631    pub fn new() -> Self {
632        Self {
633            line_no: 1,
634            state: State::Initial,
635            extension_parser: ExtensionParser::default(),
636            extension_registry: ExtensionRegistry::new(),
637            relation_parser: RelationParser::default(),
638        }
639    }
640
641    /// Configure the parser to use the specified extension registry.
642    pub fn with_extension_registry(mut self, registry: ExtensionRegistry) -> Self {
643        self.extension_registry = registry;
644        self
645    }
646
647    /// Parse a Substrait plan with the current parser configuration.
648    pub fn parse_plan(mut self, input: &'a str) -> ParseResult {
649        for line in input.lines() {
650            if line.trim().is_empty() {
651                self.line_no += 1;
652                continue;
653            }
654
655            self.parse_line(line)?;
656            self.line_no += 1;
657        }
658
659        let plan = self.build_plan()?;
660        Ok(plan)
661    }
662
663    /// Parse a single line of input.
664    fn parse_line(&mut self, line: &'a str) -> Result<(), ParseError> {
665        let indented_line = IndentedLine::from(line);
666        let line_no = self.line_no;
667        let ctx = || ParseContext {
668            line_no,
669            line: line.to_string(),
670        };
671
672        match self.state {
673            State::Initial => self.parse_initial(indented_line),
674            State::Extensions => self
675                .parse_extensions(indented_line)
676                .map_err(|e| ParseError::Extension(ctx(), e)),
677            State::Plan => {
678                let IndentedLine(depth, line_str) = indented_line;
679
680                // Parse the line
681                let node = if depth == 0 {
682                    LineNode::parse_root(line_str, line_no)?
683                } else {
684                    LineNode::parse(line_str, line_no)?
685                };
686
687                self.relation_parser.tree.add_line(depth, node)
688            }
689        }
690    }
691
692    /// Parse the initial line(s) of the input, which is either a blank line or
693    /// the extensions or plan header.
694    fn parse_initial(&mut self, line: IndentedLine) -> Result<(), ParseError> {
695        match line {
696            IndentedLine(0, l) if l.trim().is_empty() => {}
697            IndentedLine(0, simple::EXTENSIONS_HEADER) => {
698                self.state = State::Extensions;
699            }
700            IndentedLine(0, PLAN_HEADER) => {
701                self.state = State::Plan;
702            }
703            IndentedLine(n, l) => {
704                return Err(ParseError::Initial(
705                    ParseContext::new(n as i64, l.to_string()),
706                    MessageParseError::invalid(
707                        "initial",
708                        pest::Span::new(l, 0, l.len()).expect("Invalid span?!"),
709                        format!("Unknown initial line: {l:?}"),
710                    ),
711                ));
712            }
713        }
714        Ok(())
715    }
716
717    /// Parse a single line from the extensions section of the input, updating
718    /// the parser state.
719    fn parse_extensions(&mut self, line: IndentedLine<'_>) -> Result<(), ExtensionParseError> {
720        if line == IndentedLine(0, PLAN_HEADER) {
721            self.state = State::Plan;
722            return Ok(());
723        }
724        self.extension_parser.parse_line(line)
725    }
726
727    /// Build the plan from the parser state with warning collection.
728    fn build_plan(self) -> Result<Plan, ParseError> {
729        let Parser {
730            relation_parser,
731            extension_parser,
732            extension_registry,
733            ..
734        } = self;
735
736        let extensions = extension_parser.extensions();
737
738        // Parse the tree into relations
739        let root_relations = relation_parser.build(extensions, &extension_registry)?;
740
741        // Build the final plan
742        Ok(Plan {
743            extension_urns: extensions.to_extension_urns(),
744            extensions: extensions.to_extension_declarations(),
745            relations: root_relations,
746            ..Default::default()
747        })
748    }
749}
750
751#[cfg(test)]
752mod tests {
753    use substrait::proto::extensions::simple_extension_declaration::MappingType;
754
755    use super::*;
756    use crate::extensions::simple::ExtensionKind;
757    use crate::parser::extensions::ExtensionParserState;
758
759    #[test]
760    fn test_parse_basic_block() {
761        let mut expected_extensions = SimpleExtensions::new();
762        expected_extensions
763            .add_extension_urn("/urn/common".to_string(), 1)
764            .unwrap();
765        expected_extensions
766            .add_extension_urn("/urn/specific_funcs".to_string(), 2)
767            .unwrap();
768        expected_extensions
769            .add_extension(ExtensionKind::Function, 1, 10, "func_a".to_string())
770            .unwrap();
771        expected_extensions
772            .add_extension(ExtensionKind::Function, 2, 11, "func_b_special".to_string())
773            .unwrap();
774        expected_extensions
775            .add_extension(ExtensionKind::Type, 1, 20, "SomeType".to_string())
776            .unwrap();
777        expected_extensions
778            .add_extension(ExtensionKind::TypeVariation, 2, 30, "VarX".to_string())
779            .unwrap();
780
781        let mut parser = ExtensionParser::default();
782        let input_block = r#"
783URNs:
784  @  1: /urn/common
785  @  2: /urn/specific_funcs
786Functions:
787  # 10 @  1: func_a
788  # 11 @  2: func_b_special
789Types:
790  # 20 @  1: SomeType
791Type Variations:
792  # 30 @  2: VarX
793"#;
794
795        for line_str in input_block.trim().lines() {
796            parser
797                .parse_line(IndentedLine::from(line_str))
798                .unwrap_or_else(|e| panic!("Failed to parse line \'{line_str}\': {e:?}"));
799        }
800
801        assert_eq!(*parser.extensions(), expected_extensions);
802
803        let extensions_str = parser.extensions().to_string("  ");
804        // The writer adds the header; the ExtensionParser does not parse the
805        // header, so we add it here for comparison.
806        let expected_str = format!(
807            "{}\n{}",
808            simple::EXTENSIONS_HEADER,
809            input_block.trim_start()
810        );
811        assert_eq!(extensions_str.trim(), expected_str.trim());
812        // Check final state after all lines are processed.
813        // The last significant line in input_block is a TypeVariation declaration.
814        assert_eq!(
815            parser.state(),
816            ExtensionParserState::ExtensionDeclarations(ExtensionKind::TypeVariation)
817        );
818
819        // Check that a subsequent blank line correctly resets state to Extensions.
820        parser.parse_line(IndentedLine(0, "")).unwrap();
821        assert_eq!(parser.state(), ExtensionParserState::Extensions);
822    }
823
824    /// Test that we can parse a larger extensions block and it matches the input.
825    #[test]
826    fn test_parse_complete_extension_block() {
827        let mut parser = ExtensionParser::default();
828        let input_block = r#"
829URNs:
830  @  1: /urn/common
831  @  2: /urn/specific_funcs
832  @  3: /urn/types_lib
833  @  4: /urn/variations_lib
834Functions:
835  # 10 @  1: func_a
836  # 11 @  2: func_b_special
837  # 12 @  1: func_c_common
838Types:
839  # 20 @  1: CommonType
840  # 21 @  3: LibraryType
841  # 22 @  1: AnotherCommonType
842Type Variations:
843  # 30 @  4: VarX
844  # 31 @  4: VarY
845"#;
846
847        for line_str in input_block.trim().lines() {
848            parser
849                .parse_line(IndentedLine::from(line_str))
850                .unwrap_or_else(|e| panic!("Failed to parse line \'{line_str}\': {e:?}"));
851        }
852
853        let extensions_str = parser.extensions().to_string("  ");
854        // The writer adds the header; the ExtensionParser does not parse the
855        // header, so we add it here for comparison.
856        let expected_str = format!(
857            "{}\n{}",
858            simple::EXTENSIONS_HEADER,
859            input_block.trim_start()
860        );
861        assert_eq!(extensions_str.trim(), expected_str.trim());
862    }
863
864    #[test]
865    fn test_parse_relation_tree() {
866        // Example plan with a Project, a Filter, and a Read, nested by indentation
867        let plan = r#"=== Plan
868Project[$0, $1, 42, 84]
869  Filter[$2 => $0, $1]
870    Read[my.table => a:i32, b:string?, c:boolean]
871"#;
872        let mut parser = Parser::default();
873        for line in plan.lines() {
874            parser.parse_line(line).unwrap();
875        }
876
877        // Complete the current tree to convert it to relations
878        let plan = parser.build_plan().unwrap();
879
880        let root_rel = &plan.relations[0].rel_type;
881        let first_rel = match root_rel {
882            Some(plan_rel::RelType::Rel(rel)) => rel,
883            _ => panic!("Expected Rel type, got {root_rel:?}"),
884        };
885        // Root should be Project
886        let project = match &first_rel.rel_type {
887            Some(RelType::Project(p)) => p,
888            other => panic!("Expected Project at root, got {other:?}"),
889        };
890
891        // Check that Project has Filter as input
892        assert!(project.input.is_some());
893        let filter_input = project.input.as_ref().unwrap();
894
895        // Check that Filter has Read as input
896        match &filter_input.rel_type {
897            Some(RelType::Filter(_)) => {
898                match &filter_input.rel_type {
899                    Some(RelType::Filter(filter)) => {
900                        assert!(filter.input.is_some());
901                        let read_input = filter.input.as_ref().unwrap();
902
903                        // Check that Read has no input (it's a leaf)
904                        match &read_input.rel_type {
905                            Some(RelType::Read(_)) => {}
906                            other => panic!("Expected Read relation, got {other:?}"),
907                        }
908                    }
909                    other => panic!("Expected Filter relation, got {other:?}"),
910                }
911            }
912            other => panic!("Expected Filter relation, got {other:?}"),
913        }
914    }
915
916    #[test]
917    fn test_parse_root_relation() {
918        // Test a plan with a Root relation
919        let plan = r#"=== Plan
920Root[result]
921  Project[$0, $1]
922    Read[my.table => a:i32, b:string?]
923"#;
924        let mut parser = Parser::default();
925        for line in plan.lines() {
926            parser.parse_line(line).unwrap();
927        }
928
929        let plan = parser.build_plan().unwrap();
930
931        // Check that we have exactly one relation
932        assert_eq!(plan.relations.len(), 1);
933
934        let root_rel = &plan.relations[0].rel_type;
935        let rel_root = match root_rel {
936            Some(plan_rel::RelType::Root(rel_root)) => rel_root,
937            other => panic!("Expected Root type, got {other:?}"),
938        };
939
940        // Check that the root has the correct name
941        assert_eq!(rel_root.names, vec!["result"]);
942
943        // Check that the root has a Project as input
944        let project_input = match &rel_root.input {
945            Some(rel) => rel,
946            None => panic!("Root should have an input"),
947        };
948
949        let project = match &project_input.rel_type {
950            Some(RelType::Project(p)) => p,
951            other => panic!("Expected Project as root input, got {other:?}"),
952        };
953
954        // Check that Project has Read as input
955        let read_input = match &project.input {
956            Some(rel) => rel,
957            None => panic!("Project should have an input"),
958        };
959
960        match &read_input.rel_type {
961            Some(RelType::Read(_)) => {}
962            other => panic!("Expected Read relation, got {other:?}"),
963        }
964    }
965
966    #[test]
967    fn test_parse_root_relation_no_names() {
968        // Test a plan with a Root relation with no names
969        let plan = r#"=== Plan
970Root[]
971  Project[$0, $1]
972    Read[my.table => a:i32, b:string?]
973"#;
974        let mut parser = Parser::default();
975        for line in plan.lines() {
976            parser.parse_line(line).unwrap();
977        }
978
979        let plan = parser.build_plan().unwrap();
980
981        let root_rel = &plan.relations[0].rel_type;
982        let rel_root = match root_rel {
983            Some(plan_rel::RelType::Root(rel_root)) => rel_root,
984            other => panic!("Expected Root type, got {other:?}"),
985        };
986
987        // Check that the root has no names
988        assert_eq!(rel_root.names, Vec::<String>::new());
989    }
990
991    #[test]
992    fn test_parse_full_plan() {
993        // Test a complete Substrait plan with extensions and relations
994        let input = r#"
995=== Extensions
996URNs:
997  @  1: /urn/common
998  @  2: /urn/specific_funcs
999Functions:
1000  # 10 @  1: func_a
1001  # 11 @  2: func_b_special
1002Types:
1003  # 20 @  1: SomeType
1004Type Variations:
1005  # 30 @  2: VarX
1006
1007=== Plan
1008Project[$0, $1, 42, 84]
1009  Filter[$2 => $0, $1]
1010    Read[my.table => a:i32, b:string?, c:boolean]
1011"#;
1012
1013        let plan = Parser::parse(input).unwrap();
1014
1015        // Verify the plan structure
1016        assert_eq!(plan.extension_urns.len(), 2);
1017        assert_eq!(plan.extensions.len(), 4);
1018        assert_eq!(plan.relations.len(), 1);
1019
1020        // Verify extension URIs
1021        let urn1 = &plan.extension_urns[0];
1022        assert_eq!(urn1.extension_urn_anchor, 1);
1023        assert_eq!(urn1.urn, "/urn/common");
1024
1025        let urn2 = &plan.extension_urns[1];
1026        assert_eq!(urn2.extension_urn_anchor, 2);
1027        assert_eq!(urn2.urn, "/urn/specific_funcs");
1028
1029        // Verify extensions
1030        let func1 = &plan.extensions[0];
1031        match &func1.mapping_type {
1032            Some(MappingType::ExtensionFunction(f)) => {
1033                assert_eq!(f.function_anchor, 10);
1034                assert_eq!(f.extension_urn_reference, 1);
1035                assert_eq!(f.name, "func_a");
1036            }
1037            other => panic!("Expected ExtensionFunction, got {other:?}"),
1038        }
1039
1040        let func2 = &plan.extensions[1];
1041        match &func2.mapping_type {
1042            Some(MappingType::ExtensionFunction(f)) => {
1043                assert_eq!(f.function_anchor, 11);
1044                assert_eq!(f.extension_urn_reference, 2);
1045                assert_eq!(f.name, "func_b_special");
1046            }
1047            other => panic!("Expected ExtensionFunction, got {other:?}"),
1048        }
1049
1050        let type1 = &plan.extensions[2];
1051        match &type1.mapping_type {
1052            Some(MappingType::ExtensionType(t)) => {
1053                assert_eq!(t.type_anchor, 20);
1054                assert_eq!(t.extension_urn_reference, 1);
1055                assert_eq!(t.name, "SomeType");
1056            }
1057            other => panic!("Expected ExtensionType, got {other:?}"),
1058        }
1059
1060        let var1 = &plan.extensions[3];
1061        match &var1.mapping_type {
1062            Some(MappingType::ExtensionTypeVariation(v)) => {
1063                assert_eq!(v.type_variation_anchor, 30);
1064                assert_eq!(v.extension_urn_reference, 2);
1065                assert_eq!(v.name, "VarX");
1066            }
1067            other => panic!("Expected ExtensionTypeVariation, got {other:?}"),
1068        }
1069
1070        // Verify the relation tree structure
1071        let root_rel = &plan.relations[0];
1072        match &root_rel.rel_type {
1073            Some(plan_rel::RelType::Rel(rel)) => {
1074                match &rel.rel_type {
1075                    Some(RelType::Project(project)) => {
1076                        // Verify Project relation
1077                        assert_eq!(project.expressions.len(), 2); // 42 and 84
1078                        assert!(project.input.is_some()); // Should have Filter as input
1079
1080                        // Check the Filter input
1081                        let filter_input = project.input.as_ref().unwrap();
1082                        match &filter_input.rel_type {
1083                            Some(RelType::Filter(filter)) => {
1084                                assert!(filter.input.is_some()); // Should have Read as input
1085
1086                                // Check the Read input
1087                                let read_input = filter.input.as_ref().unwrap();
1088                                match &read_input.rel_type {
1089                                    Some(RelType::Read(read)) => {
1090                                        // Verify Read relation
1091                                        let schema = read.base_schema.as_ref().unwrap();
1092                                        assert_eq!(schema.names.len(), 3);
1093                                        assert_eq!(schema.names[0], "a");
1094                                        assert_eq!(schema.names[1], "b");
1095                                        assert_eq!(schema.names[2], "c");
1096
1097                                        let struct_ = schema.r#struct.as_ref().unwrap();
1098                                        assert_eq!(struct_.types.len(), 3);
1099                                    }
1100                                    other => panic!("Expected Read relation, got {other:?}"),
1101                                }
1102                            }
1103                            other => panic!("Expected Filter relation, got {other:?}"),
1104                        }
1105                    }
1106                    other => panic!("Expected Project relation, got {other:?}"),
1107                }
1108            }
1109            other => panic!("Expected Rel type, got {other:?}"),
1110        }
1111    }
1112}