substrait_explain/
cli.rs

1use std::fs;
2use std::io::{self, Read, Write};
3use std::process::ExitCode;
4
5use anyhow::{Context, Result};
6use clap::{Parser, Subcommand};
7use prost::Message;
8
9use crate::extensions::ExtensionRegistry;
10use crate::{FormatError, OutputOptions, Visibility, format_with_registry, parse};
11
12/// The outcome of a CLI operation.
13///
14/// Distinguishes between complete success and "soft failures" like formatting
15/// issues where output was still written but there were problems.
16#[derive(Debug)]
17pub enum Outcome {
18    /// Operation completed successfully with no issues.
19    Success,
20    /// Output was written, but there were formatting issues.
21    HadFormattingIssues(Vec<FormatError>),
22}
23
24#[derive(Parser)]
25#[command(name = "substrait-explain")]
26#[command(about = "A CLI for parsing and formatting Substrait query plans")]
27#[command(version)]
28pub struct Cli {
29    #[command(subcommand)]
30    pub command: Commands,
31}
32
33impl Cli {
34    /// Run the CLI and return an exit code.
35    ///
36    /// Errors are printed to stderr.
37    pub fn run(self) -> ExitCode {
38        self.run_with_extensions(ExtensionRegistry::default())
39    }
40
41    /// Run the CLI with a custom extension registry and return an exit code.
42    ///
43    /// Use this when embedding the CLI in a binary that registers custom
44    /// extension relation types:
45    ///
46    /// ```rust,ignore
47    /// let mut registry = ExtensionRegistry::new();
48    /// registry.register_relation::<MyCustomScan>().unwrap();
49    /// Cli::parse().run_with_extensions(registry)
50    /// ```
51    pub fn run_with_extensions(self, registry: ExtensionRegistry) -> ExitCode {
52        match self.run_inner(&registry) {
53            Ok(Outcome::Success) => ExitCode::SUCCESS,
54            Ok(Outcome::HadFormattingIssues(errors)) => {
55                eprintln!("Formatting issues:");
56                for error in errors {
57                    eprintln!("  {error}");
58                }
59                ExitCode::FAILURE
60            }
61            Err(e) => {
62                eprintln!("Error: {e:?}");
63                ExitCode::FAILURE
64            }
65        }
66    }
67
68    fn run_inner(self, registry: &ExtensionRegistry) -> Result<Outcome> {
69        match &self.command {
70            Commands::Convert {
71                input,
72                output,
73                from,
74                to,
75                show_literal_types,
76                show_expression_types,
77                verbose,
78            } => {
79                let reader = get_reader(input)
80                    .with_context(|| format!("Failed to open input file: {input}"))?;
81                let writer = get_writer(output)
82                    .with_context(|| format!("Failed to create output file: {output}"))?;
83                let options =
84                    self.create_output_options(*show_literal_types, *show_expression_types);
85                let from_format = self.resolve_input_format(from, input)?;
86                let to_format = self.resolve_output_format(to, output)?;
87                self.run_convert_with_io(
88                    reader,
89                    writer,
90                    &from_format,
91                    &to_format,
92                    &options,
93                    *verbose,
94                    registry,
95                )
96            }
97
98            Commands::Validate {
99                input,
100                output,
101                verbose,
102            } => {
103                let reader = get_reader(input)
104                    .with_context(|| format!("Failed to open input file: {input}"))?;
105                let writer = get_writer(output)
106                    .with_context(|| format!("Failed to create output file: {output}"))?;
107                self.run_validate_with_io(reader, writer, *verbose, registry)
108            }
109        }
110    }
111
112    /// Run CLI with provided readers and writers for testing
113    pub fn run_with_io<R: Read, W: Write>(
114        &self,
115        reader: R,
116        writer: W,
117        registry: &ExtensionRegistry,
118    ) -> Result<Outcome> {
119        match &self.command {
120            Commands::Convert {
121                input,
122                output,
123                from,
124                to,
125                show_literal_types,
126                show_expression_types,
127                verbose,
128                ..
129            } => {
130                let options =
131                    self.create_output_options(*show_literal_types, *show_expression_types);
132                let from_format = self.resolve_input_format(from, input)?;
133                let to_format = self.resolve_output_format(to, output)?;
134                self.run_convert_with_io(
135                    reader,
136                    writer,
137                    &from_format,
138                    &to_format,
139                    &options,
140                    *verbose,
141                    registry,
142                )
143            }
144
145            Commands::Validate { verbose, .. } => {
146                self.run_validate_with_io(reader, writer, *verbose, registry)
147            }
148        }
149    }
150
151    fn create_output_options(
152        &self,
153        show_literal_types: bool,
154        show_expression_types: bool,
155    ) -> OutputOptions {
156        let mut options = OutputOptions::default();
157
158        if show_literal_types {
159            options.literal_types = Visibility::Always;
160        }
161
162        if show_expression_types {
163            options.fn_types = true;
164        }
165
166        options
167    }
168
169    fn resolve_input_format(&self, format: &Option<Format>, input_path: &str) -> Result<Format> {
170        match format {
171            Some(fmt) => Ok(fmt.clone()),
172            None => Format::from_extension(input_path).ok_or_else(|| {
173                anyhow::anyhow!(
174                    "Could not auto-detect input format from file extension. \
175                     Please specify format explicitly with -f/--from. \
176                     Supported formats: text, json, yaml, protobuf/proto/pb"
177                )
178            }),
179        }
180    }
181
182    fn resolve_output_format(&self, format: &Option<Format>, output_path: &str) -> Result<Format> {
183        match format {
184            Some(fmt) => Ok(fmt.clone()),
185            None => Format::from_extension(output_path).ok_or_else(|| {
186                anyhow::anyhow!(
187                    "Could not auto-detect output format from file extension. \
188                     Please specify format explicitly with -t/--to. \
189                     Supported formats: text, json, yaml, protobuf/proto/pb"
190                )
191            }),
192        }
193    }
194
195    // TODO: this could use a refactor; the too_many_arguments tells us
196    // something useful here. We could perhaps add a type containing (registry,
197    // formats, options) or something
198    #[allow(clippy::too_many_arguments)]
199    fn run_convert_with_io<R: Read, W: Write>(
200        &self,
201        reader: R,
202        writer: W,
203        from: &Format,
204        to: &Format,
205        options: &OutputOptions,
206        verbose: bool,
207        registry: &ExtensionRegistry,
208    ) -> Result<Outcome> {
209        // Read input based on format
210        let plan = from.read_plan(reader, registry).with_context(|| {
211            format!(
212                "Failed to parse input as {} format",
213                format!("{from:?}").to_lowercase()
214            )
215        })?;
216
217        // Write output based on format
218        let outcome = to
219            .write_plan(writer, &plan, options, registry)
220            .with_context(|| {
221                format!(
222                    "Failed to write output as {} format",
223                    format!("{to:?}").to_lowercase()
224                )
225            })?;
226
227        if verbose && matches!(outcome, Outcome::Success) {
228            eprintln!("Successfully converted from {from:?} to {to:?}");
229        }
230
231        Ok(outcome)
232    }
233
234    fn run_validate_with_io<R: Read, W: Write>(
235        &self,
236        reader: R,
237        writer: W,
238        verbose: bool,
239        registry: &ExtensionRegistry,
240    ) -> Result<Outcome> {
241        let input_text = read_text_input(reader)?;
242
243        // Parse text to protobuf
244        let plan =
245            parse(&input_text).with_context(|| "Failed to parse input as Substrait text format")?;
246
247        // Format back to text
248        let (output_text, errors) =
249            format_with_registry(&plan, &OutputOptions::default(), registry);
250
251        // Write output first (best-effort)
252        write_text_output(writer, &output_text)?;
253
254        if verbose && errors.is_empty() {
255            eprintln!("Successfully validated plan");
256        }
257
258        // Return outcome based on whether there were formatting issues
259        if errors.is_empty() {
260            Ok(Outcome::Success)
261        } else {
262            Ok(Outcome::HadFormattingIssues(errors))
263        }
264    }
265}
266
267#[derive(Subcommand)]
268pub enum Commands {
269    /// Convert between different Substrait plan formats
270    ///
271    /// Format auto-detection:
272    ///   If -f/--from or -t/--to are not specified, formats will be auto-detected
273    ///   from file extensions:
274    ///     .substrait, .txt    -> text format
275    ///     .json               -> json format
276    ///     .yaml, .yml         -> yaml format
277    ///     .pb, .proto, .protobuf -> protobuf format
278    ///
279    /// Plan formats:
280    ///   text     - Human-readable Substrait text format
281    ///   json     - JSON serialized protobuf
282    ///   yaml     - YAML serialized protobuf
283    ///   protobuf - Binary protobuf format
284    Convert {
285        /// Input file (use - for stdin)
286        #[arg(short, long, default_value = "-")]
287        input: String,
288        /// Output file (use - for stdout)
289        #[arg(short, long, default_value = "-")]
290        output: String,
291        /// Input format: text, json, yaml, protobuf/proto/pb (auto-detected from file extension if not specified)
292        #[arg(short = 'f', long)]
293        from: Option<Format>,
294        /// Output format: text, json, yaml, protobuf/proto/pb (auto-detected from file extension if not specified)
295        #[arg(short = 't', long)]
296        to: Option<Format>,
297        /// Show literal types (text output only)
298        #[arg(long)]
299        show_literal_types: bool,
300        /// Show expression types (text output only)
301        #[arg(long)]
302        show_expression_types: bool,
303        /// Verbose output
304        #[arg(short, long)]
305        verbose: bool,
306    },
307    /// Validate text format by parsing and formatting (roundtrip test)
308    Validate {
309        /// Input file (use - for stdin)
310        #[arg(short, long, default_value = "-")]
311        input: String,
312        /// Output file (use - for stdout)
313        #[arg(short, long, default_value = "-")]
314        output: String,
315        /// Verbose output
316        #[arg(short, long)]
317        verbose: bool,
318    },
319}
320
321#[derive(Clone, Debug, PartialEq)]
322pub enum Format {
323    Text,
324    Json,
325    Yaml,
326    Protobuf,
327}
328
329impl std::str::FromStr for Format {
330    type Err = String;
331
332    fn from_str(s: &str) -> Result<Self, Self::Err> {
333        match s.to_lowercase().as_str() {
334            "text" => Ok(Format::Text),
335            "json" => Ok(Format::Json),
336            "yaml" => Ok(Format::Yaml),
337            "protobuf" | "proto" | "pb" => Ok(Format::Protobuf),
338            _ => Err(format!(
339                "Invalid format: '{s}'. Supported formats: text, json, yaml, protobuf/proto/pb"
340            )),
341        }
342    }
343}
344
345impl Format {
346    /// Detect format from file extension
347    pub fn from_extension(path: &str) -> Option<Format> {
348        if path == "-" {
349            return None; // stdin/stdout - no extension
350        }
351
352        let extension = std::path::Path::new(path)
353            .extension()
354            .and_then(|ext| ext.to_str())
355            .map(|ext| ext.to_lowercase());
356
357        match extension.as_deref() {
358            Some("substrait") | Some("txt") => Some(Format::Text),
359            Some("json") => Some(Format::Json),
360            Some("yaml") | Some("yml") => Some(Format::Yaml),
361            Some("pb") | Some("proto") | Some("protobuf") => Some(Format::Protobuf),
362            _ => None,
363        }
364    }
365
366    pub fn read_plan<R: Read>(
367        &self,
368        reader: R,
369        registry: &ExtensionRegistry,
370    ) -> Result<substrait::proto::Plan> {
371        match self {
372            Format::Text => {
373                let input_text = read_text_input(reader)?;
374                Ok(parse(&input_text)?)
375            }
376            Format::Json => {
377                let input_text = read_text_input(reader)?;
378                let pool = crate::json::build_descriptor_pool(&registry.descriptors())?;
379                crate::json::parse_json(&input_text, &pool)
380            }
381            Format::Yaml => {
382                #[cfg(feature = "serde")]
383                {
384                    let input_text = read_text_input(reader)?;
385                    Ok(serde_yaml::from_str(&input_text)?)
386                }
387                #[cfg(not(feature = "serde"))]
388                {
389                    Err("YAML support requires the 'serde' feature. Install with: cargo install substrait-explain --features cli,serde".into())
390                }
391            }
392            Format::Protobuf => {
393                let input_bytes = read_binary_input(reader)?;
394                Ok(substrait::proto::Plan::decode(&input_bytes[..])?)
395            }
396        }
397    }
398
399    pub fn write_plan<W: Write>(
400        &self,
401        writer: W,
402        plan: &substrait::proto::Plan,
403        options: &OutputOptions,
404        registry: &ExtensionRegistry,
405    ) -> Result<Outcome> {
406        match self {
407            Format::Text => {
408                let (text, errors) = format_with_registry(plan, options, registry);
409
410                // Write output first (best-effort)
411                write_text_output(writer, &text)?;
412
413                // Return outcome based on whether there were formatting issues
414                if errors.is_empty() {
415                    Ok(Outcome::Success)
416                } else {
417                    Ok(Outcome::HadFormattingIssues(errors))
418                }
419            }
420            Format::Json => {
421                #[cfg(feature = "serde")]
422                {
423                    let json = serde_json::to_string_pretty(plan)?;
424                    write_text_output(writer, &json)?;
425                    Ok(Outcome::Success)
426                }
427                #[cfg(not(feature = "serde"))]
428                {
429                    Err("JSON support requires the 'serde' feature. Install with: cargo install substrait-explain --features cli,serde".into())
430                }
431            }
432            Format::Yaml => {
433                #[cfg(feature = "serde")]
434                {
435                    let yaml = serde_yaml::to_string(plan)?;
436                    write_text_output(writer, &yaml)?;
437                    Ok(Outcome::Success)
438                }
439                #[cfg(not(feature = "serde"))]
440                {
441                    Err("YAML support requires the 'serde' feature. Install with: cargo install substrait-explain --features cli,serde".into())
442                }
443            }
444            Format::Protobuf => {
445                let bytes = plan.encode_to_vec();
446                write_binary_output(writer, &bytes)?;
447                Ok(Outcome::Success)
448            }
449        }
450    }
451}
452
453/// Read text input from reader
454fn read_text_input<R: Read>(mut reader: R) -> Result<String> {
455    let mut buffer = String::new();
456    reader.read_to_string(&mut buffer)?;
457    Ok(buffer)
458}
459
460/// Read binary input from reader
461fn read_binary_input<R: Read>(mut reader: R) -> Result<Vec<u8>> {
462    let mut buffer = Vec::new();
463    reader.read_to_end(&mut buffer)?;
464    Ok(buffer)
465}
466
467/// Write text output to writer
468fn write_text_output<W: Write>(mut writer: W, content: &str) -> Result<()> {
469    writer.write_all(content.as_bytes())?;
470    Ok(())
471}
472
473/// Write binary output to writer
474fn write_binary_output<W: Write>(mut writer: W, content: &[u8]) -> Result<()> {
475    writer.write_all(content)?;
476    Ok(())
477}
478
479/// Helper function to get reader from file path (or stdin if "-")
480fn get_reader(path: &str) -> Result<Box<dyn Read>> {
481    if path == "-" {
482        Ok(Box::new(io::stdin()))
483    } else {
484        Ok(Box::new(fs::File::open(path)?))
485    }
486}
487
488/// Helper function to get writer from file path (or stdout if "-")
489fn get_writer(path: &str) -> Result<Box<dyn Write>> {
490    if path == "-" {
491        Ok(Box::new(io::stdout()))
492    } else {
493        Ok(Box::new(fs::File::create(path)?))
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use std::io::Cursor;
500
501    use substrait::proto::expression::RexType;
502    use substrait::proto::plan_rel;
503    use substrait::proto::rel::RelType;
504
505    use super::*;
506
507    const BASIC_PLAN: &str = r#"=== Plan
508Root[result]
509  Project[$0, $1]
510    Read[data => a:i64, b:string]
511"#;
512
513    const PLAN_WITH_EXTENSIONS: &str = r#"=== Extensions
514URNs:
515  @  1: https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml
516Functions:
517  # 10 @  1: gt
518
519=== Plan
520Root[result]
521  Filter[gt($2, 100) => $0, $1, $2]
522    Project[$0, $1, $2]
523      Read[data => a:i64, b:string, c:i32]
524"#;
525
526    #[test]
527    fn test_convert_text_to_text() {
528        let input = Cursor::new(BASIC_PLAN);
529        let mut output = Vec::new();
530
531        let cli = Cli {
532            command: Commands::Convert {
533                input: "input.substrait".to_string(),
534                output: "output.substrait".to_string(),
535                from: Some(Format::Text),
536                to: Some(Format::Text),
537                show_literal_types: false,
538                show_expression_types: false,
539                verbose: false,
540            },
541        };
542
543        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
544            .unwrap();
545
546        let output_content = String::from_utf8(output).unwrap();
547        assert!(output_content.contains("=== Plan"));
548        assert!(output_content.contains("Root[result]"));
549        assert!(output_content.contains("Project[$0, $1]"));
550        assert!(output_content.contains("Read[data => a:i64, b:string]"));
551    }
552
553    #[test]
554    fn test_convert_text_to_json() {
555        let input = Cursor::new(BASIC_PLAN);
556        let mut output = Vec::new();
557
558        let cli = Cli {
559            command: Commands::Convert {
560                input: "input.substrait".to_string(),
561                output: "output.json".to_string(),
562                from: Some(Format::Text),
563                to: Some(Format::Json),
564                show_literal_types: false,
565                show_expression_types: false,
566                verbose: false,
567            },
568        };
569
570        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
571            .unwrap();
572
573        let output_content = String::from_utf8(output).unwrap();
574        assert!(output_content.contains("\"relations\""));
575        assert!(output_content.contains("\"root\""));
576        assert!(output_content.contains("\"project\""));
577        assert!(output_content.contains("\"read\""));
578    }
579
580    #[test]
581    fn test_convert_json_to_text() {
582        // First convert text to JSON
583        let input = Cursor::new(BASIC_PLAN);
584        let mut json_output = Vec::new();
585
586        let cli_to_json = Cli {
587            command: Commands::Convert {
588                input: "input.substrait".to_string(),
589                output: "output.json".to_string(),
590                from: Some(Format::Text),
591                to: Some(Format::Json),
592                show_literal_types: false,
593                show_expression_types: false,
594                verbose: false,
595            },
596        };
597
598        cli_to_json
599            .run_with_io(input, &mut json_output, &ExtensionRegistry::default())
600            .unwrap();
601
602        // Now convert JSON back to text
603        let json_input = Cursor::new(json_output);
604        let mut text_output = Vec::new();
605
606        let cli_to_text = Cli {
607            command: Commands::Convert {
608                input: "input.json".to_string(),
609                output: "output.substrait".to_string(),
610                from: Some(Format::Json),
611                to: Some(Format::Text),
612                show_literal_types: false,
613                show_expression_types: false,
614                verbose: false,
615            },
616        };
617
618        cli_to_text
619            .run_with_io(json_input, &mut text_output, &ExtensionRegistry::default())
620            .unwrap();
621
622        let output_content = String::from_utf8(text_output).unwrap();
623        assert!(output_content.contains("=== Plan"));
624        assert!(output_content.contains("Root[result]"));
625    }
626
627    #[test]
628    fn test_convert_with_protobuf_output() {
629        let input = Cursor::new(BASIC_PLAN);
630        let mut output = Vec::new();
631
632        let cli = Cli {
633            command: Commands::Convert {
634                input: "input.substrait".to_string(),
635                output: "output.pb".to_string(),
636                from: Some(Format::Text),
637                to: Some(Format::Protobuf),
638                show_literal_types: false,
639                show_expression_types: false,
640                verbose: false,
641            },
642        };
643
644        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
645            .unwrap();
646
647        // Protobuf output should be binary, so we just check that it's not empty
648        assert!(!output.is_empty());
649
650        // Should not contain readable text
651        let output_string = String::from_utf8_lossy(&output);
652        assert!(!output_string.contains("=== Plan"));
653    }
654
655    #[test]
656    fn test_validate_command() {
657        let input = Cursor::new(BASIC_PLAN);
658        let mut output = Vec::new();
659
660        let cli = Cli {
661            command: Commands::Validate {
662                input: String::new(),
663                output: String::new(),
664                verbose: false,
665            },
666        };
667
668        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
669            .unwrap();
670
671        let output_content = String::from_utf8(output).unwrap();
672        assert!(output_content.contains("=== Plan"));
673        assert!(output_content.contains("Root[result]"));
674        assert!(output_content.contains("Project[$0, $1]"));
675        assert!(output_content.contains("Read[data => a:i64, b:string]"));
676    }
677
678    #[test]
679    fn test_validate_with_extensions() {
680        let input = Cursor::new(PLAN_WITH_EXTENSIONS);
681        let mut output = Vec::new();
682
683        let cli = Cli {
684            command: Commands::Validate {
685                input: String::new(),
686                output: String::new(),
687                verbose: false,
688            },
689        };
690
691        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
692            .unwrap();
693
694        let output_content = String::from_utf8(output).unwrap();
695        assert!(output_content.contains("=== Extensions"));
696        assert!(output_content.contains("=== Plan"));
697        assert!(output_content.contains("Root[result]"));
698        assert!(output_content.contains("Filter[gt($2, 100)"));
699    }
700
701    #[test]
702    fn test_convert_with_formatting_options() {
703        let input = Cursor::new(BASIC_PLAN);
704        let mut output = Vec::new();
705
706        let cli = Cli {
707            command: Commands::Convert {
708                input: "input.substrait".to_string(),
709                output: "output.substrait".to_string(),
710                from: Some(Format::Text),
711                to: Some(Format::Text),
712                show_literal_types: true,
713                show_expression_types: true,
714                verbose: false,
715            },
716        };
717
718        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
719            .unwrap();
720
721        let output_content = String::from_utf8(output).unwrap();
722        assert!(output_content.contains("=== Plan"));
723        assert!(output_content.contains("Root[result]"));
724    }
725
726    #[test]
727    fn test_auto_detect_from_extension() {
728        // Test auto-detection of text format
729        assert_eq!(Format::from_extension("plan.substrait"), Some(Format::Text));
730        assert_eq!(Format::from_extension("plan.txt"), Some(Format::Text));
731
732        // Test auto-detection of JSON format
733        assert_eq!(Format::from_extension("plan.json"), Some(Format::Json));
734
735        // Test auto-detection of YAML format
736        assert_eq!(Format::from_extension("plan.yaml"), Some(Format::Yaml));
737        assert_eq!(Format::from_extension("plan.yml"), Some(Format::Yaml));
738
739        // Test auto-detection of protobuf format
740        assert_eq!(Format::from_extension("plan.pb"), Some(Format::Protobuf));
741        assert_eq!(Format::from_extension("plan.proto"), Some(Format::Protobuf));
742        assert_eq!(
743            Format::from_extension("plan.protobuf"),
744            Some(Format::Protobuf)
745        );
746
747        // Test unknown extensions
748        assert_eq!(Format::from_extension("plan.unknown"), None);
749        assert_eq!(Format::from_extension("plan"), None);
750
751        // Test stdin/stdout
752        assert_eq!(Format::from_extension("-"), None);
753    }
754
755    #[test]
756    fn test_convert_with_auto_detection() {
757        let input = Cursor::new(BASIC_PLAN);
758        let mut output = Vec::new();
759
760        let cli = Cli {
761            command: Commands::Convert {
762                input: "input.substrait".to_string(),
763                output: "output.json".to_string(),
764                from: None, // Auto-detect from extension
765                to: None,   // Auto-detect from extension
766                show_literal_types: false,
767                show_expression_types: false,
768                verbose: false,
769            },
770        };
771
772        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
773            .unwrap();
774
775        let output_content = String::from_utf8(output).unwrap();
776        assert!(output_content.contains("\"relations\""));
777        assert!(output_content.contains("\"root\""));
778        assert!(output_content.contains("\"project\""));
779        assert!(output_content.contains("\"read\""));
780    }
781
782    #[test]
783    fn test_auto_detection_error_unknown_input_extension() {
784        let input = Cursor::new(BASIC_PLAN);
785        let mut output = Vec::new();
786
787        let cli = Cli {
788            command: Commands::Convert {
789                input: "input.unknown".to_string(),
790                output: "output.json".to_string(),
791                from: None, // Should fail auto-detection
792                to: None,
793                show_literal_types: false,
794                show_expression_types: false,
795                verbose: false,
796            },
797        };
798
799        let result = cli.run_with_io(input, &mut output, &ExtensionRegistry::default());
800        assert!(result.is_err());
801        assert!(
802            result
803                .unwrap_err()
804                .to_string()
805                .contains("Could not auto-detect input format")
806        );
807    }
808
809    #[test]
810    fn test_auto_detection_error_unknown_output_extension() {
811        let input = Cursor::new(BASIC_PLAN);
812        let mut output = Vec::new();
813
814        let cli = Cli {
815            command: Commands::Convert {
816                input: "input.substrait".to_string(),
817                output: "output.unknown".to_string(),
818                from: None,
819                to: None, // Should fail auto-detection
820                show_literal_types: false,
821                show_expression_types: false,
822                verbose: false,
823            },
824        };
825
826        let result = cli.run_with_io(input, &mut output, &ExtensionRegistry::default());
827        assert!(result.is_err());
828        assert!(
829            result
830                .unwrap_err()
831                .to_string()
832                .contains("Could not auto-detect output format")
833        );
834    }
835
836    #[test]
837    fn test_explicit_format_overrides_auto_detection() {
838        let input = Cursor::new(BASIC_PLAN);
839        let mut output = Vec::new();
840
841        let cli = Cli {
842            command: Commands::Convert {
843                input: "input.json".to_string(), // Would auto-detect as JSON
844                output: "output.pb".to_string(), // Would auto-detect as Protobuf
845                from: Some(Format::Text),        // Explicit override
846                to: Some(Format::Text),          // Explicit override
847                show_literal_types: false,
848                show_expression_types: false,
849                verbose: false,
850            },
851        };
852
853        cli.run_with_io(input, &mut output, &ExtensionRegistry::default())
854            .unwrap();
855
856        let output_content = String::from_utf8(output).unwrap();
857        assert!(output_content.contains("=== Plan"));
858        assert!(output_content.contains("Root[result]"));
859    }
860
861    #[test]
862    fn test_protobuf_roundtrip() {
863        // Convert text to protobuf
864        let input = Cursor::new(BASIC_PLAN);
865        let mut protobuf_output = Vec::new();
866
867        let cli_to_protobuf = Cli {
868            command: Commands::Convert {
869                input: "input.substrait".to_string(),
870                output: "output.pb".to_string(),
871                from: Some(Format::Text),
872                to: Some(Format::Protobuf),
873                show_literal_types: false,
874                show_expression_types: false,
875                verbose: false,
876            },
877        };
878
879        cli_to_protobuf
880            .run_with_io(input, &mut protobuf_output, &ExtensionRegistry::default())
881            .unwrap();
882
883        // Convert protobuf back to text
884        let protobuf_input = Cursor::new(protobuf_output);
885        let mut text_output = Vec::new();
886
887        let cli_to_text = Cli {
888            command: Commands::Convert {
889                input: "input.pb".to_string(),
890                output: "output.substrait".to_string(),
891                from: Some(Format::Protobuf),
892                to: Some(Format::Text),
893                show_literal_types: false,
894                show_expression_types: false,
895                verbose: false,
896            },
897        };
898
899        cli_to_text
900            .run_with_io(
901                protobuf_input,
902                &mut text_output,
903                &ExtensionRegistry::default(),
904            )
905            .unwrap();
906
907        let output_content = String::from_utf8(text_output).unwrap();
908        assert!(output_content.contains("=== Plan"));
909        assert!(output_content.contains("Root[result]"));
910        assert!(output_content.contains("Read[data => a:i64, b:string]"));
911    }
912
913    /// Creates a plan with an invalid function reference that will cause formatting errors.
914    fn make_plan_with_invalid_function_ref() -> substrait::proto::Plan {
915        const VALID_PLAN: &str = r#"=== Extensions
916URNs:
917  @  1: https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml
918Functions:
919  # 10 @  1: equal
920
921=== Plan
922Root[result]
923  Filter[equal($0, 42:i32) => $0]
924    Read[data => a:i32]
925"#;
926
927        let mut plan = parse(VALID_PLAN).expect("Failed to parse valid plan");
928
929        // Navigate to the function and corrupt its reference
930        let rel_root = plan.relations.first_mut().unwrap();
931        let plan_rel::RelType::Root(root) = rel_root.rel_type.as_mut().unwrap() else {
932            panic!("Expected Root relation");
933        };
934        let rel = root.input.as_mut().unwrap();
935        let RelType::Filter(filter) = rel.rel_type.as_mut().unwrap() else {
936            panic!("Expected Filter relation");
937        };
938        let condition = filter.condition.as_mut().unwrap();
939        let RexType::ScalarFunction(func) = condition.rex_type.as_mut().unwrap() else {
940            panic!("Expected ScalarFunction");
941        };
942        func.function_reference = 999; // Invalid - doesn't exist in extensions
943
944        plan
945    }
946
947    #[test]
948    fn test_write_plan_reports_formatting_issues() {
949        let plan = make_plan_with_invalid_function_ref();
950        let mut output = Vec::new();
951
952        let result = Format::Text.write_plan(
953            &mut output,
954            &plan,
955            &OutputOptions::default(),
956            &ExtensionRegistry::default(),
957        );
958
959        // Should succeed but report formatting issues
960        let outcome = result.expect("write_plan should not return hard error");
961        assert!(
962            matches!(outcome, Outcome::HadFormattingIssues(ref errors) if !errors.is_empty()),
963            "Expected HadFormattingIssues with errors, got {outcome:?}"
964        );
965        // Output should still be written (best-effort formatting)
966        assert!(
967            !output.is_empty(),
968            "Output should be written even with issues"
969        );
970    }
971}