1pub mod saluki_keys;
12pub mod schema_gen;
13pub mod smoke_test_support;
14
15use std::collections::HashSet;
16use std::path::{Path, PathBuf};
17
18use indexmap::{IndexMap, IndexSet};
19use serde::Deserialize;
20
21use crate::smoke_test_support::ConfigurationStruct;
22
23#[derive(Debug, Clone, Deserialize)]
28pub struct SchemaOverlay {
29 pub inventory: IndexMap<String, KnownEntry>,
30 pub excluded: IndexMap<String, String>,
31}
32
33#[derive(Debug, Clone, Deserialize)]
35#[serde(tag = "support", rename_all = "snake_case")]
36pub enum KnownEntry {
37 Full(FullSupport),
39 Partial(PartialSupport),
41 #[serde(rename = "none")]
43 Unsupported(Unsupported),
44 Unknown(UnknownSupport),
46}
47
48#[derive(Debug, Clone, Deserialize)]
50#[serde(rename_all = "snake_case")]
51pub struct FullSupport {
52 pub pipelines: PipelineAffinity,
54 pub description: String,
56 #[serde(default)]
58 pub documentation: Option<String>,
59 #[serde(default)]
61 pub issue: Option<String>,
62 pub test_support: TestSupport,
64}
65
66#[derive(Debug, Clone, Deserialize)]
68#[serde(rename_all = "snake_case")]
69pub struct PartialSupport {
70 pub pipelines: PipelineAffinity,
72 pub description: String,
74 pub documentation: String,
76 #[serde(default)]
78 pub warn: bool,
79 #[serde(default)]
81 pub issue: Option<String>,
82 pub test_support: TestSupport,
84}
85
86#[derive(Debug, Clone, Deserialize)]
88#[serde(rename_all = "snake_case")]
89pub struct Unsupported {
90 pub pipelines: PipelineAffinity,
92 pub description: String,
94 #[serde(default)]
96 pub documentation: Option<String>,
97 pub severity: Severity,
99 pub planned: bool,
101 #[serde(default)]
103 pub issue: Option<String>,
104}
105
106#[derive(Debug, Clone, Deserialize)]
108#[serde(rename_all = "snake_case")]
109pub struct UnknownSupport {
110 #[serde(default)]
112 pub description: Option<String>,
113 #[serde(default)]
115 pub severity: Option<Severity>,
116 #[serde(default)]
118 pub issue: Option<String>,
119}
120
121#[derive(Debug, Clone, Deserialize)]
126#[serde(rename_all = "snake_case")]
127pub struct TestSupport {
128 #[serde(default)]
130 pub env_var_override: Option<Vec<String>>,
131 #[serde(default)]
133 pub additional_yaml_paths: Vec<String>,
134 #[serde(default)]
136 pub value_type_override: Option<ValueType>,
137 pub used_by: IndexSet<ConfigurationStruct>,
139 #[serde(default)]
141 pub test_json: Option<String>,
142 #[serde(default)]
145 pub additional_attributes: IndexMap<String, String>,
146}
147
148#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
150#[serde(rename_all = "snake_case")]
151pub enum Severity {
152 Low,
153 Medium,
154 High,
155}
156
157#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Pipeline {
161 #[serde(rename = "dogstatsd")]
162 DogStatsD,
163 Checks,
164 Otlp,
165 Traces,
166}
167
168#[derive(Debug, Clone)]
174pub enum PipelineAffinity {
175 CrossCutting,
177 Pipelines(Vec<Pipeline>),
179}
180
181impl<'de> serde::Deserialize<'de> for PipelineAffinity {
182 fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
183 #[derive(Deserialize, PartialEq)]
184 #[serde(rename_all = "snake_case")]
185 enum Token {
186 CrossCutting,
187 #[serde(rename = "dogstatsd")]
188 DogStatsD,
189 Checks,
190 Otlp,
191 Traces,
192 }
193
194 let tokens: Vec<Token> = Vec::deserialize(d)?;
195
196 if tokens.is_empty() {
197 return Err(serde::de::Error::custom("pipelines must be non-empty"));
198 }
199
200 let has_cc = tokens.iter().any(|t| t == &Token::CrossCutting);
201
202 if has_cc && tokens.len() > 1 {
203 return Err(serde::de::Error::custom(
204 "cross_cutting must appear alone in pipelines list",
205 ));
206 }
207
208 if has_cc {
209 return Ok(PipelineAffinity::CrossCutting);
210 }
211
212 let pipelines = tokens
213 .into_iter()
214 .map(|t| match t {
215 Token::DogStatsD => Pipeline::DogStatsD,
216 Token::Checks => Pipeline::Checks,
217 Token::Otlp => Pipeline::Otlp,
218 Token::Traces => Pipeline::Traces,
219 Token::CrossCutting => unreachable!(),
220 })
221 .collect();
222
223 Ok(PipelineAffinity::Pipelines(pipelines))
224 }
225}
226
227#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
229#[serde(rename_all = "snake_case")]
230pub enum ValueType {
231 Boolean,
232 Integer,
233 Float,
234 String,
235 StringList,
236}
237
238pub struct Files {
242 pub schema: PathBuf,
243 pub overlay: PathBuf,
244}
245
246impl Default for Files {
247 fn default() -> Self {
248 let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
249 .join("..")
250 .join("config")
251 .join("schema");
252 let schema = dir.join("core").join("core_schema.yaml");
253 let overlay = dir.join("schema_overlay.yaml");
254 Files { schema, overlay }
255 }
256}
257
258impl SchemaOverlay {
259 pub fn load(files: Files) -> Result<Self, Error> {
260 let loaded = Self::from_file(&files.overlay)?;
261 loaded.validate(&files.schema)?;
262 Ok(loaded)
263 }
264
265 fn from_yaml(s: &str) -> Result<Self, Error> {
266 let yaml: serde_yaml::Value = serde_yaml::from_str(s).map_err(Error::Yaml)?;
267 Self::lint_yaml(&yaml)?;
268 serde_yaml::from_value(yaml).map_err(Error::Yaml)
269 }
270
271 fn from_file(path: &Path) -> Result<Self, Error> {
272 let contents = std::fs::read_to_string(path).map_err(|e| Error::Io((path.into(), e)))?;
273 Self::from_yaml(&contents)
274 }
275
276 fn validate(&self, core_schema: &Path) -> Result<(), Error> {
277 self.validate_keys_match(core_schema)?;
278 self.validate_entries()?;
279 Ok(())
280 }
281
282 fn lint_yaml(yaml: &serde_yaml::Value) -> Result<(), Error> {
285 let mapping = yaml
286 .as_mapping()
287 .ok_or_else(|| Error::Validation("overlay must be a YAML mapping".to_string()))?;
288
289 let section_names: Vec<&str> = mapping.keys().filter_map(|k| k.as_str()).collect();
290
291 for required in ["inventory", "excluded"] {
292 if !section_names.contains(&required) {
293 return Err(Error::Validation(format!(
294 "overlay missing required section '{}'",
295 required
296 )));
297 }
298 }
299
300 let pos_known = section_names.iter().position(|&k| k == "inventory").unwrap();
301 let pos_ignored = section_names.iter().position(|&k| k == "excluded").unwrap();
302
303 if pos_known >= pos_ignored {
304 return Err(Error::Validation(
305 "sections must appear in order: known, ignored".to_string(),
306 ));
307 }
308
309 for section_name in ["inventory", "excluded"] {
310 if let Some(section) = yaml.get(section_name).and_then(|v| v.as_mapping()) {
311 let mut prev = "";
312 for key in section.keys().filter_map(|k| k.as_str()) {
313 if key < prev {
314 return Err(Error::Validation(format!(
315 "{}: key '{}' is out of alphabetical order (after '{}')",
316 section_name, key, prev
317 )));
318 }
319 prev = key;
320 }
321 }
322 }
323
324 Ok(())
325 }
326
327 fn validate_keys_match(&self, core_schema: &Path) -> Result<(), Error> {
329 let schema_keys = Self::schema_keys(core_schema)?;
330
331 for key in self.excluded.keys() {
332 if self.inventory.contains_key(key.as_str()) {
333 return Err(Error::Validation(format!(
334 "key '{}' appears in more than one overlay section",
335 key
336 )));
337 }
338 }
339
340 for key in self.inventory.keys().chain(self.excluded.keys()) {
341 if !schema_keys.contains(key.as_str()) {
342 return Err(Error::Validation(format!(
343 "overlay key '{}' is not present in the schema",
344 key
345 )));
346 }
347 }
348
349 let overlay_keys: HashSet<&str> = self
350 .inventory
351 .keys()
352 .chain(self.excluded.keys())
353 .map(|s| s.as_str())
354 .collect();
355 for key in &schema_keys {
356 if !overlay_keys.contains(key.as_str()) {
357 return Err(Error::Validation(format!(
358 "schema key '{}' is not covered by the overlay",
359 key
360 )));
361 }
362 }
363
364 Ok(())
365 }
366
367 fn schema_keys(schema_path: &Path) -> Result<HashSet<String>, Error> {
368 let schema = load_resolved_schema(schema_path)?;
369 let props = schema
370 .get("properties")
371 .and_then(|v| v.as_mapping())
372 .ok_or_else(|| Error::Validation("schema missing 'properties' section".to_string()))?;
373 let mut keys = HashSet::new();
374 Self::collect_schema_keys(props, "", &mut keys);
375 Ok(keys)
376 }
377
378 fn collect_schema_keys(props: &serde_yaml::Mapping, prefix: &str, keys: &mut HashSet<String>) {
379 for (k, v) in props {
380 if let Some(name) = k.as_str() {
381 let full_key = if prefix.is_empty() {
382 name.to_string()
383 } else {
384 format!("{}.{}", prefix, name)
385 };
386 if let Some(sub_props) = v.get("properties").and_then(|p| p.as_mapping()) {
389 Self::collect_schema_keys(sub_props, &full_key, keys);
390 } else {
391 keys.insert(full_key);
392 }
393 }
394 }
395 }
396
397 fn validate_entries(&self) -> Result<(), Error> {
400 for (key, entry) in &self.inventory {
401 match entry {
402 KnownEntry::Full(f) => {
403 if f.test_support.used_by.is_empty() {
404 return Err(Error::Validation(format!(
405 "full key '{}': used_by must be non-empty",
406 key
407 )));
408 }
409 if f.description.len() > 50 {
410 return Err(Error::Validation(format!(
411 "full key '{}': description exceeds 50 chars ({} chars)",
412 key,
413 f.description.len()
414 )));
415 }
416 let mut seen: HashSet<&str> = HashSet::new();
417 for path in &f.test_support.additional_yaml_paths {
418 if !seen.insert(path.as_str()) {
419 return Err(Error::Validation(format!(
420 "full key '{}': duplicate additional_yaml_path '{}'",
421 key, path
422 )));
423 }
424 }
425 }
426 KnownEntry::Partial(p) => {
427 if p.test_support.used_by.is_empty() {
428 return Err(Error::Validation(format!(
429 "partial key '{}': used_by must be non-empty",
430 key
431 )));
432 }
433 if p.description.len() > 50 {
434 return Err(Error::Validation(format!(
435 "partial key '{}': description exceeds 50 chars ({} chars)",
436 key,
437 p.description.len()
438 )));
439 }
440 let mut seen: HashSet<&str> = HashSet::new();
441 for path in &p.test_support.additional_yaml_paths {
442 if !seen.insert(path.as_str()) {
443 return Err(Error::Validation(format!(
444 "partial key '{}': duplicate additional_yaml_path '{}'",
445 key, path
446 )));
447 }
448 }
449 }
450 KnownEntry::Unsupported(u) => {
451 if u.description.len() > 50 {
452 return Err(Error::Validation(format!(
453 "unsupported key '{}': description exceeds 50 chars ({} chars)",
454 key,
455 u.description.len()
456 )));
457 }
458 if u.planned && u.issue.is_none() {
459 return Err(Error::Validation(format!(
460 "unsupported key '{}': planned requires an issue",
461 key
462 )));
463 }
464 }
465 KnownEntry::Unknown(u) => {
466 if let Some(desc) = &u.description {
467 if desc.len() > 50 {
468 return Err(Error::Validation(format!(
469 "unknown key '{}': description exceeds 50 chars ({} chars)",
470 key,
471 desc.len()
472 )));
473 }
474 }
475 }
476 }
477 }
478 Ok(())
479 }
480}
481
482fn read_yaml(path: &Path) -> Result<serde_yaml::Value, Error> {
484 let contents = std::fs::read_to_string(path).map_err(|e| Error::Io((path.into(), e)))?;
485 serde_yaml::from_str(&contents).map_err(Error::Yaml)
486}
487
488pub(crate) fn load_resolved_schema(schema_path: &Path) -> Result<serde_yaml::Value, Error> {
500 let schema_dir = schema_path.parent().unwrap_or_else(|| Path::new("."));
501 let mut doc = read_yaml(schema_path)?;
502 resolve_refs(&mut doc, schema_dir)?;
503 Ok(doc)
504}
505
506fn resolve_refs(value: &mut serde_yaml::Value, schema_dir: &Path) -> Result<(), Error> {
509 if let Some(map) = value.as_mapping_mut() {
510 if let Some(ref_path) = map.get("$ref").and_then(|v| v.as_str()) {
511 let ref_file = schema_dir.join(ref_path);
512 let mut ref_doc = read_yaml(&ref_file)?;
513 resolve_refs(&mut ref_doc, schema_dir)?;
514 *value = ref_doc;
515 return Ok(());
516 }
517 for (_k, v) in map.iter_mut() {
518 resolve_refs(v, schema_dir)?;
519 }
520 }
521 Ok(())
522}
523
524const VALIDATION_RULES: &str = "\n\
525 \n\
526 Rules that must hold in schema_overlay.yaml:\n\
527 - Every core_schema.yaml key appears in exactly one section (known / ignored).\n\
528 - No key appears in more than one section.\n\
529 - Sections appear in order: known, ignored.\n\
530 - Keys within each section are sorted alphabetically.\n\
531 - full entries: pipelines non-empty, used_by non-empty, description <= 50 chars.\n\
532 - partial entries: pipelines non-empty, used_by non-empty, description <= 50 chars, documentation required.\n\
533 - unsupported entries: pipelines non-empty, description <= 50 chars, planned+issue consistent.\n\
534 - unknown entries: description <= 50 chars (when present).\n\
535 - additional_yaml_paths: no duplicates within a single entry.\n\
536 Fix: edit lib/datadog-agent/config/schema/schema_overlay.yaml.";
537
538#[derive(Debug)]
540pub enum Error {
541 Io((PathBuf, std::io::Error)),
542 Yaml(serde_yaml::Error),
543 Validation(String),
544}
545
546impl std::fmt::Display for Error {
547 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
548 match self {
549 Error::Io(e) => write!(f, "Error reading {}: {}", e.0.display(), e.1),
550 Error::Yaml(e) => write!(f, "YAML parse error in overlay: {e}"),
551 Error::Validation(s) => write!(f, "schema_overlay.yaml validation failed: {s}{VALIDATION_RULES}"),
552 }
553 }
554}
555
556impl std::error::Error for Error {
557 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
558 match self {
559 Error::Io(e) => Some(&e.1),
560 Error::Yaml(e) => Some(e),
561 Error::Validation(_) => None,
562 }
563 }
564}
565
566#[cfg(test)]
567mod tests {
568 use super::*;
569
570 #[test]
571 fn overlay_loads() {
572 let test_files = Files {
573 schema: Path::new(env!("CARGO_MANIFEST_DIR"))
574 .join("test")
575 .join("fake_schema.yaml"),
576 overlay: Path::new(env!("CARGO_MANIFEST_DIR"))
577 .join("test")
578 .join("fake_overlay.yaml"),
579 };
580 let validated = SchemaOverlay::load(test_files).unwrap();
581 assert_eq!(validated.inventory.len(), 18);
582 }
583
584 #[test]
585 fn pipeline_affinity_cross_cutting() {
586 let yaml = "pipelines: [cross_cutting]";
587 #[derive(Deserialize)]
588 struct W {
589 pipelines: PipelineAffinity,
590 }
591 let w: W = serde_yaml::from_str(yaml).unwrap();
592 assert!(matches!(w.pipelines, PipelineAffinity::CrossCutting));
593 }
594
595 #[test]
596 fn pipeline_affinity_multi() {
597 let yaml = "pipelines: [dogstatsd, traces]";
598 #[derive(Deserialize)]
599 struct W {
600 pipelines: PipelineAffinity,
601 }
602 let w: W = serde_yaml::from_str(yaml).unwrap();
603 if let PipelineAffinity::Pipelines(ps) = w.pipelines {
604 assert_eq!(ps.len(), 2);
605 assert!(matches!(ps[0], Pipeline::DogStatsD));
606 assert!(matches!(ps[1], Pipeline::Traces));
607 } else {
608 panic!("expected Pipelines");
609 }
610 }
611
612 #[test]
613 fn pipeline_affinity_cross_cutting_must_be_alone() {
614 let yaml = "pipelines: [cross_cutting, dogstatsd]";
615 #[derive(Deserialize)]
616 #[allow(dead_code)]
617 struct W {
618 pipelines: PipelineAffinity,
619 }
620 assert!(serde_yaml::from_str::<W>(yaml).is_err());
621 }
622
623 fn load_from_strs(schema: &str, overlay: &str) -> Result<SchemaOverlay, Error> {
624 let dir = tempfile::tempdir().unwrap();
625 let schema_path = dir.path().join("schema.yaml");
626 let overlay_path = dir.path().join("overlay.yaml");
627 std::fs::write(&schema_path, schema).unwrap();
628 std::fs::write(&overlay_path, overlay).unwrap();
629 SchemaOverlay::load(Files {
630 schema: schema_path,
631 overlay: overlay_path,
632 })
633 }
634
635 #[test]
636 fn validation_rejects_schema_key_missing_from_overlay() {
637 let schema = "\
638properties:
639 key_a:
640 type: string
641 key_b:
642 type: string
643";
644 let overlay = "\
645inventory:
646 key_a:
647 support: full
648 pipelines: [cross_cutting]
649 description: \"Key A\"
650 test_support:
651 used_by: [ForwarderConfiguration]
652excluded: {}
653";
654 let err = load_from_strs(schema, overlay).unwrap_err();
655 assert!(
656 err.to_string().contains("schema key 'key_b' is not covered"),
657 "unexpected error: {err}"
658 );
659 }
660
661 #[test]
662 fn validation_rejects_overlay_key_absent_from_schema() {
663 let schema = "\
664properties:
665 key_a:
666 type: string
667";
668 let overlay = "\
669inventory:
670 key_a:
671 support: full
672 pipelines: [cross_cutting]
673 description: \"Key A\"
674 test_support:
675 used_by: [ForwarderConfiguration]
676excluded:
677 key_b: \"not in schema\"
678";
679 let err = load_from_strs(schema, overlay).unwrap_err();
680 assert!(
681 err.to_string().contains("overlay key 'key_b' is not present"),
682 "unexpected error: {err}"
683 );
684 }
685
686 #[test]
687 fn validation_rejects_key_in_two_sections() {
688 let schema = "\
689properties:
690 key_a:
691 type: string
692 key_b:
693 type: string
694";
695 let overlay = "\
696inventory:
697 key_a:
698 support: full
699 pipelines: [cross_cutting]
700 description: \"Key A\"
701 test_support:
702 used_by: [ForwarderConfiguration]
703excluded:
704 key_a: \"duplicate\"
705 key_b: \"ok\"
706";
707 let err = load_from_strs(schema, overlay).unwrap_err();
708 assert!(
709 err.to_string()
710 .contains("key 'key_a' appears in more than one overlay section"),
711 "unexpected error: {err}"
712 );
713 }
714
715 #[test]
717 fn schema_ref_is_resolved_and_keys_namespaced() {
718 let dir = tempfile::tempdir().unwrap();
719 std::fs::write(
720 dir.path().join("sub.yaml"),
721 "properties:\n enabled:\n type: boolean\n",
722 )
723 .unwrap();
724 let schema_path = dir.path().join("schema.yaml");
725 std::fs::write(&schema_path, "properties:\n feature:\n $ref: sub.yaml\n").unwrap();
726
727 let keys = SchemaOverlay::schema_keys(&schema_path).unwrap();
728 assert_eq!(
729 keys,
730 HashSet::from(["feature.enabled".to_string()]),
731 "unexpected keys: {keys:?}"
732 );
733 }
734
735 #[test]
738 fn missing_schema_ref_reports_io_error() {
739 let dir = tempfile::tempdir().unwrap();
740 let schema_path = dir.path().join("schema.yaml");
741 std::fs::write(&schema_path, "properties:\n feature:\n $ref: does_not_exist.yaml\n").unwrap();
742
743 let err = SchemaOverlay::schema_keys(&schema_path).unwrap_err();
744 assert!(matches!(err, Error::Io(_)), "expected Io error, got: {err}");
745 assert!(
746 err.to_string().contains("does_not_exist.yaml"),
747 "error should name the missing file: {err}"
748 );
749 }
750
751 #[test]
752 fn validation_rejects_unsorted_inventory_keys() {
753 let schema = "\
754properties:
755 key_a:
756 type: string
757 key_b:
758 type: string
759";
760 let overlay = "\
761inventory:
762 key_b:
763 support: full
764 pipelines: [cross_cutting]
765 description: \"Key B\"
766 test_support:
767 used_by: [ForwarderConfiguration]
768 key_a:
769 support: full
770 pipelines: [cross_cutting]
771 description: \"Key A\"
772 test_support:
773 used_by: [ForwarderConfiguration]
774excluded: {}
775";
776 let err = load_from_strs(schema, overlay).unwrap_err();
777 assert!(
778 err.to_string().contains("out of alphabetical order"),
779 "unexpected error: {err}"
780 );
781 }
782}