234 lines
7.5 KiB
Python
Executable File
234 lines
7.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
YAML Validation Script for MXCP Files
|
|
|
|
Validates YAML files against their corresponding JSON schemas.
|
|
Automatically detects the file type and applies the appropriate schema.
|
|
|
|
Usage:
|
|
python validate_yaml.py <path-to-yaml-file>
|
|
python validate_yaml.py --all # Validate all YAML files in project templates
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, Optional, Tuple
|
|
|
|
try:
|
|
import yaml
|
|
from jsonschema import Draft7Validator, RefResolver, ValidationError
|
|
except ImportError:
|
|
print("Error: Required packages not installed.")
|
|
print("Please install: pip install pyyaml jsonschema")
|
|
sys.exit(1)
|
|
|
|
|
|
class YAMLValidator:
|
|
"""Validates MXCP YAML files against JSON schemas."""
|
|
|
|
def __init__(self, schemas_dir: Path):
|
|
self.schemas_dir = schemas_dir
|
|
self.schemas = self._load_schemas()
|
|
|
|
def _load_schemas(self) -> Dict[str, dict]:
|
|
"""Load all JSON schemas from the schemas directory."""
|
|
schemas = {}
|
|
for schema_file in self.schemas_dir.glob("*-schema-*.json"):
|
|
with open(schema_file, "r") as f:
|
|
schema_name = schema_file.stem
|
|
schemas[schema_name] = json.load(f)
|
|
return schemas
|
|
|
|
def _detect_yaml_type(self, yaml_data: dict, file_path: Path) -> Optional[str]:
|
|
"""Detect the type of YAML file based on its content and filename."""
|
|
filename = file_path.name.lower()
|
|
|
|
# Check for mxcp-site.yml
|
|
if filename == "mxcp-site.yml":
|
|
return "mxcp-site-schema-1"
|
|
|
|
# Check for config.yml
|
|
if filename == "config.yml":
|
|
return "mxcp-config-schema-1"
|
|
|
|
# Check for mxcp version field (required in all MXCP files)
|
|
if "mxcp" not in yaml_data:
|
|
return None
|
|
|
|
# Detect by top-level keys
|
|
if "tool" in yaml_data:
|
|
return "tool-schema-1"
|
|
elif "resource" in yaml_data:
|
|
return "resource-schema-1"
|
|
elif "prompt" in yaml_data:
|
|
return "prompt-schema-1"
|
|
elif "suite" in yaml_data and "tests" in yaml_data:
|
|
return "eval-schema-1"
|
|
elif "project" in yaml_data:
|
|
return "mxcp-site-schema-1"
|
|
elif "projects" in yaml_data:
|
|
return "mxcp-config-schema-1"
|
|
|
|
return None
|
|
|
|
def validate_file(self, file_path: Path) -> Tuple[bool, Optional[str]]:
|
|
"""
|
|
Validate a single YAML file.
|
|
|
|
Returns:
|
|
Tuple of (is_valid, error_message)
|
|
"""
|
|
try:
|
|
# Load YAML file
|
|
with open(file_path, "r") as f:
|
|
yaml_data = yaml.safe_load(f)
|
|
|
|
if yaml_data is None:
|
|
return False, "Empty YAML file"
|
|
|
|
# Detect YAML type
|
|
schema_name = self._detect_yaml_type(yaml_data, file_path)
|
|
if schema_name is None:
|
|
return False, "Could not detect YAML file type (missing 'mxcp' field or unknown structure)"
|
|
|
|
if schema_name not in self.schemas:
|
|
return False, f"Schema '{schema_name}' not found in schemas directory"
|
|
|
|
schema = self.schemas[schema_name]
|
|
|
|
# Create resolver for $ref resolution
|
|
schema_uri = f"file://{self.schemas_dir.resolve()}/"
|
|
resolver = RefResolver(schema_uri, schema)
|
|
|
|
# Validate
|
|
validator = Draft7Validator(schema, resolver=resolver)
|
|
errors = list(validator.iter_errors(yaml_data))
|
|
|
|
if errors:
|
|
error_messages = []
|
|
for error in errors:
|
|
path = " -> ".join(str(p) for p in error.path) if error.path else "root"
|
|
error_messages.append(f" At {path}: {error.message}")
|
|
return False, "\n".join(error_messages)
|
|
|
|
return True, None
|
|
|
|
except yaml.YAMLError as e:
|
|
return False, f"YAML parsing error: {e}"
|
|
except Exception as e:
|
|
return False, f"Unexpected error: {e}"
|
|
|
|
def _should_skip_file(self, file_path: Path) -> bool:
|
|
"""Determine if a file should be skipped during validation."""
|
|
# Skip files in virtual environments
|
|
if ".venv" in file_path.parts or "venv" in file_path.parts:
|
|
return True
|
|
|
|
# Skip dbt-specific configuration files (not MXCP files)
|
|
dbt_files = {"dbt_project.yml", "profiles.yml", "sample_profiles.yml", "packages.yml"}
|
|
if file_path.name in dbt_files:
|
|
return True
|
|
|
|
# Skip dbt model schema files (sources.yml, schema.yml in models/)
|
|
if file_path.name in {"sources.yml", "schema.yml"} and "models" in file_path.parts:
|
|
return True
|
|
|
|
# Skip seed schema files
|
|
if file_path.name == "schema.yml" and "seeds" in file_path.parts:
|
|
return True
|
|
|
|
return False
|
|
|
|
def validate_directory(self, directory: Path, pattern: str = "**/*.yml") -> Dict[str, Tuple[bool, Optional[str]]]:
|
|
"""
|
|
Validate all YAML files in a directory.
|
|
|
|
Returns:
|
|
Dictionary mapping file paths to validation results
|
|
"""
|
|
results = {}
|
|
yaml_files = list(directory.glob(pattern))
|
|
|
|
# Also check for .yaml extension
|
|
yaml_files.extend(directory.glob(pattern.replace(".yml", ".yaml")))
|
|
|
|
for yaml_file in yaml_files:
|
|
if not self._should_skip_file(yaml_file):
|
|
results[str(yaml_file)] = self.validate_file(yaml_file)
|
|
|
|
return results
|
|
|
|
|
|
def main():
|
|
# Determine base directory (assume script is in mxcp-expert/scripts/)
|
|
script_dir = Path(__file__).parent
|
|
base_dir = script_dir.parent
|
|
schemas_dir = base_dir / "assets" / "schemas"
|
|
|
|
if not schemas_dir.exists():
|
|
print(f"Error: Schemas directory not found at {schemas_dir}")
|
|
sys.exit(1)
|
|
|
|
validator = YAMLValidator(schemas_dir)
|
|
|
|
# Check command line arguments
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python validate_yaml.py <path-to-yaml-file>")
|
|
print(" python validate_yaml.py --all")
|
|
sys.exit(1)
|
|
|
|
if sys.argv[1] == "--all":
|
|
# Validate all YAML files in project templates
|
|
templates_dir = base_dir / "assets" / "project-templates"
|
|
if not templates_dir.exists():
|
|
print(f"Error: Project templates directory not found at {templates_dir}")
|
|
sys.exit(1)
|
|
|
|
print(f"Validating all YAML files in {templates_dir}...")
|
|
print("-" * 80)
|
|
|
|
results = validator.validate_directory(templates_dir)
|
|
|
|
# Print results
|
|
valid_count = 0
|
|
invalid_count = 0
|
|
|
|
for file_path, (is_valid, error_msg) in results.items():
|
|
rel_path = Path(file_path).relative_to(base_dir)
|
|
if is_valid:
|
|
print(f"✓ {rel_path}")
|
|
valid_count += 1
|
|
else:
|
|
print(f"✗ {rel_path}")
|
|
print(f" Error: {error_msg}")
|
|
print()
|
|
invalid_count += 1
|
|
|
|
print("-" * 80)
|
|
print(f"Results: {valid_count} valid, {invalid_count} invalid")
|
|
|
|
if invalid_count > 0:
|
|
sys.exit(1)
|
|
|
|
else:
|
|
# Validate single file
|
|
file_path = Path(sys.argv[1])
|
|
if not file_path.exists():
|
|
print(f"Error: File not found: {file_path}")
|
|
sys.exit(1)
|
|
|
|
print(f"Validating {file_path}...")
|
|
is_valid, error_msg = validator.validate_file(file_path)
|
|
|
|
if is_valid:
|
|
print("✓ Valid")
|
|
else:
|
|
print(f"✗ Invalid")
|
|
print(f"Error: {error_msg}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|