Initial commit
This commit is contained in:
233
skills/mxcp-expert/scripts/validate_yaml.py
Executable file
233
skills/mxcp-expert/scripts/validate_yaml.py
Executable file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
YAML Validation Script for MXCP Files
|
||||
|
||||
Validates YAML files against their corresponding JSON schemas.
|
||||
Automatically detects the file type and applies the appropriate schema.
|
||||
|
||||
Usage:
|
||||
python validate_yaml.py <path-to-yaml-file>
|
||||
python validate_yaml.py --all # Validate all YAML files in project templates
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
try:
|
||||
import yaml
|
||||
from jsonschema import Draft7Validator, RefResolver, ValidationError
|
||||
except ImportError:
|
||||
print("Error: Required packages not installed.")
|
||||
print("Please install: pip install pyyaml jsonschema")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class YAMLValidator:
|
||||
"""Validates MXCP YAML files against JSON schemas."""
|
||||
|
||||
def __init__(self, schemas_dir: Path):
|
||||
self.schemas_dir = schemas_dir
|
||||
self.schemas = self._load_schemas()
|
||||
|
||||
def _load_schemas(self) -> Dict[str, dict]:
|
||||
"""Load all JSON schemas from the schemas directory."""
|
||||
schemas = {}
|
||||
for schema_file in self.schemas_dir.glob("*-schema-*.json"):
|
||||
with open(schema_file, "r") as f:
|
||||
schema_name = schema_file.stem
|
||||
schemas[schema_name] = json.load(f)
|
||||
return schemas
|
||||
|
||||
def _detect_yaml_type(self, yaml_data: dict, file_path: Path) -> Optional[str]:
|
||||
"""Detect the type of YAML file based on its content and filename."""
|
||||
filename = file_path.name.lower()
|
||||
|
||||
# Check for mxcp-site.yml
|
||||
if filename == "mxcp-site.yml":
|
||||
return "mxcp-site-schema-1"
|
||||
|
||||
# Check for config.yml
|
||||
if filename == "config.yml":
|
||||
return "mxcp-config-schema-1"
|
||||
|
||||
# Check for mxcp version field (required in all MXCP files)
|
||||
if "mxcp" not in yaml_data:
|
||||
return None
|
||||
|
||||
# Detect by top-level keys
|
||||
if "tool" in yaml_data:
|
||||
return "tool-schema-1"
|
||||
elif "resource" in yaml_data:
|
||||
return "resource-schema-1"
|
||||
elif "prompt" in yaml_data:
|
||||
return "prompt-schema-1"
|
||||
elif "suite" in yaml_data and "tests" in yaml_data:
|
||||
return "eval-schema-1"
|
||||
elif "project" in yaml_data:
|
||||
return "mxcp-site-schema-1"
|
||||
elif "projects" in yaml_data:
|
||||
return "mxcp-config-schema-1"
|
||||
|
||||
return None
|
||||
|
||||
def validate_file(self, file_path: Path) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Validate a single YAML file.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
try:
|
||||
# Load YAML file
|
||||
with open(file_path, "r") as f:
|
||||
yaml_data = yaml.safe_load(f)
|
||||
|
||||
if yaml_data is None:
|
||||
return False, "Empty YAML file"
|
||||
|
||||
# Detect YAML type
|
||||
schema_name = self._detect_yaml_type(yaml_data, file_path)
|
||||
if schema_name is None:
|
||||
return False, "Could not detect YAML file type (missing 'mxcp' field or unknown structure)"
|
||||
|
||||
if schema_name not in self.schemas:
|
||||
return False, f"Schema '{schema_name}' not found in schemas directory"
|
||||
|
||||
schema = self.schemas[schema_name]
|
||||
|
||||
# Create resolver for $ref resolution
|
||||
schema_uri = f"file://{self.schemas_dir.resolve()}/"
|
||||
resolver = RefResolver(schema_uri, schema)
|
||||
|
||||
# Validate
|
||||
validator = Draft7Validator(schema, resolver=resolver)
|
||||
errors = list(validator.iter_errors(yaml_data))
|
||||
|
||||
if errors:
|
||||
error_messages = []
|
||||
for error in errors:
|
||||
path = " -> ".join(str(p) for p in error.path) if error.path else "root"
|
||||
error_messages.append(f" At {path}: {error.message}")
|
||||
return False, "\n".join(error_messages)
|
||||
|
||||
return True, None
|
||||
|
||||
except yaml.YAMLError as e:
|
||||
return False, f"YAML parsing error: {e}"
|
||||
except Exception as e:
|
||||
return False, f"Unexpected error: {e}"
|
||||
|
||||
def _should_skip_file(self, file_path: Path) -> bool:
|
||||
"""Determine if a file should be skipped during validation."""
|
||||
# Skip files in virtual environments
|
||||
if ".venv" in file_path.parts or "venv" in file_path.parts:
|
||||
return True
|
||||
|
||||
# Skip dbt-specific configuration files (not MXCP files)
|
||||
dbt_files = {"dbt_project.yml", "profiles.yml", "sample_profiles.yml", "packages.yml"}
|
||||
if file_path.name in dbt_files:
|
||||
return True
|
||||
|
||||
# Skip dbt model schema files (sources.yml, schema.yml in models/)
|
||||
if file_path.name in {"sources.yml", "schema.yml"} and "models" in file_path.parts:
|
||||
return True
|
||||
|
||||
# Skip seed schema files
|
||||
if file_path.name == "schema.yml" and "seeds" in file_path.parts:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def validate_directory(self, directory: Path, pattern: str = "**/*.yml") -> Dict[str, Tuple[bool, Optional[str]]]:
|
||||
"""
|
||||
Validate all YAML files in a directory.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping file paths to validation results
|
||||
"""
|
||||
results = {}
|
||||
yaml_files = list(directory.glob(pattern))
|
||||
|
||||
# Also check for .yaml extension
|
||||
yaml_files.extend(directory.glob(pattern.replace(".yml", ".yaml")))
|
||||
|
||||
for yaml_file in yaml_files:
|
||||
if not self._should_skip_file(yaml_file):
|
||||
results[str(yaml_file)] = self.validate_file(yaml_file)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
# Determine base directory (assume script is in mxcp-expert/scripts/)
|
||||
script_dir = Path(__file__).parent
|
||||
base_dir = script_dir.parent
|
||||
schemas_dir = base_dir / "assets" / "schemas"
|
||||
|
||||
if not schemas_dir.exists():
|
||||
print(f"Error: Schemas directory not found at {schemas_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
validator = YAMLValidator(schemas_dir)
|
||||
|
||||
# Check command line arguments
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python validate_yaml.py <path-to-yaml-file>")
|
||||
print(" python validate_yaml.py --all")
|
||||
sys.exit(1)
|
||||
|
||||
if sys.argv[1] == "--all":
|
||||
# Validate all YAML files in project templates
|
||||
templates_dir = base_dir / "assets" / "project-templates"
|
||||
if not templates_dir.exists():
|
||||
print(f"Error: Project templates directory not found at {templates_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Validating all YAML files in {templates_dir}...")
|
||||
print("-" * 80)
|
||||
|
||||
results = validator.validate_directory(templates_dir)
|
||||
|
||||
# Print results
|
||||
valid_count = 0
|
||||
invalid_count = 0
|
||||
|
||||
for file_path, (is_valid, error_msg) in results.items():
|
||||
rel_path = Path(file_path).relative_to(base_dir)
|
||||
if is_valid:
|
||||
print(f"✓ {rel_path}")
|
||||
valid_count += 1
|
||||
else:
|
||||
print(f"✗ {rel_path}")
|
||||
print(f" Error: {error_msg}")
|
||||
print()
|
||||
invalid_count += 1
|
||||
|
||||
print("-" * 80)
|
||||
print(f"Results: {valid_count} valid, {invalid_count} invalid")
|
||||
|
||||
if invalid_count > 0:
|
||||
sys.exit(1)
|
||||
|
||||
else:
|
||||
# Validate single file
|
||||
file_path = Path(sys.argv[1])
|
||||
if not file_path.exists():
|
||||
print(f"Error: File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Validating {file_path}...")
|
||||
is_valid, error_msg = validator.validate_file(file_path)
|
||||
|
||||
if is_valid:
|
||||
print("✓ Valid")
|
||||
else:
|
||||
print(f"✗ Invalid")
|
||||
print(f"Error: {error_msg}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user