Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:49:50 +08:00
commit adc4b2be25
147 changed files with 24716 additions and 0 deletions

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
YAML Validation Script for MXCP Files
Validates YAML files against their corresponding JSON schemas.
Automatically detects the file type and applies the appropriate schema.
Usage:
python validate_yaml.py <path-to-yaml-file>
python validate_yaml.py --all # Validate all YAML files in project templates
"""
import json
import sys
from pathlib import Path
from typing import Dict, Optional, Tuple
try:
import yaml
from jsonschema import Draft7Validator, RefResolver, ValidationError
except ImportError:
print("Error: Required packages not installed.")
print("Please install: pip install pyyaml jsonschema")
sys.exit(1)
class YAMLValidator:
"""Validates MXCP YAML files against JSON schemas."""
def __init__(self, schemas_dir: Path):
self.schemas_dir = schemas_dir
self.schemas = self._load_schemas()
def _load_schemas(self) -> Dict[str, dict]:
"""Load all JSON schemas from the schemas directory."""
schemas = {}
for schema_file in self.schemas_dir.glob("*-schema-*.json"):
with open(schema_file, "r") as f:
schema_name = schema_file.stem
schemas[schema_name] = json.load(f)
return schemas
def _detect_yaml_type(self, yaml_data: dict, file_path: Path) -> Optional[str]:
"""Detect the type of YAML file based on its content and filename."""
filename = file_path.name.lower()
# Check for mxcp-site.yml
if filename == "mxcp-site.yml":
return "mxcp-site-schema-1"
# Check for config.yml
if filename == "config.yml":
return "mxcp-config-schema-1"
# Check for mxcp version field (required in all MXCP files)
if "mxcp" not in yaml_data:
return None
# Detect by top-level keys
if "tool" in yaml_data:
return "tool-schema-1"
elif "resource" in yaml_data:
return "resource-schema-1"
elif "prompt" in yaml_data:
return "prompt-schema-1"
elif "suite" in yaml_data and "tests" in yaml_data:
return "eval-schema-1"
elif "project" in yaml_data:
return "mxcp-site-schema-1"
elif "projects" in yaml_data:
return "mxcp-config-schema-1"
return None
def validate_file(self, file_path: Path) -> Tuple[bool, Optional[str]]:
"""
Validate a single YAML file.
Returns:
Tuple of (is_valid, error_message)
"""
try:
# Load YAML file
with open(file_path, "r") as f:
yaml_data = yaml.safe_load(f)
if yaml_data is None:
return False, "Empty YAML file"
# Detect YAML type
schema_name = self._detect_yaml_type(yaml_data, file_path)
if schema_name is None:
return False, "Could not detect YAML file type (missing 'mxcp' field or unknown structure)"
if schema_name not in self.schemas:
return False, f"Schema '{schema_name}' not found in schemas directory"
schema = self.schemas[schema_name]
# Create resolver for $ref resolution
schema_uri = f"file://{self.schemas_dir.resolve()}/"
resolver = RefResolver(schema_uri, schema)
# Validate
validator = Draft7Validator(schema, resolver=resolver)
errors = list(validator.iter_errors(yaml_data))
if errors:
error_messages = []
for error in errors:
path = " -> ".join(str(p) for p in error.path) if error.path else "root"
error_messages.append(f" At {path}: {error.message}")
return False, "\n".join(error_messages)
return True, None
except yaml.YAMLError as e:
return False, f"YAML parsing error: {e}"
except Exception as e:
return False, f"Unexpected error: {e}"
def _should_skip_file(self, file_path: Path) -> bool:
"""Determine if a file should be skipped during validation."""
# Skip files in virtual environments
if ".venv" in file_path.parts or "venv" in file_path.parts:
return True
# Skip dbt-specific configuration files (not MXCP files)
dbt_files = {"dbt_project.yml", "profiles.yml", "sample_profiles.yml", "packages.yml"}
if file_path.name in dbt_files:
return True
# Skip dbt model schema files (sources.yml, schema.yml in models/)
if file_path.name in {"sources.yml", "schema.yml"} and "models" in file_path.parts:
return True
# Skip seed schema files
if file_path.name == "schema.yml" and "seeds" in file_path.parts:
return True
return False
def validate_directory(self, directory: Path, pattern: str = "**/*.yml") -> Dict[str, Tuple[bool, Optional[str]]]:
"""
Validate all YAML files in a directory.
Returns:
Dictionary mapping file paths to validation results
"""
results = {}
yaml_files = list(directory.glob(pattern))
# Also check for .yaml extension
yaml_files.extend(directory.glob(pattern.replace(".yml", ".yaml")))
for yaml_file in yaml_files:
if not self._should_skip_file(yaml_file):
results[str(yaml_file)] = self.validate_file(yaml_file)
return results
def main():
# Determine base directory (assume script is in mxcp-expert/scripts/)
script_dir = Path(__file__).parent
base_dir = script_dir.parent
schemas_dir = base_dir / "assets" / "schemas"
if not schemas_dir.exists():
print(f"Error: Schemas directory not found at {schemas_dir}")
sys.exit(1)
validator = YAMLValidator(schemas_dir)
# Check command line arguments
if len(sys.argv) < 2:
print("Usage: python validate_yaml.py <path-to-yaml-file>")
print(" python validate_yaml.py --all")
sys.exit(1)
if sys.argv[1] == "--all":
# Validate all YAML files in project templates
templates_dir = base_dir / "assets" / "project-templates"
if not templates_dir.exists():
print(f"Error: Project templates directory not found at {templates_dir}")
sys.exit(1)
print(f"Validating all YAML files in {templates_dir}...")
print("-" * 80)
results = validator.validate_directory(templates_dir)
# Print results
valid_count = 0
invalid_count = 0
for file_path, (is_valid, error_msg) in results.items():
rel_path = Path(file_path).relative_to(base_dir)
if is_valid:
print(f"{rel_path}")
valid_count += 1
else:
print(f"{rel_path}")
print(f" Error: {error_msg}")
print()
invalid_count += 1
print("-" * 80)
print(f"Results: {valid_count} valid, {invalid_count} invalid")
if invalid_count > 0:
sys.exit(1)
else:
# Validate single file
file_path = Path(sys.argv[1])
if not file_path.exists():
print(f"Error: File not found: {file_path}")
sys.exit(1)
print(f"Validating {file_path}...")
is_valid, error_msg = validator.validate_file(file_path)
if is_valid:
print("✓ Valid")
else:
print(f"✗ Invalid")
print(f"Error: {error_msg}")
sys.exit(1)
if __name__ == "__main__":
main()