Initial commit

2025-11-29 18:26:08 +08:00
commit 8f22ddf339
295 changed files with 59710 additions and 0 deletions
--- a/skills/registry.validate/registry_validate.py
+++ b/skills/registry.validate/registry_validate.py
@@ -0,0 +1,669 @@
+#!/usr/bin/env python3
+"""
+Registry Validation Skill for Betty Framework
+
+Validates registry files for schema compliance, consistency, and completeness.
+Provides dry-run mode to test changes before committing.
+
+Usage:
+    python registry_validate.py --registry_files '["registry/skills.json"]'
+"""
+
+import argparse
+import json
+import logging
+import os
+import sys
+from collections import defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Set, Tuple
+
+try:
+    from pydantic import ValidationError
+except ImportError:
+    print("Error: pydantic not installed. Run: pip install pydantic", file=sys.stderr)
+    sys.exit(1)
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+try:
+    from betty.models import SkillManifest
+except ImportError:
+    # If betty.models not available, define minimal version
+    class SkillManifest:
+        pass
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class RegistryValidator:
+    """Validates Betty Framework registry files."""
+
+    def __init__(self):
+        self.errors: List[Dict[str, Any]] = []
+        self.warnings: List[Dict[str, Any]] = []
+        self.suggestions: List[str] = []
+        self.artifact_types_cache: Dict[str, Any] = {}
+
+    def add_error(self, file: str, item: str, field: str, message: str):
+        """Add an error to the error list."""
+        self.errors.append({
+            'file': file,
+            'item': item,
+            'field': field,
+            'message': message,
+            'severity': 'error'
+        })
+        logger.error(f"❌ {file} - {item}.{field}: {message}")
+
+    def add_warning(self, file: str, item: str, field: str, message: str):
+        """Add a warning to the warning list."""
+        self.warnings.append({
+            'file': file,
+            'item': item,
+            'field': field,
+            'message': message,
+            'severity': 'warning'
+        })
+        logger.warning(f"⚠️  {file} - {item}.{field}: {message}")
+
+    def add_suggestion(self, suggestion: str):
+        """Add a suggestion for improvement."""
+        self.suggestions.append(suggestion)
+
+    def load_artifact_types(self, artifact_types_path: str = "registry/artifact_types.json") -> Dict[str, Any]:
+        """Load artifact types registry."""
+        if self.artifact_types_cache:
+            return self.artifact_types_cache
+
+        try:
+            with open(artifact_types_path, 'r') as f:
+                registry = json.load(f)
+                self.artifact_types_cache = {
+                    artifact['name']: artifact
+                    for artifact in registry.get('artifact_types', [])
+                }
+                return self.artifact_types_cache
+        except FileNotFoundError:
+            logger.warning(f"Artifact types registry not found: {artifact_types_path}")
+            return {}
+        except json.JSONDecodeError as e:
+            logger.error(f"Invalid JSON in artifact types registry: {e}")
+            return {}
+
+    def validate_json_syntax(self, file_path: str) -> Tuple[bool, Any]:
+        """Validate JSON syntax and load data."""
+        try:
+            with open(file_path, 'r') as f:
+                data = json.load(f)
+            return True, data
+        except FileNotFoundError:
+            self.add_error(file_path, 'file', 'existence', 'File not found')
+            return False, None
+        except json.JSONDecodeError as e:
+            self.add_error(file_path, 'file', 'syntax', f'Invalid JSON: {e}')
+            return False, None
+
+    def validate_skill(self, skill: Dict[str, Any], file_path: str):
+        """Validate a single skill against SkillManifest schema."""
+        skill_name = skill.get('name', '<unknown>')
+
+        # Check required fields
+        required_fields = ['name', 'version', 'description', 'inputs', 'outputs', 'status']
+        for field in required_fields:
+            if field not in skill:
+                self.add_error(file_path, skill_name, field, f'Missing required field: {field}')
+
+        # Validate with Pydantic if available
+        if SkillManifest != type:  # Check if we have real SkillManifest
+            try:
+                SkillManifest(**skill)
+            except ValidationError as e:
+                for error in e.errors():
+                    field = '.'.join(str(loc) for loc in error['loc'])
+                    self.add_error(file_path, skill_name, field, error['msg'])
+            except Exception as e:
+                self.add_warning(file_path, skill_name, 'validation', f'Could not validate with Pydantic: {e}')
+
+    def validate_artifact_references(
+        self,
+        skill: Dict[str, Any],
+        file_path: str,
+        artifact_types: Dict[str, Any]
+    ):
+        """Validate artifact type references in skill."""
+        skill_name = skill.get('name', '<unknown>')
+
+        if 'artifact_metadata' not in skill:
+            return
+
+        metadata = skill['artifact_metadata']
+
+        # Validate produces
+        for artifact in metadata.get('produces', []):
+            artifact_type = artifact.get('type')
+            if artifact_type and artifact_type not in artifact_types:
+                self.add_error(
+                    file_path,
+                    skill_name,
+                    f'artifact_metadata.produces.type',
+                    f'Unknown artifact type: {artifact_type}'
+                )
+
+        # Validate consumes
+        for artifact in metadata.get('consumes', []):
+            if isinstance(artifact, dict):
+                artifact_type = artifact.get('type')
+            else:
+                artifact_type = artifact  # String format
+
+            if artifact_type and artifact_type not in artifact_types:
+                self.add_error(
+                    file_path,
+                    skill_name,
+                    f'artifact_metadata.consumes.type',
+                    f'Unknown artifact type: {artifact_type}'
+                )
+
+    def validate_file_paths(self, skill: Dict[str, Any], file_path: str):
+        """Validate that referenced files exist."""
+        skill_name = skill.get('name', '<unknown>')
+
+        # Check entrypoint handlers
+        for entrypoint in skill.get('entrypoints', []):
+            handler = entrypoint.get('handler')
+            if handler:
+                handler_path = f"skills/{skill_name}/{handler}"
+                if not os.path.exists(handler_path):
+                    self.add_error(
+                        file_path,
+                        skill_name,
+                        'entrypoints.handler',
+                        f'Handler file not found: {handler_path}'
+                    )
+
+        # Check schemas in artifact_metadata
+        if 'artifact_metadata' in skill:
+            for artifact in skill['artifact_metadata'].get('produces', []):
+                schema = artifact.get('schema')
+                if schema and not os.path.exists(schema):
+                    self.add_warning(
+                        file_path,
+                        skill_name,
+                        'artifact_metadata.produces.schema',
+                        f'Schema file not found: {schema}'
+                    )
+
+    def detect_duplicates(self, items: List[Dict[str, Any]], file_path: str, item_type: str):
+        """Detect duplicate names/versions in registry."""
+        seen = {}
+        for item in items:
+            name = item.get('name', '<unknown>')
+            version = item.get('version', '<unknown>')
+            key = f"{name}:{version}"
+
+            if key in seen:
+                self.add_error(
+                    file_path,
+                    name,
+                    'name+version',
+                    f'Duplicate {item_type}: {key}'
+                )
+            seen[key] = True
+
+    def detect_circular_dependencies(self, skills: List[Dict[str, Any]], file_path: str):
+        """Detect circular dependencies between skills."""
+        # Build dependency graph
+        graph: Dict[str, Set[str]] = defaultdict(set)
+
+        for skill in skills:
+            skill_name = skill.get('name')
+            if not skill_name:
+                continue
+
+            # Check dependencies field
+            for dep in skill.get('dependencies', []):
+                if '/' not in dep:  # Not a Python package, might be skill dependency
+                    graph[skill_name].add(dep)
+
+        # Find cycles using DFS
+        visited = set()
+        rec_stack = set()
+        cycles = []
+
+        def dfs(node: str, path: List[str]):
+            visited.add(node)
+            rec_stack.add(node)
+            path.append(node)
+
+            for neighbor in graph.get(node, []):
+                if neighbor not in visited:
+                    if dfs(neighbor, path.copy()):
+                        return True
+                elif neighbor in rec_stack:
+                    cycle_start = path.index(neighbor)
+                    cycles.append(path[cycle_start:] + [neighbor])
+                    return True
+
+            rec_stack.remove(node)
+            return False
+
+        for skill_name in graph.keys():
+            if skill_name not in visited:
+                dfs(skill_name, [])
+
+        if cycles:
+            for cycle in cycles:
+                cycle_str = ' → '.join(cycle)
+                self.add_error(
+                    file_path,
+                    cycle[0],
+                    'dependencies',
+                    f'Circular dependency: {cycle_str}'
+                )
+
+    def validate_skills_registry(
+        self,
+        file_path: str,
+        check_file_paths: bool,
+        check_artifacts: bool,
+        check_dependencies: bool
+    ) -> Dict[str, Any]:
+        """Validate skills.json registry."""
+        logger.info(f"Validating skills registry: {file_path}")
+
+        # Load and validate JSON
+        valid, data = self.validate_json_syntax(file_path)
+        if not valid:
+            return {'valid': False, 'skill_count': 0}
+
+        skills = data.get('skills', [])
+
+        # Load artifact types if needed
+        artifact_types = {}
+        if check_artifacts:
+            artifact_types = self.load_artifact_types()
+
+        # Detect duplicates
+        self.detect_duplicates(skills, file_path, 'skill')
+
+        # Validate each skill
+        skills_with_artifacts = 0
+        skills_with_tests = 0
+
+        for skill in skills:
+            # Basic validation
+            self.validate_skill(skill, file_path)
+
+            # Artifact reference validation
+            if check_artifacts and 'artifact_metadata' in skill:
+                self.validate_artifact_references(skill, file_path, artifact_types)
+                skills_with_artifacts += 1
+
+            # File path validation
+            if check_file_paths:
+                self.validate_file_paths(skill, file_path)
+
+            # Check for tests
+            skill_name = skill.get('name', '')
+            test_path = f"skills/{skill_name}/test_{skill_name.replace('.', '_')}.py"
+            if os.path.exists(test_path):
+                skills_with_tests += 1
+
+        # Check circular dependencies
+        if check_dependencies:
+            self.detect_circular_dependencies(skills, file_path)
+
+        # Generate suggestions
+        if skills_with_tests < len(skills) / 2:
+            self.add_suggestion(f"{len(skills) - skills_with_tests} skills missing test coverage")
+
+        if skills_with_artifacts < len(skills) / 2:
+            self.add_suggestion(f"{len(skills) - skills_with_artifacts} skills missing artifact_metadata")
+
+        return {
+            'valid': True,
+            'skill_count': len(skills),
+            'skills_with_artifacts': skills_with_artifacts,
+            'skills_with_tests': skills_with_tests
+        }
+
+    def validate_agents_registry(self, file_path: str) -> Dict[str, Any]:
+        """Validate agents.json registry."""
+        logger.info(f"Validating agents registry: {file_path}")
+
+        # Load and validate JSON
+        valid, data = self.validate_json_syntax(file_path)
+        if not valid:
+            return {'valid': False, 'agent_count': 0}
+
+        agents = data.get('agents', [])
+
+        # Detect duplicates
+        self.detect_duplicates(agents, file_path, 'agent')
+
+        # Validate each agent
+        for agent in agents:
+            agent_name = agent.get('name', '<unknown>')
+
+            # Check required fields
+            required_fields = ['name', 'version', 'description', 'reasoning_mode']
+            for field in required_fields:
+                if field not in agent:
+                    self.add_error(file_path, agent_name, field, f'Missing required field: {field}')
+
+        return {
+            'valid': True,
+            'agent_count': len(agents)
+        }
+
+    def validate_artifact_types_registry(self, file_path: str) -> Dict[str, Any]:
+        """Validate artifact_types.json registry."""
+        logger.info(f"Validating artifact types registry: {file_path}")
+
+        # Load and validate JSON
+        valid, data = self.validate_json_syntax(file_path)
+        if not valid:
+            return {'valid': False, 'artifact_type_count': 0}
+
+        artifact_types = data.get('artifact_types', [])
+
+        # Detect duplicates
+        self.detect_duplicates(artifact_types, file_path, 'artifact type')
+
+        # Validate each artifact type
+        for artifact_type in artifact_types:
+            type_name = artifact_type.get('name', '<unknown>')
+
+            # Check required fields
+            required_fields = ['name', 'file_pattern', 'content_type']
+            for field in required_fields:
+                if field not in artifact_type:
+                    self.add_error(file_path, type_name, field, f'Missing required field: {field}')
+
+        return {
+            'valid': True,
+            'artifact_type_count': len(artifact_types)
+        }
+
+
+def validate_registries(
+    registry_files: List[str],
+    check_file_paths: bool = True,
+    check_artifacts: bool = True,
+    check_dependencies: bool = True,
+    strict_mode: bool = False,
+    output_format: str = "detailed"
+) -> Dict[str, Any]:
+    """
+    Validate Betty Framework registry files.
+
+    Args:
+        registry_files: List of registry file paths
+        check_file_paths: Whether to verify referenced files exist
+        check_artifacts: Whether to validate artifact type references
+        check_dependencies: Whether to check for circular dependencies
+        strict_mode: Whether to treat warnings as errors
+        output_format: Output format ("json", "summary", "detailed")
+
+    Returns:
+        Validation results dictionary
+    """
+    validator = RegistryValidator()
+    validation_results = {}
+    start_time = datetime.now(timezone.utc)
+
+    for file_path in registry_files:
+        if 'skills.json' in file_path:
+            result = validator.validate_skills_registry(
+                file_path, check_file_paths, check_artifacts, check_dependencies
+            )
+        elif 'agents.json' in file_path:
+            result = validator.validate_agents_registry(file_path)
+        elif 'artifact_types.json' in file_path:
+            result = validator.validate_artifact_types_registry(file_path)
+        else:
+            logger.warning(f"Unknown registry type: {file_path}")
+            continue
+
+        # Add timing info
+        result['validated_at'] = datetime.now(timezone.utc).isoformat()
+
+        # Add errors/warnings for this file
+        file_errors = [e for e in validator.errors if e['file'] == file_path]
+        file_warnings = [w for w in validator.warnings if w['file'] == file_path]
+
+        result['errors'] = file_errors
+        result['warnings'] = file_warnings
+        result['valid'] = len(file_errors) == 0 and (not strict_mode or len(file_warnings) == 0)
+
+        validation_results[file_path] = result
+
+    # Calculate stats
+    end_time = datetime.now(timezone.utc)
+    validation_time_ms = int((end_time - start_time).total_seconds() * 1000)
+
+    stats = {
+        'validation_time_ms': validation_time_ms
+    }
+
+    # Aggregate counts
+    for result in validation_results.values():
+        for key in ['skill_count', 'agent_count', 'artifact_type_count']:
+            if key in result:
+                stats_key = key.replace('_count', 's')  # skill_count → skills
+                stats[f'total_{stats_key}'] = stats.get(f'total_{stats_key}', 0) + result[key]
+
+    # Overall validity
+    all_valid = all(result['valid'] for result in validation_results.values())
+
+    if strict_mode and validator.warnings:
+        all_valid = False
+
+    return {
+        'valid': all_valid,
+        'validation_results': validation_results,
+        'errors': validator.errors,
+        'warnings': validator.warnings,
+        'suggestions': validator.suggestions,
+        'stats': stats
+    }
+
+
+def format_output(result: Dict[str, Any], output_format: str) -> str:
+    """Format validation result based on output_format."""
+    if output_format == "json":
+        return json.dumps(result, indent=2)
+
+    elif output_format == "summary":
+        lines = ["=== Registry Validation Summary ===\n"]
+
+        for file_path, file_result in result['validation_results'].items():
+            status = "✅ Valid" if file_result['valid'] else "❌ Invalid"
+            error_count = len(file_result.get('errors', []))
+            warning_count = len(file_result.get('warnings', []))
+
+            lines.append(f"{status} {file_path}")
+            if 'skill_count' in file_result:
+                lines.append(f"  Skills: {file_result['skill_count']}")
+            if 'agent_count' in file_result:
+                lines.append(f"  Agents: {file_result['agent_count']}")
+            if 'artifact_type_count' in file_result:
+                lines.append(f"  Artifact Types: {file_result['artifact_type_count']}")
+
+            if error_count:
+                lines.append(f"  Errors: {error_count}")
+            if warning_count:
+                lines.append(f"  Warnings: {warning_count}")
+            lines.append("")
+
+        overall = "✅ PASSED" if result['valid'] else "❌ FAILED"
+        lines.append(f"Overall: {overall}")
+        lines.append(f"Validation time: {result['stats']['validation_time_ms']}ms")
+
+        return '\n'.join(lines)
+
+    else:  # detailed
+        lines = ["=== Registry Validation Report ===\n"]
+
+        for file_path, file_result in result['validation_results'].items():
+            status = "✅ Valid" if file_result['valid'] else "❌ Invalid"
+            lines.append(f"{file_path}: {status}")
+
+            # Show errors
+            for error in file_result.get('errors', []):
+                lines.append(f"  ❌ {error['item']}.{error['field']}: {error['message']}")
+
+            # Show warnings
+            for warning in file_result.get('warnings', []):
+                lines.append(f"  ⚠️  {warning['item']}.{warning['field']}: {warning['message']}")
+
+            lines.append("")
+
+        # Show suggestions
+        if result['suggestions']:
+            lines.append("Suggestions:")
+            for suggestion in result['suggestions']:
+                lines.append(f"  💡 {suggestion}")
+            lines.append("")
+
+        overall = "✅ PASSED" if result['valid'] else "❌ FAILED"
+        lines.append(f"Overall: {overall}")
+        lines.append(f"Time: {result['stats']['validation_time_ms']}ms")
+
+        return '\n'.join(lines)
+
+
+def main():
+    """Main entry point for registry.validate skill."""
+    parser = argparse.ArgumentParser(
+        description="Validate Betty Framework registry files"
+    )
+
+    parser.add_argument(
+        '--registry_files',
+        type=str,
+        required=True,
+        help='JSON array of registry file paths (e.g., \'["registry/skills.json"]\')'
+    )
+
+    parser.add_argument(
+        '--check_file_paths',
+        type=lambda x: x.lower() == 'true',
+        default=True,
+        help='Whether to verify referenced files exist (default: true)'
+    )
+
+    parser.add_argument(
+        '--check_artifacts',
+        type=lambda x: x.lower() == 'true',
+        default=True,
+        help='Whether to validate artifact type references (default: true)'
+    )
+
+    parser.add_argument(
+        '--check_dependencies',
+        type=lambda x: x.lower() == 'true',
+        default=True,
+        help='Whether to check for circular dependencies (default: true)'
+    )
+
+    parser.add_argument(
+        '--strict_mode',
+        type=lambda x: x.lower() == 'true',
+        default=False,
+        help='Whether to treat warnings as errors (default: false)'
+    )
+
+    parser.add_argument(
+        '--output_format',
+        type=str,
+        default='detailed',
+        choices=['json', 'summary', 'detailed'],
+        help='Output format (default: detailed)'
+    )
+
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Output file path for validation report (optional)'
+    )
+
+    args = parser.parse_args()
+
+    try:
+        # Parse registry_files JSON
+        registry_files = json.loads(args.registry_files)
+
+        if not isinstance(registry_files, list):
+            logger.error("registry_files must be a JSON array")
+            sys.exit(1)
+
+        logger.info(f"Validating {len(registry_files)} registry files...")
+
+        # Perform validation
+        result = validate_registries(
+            registry_files=registry_files,
+            check_file_paths=args.check_file_paths,
+            check_artifacts=args.check_artifacts,
+            check_dependencies=args.check_dependencies,
+            strict_mode=args.strict_mode,
+            output_format=args.output_format
+        )
+
+        # Add status for JSON output
+        result['ok'] = result['valid']
+        result['status'] = 'success' if result['valid'] else 'validation_failed'
+
+        # Format output
+        output = format_output(result, args.output_format)
+
+        # Save to file if specified
+        if args.output:
+            output_path = Path(args.output)
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+
+            if args.output_format == 'json':
+                with open(output_path, 'w') as f:
+                    json.dump(result, f, indent=2)
+            else:
+                with open(output_path, 'w') as f:
+                    f.write(output)
+
+            logger.info(f"Validation report saved to: {output_path}")
+            result['validation_report_path'] = str(output_path)
+
+        # Print output
+        print(output)
+
+        # Exit with appropriate code
+        sys.exit(0 if result['valid'] else 1)
+
+    except json.JSONDecodeError as e:
+        logger.error(f"Invalid JSON in registry_files parameter: {e}")
+        print(json.dumps({
+            'ok': False,
+            'status': 'error',
+            'error': f'Invalid JSON: {str(e)}'
+        }, indent=2))
+        sys.exit(1)
+
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}", exc_info=True)
+        print(json.dumps({
+            'ok': False,
+            'status': 'error',
+            'error': str(e)
+        }, indent=2))
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/skills/registry.validate/skill.yaml
+++ b/skills/registry.validate/skill.yaml
@@ -0,0 +1,104 @@
+name: registry.validate
+version: 0.1.0
+description: >
+  Validates Betty Framework registry files (skills.json, agents.json,
+  artifact_types.json) for schema compliance, consistency, and completeness.
+  Provides dry-run mode to test changes before committing. Checks JSON syntax,
+  Pydantic model compliance, artifact type references, duplicate detection,
+  file path existence, and circular dependencies.
+
+inputs:
+  - name: registry_files
+    type: array
+    required: true
+    description: List of registry file paths to validate (e.g., ["registry/skills.json", "registry/agents.json"])
+
+  - name: check_file_paths
+    type: boolean
+    required: false
+    default: true
+    description: Whether to verify referenced files exist on filesystem
+
+  - name: check_artifacts
+    type: boolean
+    required: false
+    default: true
+    description: Whether to validate artifact type references against artifact_types.json
+
+  - name: check_dependencies
+    type: boolean
+    required: false
+    default: true
+    description: Whether to check for circular dependencies between skills/agents
+
+  - name: strict_mode
+    type: boolean
+    required: false
+    default: false
+    description: Whether to treat warnings as errors (fail on warnings)
+
+  - name: output_format
+    type: string
+    required: false
+    default: detailed
+    description: Output format - "json", "summary", or "detailed"
+
+outputs:
+  - name: valid
+    type: boolean
+    description: Whether all registry files passed validation
+
+  - name: validation_results
+    type: object
+    description: Detailed validation results for each registry file
+
+  - name: errors
+    type: array
+    description: List of errors found across all registries
+
+  - name: warnings
+    type: array
+    description: List of warnings found across all registries
+
+  - name: suggestions
+    type: array
+    description: List of improvement suggestions
+
+  - name: stats
+    type: object
+    description: Statistics about registries (counts, validation time, etc.)
+
+artifact_metadata:
+  produces:
+    - type: validation-report
+      file_pattern: "*.validation.json"
+      content_type: application/json
+      schema: schemas/validation-report.json
+      description: Registry validation results with errors, warnings, and suggestions
+
+  consumes: []
+
+entrypoints:
+  - command: /registry/validate
+    handler: registry_validate.py
+    runtime: python
+    permissions:
+      - filesystem:read
+
+status: active
+
+tags:
+  - registry
+  - validation
+  - quality
+  - ci-cd
+  - testing
+  - infrastructure
+
+dependencies:
+  - pydantic
+  - jsonschema
+  - pyyaml
+
+permissions:
+  - filesystem:read