Initial commit

2025-11-29 18:26:08 +08:00
commit 8f22ddf339
295 changed files with 59710 additions and 0 deletions
--- a/skills/docs.validate.skilldocs/skill_docs_validate.py
+++ b/skills/docs.validate.skilldocs/skill_docs_validate.py
@@ -0,0 +1,516 @@
+#!/usr/bin/env python3
+"""
+Validate SKILL.md documentation against skill.yaml manifests.
+
+This skill ensures that SKILL.md files contain all required sections and that
+documented fields match the corresponding skill.yaml manifest.
+"""
+
+import json
+import sys
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set
+
+import yaml
+
+
+from betty.errors import BettyError, SkillValidationError
+from betty.logging_utils import setup_logger
+from betty.validation import validate_path, ValidationError
+from betty.telemetry_integration import telemetry_tracked
+
+logger = setup_logger(__name__)
+
+# Required headers in SKILL.md
+REQUIRED_HEADERS = [
+    "Overview",
+    "Purpose",
+    "Usage",
+    "Inputs",
+    "Outputs",
+    "Examples",
+    "Integration",
+    "Errors"
+]
+
+# Alternative header variations that are acceptable
+HEADER_VARIATIONS = {
+    "Overview": ["Overview", "## Overview", "Description"],
+    "Purpose": ["Purpose", "## Purpose"],
+    "Usage": ["Usage", "## Usage", "How to Use", "## How to Use"],
+    "Inputs": ["Inputs", "## Inputs", "Parameters", "## Parameters"],
+    "Outputs": ["Outputs", "## Outputs", "Output", "## Output"],
+    "Examples": ["Examples", "## Examples", "Example", "## Example"],
+    "Integration": ["Integration", "## Integration", "Integration with Hooks", "## Integration with Hooks", "Use in Workflows", "## Use in Workflows"],
+    "Errors": ["Errors", "## Errors", "Error Codes", "## Error Codes", "Exit Codes", "## Exit Codes", "Common Errors", "## Common Errors"]
+}
+
+
+class ValidationIssue:
+    """Represents a validation issue (error or warning)."""
+
+    def __init__(self, issue_type: str, message: str, severity: str = "error",
+                 file_path: Optional[str] = None, suggestion: Optional[str] = None):
+        self.issue_type = issue_type
+        self.message = message
+        self.severity = severity
+        self.file_path = file_path
+        self.suggestion = suggestion
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        result = {
+            "type": self.issue_type,
+            "message": self.message,
+            "severity": self.severity
+        }
+        if self.file_path:
+            result["file"] = self.file_path
+        if self.suggestion:
+            result["suggestion"] = self.suggestion
+        return result
+
+
+class SkillDocsValidator:
+    """Validates SKILL.md documentation against skill.yaml manifest."""
+
+    def __init__(self, skill_path: str, check_headers: bool = True,
+                 check_manifest_parity: bool = True):
+        self.skill_path = Path(skill_path)
+        self.check_headers = check_headers
+        self.check_manifest_parity = check_manifest_parity
+        self.errors: List[ValidationIssue] = []
+        self.warnings: List[ValidationIssue] = []
+        self.manifest: Optional[Dict[str, Any]] = None
+        self.skill_md_content: Optional[str] = None
+
+    def validate(self) -> Dict[str, Any]:
+        """Run all validation checks."""
+        try:
+            # Load manifest and documentation
+            self._load_files()
+
+            # Run checks
+            if self.check_headers:
+                self._validate_headers()
+
+            if self.check_manifest_parity:
+                self._validate_manifest_parity()
+
+            # Build response
+            return {
+                "valid": len(self.errors) == 0,
+                "skill_name": self.manifest.get("name") if self.manifest else "unknown",
+                "skill_path": str(self.skill_path),
+                "errors": [e.to_dict() for e in self.errors],
+                "warnings": [w.to_dict() for w in self.warnings],
+                "checks_run": {
+                    "headers": self.check_headers,
+                    "manifest_parity": self.check_manifest_parity
+                }
+            }
+
+        except Exception as exc:
+            logger.error("Validation failed: %s", exc)
+            self.errors.append(ValidationIssue(
+                "validation_error",
+                str(exc),
+                severity="error"
+            ))
+            return {
+                "valid": False,
+                "skill_name": "unknown",
+                "skill_path": str(self.skill_path),
+                "errors": [e.to_dict() for e in self.errors],
+                "warnings": [w.to_dict() for w in self.warnings],
+                "checks_run": {
+                    "headers": self.check_headers,
+                    "manifest_parity": self.check_manifest_parity
+                }
+            }
+
+    def _load_files(self) -> None:
+        """Load skill.yaml and SKILL.md files."""
+        # Validate skill path
+        try:
+            validate_path(str(self.skill_path), must_exist=True)
+        except ValidationError as exc:
+            raise SkillValidationError(f"Invalid skill path: {exc}") from exc
+
+        if not self.skill_path.is_dir():
+            raise SkillValidationError(f"Skill path is not a directory: {self.skill_path}")
+
+        # Load skill.yaml
+        manifest_path = self.skill_path / "skill.yaml"
+        if not manifest_path.exists():
+            raise SkillValidationError(f"skill.yaml not found at {manifest_path}")
+
+        try:
+            with open(manifest_path, 'r', encoding='utf-8') as f:
+                self.manifest = yaml.safe_load(f)
+        except Exception as exc:
+            raise SkillValidationError(f"Failed to parse skill.yaml: {exc}") from exc
+
+        # Load SKILL.md
+        skill_md_path = self.skill_path / "SKILL.md"
+        if not skill_md_path.exists():
+            self.errors.append(ValidationIssue(
+                "missing_file",
+                "SKILL.md file not found",
+                severity="error",
+                file_path=str(skill_md_path),
+                suggestion="Create SKILL.md documentation file"
+            ))
+            self.skill_md_content = ""
+            return
+
+        try:
+            with open(skill_md_path, 'r', encoding='utf-8') as f:
+                self.skill_md_content = f.read()
+        except Exception as exc:
+            raise SkillValidationError(f"Failed to read SKILL.md: {exc}") from exc
+
+    def _validate_headers(self) -> None:
+        """Validate that SKILL.md contains all required headers."""
+        if not self.skill_md_content:
+            return
+
+        # Extract all headers from the markdown
+        header_pattern = re.compile(r'^#{1,6}\s+(.+)$', re.MULTILINE)
+        found_headers = set()
+
+        for match in header_pattern.finditer(self.skill_md_content):
+            header_text = match.group(1).strip()
+            found_headers.add(header_text)
+
+        # Check each required header
+        for required_header in REQUIRED_HEADERS:
+            variations = HEADER_VARIATIONS.get(required_header, [required_header])
+
+            # Check if any variation exists
+            found = False
+            for variation in variations:
+                # Remove markdown prefix for comparison
+                clean_variation = variation.replace("#", "").strip()
+                if any(clean_variation.lower() in h.lower() for h in found_headers):
+                    found = True
+                    break
+
+            if not found:
+                self.errors.append(ValidationIssue(
+                    "missing_header",
+                    f"Required header '{required_header}' not found in SKILL.md",
+                    severity="error",
+                    file_path="SKILL.md",
+                    suggestion=f"Add a '## {required_header}' section to SKILL.md"
+                ))
+
+    def _validate_manifest_parity(self) -> None:
+        """Validate that SKILL.md matches skill.yaml manifest fields."""
+        if not self.manifest or not self.skill_md_content:
+            return
+
+        # Check skill name
+        skill_name = self.manifest.get("name", "")
+        if skill_name and skill_name not in self.skill_md_content:
+            self.warnings.append(ValidationIssue(
+                "missing_skill_name",
+                f"Skill name '{skill_name}' not found in SKILL.md",
+                severity="warning",
+                suggestion=f"Include the skill name '{skill_name}' in the documentation"
+            ))
+
+        # Check inputs
+        manifest_inputs = self.manifest.get("inputs", [])
+        if manifest_inputs:
+            self._validate_inputs_documented(manifest_inputs)
+
+        # Check outputs
+        manifest_outputs = self.manifest.get("outputs", [])
+        if manifest_outputs:
+            self._validate_outputs_documented(manifest_outputs)
+
+        # Check status
+        status = self.manifest.get("status", "")
+        if status and status not in ["active", "deprecated", "experimental"]:
+            self.warnings.append(ValidationIssue(
+                "invalid_status",
+                f"Manifest status '{status}' is not a standard value",
+                severity="warning",
+                suggestion="Use 'active', 'deprecated', or 'experimental'"
+            ))
+
+        # Check tags
+        tags = self.manifest.get("tags", [])
+        if not tags:
+            self.warnings.append(ValidationIssue(
+                "missing_tags",
+                "No tags defined in skill.yaml manifest",
+                severity="warning",
+                suggestion="Add relevant tags to improve skill discoverability"
+            ))
+
+    def _validate_inputs_documented(self, inputs: List[Any]) -> None:
+        """Validate that all manifest inputs are documented in SKILL.md."""
+        for input_spec in inputs:
+            # Handle both string format and dict format
+            if isinstance(input_spec, str):
+                # Simple string format: "input_name (optional)"
+                input_name = input_spec.split("(")[0].strip()
+            elif isinstance(input_spec, dict):
+                # Dictionary format with name, type, description
+                input_name = input_spec.get("name", "")
+            else:
+                logger.warning("Unexpected input format: %s", type(input_spec))
+                continue
+
+            if not input_name:
+                continue
+
+            # Check if input is mentioned in the documentation
+            # Look for various patterns: input_name, --input-name, `input_name`, etc.
+            patterns = [
+                input_name,
+                f"`{input_name}`",
+                f"--{input_name.replace('_', '-')}",
+                f"`--{input_name.replace('_', '-')}`"
+            ]
+
+            found = any(pattern in self.skill_md_content for pattern in patterns)
+
+            if not found:
+                self.errors.append(ValidationIssue(
+                    "undocumented_input",
+                    f"Input '{input_name}' from manifest not documented in SKILL.md",
+                    severity="error",
+                    file_path="SKILL.md",
+                    suggestion=f"Document the '{input_name}' input in the Inputs section"
+                ))
+
+    def _validate_outputs_documented(self, outputs: List[Any]) -> None:
+        """Validate that all manifest outputs are documented in SKILL.md."""
+        for output_spec in outputs:
+            # Handle both string format and dict format
+            if isinstance(output_spec, str):
+                # Simple string format: "output_name.json" or just "output_name"
+                output_name = output_spec.split(".")[0].strip()
+            elif isinstance(output_spec, dict):
+                # Dictionary format with name, type, description
+                output_name = output_spec.get("name", "")
+            else:
+                logger.warning("Unexpected output format: %s", type(output_spec))
+                continue
+
+            if not output_name:
+                continue
+
+            # Check if output is mentioned in the documentation
+            patterns = [
+                output_name,
+                f"`{output_name}`",
+                f'"{output_name}"'
+            ]
+
+            found = any(pattern in self.skill_md_content for pattern in patterns)
+
+            if not found:
+                self.warnings.append(ValidationIssue(
+                    "undocumented_output",
+                    f"Output '{output_name}' from manifest not documented in SKILL.md",
+                    severity="warning",
+                    file_path="SKILL.md",
+                    suggestion=f"Document the '{output_name}' output in the Outputs section"
+                ))
+
+
+def build_response(ok: bool, skill_path: str, errors: Optional[List[Dict[str, Any]]] = None,
+                   warnings: Optional[List[Dict[str, Any]]] = None,
+                   details: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """Build standard response format."""
+    return {
+        "ok": ok,
+        "status": "valid" if ok else "invalid",
+        "skill_path": skill_path,
+        "errors": errors or [],
+        "warnings": warnings or [],
+        "details": details or {}
+    }
+
+
+def print_summary_table(results: List[Dict[str, Any]]) -> None:
+    """Print a summary table of validation results."""
+    print("\n" + "="*80)
+    print("SKILL DOCUMENTATION VALIDATION SUMMARY")
+    print("="*80)
+
+    if not results:
+        print("No skills validated.")
+        return
+
+    # Calculate column widths
+    max_name_len = max(len(r.get("skill_name", "")) for r in results)
+    max_name_len = max(max_name_len, len("Skill Name"))
+
+    # Print header
+    print(f"{'Skill Name':<{max_name_len}}  {'Status':<8}  {'Errors':<8}  {'Warnings':<8}")
+    print("-" * 80)
+
+    # Print each result
+    total_errors = 0
+    total_warnings = 0
+    valid_count = 0
+
+    for result in results:
+        skill_name = result.get("skill_name", "unknown")
+        valid = result.get("valid", False)
+        error_count = len(result.get("errors", []))
+        warning_count = len(result.get("warnings", []))
+
+        status = "VALID" if valid else "INVALID"
+        status_color = status
+
+        print(f"{skill_name:<{max_name_len}}  {status:<8}  {error_count:<8}  {warning_count:<8}")
+
+        total_errors += error_count
+        total_warnings += warning_count
+        if valid:
+            valid_count += 1
+
+    # Print summary
+    print("-" * 80)
+    print(f"Total: {len(results)} skills | Valid: {valid_count} | "
+          f"Total Errors: {total_errors} | Total Warnings: {total_warnings}")
+    print("="*80 + "\n")
+
+
+@telemetry_tracked(skill_name="docs.validate.skill_docs", caller="cli")
+def main(argv: Optional[List[str]] = None) -> int:
+    """Entry point for CLI execution."""
+    argv = argv or sys.argv[1:]
+
+    # Parse arguments
+    if len(argv) == 0:
+        response = build_response(
+            False,
+            "",
+            errors=[{
+                "type": "usage_error",
+                "message": "Usage: skill_docs_validate.py <skill_path> [--summary] [--no-headers] [--no-manifest-parity]",
+                "severity": "error"
+            }]
+        )
+        print(json.dumps(response, indent=2))
+        return 1
+
+    skill_path = argv[0]
+    summary_mode = "--summary" in argv
+    check_headers = "--no-headers" not in argv
+    check_manifest_parity = "--no-manifest-parity" not in argv
+
+    try:
+        # Handle batch validation of multiple skills if path is parent directory
+        skill_dir = Path(skill_path)
+
+        # Check if this is a single skill or batch validation
+        if (skill_dir / "skill.yaml").exists():
+            # Single skill validation
+            validator = SkillDocsValidator(
+                skill_path,
+                check_headers=check_headers,
+                check_manifest_parity=check_manifest_parity
+            )
+            result = validator.validate()
+
+            if summary_mode:
+                print_summary_table([result])
+            else:
+                response = build_response(
+                    result["valid"],
+                    skill_path,
+                    errors=result["errors"],
+                    warnings=result["warnings"],
+                    details=result
+                )
+                print(json.dumps(response, indent=2))
+
+            return 0 if result["valid"] else 1
+
+        else:
+            # Batch validation - check if directory contains skill subdirectories
+            if not skill_dir.is_dir():
+                response = build_response(
+                    False,
+                    skill_path,
+                    errors=[{
+                        "type": "invalid_path",
+                        "message": f"Path is not a directory: {skill_path}",
+                        "severity": "error"
+                    }]
+                )
+                print(json.dumps(response, indent=2))
+                return 1
+
+            # Find all skill directories
+            results = []
+            for subdir in sorted(skill_dir.iterdir()):
+                if subdir.is_dir() and (subdir / "skill.yaml").exists():
+                    validator = SkillDocsValidator(
+                        str(subdir),
+                        check_headers=check_headers,
+                        check_manifest_parity=check_manifest_parity
+                    )
+                    result = validator.validate()
+                    results.append(result)
+
+            if not results:
+                response = build_response(
+                    False,
+                    skill_path,
+                    errors=[{
+                        "type": "no_skills_found",
+                        "message": f"No skills found in directory: {skill_path}",
+                        "severity": "error"
+                    }]
+                )
+                print(json.dumps(response, indent=2))
+                return 1
+
+            # Output results
+            if summary_mode:
+                print_summary_table(results)
+            else:
+                # Print full JSON for each skill
+                for result in results:
+                    response = build_response(
+                        result["valid"],
+                        result["skill_path"],
+                        errors=result["errors"],
+                        warnings=result["warnings"],
+                        details=result
+                    )
+                    print(json.dumps(response, indent=2))
+                    print()  # Blank line between results
+
+            # Return error if any skill is invalid
+            any_invalid = any(not r["valid"] for r in results)
+            return 1 if any_invalid else 0
+
+    except Exception as exc:
+        logger.error("Validation failed: %s", exc, exc_info=True)
+        response = build_response(
+            False,
+            skill_path,
+            errors=[{
+                "type": "validation_error",
+                "message": str(exc),
+                "severity": "error"
+            }]
+        )
+        print(json.dumps(response, indent=2))
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))