517 lines
18 KiB
Python
Executable File
517 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Validate SKILL.md documentation against skill.yaml manifests.
|
|
|
|
This skill ensures that SKILL.md files contain all required sections and that
|
|
documented fields match the corresponding skill.yaml manifest.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Set
|
|
|
|
import yaml
|
|
|
|
|
|
from betty.errors import BettyError, SkillValidationError
|
|
from betty.logging_utils import setup_logger
|
|
from betty.validation import validate_path, ValidationError
|
|
from betty.telemetry_integration import telemetry_tracked
|
|
|
|
logger = setup_logger(__name__)
|
|
|
|
# Required headers in SKILL.md
|
|
REQUIRED_HEADERS = [
|
|
"Overview",
|
|
"Purpose",
|
|
"Usage",
|
|
"Inputs",
|
|
"Outputs",
|
|
"Examples",
|
|
"Integration",
|
|
"Errors"
|
|
]
|
|
|
|
# Alternative header variations that are acceptable
|
|
HEADER_VARIATIONS = {
|
|
"Overview": ["Overview", "## Overview", "Description"],
|
|
"Purpose": ["Purpose", "## Purpose"],
|
|
"Usage": ["Usage", "## Usage", "How to Use", "## How to Use"],
|
|
"Inputs": ["Inputs", "## Inputs", "Parameters", "## Parameters"],
|
|
"Outputs": ["Outputs", "## Outputs", "Output", "## Output"],
|
|
"Examples": ["Examples", "## Examples", "Example", "## Example"],
|
|
"Integration": ["Integration", "## Integration", "Integration with Hooks", "## Integration with Hooks", "Use in Workflows", "## Use in Workflows"],
|
|
"Errors": ["Errors", "## Errors", "Error Codes", "## Error Codes", "Exit Codes", "## Exit Codes", "Common Errors", "## Common Errors"]
|
|
}
|
|
|
|
|
|
class ValidationIssue:
|
|
"""Represents a validation issue (error or warning)."""
|
|
|
|
def __init__(self, issue_type: str, message: str, severity: str = "error",
|
|
file_path: Optional[str] = None, suggestion: Optional[str] = None):
|
|
self.issue_type = issue_type
|
|
self.message = message
|
|
self.severity = severity
|
|
self.file_path = file_path
|
|
self.suggestion = suggestion
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary for JSON serialization."""
|
|
result = {
|
|
"type": self.issue_type,
|
|
"message": self.message,
|
|
"severity": self.severity
|
|
}
|
|
if self.file_path:
|
|
result["file"] = self.file_path
|
|
if self.suggestion:
|
|
result["suggestion"] = self.suggestion
|
|
return result
|
|
|
|
|
|
class SkillDocsValidator:
|
|
"""Validates SKILL.md documentation against skill.yaml manifest."""
|
|
|
|
def __init__(self, skill_path: str, check_headers: bool = True,
|
|
check_manifest_parity: bool = True):
|
|
self.skill_path = Path(skill_path)
|
|
self.check_headers = check_headers
|
|
self.check_manifest_parity = check_manifest_parity
|
|
self.errors: List[ValidationIssue] = []
|
|
self.warnings: List[ValidationIssue] = []
|
|
self.manifest: Optional[Dict[str, Any]] = None
|
|
self.skill_md_content: Optional[str] = None
|
|
|
|
def validate(self) -> Dict[str, Any]:
|
|
"""Run all validation checks."""
|
|
try:
|
|
# Load manifest and documentation
|
|
self._load_files()
|
|
|
|
# Run checks
|
|
if self.check_headers:
|
|
self._validate_headers()
|
|
|
|
if self.check_manifest_parity:
|
|
self._validate_manifest_parity()
|
|
|
|
# Build response
|
|
return {
|
|
"valid": len(self.errors) == 0,
|
|
"skill_name": self.manifest.get("name") if self.manifest else "unknown",
|
|
"skill_path": str(self.skill_path),
|
|
"errors": [e.to_dict() for e in self.errors],
|
|
"warnings": [w.to_dict() for w in self.warnings],
|
|
"checks_run": {
|
|
"headers": self.check_headers,
|
|
"manifest_parity": self.check_manifest_parity
|
|
}
|
|
}
|
|
|
|
except Exception as exc:
|
|
logger.error("Validation failed: %s", exc)
|
|
self.errors.append(ValidationIssue(
|
|
"validation_error",
|
|
str(exc),
|
|
severity="error"
|
|
))
|
|
return {
|
|
"valid": False,
|
|
"skill_name": "unknown",
|
|
"skill_path": str(self.skill_path),
|
|
"errors": [e.to_dict() for e in self.errors],
|
|
"warnings": [w.to_dict() for w in self.warnings],
|
|
"checks_run": {
|
|
"headers": self.check_headers,
|
|
"manifest_parity": self.check_manifest_parity
|
|
}
|
|
}
|
|
|
|
def _load_files(self) -> None:
|
|
"""Load skill.yaml and SKILL.md files."""
|
|
# Validate skill path
|
|
try:
|
|
validate_path(str(self.skill_path), must_exist=True)
|
|
except ValidationError as exc:
|
|
raise SkillValidationError(f"Invalid skill path: {exc}") from exc
|
|
|
|
if not self.skill_path.is_dir():
|
|
raise SkillValidationError(f"Skill path is not a directory: {self.skill_path}")
|
|
|
|
# Load skill.yaml
|
|
manifest_path = self.skill_path / "skill.yaml"
|
|
if not manifest_path.exists():
|
|
raise SkillValidationError(f"skill.yaml not found at {manifest_path}")
|
|
|
|
try:
|
|
with open(manifest_path, 'r', encoding='utf-8') as f:
|
|
self.manifest = yaml.safe_load(f)
|
|
except Exception as exc:
|
|
raise SkillValidationError(f"Failed to parse skill.yaml: {exc}") from exc
|
|
|
|
# Load SKILL.md
|
|
skill_md_path = self.skill_path / "SKILL.md"
|
|
if not skill_md_path.exists():
|
|
self.errors.append(ValidationIssue(
|
|
"missing_file",
|
|
"SKILL.md file not found",
|
|
severity="error",
|
|
file_path=str(skill_md_path),
|
|
suggestion="Create SKILL.md documentation file"
|
|
))
|
|
self.skill_md_content = ""
|
|
return
|
|
|
|
try:
|
|
with open(skill_md_path, 'r', encoding='utf-8') as f:
|
|
self.skill_md_content = f.read()
|
|
except Exception as exc:
|
|
raise SkillValidationError(f"Failed to read SKILL.md: {exc}") from exc
|
|
|
|
def _validate_headers(self) -> None:
|
|
"""Validate that SKILL.md contains all required headers."""
|
|
if not self.skill_md_content:
|
|
return
|
|
|
|
# Extract all headers from the markdown
|
|
header_pattern = re.compile(r'^#{1,6}\s+(.+)$', re.MULTILINE)
|
|
found_headers = set()
|
|
|
|
for match in header_pattern.finditer(self.skill_md_content):
|
|
header_text = match.group(1).strip()
|
|
found_headers.add(header_text)
|
|
|
|
# Check each required header
|
|
for required_header in REQUIRED_HEADERS:
|
|
variations = HEADER_VARIATIONS.get(required_header, [required_header])
|
|
|
|
# Check if any variation exists
|
|
found = False
|
|
for variation in variations:
|
|
# Remove markdown prefix for comparison
|
|
clean_variation = variation.replace("#", "").strip()
|
|
if any(clean_variation.lower() in h.lower() for h in found_headers):
|
|
found = True
|
|
break
|
|
|
|
if not found:
|
|
self.errors.append(ValidationIssue(
|
|
"missing_header",
|
|
f"Required header '{required_header}' not found in SKILL.md",
|
|
severity="error",
|
|
file_path="SKILL.md",
|
|
suggestion=f"Add a '## {required_header}' section to SKILL.md"
|
|
))
|
|
|
|
def _validate_manifest_parity(self) -> None:
|
|
"""Validate that SKILL.md matches skill.yaml manifest fields."""
|
|
if not self.manifest or not self.skill_md_content:
|
|
return
|
|
|
|
# Check skill name
|
|
skill_name = self.manifest.get("name", "")
|
|
if skill_name and skill_name not in self.skill_md_content:
|
|
self.warnings.append(ValidationIssue(
|
|
"missing_skill_name",
|
|
f"Skill name '{skill_name}' not found in SKILL.md",
|
|
severity="warning",
|
|
suggestion=f"Include the skill name '{skill_name}' in the documentation"
|
|
))
|
|
|
|
# Check inputs
|
|
manifest_inputs = self.manifest.get("inputs", [])
|
|
if manifest_inputs:
|
|
self._validate_inputs_documented(manifest_inputs)
|
|
|
|
# Check outputs
|
|
manifest_outputs = self.manifest.get("outputs", [])
|
|
if manifest_outputs:
|
|
self._validate_outputs_documented(manifest_outputs)
|
|
|
|
# Check status
|
|
status = self.manifest.get("status", "")
|
|
if status and status not in ["active", "deprecated", "experimental"]:
|
|
self.warnings.append(ValidationIssue(
|
|
"invalid_status",
|
|
f"Manifest status '{status}' is not a standard value",
|
|
severity="warning",
|
|
suggestion="Use 'active', 'deprecated', or 'experimental'"
|
|
))
|
|
|
|
# Check tags
|
|
tags = self.manifest.get("tags", [])
|
|
if not tags:
|
|
self.warnings.append(ValidationIssue(
|
|
"missing_tags",
|
|
"No tags defined in skill.yaml manifest",
|
|
severity="warning",
|
|
suggestion="Add relevant tags to improve skill discoverability"
|
|
))
|
|
|
|
def _validate_inputs_documented(self, inputs: List[Any]) -> None:
|
|
"""Validate that all manifest inputs are documented in SKILL.md."""
|
|
for input_spec in inputs:
|
|
# Handle both string format and dict format
|
|
if isinstance(input_spec, str):
|
|
# Simple string format: "input_name (optional)"
|
|
input_name = input_spec.split("(")[0].strip()
|
|
elif isinstance(input_spec, dict):
|
|
# Dictionary format with name, type, description
|
|
input_name = input_spec.get("name", "")
|
|
else:
|
|
logger.warning("Unexpected input format: %s", type(input_spec))
|
|
continue
|
|
|
|
if not input_name:
|
|
continue
|
|
|
|
# Check if input is mentioned in the documentation
|
|
# Look for various patterns: input_name, --input-name, `input_name`, etc.
|
|
patterns = [
|
|
input_name,
|
|
f"`{input_name}`",
|
|
f"--{input_name.replace('_', '-')}",
|
|
f"`--{input_name.replace('_', '-')}`"
|
|
]
|
|
|
|
found = any(pattern in self.skill_md_content for pattern in patterns)
|
|
|
|
if not found:
|
|
self.errors.append(ValidationIssue(
|
|
"undocumented_input",
|
|
f"Input '{input_name}' from manifest not documented in SKILL.md",
|
|
severity="error",
|
|
file_path="SKILL.md",
|
|
suggestion=f"Document the '{input_name}' input in the Inputs section"
|
|
))
|
|
|
|
def _validate_outputs_documented(self, outputs: List[Any]) -> None:
|
|
"""Validate that all manifest outputs are documented in SKILL.md."""
|
|
for output_spec in outputs:
|
|
# Handle both string format and dict format
|
|
if isinstance(output_spec, str):
|
|
# Simple string format: "output_name.json" or just "output_name"
|
|
output_name = output_spec.split(".")[0].strip()
|
|
elif isinstance(output_spec, dict):
|
|
# Dictionary format with name, type, description
|
|
output_name = output_spec.get("name", "")
|
|
else:
|
|
logger.warning("Unexpected output format: %s", type(output_spec))
|
|
continue
|
|
|
|
if not output_name:
|
|
continue
|
|
|
|
# Check if output is mentioned in the documentation
|
|
patterns = [
|
|
output_name,
|
|
f"`{output_name}`",
|
|
f'"{output_name}"'
|
|
]
|
|
|
|
found = any(pattern in self.skill_md_content for pattern in patterns)
|
|
|
|
if not found:
|
|
self.warnings.append(ValidationIssue(
|
|
"undocumented_output",
|
|
f"Output '{output_name}' from manifest not documented in SKILL.md",
|
|
severity="warning",
|
|
file_path="SKILL.md",
|
|
suggestion=f"Document the '{output_name}' output in the Outputs section"
|
|
))
|
|
|
|
|
|
def build_response(ok: bool, skill_path: str, errors: Optional[List[Dict[str, Any]]] = None,
|
|
warnings: Optional[List[Dict[str, Any]]] = None,
|
|
details: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
"""Build standard response format."""
|
|
return {
|
|
"ok": ok,
|
|
"status": "valid" if ok else "invalid",
|
|
"skill_path": skill_path,
|
|
"errors": errors or [],
|
|
"warnings": warnings or [],
|
|
"details": details or {}
|
|
}
|
|
|
|
|
|
def print_summary_table(results: List[Dict[str, Any]]) -> None:
|
|
"""Print a summary table of validation results."""
|
|
print("\n" + "="*80)
|
|
print("SKILL DOCUMENTATION VALIDATION SUMMARY")
|
|
print("="*80)
|
|
|
|
if not results:
|
|
print("No skills validated.")
|
|
return
|
|
|
|
# Calculate column widths
|
|
max_name_len = max(len(r.get("skill_name", "")) for r in results)
|
|
max_name_len = max(max_name_len, len("Skill Name"))
|
|
|
|
# Print header
|
|
print(f"{'Skill Name':<{max_name_len}} {'Status':<8} {'Errors':<8} {'Warnings':<8}")
|
|
print("-" * 80)
|
|
|
|
# Print each result
|
|
total_errors = 0
|
|
total_warnings = 0
|
|
valid_count = 0
|
|
|
|
for result in results:
|
|
skill_name = result.get("skill_name", "unknown")
|
|
valid = result.get("valid", False)
|
|
error_count = len(result.get("errors", []))
|
|
warning_count = len(result.get("warnings", []))
|
|
|
|
status = "VALID" if valid else "INVALID"
|
|
status_color = status
|
|
|
|
print(f"{skill_name:<{max_name_len}} {status:<8} {error_count:<8} {warning_count:<8}")
|
|
|
|
total_errors += error_count
|
|
total_warnings += warning_count
|
|
if valid:
|
|
valid_count += 1
|
|
|
|
# Print summary
|
|
print("-" * 80)
|
|
print(f"Total: {len(results)} skills | Valid: {valid_count} | "
|
|
f"Total Errors: {total_errors} | Total Warnings: {total_warnings}")
|
|
print("="*80 + "\n")
|
|
|
|
|
|
@telemetry_tracked(skill_name="docs.validate.skill_docs", caller="cli")
|
|
def main(argv: Optional[List[str]] = None) -> int:
|
|
"""Entry point for CLI execution."""
|
|
argv = argv or sys.argv[1:]
|
|
|
|
# Parse arguments
|
|
if len(argv) == 0:
|
|
response = build_response(
|
|
False,
|
|
"",
|
|
errors=[{
|
|
"type": "usage_error",
|
|
"message": "Usage: skill_docs_validate.py <skill_path> [--summary] [--no-headers] [--no-manifest-parity]",
|
|
"severity": "error"
|
|
}]
|
|
)
|
|
print(json.dumps(response, indent=2))
|
|
return 1
|
|
|
|
skill_path = argv[0]
|
|
summary_mode = "--summary" in argv
|
|
check_headers = "--no-headers" not in argv
|
|
check_manifest_parity = "--no-manifest-parity" not in argv
|
|
|
|
try:
|
|
# Handle batch validation of multiple skills if path is parent directory
|
|
skill_dir = Path(skill_path)
|
|
|
|
# Check if this is a single skill or batch validation
|
|
if (skill_dir / "skill.yaml").exists():
|
|
# Single skill validation
|
|
validator = SkillDocsValidator(
|
|
skill_path,
|
|
check_headers=check_headers,
|
|
check_manifest_parity=check_manifest_parity
|
|
)
|
|
result = validator.validate()
|
|
|
|
if summary_mode:
|
|
print_summary_table([result])
|
|
else:
|
|
response = build_response(
|
|
result["valid"],
|
|
skill_path,
|
|
errors=result["errors"],
|
|
warnings=result["warnings"],
|
|
details=result
|
|
)
|
|
print(json.dumps(response, indent=2))
|
|
|
|
return 0 if result["valid"] else 1
|
|
|
|
else:
|
|
# Batch validation - check if directory contains skill subdirectories
|
|
if not skill_dir.is_dir():
|
|
response = build_response(
|
|
False,
|
|
skill_path,
|
|
errors=[{
|
|
"type": "invalid_path",
|
|
"message": f"Path is not a directory: {skill_path}",
|
|
"severity": "error"
|
|
}]
|
|
)
|
|
print(json.dumps(response, indent=2))
|
|
return 1
|
|
|
|
# Find all skill directories
|
|
results = []
|
|
for subdir in sorted(skill_dir.iterdir()):
|
|
if subdir.is_dir() and (subdir / "skill.yaml").exists():
|
|
validator = SkillDocsValidator(
|
|
str(subdir),
|
|
check_headers=check_headers,
|
|
check_manifest_parity=check_manifest_parity
|
|
)
|
|
result = validator.validate()
|
|
results.append(result)
|
|
|
|
if not results:
|
|
response = build_response(
|
|
False,
|
|
skill_path,
|
|
errors=[{
|
|
"type": "no_skills_found",
|
|
"message": f"No skills found in directory: {skill_path}",
|
|
"severity": "error"
|
|
}]
|
|
)
|
|
print(json.dumps(response, indent=2))
|
|
return 1
|
|
|
|
# Output results
|
|
if summary_mode:
|
|
print_summary_table(results)
|
|
else:
|
|
# Print full JSON for each skill
|
|
for result in results:
|
|
response = build_response(
|
|
result["valid"],
|
|
result["skill_path"],
|
|
errors=result["errors"],
|
|
warnings=result["warnings"],
|
|
details=result
|
|
)
|
|
print(json.dumps(response, indent=2))
|
|
print() # Blank line between results
|
|
|
|
# Return error if any skill is invalid
|
|
any_invalid = any(not r["valid"] for r in results)
|
|
return 1 if any_invalid else 0
|
|
|
|
except Exception as exc:
|
|
logger.error("Validation failed: %s", exc, exc_info=True)
|
|
response = build_response(
|
|
False,
|
|
skill_path,
|
|
errors=[{
|
|
"type": "validation_error",
|
|
"message": str(exc),
|
|
"severity": "error"
|
|
}]
|
|
)
|
|
print(json.dumps(response, indent=2))
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main(sys.argv[1:]))
|