Initial commit
This commit is contained in:
516
skills/docs.validate.skilldocs/skill_docs_validate.py
Executable file
516
skills/docs.validate.skilldocs/skill_docs_validate.py
Executable file
@@ -0,0 +1,516 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validate SKILL.md documentation against skill.yaml manifests.
|
||||
|
||||
This skill ensures that SKILL.md files contain all required sections and that
|
||||
documented fields match the corresponding skill.yaml manifest.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
from betty.errors import BettyError, SkillValidationError
|
||||
from betty.logging_utils import setup_logger
|
||||
from betty.validation import validate_path, ValidationError
|
||||
from betty.telemetry_integration import telemetry_tracked
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
# Required headers in SKILL.md
|
||||
REQUIRED_HEADERS = [
|
||||
"Overview",
|
||||
"Purpose",
|
||||
"Usage",
|
||||
"Inputs",
|
||||
"Outputs",
|
||||
"Examples",
|
||||
"Integration",
|
||||
"Errors"
|
||||
]
|
||||
|
||||
# Alternative header variations that are acceptable
|
||||
HEADER_VARIATIONS = {
|
||||
"Overview": ["Overview", "## Overview", "Description"],
|
||||
"Purpose": ["Purpose", "## Purpose"],
|
||||
"Usage": ["Usage", "## Usage", "How to Use", "## How to Use"],
|
||||
"Inputs": ["Inputs", "## Inputs", "Parameters", "## Parameters"],
|
||||
"Outputs": ["Outputs", "## Outputs", "Output", "## Output"],
|
||||
"Examples": ["Examples", "## Examples", "Example", "## Example"],
|
||||
"Integration": ["Integration", "## Integration", "Integration with Hooks", "## Integration with Hooks", "Use in Workflows", "## Use in Workflows"],
|
||||
"Errors": ["Errors", "## Errors", "Error Codes", "## Error Codes", "Exit Codes", "## Exit Codes", "Common Errors", "## Common Errors"]
|
||||
}
|
||||
|
||||
|
||||
class ValidationIssue:
|
||||
"""Represents a validation issue (error or warning)."""
|
||||
|
||||
def __init__(self, issue_type: str, message: str, severity: str = "error",
|
||||
file_path: Optional[str] = None, suggestion: Optional[str] = None):
|
||||
self.issue_type = issue_type
|
||||
self.message = message
|
||||
self.severity = severity
|
||||
self.file_path = file_path
|
||||
self.suggestion = suggestion
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
result = {
|
||||
"type": self.issue_type,
|
||||
"message": self.message,
|
||||
"severity": self.severity
|
||||
}
|
||||
if self.file_path:
|
||||
result["file"] = self.file_path
|
||||
if self.suggestion:
|
||||
result["suggestion"] = self.suggestion
|
||||
return result
|
||||
|
||||
|
||||
class SkillDocsValidator:
|
||||
"""Validates SKILL.md documentation against skill.yaml manifest."""
|
||||
|
||||
def __init__(self, skill_path: str, check_headers: bool = True,
|
||||
check_manifest_parity: bool = True):
|
||||
self.skill_path = Path(skill_path)
|
||||
self.check_headers = check_headers
|
||||
self.check_manifest_parity = check_manifest_parity
|
||||
self.errors: List[ValidationIssue] = []
|
||||
self.warnings: List[ValidationIssue] = []
|
||||
self.manifest: Optional[Dict[str, Any]] = None
|
||||
self.skill_md_content: Optional[str] = None
|
||||
|
||||
def validate(self) -> Dict[str, Any]:
|
||||
"""Run all validation checks."""
|
||||
try:
|
||||
# Load manifest and documentation
|
||||
self._load_files()
|
||||
|
||||
# Run checks
|
||||
if self.check_headers:
|
||||
self._validate_headers()
|
||||
|
||||
if self.check_manifest_parity:
|
||||
self._validate_manifest_parity()
|
||||
|
||||
# Build response
|
||||
return {
|
||||
"valid": len(self.errors) == 0,
|
||||
"skill_name": self.manifest.get("name") if self.manifest else "unknown",
|
||||
"skill_path": str(self.skill_path),
|
||||
"errors": [e.to_dict() for e in self.errors],
|
||||
"warnings": [w.to_dict() for w in self.warnings],
|
||||
"checks_run": {
|
||||
"headers": self.check_headers,
|
||||
"manifest_parity": self.check_manifest_parity
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Validation failed: %s", exc)
|
||||
self.errors.append(ValidationIssue(
|
||||
"validation_error",
|
||||
str(exc),
|
||||
severity="error"
|
||||
))
|
||||
return {
|
||||
"valid": False,
|
||||
"skill_name": "unknown",
|
||||
"skill_path": str(self.skill_path),
|
||||
"errors": [e.to_dict() for e in self.errors],
|
||||
"warnings": [w.to_dict() for w in self.warnings],
|
||||
"checks_run": {
|
||||
"headers": self.check_headers,
|
||||
"manifest_parity": self.check_manifest_parity
|
||||
}
|
||||
}
|
||||
|
||||
def _load_files(self) -> None:
|
||||
"""Load skill.yaml and SKILL.md files."""
|
||||
# Validate skill path
|
||||
try:
|
||||
validate_path(str(self.skill_path), must_exist=True)
|
||||
except ValidationError as exc:
|
||||
raise SkillValidationError(f"Invalid skill path: {exc}") from exc
|
||||
|
||||
if not self.skill_path.is_dir():
|
||||
raise SkillValidationError(f"Skill path is not a directory: {self.skill_path}")
|
||||
|
||||
# Load skill.yaml
|
||||
manifest_path = self.skill_path / "skill.yaml"
|
||||
if not manifest_path.exists():
|
||||
raise SkillValidationError(f"skill.yaml not found at {manifest_path}")
|
||||
|
||||
try:
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
self.manifest = yaml.safe_load(f)
|
||||
except Exception as exc:
|
||||
raise SkillValidationError(f"Failed to parse skill.yaml: {exc}") from exc
|
||||
|
||||
# Load SKILL.md
|
||||
skill_md_path = self.skill_path / "SKILL.md"
|
||||
if not skill_md_path.exists():
|
||||
self.errors.append(ValidationIssue(
|
||||
"missing_file",
|
||||
"SKILL.md file not found",
|
||||
severity="error",
|
||||
file_path=str(skill_md_path),
|
||||
suggestion="Create SKILL.md documentation file"
|
||||
))
|
||||
self.skill_md_content = ""
|
||||
return
|
||||
|
||||
try:
|
||||
with open(skill_md_path, 'r', encoding='utf-8') as f:
|
||||
self.skill_md_content = f.read()
|
||||
except Exception as exc:
|
||||
raise SkillValidationError(f"Failed to read SKILL.md: {exc}") from exc
|
||||
|
||||
def _validate_headers(self) -> None:
|
||||
"""Validate that SKILL.md contains all required headers."""
|
||||
if not self.skill_md_content:
|
||||
return
|
||||
|
||||
# Extract all headers from the markdown
|
||||
header_pattern = re.compile(r'^#{1,6}\s+(.+)$', re.MULTILINE)
|
||||
found_headers = set()
|
||||
|
||||
for match in header_pattern.finditer(self.skill_md_content):
|
||||
header_text = match.group(1).strip()
|
||||
found_headers.add(header_text)
|
||||
|
||||
# Check each required header
|
||||
for required_header in REQUIRED_HEADERS:
|
||||
variations = HEADER_VARIATIONS.get(required_header, [required_header])
|
||||
|
||||
# Check if any variation exists
|
||||
found = False
|
||||
for variation in variations:
|
||||
# Remove markdown prefix for comparison
|
||||
clean_variation = variation.replace("#", "").strip()
|
||||
if any(clean_variation.lower() in h.lower() for h in found_headers):
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
self.errors.append(ValidationIssue(
|
||||
"missing_header",
|
||||
f"Required header '{required_header}' not found in SKILL.md",
|
||||
severity="error",
|
||||
file_path="SKILL.md",
|
||||
suggestion=f"Add a '## {required_header}' section to SKILL.md"
|
||||
))
|
||||
|
||||
def _validate_manifest_parity(self) -> None:
|
||||
"""Validate that SKILL.md matches skill.yaml manifest fields."""
|
||||
if not self.manifest or not self.skill_md_content:
|
||||
return
|
||||
|
||||
# Check skill name
|
||||
skill_name = self.manifest.get("name", "")
|
||||
if skill_name and skill_name not in self.skill_md_content:
|
||||
self.warnings.append(ValidationIssue(
|
||||
"missing_skill_name",
|
||||
f"Skill name '{skill_name}' not found in SKILL.md",
|
||||
severity="warning",
|
||||
suggestion=f"Include the skill name '{skill_name}' in the documentation"
|
||||
))
|
||||
|
||||
# Check inputs
|
||||
manifest_inputs = self.manifest.get("inputs", [])
|
||||
if manifest_inputs:
|
||||
self._validate_inputs_documented(manifest_inputs)
|
||||
|
||||
# Check outputs
|
||||
manifest_outputs = self.manifest.get("outputs", [])
|
||||
if manifest_outputs:
|
||||
self._validate_outputs_documented(manifest_outputs)
|
||||
|
||||
# Check status
|
||||
status = self.manifest.get("status", "")
|
||||
if status and status not in ["active", "deprecated", "experimental"]:
|
||||
self.warnings.append(ValidationIssue(
|
||||
"invalid_status",
|
||||
f"Manifest status '{status}' is not a standard value",
|
||||
severity="warning",
|
||||
suggestion="Use 'active', 'deprecated', or 'experimental'"
|
||||
))
|
||||
|
||||
# Check tags
|
||||
tags = self.manifest.get("tags", [])
|
||||
if not tags:
|
||||
self.warnings.append(ValidationIssue(
|
||||
"missing_tags",
|
||||
"No tags defined in skill.yaml manifest",
|
||||
severity="warning",
|
||||
suggestion="Add relevant tags to improve skill discoverability"
|
||||
))
|
||||
|
||||
def _validate_inputs_documented(self, inputs: List[Any]) -> None:
|
||||
"""Validate that all manifest inputs are documented in SKILL.md."""
|
||||
for input_spec in inputs:
|
||||
# Handle both string format and dict format
|
||||
if isinstance(input_spec, str):
|
||||
# Simple string format: "input_name (optional)"
|
||||
input_name = input_spec.split("(")[0].strip()
|
||||
elif isinstance(input_spec, dict):
|
||||
# Dictionary format with name, type, description
|
||||
input_name = input_spec.get("name", "")
|
||||
else:
|
||||
logger.warning("Unexpected input format: %s", type(input_spec))
|
||||
continue
|
||||
|
||||
if not input_name:
|
||||
continue
|
||||
|
||||
# Check if input is mentioned in the documentation
|
||||
# Look for various patterns: input_name, --input-name, `input_name`, etc.
|
||||
patterns = [
|
||||
input_name,
|
||||
f"`{input_name}`",
|
||||
f"--{input_name.replace('_', '-')}",
|
||||
f"`--{input_name.replace('_', '-')}`"
|
||||
]
|
||||
|
||||
found = any(pattern in self.skill_md_content for pattern in patterns)
|
||||
|
||||
if not found:
|
||||
self.errors.append(ValidationIssue(
|
||||
"undocumented_input",
|
||||
f"Input '{input_name}' from manifest not documented in SKILL.md",
|
||||
severity="error",
|
||||
file_path="SKILL.md",
|
||||
suggestion=f"Document the '{input_name}' input in the Inputs section"
|
||||
))
|
||||
|
||||
def _validate_outputs_documented(self, outputs: List[Any]) -> None:
|
||||
"""Validate that all manifest outputs are documented in SKILL.md."""
|
||||
for output_spec in outputs:
|
||||
# Handle both string format and dict format
|
||||
if isinstance(output_spec, str):
|
||||
# Simple string format: "output_name.json" or just "output_name"
|
||||
output_name = output_spec.split(".")[0].strip()
|
||||
elif isinstance(output_spec, dict):
|
||||
# Dictionary format with name, type, description
|
||||
output_name = output_spec.get("name", "")
|
||||
else:
|
||||
logger.warning("Unexpected output format: %s", type(output_spec))
|
||||
continue
|
||||
|
||||
if not output_name:
|
||||
continue
|
||||
|
||||
# Check if output is mentioned in the documentation
|
||||
patterns = [
|
||||
output_name,
|
||||
f"`{output_name}`",
|
||||
f'"{output_name}"'
|
||||
]
|
||||
|
||||
found = any(pattern in self.skill_md_content for pattern in patterns)
|
||||
|
||||
if not found:
|
||||
self.warnings.append(ValidationIssue(
|
||||
"undocumented_output",
|
||||
f"Output '{output_name}' from manifest not documented in SKILL.md",
|
||||
severity="warning",
|
||||
file_path="SKILL.md",
|
||||
suggestion=f"Document the '{output_name}' output in the Outputs section"
|
||||
))
|
||||
|
||||
|
||||
def build_response(ok: bool, skill_path: str, errors: Optional[List[Dict[str, Any]]] = None,
|
||||
warnings: Optional[List[Dict[str, Any]]] = None,
|
||||
details: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""Build standard response format."""
|
||||
return {
|
||||
"ok": ok,
|
||||
"status": "valid" if ok else "invalid",
|
||||
"skill_path": skill_path,
|
||||
"errors": errors or [],
|
||||
"warnings": warnings or [],
|
||||
"details": details or {}
|
||||
}
|
||||
|
||||
|
||||
def print_summary_table(results: List[Dict[str, Any]]) -> None:
|
||||
"""Print a summary table of validation results."""
|
||||
print("\n" + "="*80)
|
||||
print("SKILL DOCUMENTATION VALIDATION SUMMARY")
|
||||
print("="*80)
|
||||
|
||||
if not results:
|
||||
print("No skills validated.")
|
||||
return
|
||||
|
||||
# Calculate column widths
|
||||
max_name_len = max(len(r.get("skill_name", "")) for r in results)
|
||||
max_name_len = max(max_name_len, len("Skill Name"))
|
||||
|
||||
# Print header
|
||||
print(f"{'Skill Name':<{max_name_len}} {'Status':<8} {'Errors':<8} {'Warnings':<8}")
|
||||
print("-" * 80)
|
||||
|
||||
# Print each result
|
||||
total_errors = 0
|
||||
total_warnings = 0
|
||||
valid_count = 0
|
||||
|
||||
for result in results:
|
||||
skill_name = result.get("skill_name", "unknown")
|
||||
valid = result.get("valid", False)
|
||||
error_count = len(result.get("errors", []))
|
||||
warning_count = len(result.get("warnings", []))
|
||||
|
||||
status = "VALID" if valid else "INVALID"
|
||||
status_color = status
|
||||
|
||||
print(f"{skill_name:<{max_name_len}} {status:<8} {error_count:<8} {warning_count:<8}")
|
||||
|
||||
total_errors += error_count
|
||||
total_warnings += warning_count
|
||||
if valid:
|
||||
valid_count += 1
|
||||
|
||||
# Print summary
|
||||
print("-" * 80)
|
||||
print(f"Total: {len(results)} skills | Valid: {valid_count} | "
|
||||
f"Total Errors: {total_errors} | Total Warnings: {total_warnings}")
|
||||
print("="*80 + "\n")
|
||||
|
||||
|
||||
@telemetry_tracked(skill_name="docs.validate.skill_docs", caller="cli")
|
||||
def main(argv: Optional[List[str]] = None) -> int:
|
||||
"""Entry point for CLI execution."""
|
||||
argv = argv or sys.argv[1:]
|
||||
|
||||
# Parse arguments
|
||||
if len(argv) == 0:
|
||||
response = build_response(
|
||||
False,
|
||||
"",
|
||||
errors=[{
|
||||
"type": "usage_error",
|
||||
"message": "Usage: skill_docs_validate.py <skill_path> [--summary] [--no-headers] [--no-manifest-parity]",
|
||||
"severity": "error"
|
||||
}]
|
||||
)
|
||||
print(json.dumps(response, indent=2))
|
||||
return 1
|
||||
|
||||
skill_path = argv[0]
|
||||
summary_mode = "--summary" in argv
|
||||
check_headers = "--no-headers" not in argv
|
||||
check_manifest_parity = "--no-manifest-parity" not in argv
|
||||
|
||||
try:
|
||||
# Handle batch validation of multiple skills if path is parent directory
|
||||
skill_dir = Path(skill_path)
|
||||
|
||||
# Check if this is a single skill or batch validation
|
||||
if (skill_dir / "skill.yaml").exists():
|
||||
# Single skill validation
|
||||
validator = SkillDocsValidator(
|
||||
skill_path,
|
||||
check_headers=check_headers,
|
||||
check_manifest_parity=check_manifest_parity
|
||||
)
|
||||
result = validator.validate()
|
||||
|
||||
if summary_mode:
|
||||
print_summary_table([result])
|
||||
else:
|
||||
response = build_response(
|
||||
result["valid"],
|
||||
skill_path,
|
||||
errors=result["errors"],
|
||||
warnings=result["warnings"],
|
||||
details=result
|
||||
)
|
||||
print(json.dumps(response, indent=2))
|
||||
|
||||
return 0 if result["valid"] else 1
|
||||
|
||||
else:
|
||||
# Batch validation - check if directory contains skill subdirectories
|
||||
if not skill_dir.is_dir():
|
||||
response = build_response(
|
||||
False,
|
||||
skill_path,
|
||||
errors=[{
|
||||
"type": "invalid_path",
|
||||
"message": f"Path is not a directory: {skill_path}",
|
||||
"severity": "error"
|
||||
}]
|
||||
)
|
||||
print(json.dumps(response, indent=2))
|
||||
return 1
|
||||
|
||||
# Find all skill directories
|
||||
results = []
|
||||
for subdir in sorted(skill_dir.iterdir()):
|
||||
if subdir.is_dir() and (subdir / "skill.yaml").exists():
|
||||
validator = SkillDocsValidator(
|
||||
str(subdir),
|
||||
check_headers=check_headers,
|
||||
check_manifest_parity=check_manifest_parity
|
||||
)
|
||||
result = validator.validate()
|
||||
results.append(result)
|
||||
|
||||
if not results:
|
||||
response = build_response(
|
||||
False,
|
||||
skill_path,
|
||||
errors=[{
|
||||
"type": "no_skills_found",
|
||||
"message": f"No skills found in directory: {skill_path}",
|
||||
"severity": "error"
|
||||
}]
|
||||
)
|
||||
print(json.dumps(response, indent=2))
|
||||
return 1
|
||||
|
||||
# Output results
|
||||
if summary_mode:
|
||||
print_summary_table(results)
|
||||
else:
|
||||
# Print full JSON for each skill
|
||||
for result in results:
|
||||
response = build_response(
|
||||
result["valid"],
|
||||
result["skill_path"],
|
||||
errors=result["errors"],
|
||||
warnings=result["warnings"],
|
||||
details=result
|
||||
)
|
||||
print(json.dumps(response, indent=2))
|
||||
print() # Blank line between results
|
||||
|
||||
# Return error if any skill is invalid
|
||||
any_invalid = any(not r["valid"] for r in results)
|
||||
return 1 if any_invalid else 0
|
||||
|
||||
except Exception as exc:
|
||||
logger.error("Validation failed: %s", exc, exc_info=True)
|
||||
response = build_response(
|
||||
False,
|
||||
skill_path,
|
||||
errors=[{
|
||||
"type": "validation_error",
|
||||
"message": str(exc),
|
||||
"severity": "error"
|
||||
}]
|
||||
)
|
||||
print(json.dumps(response, indent=2))
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
||||
Reference in New Issue
Block a user