Initial commit

2025-11-29 18:26:08 +08:00
commit 8f22ddf339
295 changed files with 59710 additions and 0 deletions
--- a/skills/workflow.validate/workflow_validate.py
+++ b/skills/workflow.validate/workflow_validate.py
@@ -0,0 +1,422 @@
+#!/usr/bin/env python3
+"""workflow_validate.py – Implementation of the workflow.validate Skill."""
+
+import json
+import os
+import sys
+from typing import Any, Dict, List, Optional
+from datetime import datetime, timezone
+
+import yaml
+from pydantic import ValidationError as PydanticValidationError
+
+# Ensure project root on path for betty imports when executed directly
+
+from betty.errors import SkillValidationError, WorkflowError  # noqa: E402
+from betty.logging_utils import setup_logger  # noqa: E402
+from betty.validation import ValidationError, validate_path  # noqa: E402
+from betty.telemetry_integration import telemetry_tracked  # noqa: E402
+from betty.models import WorkflowDefinition  # noqa: E402
+from betty.config import REGISTRY_DIR  # noqa: E402
+from betty.versioning import satisfies  # noqa: E402
+
+logger = setup_logger(__name__)
+
+REQUIRED_FIELDS = ["steps"]
+# Steps can have either 'skill' or 'agent' (not both)
+# For skill steps: 'skill', 'version', and 'args' are required
+# For agent steps: 'agent' is required, 'input' is optional
+
+SKILLS_REGISTRY_FILE = os.path.join(REGISTRY_DIR, "skills.json")
+LOCKFILE_DIR = os.path.join(REGISTRY_DIR, "runs")
+
+
+def build_response(ok: bool, path: str, errors: Optional[List[str]] = None, details: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    response: Dict[str, Any] = {
+        "ok": ok,
+        "status": "success" if ok else "failed",
+        "errors": errors or [],
+        "path": path,
+    }
+
+    if details is not None:
+        response["details"] = details
+
+    return response
+
+
+def _load_workflow(path: str) -> Dict[str, Any]:
+    """Load a workflow YAML file into a dictionary."""
+    try:
+        with open(path, "r", encoding="utf-8") as handle:
+            data = yaml.safe_load(handle)
+    except FileNotFoundError as exc:
+        raise WorkflowError(f"Workflow file not found: {path}") from exc
+    except yaml.YAMLError as exc:
+        raise SkillValidationError(f"Invalid YAML syntax: {exc}") from exc
+
+    if data is None:
+        return {}
+
+    if not isinstance(data, dict):
+        raise SkillValidationError("Workflow root must be a mapping")
+
+    return data
+
+
+def _validate_required_fields(data: Dict[str, Any]) -> List[str]:
+    """Validate presence of required top-level workflow fields."""
+    errors: List[str] = []
+    for field in REQUIRED_FIELDS:
+        if field not in data:
+            errors.append(f"Missing required field: {field}")
+    return errors
+
+
+def _load_skills_registry() -> Dict[str, Any]:
+    """Load the skills registry from disk."""
+    try:
+        if not os.path.exists(SKILLS_REGISTRY_FILE):
+            logger.warning(f"Skills registry not found at {SKILLS_REGISTRY_FILE}")
+            return {"skills": []}
+
+        with open(SKILLS_REGISTRY_FILE, 'r') as f:
+            return json.load(f)
+    except Exception as e:
+        logger.error(f"Failed to load skills registry: {e}")
+        return {"skills": []}
+
+
+def _resolve_skill_version(skill_name: str, version_constraint: str, registry: Dict[str, Any]) -> Optional[str]:
+    """
+    Resolve a skill version from the registry that satisfies the constraint.
+
+    Args:
+        skill_name: Name of the skill to resolve
+        version_constraint: Version constraint (e.g., ">=1.0.0 <2.0.0")
+        registry: Skills registry data
+
+    Returns:
+        Resolved version string, or None if no matching version found
+    """
+    matching_versions = []
+
+    for skill in registry.get("skills", []):
+        if skill.get("name") == skill_name:
+            skill_version = skill.get("version")
+            if skill_version and satisfies(skill_version, version_constraint):
+                matching_versions.append(skill_version)
+
+    if not matching_versions:
+        return None
+
+    # Return the latest version that satisfies the constraint
+    # (assuming versions are stored in order, or we could sort them)
+    return matching_versions[-1]
+
+
+def _validate_steps(steps: Any) -> List[str]:
+    """Validate the steps section of the workflow."""
+    errors: List[str] = []
+
+    if not isinstance(steps, list):
+        errors.append("`steps` must be a list")
+        return errors
+
+    for index, step in enumerate(steps, start=1):
+        if not isinstance(step, dict):
+            errors.append(f"Step {index} must be a mapping")
+            continue
+
+        # Check if step has skill or agent field
+        has_skill = "skill" in step
+        has_agent = "agent" in step
+
+        if not has_skill and not has_agent:
+            errors.append(f"Step {index} must have either 'skill' or 'agent' field")
+            continue
+
+        if has_skill and has_agent:
+            errors.append(f"Step {index} cannot have both 'skill' and 'agent' fields")
+            continue
+
+        # Validate skill steps
+        if has_skill:
+            skill_value = step.get("skill")
+            if not isinstance(skill_value, str):
+                errors.append(f"Step {index} 'skill' must be a string")
+
+            # version field is required for skill steps
+            if "version" not in step:
+                errors.append(f"Step {index} missing 'version' constraint (required for skill steps)")
+            else:
+                version_value = step.get("version")
+                if not isinstance(version_value, str):
+                    errors.append(f"Step {index} 'version' must be a string")
+
+            # args field is required for skill steps
+            if "args" not in step:
+                errors.append(f"Step {index} missing 'args' field (required for skill steps)")
+            else:
+                args_value = step.get("args")
+                if not isinstance(args_value, list):
+                    errors.append(f"Step {index} 'args' must be a list")
+
+        # Validate agent steps
+        if has_agent:
+            agent_value = step.get("agent")
+            if not isinstance(agent_value, str):
+                errors.append(f"Step {index} 'agent' must be a string")
+
+            # input field is optional for agent steps, but if present must be a string
+            input_value = step.get("input")
+            if input_value is not None and not isinstance(input_value, str):
+                errors.append(f"Step {index} 'input' must be a string")
+
+    return errors
+
+
+def _validate_with_pydantic(data: Dict[str, Any]) -> List[str]:
+    """
+    Validate workflow data using Pydantic schema.
+
+    Args:
+        data: Workflow data dictionary
+
+    Returns:
+        List of validation errors (empty if valid)
+    """
+    errors: List[str] = []
+
+    try:
+        # Attempt Pydantic validation
+        WorkflowDefinition.model_validate(data)
+        logger.info("Pydantic schema validation passed")
+    except PydanticValidationError as exc:
+        logger.warning("Pydantic schema validation failed")
+        # Convert Pydantic errors to human-readable messages
+        for error in exc.errors():
+            field = ".".join(str(loc) for loc in error["loc"])
+            message = error["msg"]
+            error_type = error["type"]
+            errors.append(f"Schema validation error at '{field}': {message} (type: {error_type})")
+
+    return errors
+
+
+def _resolve_versions_and_create_lockfile(
+    workflow_name: str,
+    workflow_data: Dict[str, Any],
+    registry: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Resolve skill versions from registry and create a lockfile.
+
+    Args:
+        workflow_name: Name of the workflow
+        workflow_data: Workflow definition data
+        registry: Skills registry data
+
+    Returns:
+        Dictionary with resolved versions and lockfile path
+
+    Raises:
+        WorkflowError: If version resolution fails
+    """
+    resolved = []
+    errors = []
+
+    for index, step in enumerate(workflow_data.get("steps", []), start=1):
+        if "skill" in step:
+            skill_name = step.get("skill")
+            version_constraint = step.get("version")
+
+            if skill_name and version_constraint:
+                resolved_version = _resolve_skill_version(skill_name, version_constraint, registry)
+
+                if resolved_version:
+                    resolved.append({
+                        "skill": skill_name,
+                        "version": resolved_version,
+                        "constraint": version_constraint
+                    })
+                else:
+                    errors.append(
+                        f"Step {index}: No version of skill '{skill_name}' "
+                        f"satisfies constraint '{version_constraint}'"
+                    )
+
+    if errors:
+        raise WorkflowError(
+            f"Version resolution failed for workflow '{workflow_name}':\n" +
+            "\n".join(f"  - {err}" for err in errors)
+        )
+
+    # Create lockfile
+    timestamp = datetime.now(timezone.utc).isoformat()
+    lockfile_data = {
+        "workflow": workflow_name,
+        "timestamp": timestamp,
+        "resolved": resolved
+    }
+
+    # Ensure lockfile directory exists
+    os.makedirs(LOCKFILE_DIR, exist_ok=True)
+
+    # Generate lockfile name
+    lockfile_name = f"{timestamp.replace(':', '-').replace('.', '-')}.lock.json"
+    lockfile_path = os.path.join(LOCKFILE_DIR, lockfile_name)
+
+    # Write lockfile
+    try:
+        with open(lockfile_path, 'w') as f:
+            json.dump(lockfile_data, f, indent=2)
+        logger.info(f"Lockfile created at {lockfile_path}")
+    except Exception as e:
+        logger.error(f"Failed to create lockfile: {e}")
+        raise WorkflowError(f"Failed to create lockfile: {e}")
+
+    return {
+        "resolved": resolved,
+        "lockfile_path": lockfile_path,
+        "lockfile_data": lockfile_data
+    }
+
+
+def validate_workflow(path: str) -> Dict[str, Any]:
+    """
+    Validate a workflow definition file.
+
+    Validates workflow structure, version constraints, and resolves skill versions
+    from the registry. On success, creates a lockfile under registry/runs/.
+
+    Args:
+        path: Path to workflow YAML file
+
+    Returns:
+        Validation result dictionary
+
+    Raises:
+        SkillValidationError: If validation fails
+        WorkflowError: If version resolution fails
+    """
+    try:
+        validate_path(path, must_exist=True)
+    except ValidationError as exc:
+        raise SkillValidationError(str(exc)) from exc
+
+    workflow_data = _load_workflow(path)
+
+    errors: List[str] = []
+
+    # First, validate with Pydantic schema
+    schema_errors = _validate_with_pydantic(workflow_data)
+    errors.extend(schema_errors)
+
+    # Then run existing validation for more specific checks
+    errors.extend(_validate_required_fields(workflow_data))
+    errors.extend(_validate_steps(workflow_data.get("steps", [])))
+
+    if errors:
+        status = "failed"
+        result = {
+            "valid": False,
+            "errors": errors,
+            "status": status,
+            "path": path,
+        }
+        return result
+
+    # If validation passed, resolve versions and create lockfile
+    workflow_name = workflow_data.get("name", os.path.basename(path).replace(".yaml", ""))
+    registry = _load_skills_registry()
+
+    try:
+        lockfile_info = _resolve_versions_and_create_lockfile(workflow_name, workflow_data, registry)
+
+        result = {
+            "valid": True,
+            "errors": [],
+            "status": "validated",
+            "path": path,
+            "lockfile": lockfile_info["lockfile_path"],
+            "resolved_versions": lockfile_info["resolved"],
+        }
+    except WorkflowError as e:
+        # Version resolution failed
+        result = {
+            "valid": False,
+            "errors": [str(e)],
+            "status": "failed",
+            "path": path,
+        }
+
+    return result
+
+
+@telemetry_tracked(skill_name="workflow.validate", caller="cli")
+def main(argv: Optional[List[str]] = None) -> int:
+    """Entry point for CLI execution."""
+    argv = argv or sys.argv[1:]
+
+    if len(argv) != 1:
+        message = "Usage: workflow_validate.py <workflow.yaml>"
+        logger.error(message)
+        response = build_response(
+            False,
+            path="",
+            errors=[message],
+            details={"error": {"error": "UsageError", "message": message, "details": {}}},
+        )
+        print(json.dumps(response, indent=2))
+        return 1
+
+    workflow_path = argv[0]
+
+    try:
+        result = validate_workflow(workflow_path)
+
+        # Check if there are schema validation errors
+        has_schema_errors = any("Schema validation error" in err for err in result.get("errors", []))
+
+        details = result.copy()
+        if not result.get("valid", False) and has_schema_errors:
+            details["error"] = {
+                "type": "SchemaError",
+                "error": "SchemaError",
+                "message": "Workflow schema validation failed",
+                "details": {"errors": result.get("errors", [])}
+            }
+
+        response = build_response(
+            result.get("valid", False),
+            path=result.get("path", workflow_path),
+            errors=result.get("errors", []),
+            details=details,
+        )
+        print(json.dumps(response, indent=2))
+        return 0 if response["ok"] else 1
+    except (SkillValidationError, WorkflowError) as exc:
+        logger.error("Validation failed: %s", exc)
+        response = build_response(
+            False,
+            path=workflow_path,
+            errors=[str(exc)],
+            details={"error": {"error": type(exc).__name__, "message": str(exc), "details": {}}},
+        )
+        print(json.dumps(response, indent=2))
+        return 1
+    except Exception as exc:  # pragma: no cover - unexpected failures
+        logger.exception("Unexpected error during workflow validation")
+        response = build_response(
+            False,
+            path=workflow_path,
+            errors=[str(exc)],
+            details={"error": {"error": type(exc).__name__, "message": str(exc)}},
+        )
+        print(json.dumps(response, indent=2))
+        return 1
+
+
+if __name__ == "__main__":  # pragma: no cover - CLI entry point
+    sys.exit(main(sys.argv[1:]))