Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:26:08 +08:00
commit 8f22ddf339
295 changed files with 59710 additions and 0 deletions

View File

@@ -0,0 +1,422 @@
#!/usr/bin/env python3
"""workflow_validate.py Implementation of the workflow.validate Skill."""
import json
import os
import sys
from typing import Any, Dict, List, Optional
from datetime import datetime, timezone
import yaml
from pydantic import ValidationError as PydanticValidationError
# Ensure project root on path for betty imports when executed directly
from betty.errors import SkillValidationError, WorkflowError # noqa: E402
from betty.logging_utils import setup_logger # noqa: E402
from betty.validation import ValidationError, validate_path # noqa: E402
from betty.telemetry_integration import telemetry_tracked # noqa: E402
from betty.models import WorkflowDefinition # noqa: E402
from betty.config import REGISTRY_DIR # noqa: E402
from betty.versioning import satisfies # noqa: E402
logger = setup_logger(__name__)
REQUIRED_FIELDS = ["steps"]
# Steps can have either 'skill' or 'agent' (not both)
# For skill steps: 'skill', 'version', and 'args' are required
# For agent steps: 'agent' is required, 'input' is optional
SKILLS_REGISTRY_FILE = os.path.join(REGISTRY_DIR, "skills.json")
LOCKFILE_DIR = os.path.join(REGISTRY_DIR, "runs")
def build_response(ok: bool, path: str, errors: Optional[List[str]] = None, details: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
response: Dict[str, Any] = {
"ok": ok,
"status": "success" if ok else "failed",
"errors": errors or [],
"path": path,
}
if details is not None:
response["details"] = details
return response
def _load_workflow(path: str) -> Dict[str, Any]:
"""Load a workflow YAML file into a dictionary."""
try:
with open(path, "r", encoding="utf-8") as handle:
data = yaml.safe_load(handle)
except FileNotFoundError as exc:
raise WorkflowError(f"Workflow file not found: {path}") from exc
except yaml.YAMLError as exc:
raise SkillValidationError(f"Invalid YAML syntax: {exc}") from exc
if data is None:
return {}
if not isinstance(data, dict):
raise SkillValidationError("Workflow root must be a mapping")
return data
def _validate_required_fields(data: Dict[str, Any]) -> List[str]:
"""Validate presence of required top-level workflow fields."""
errors: List[str] = []
for field in REQUIRED_FIELDS:
if field not in data:
errors.append(f"Missing required field: {field}")
return errors
def _load_skills_registry() -> Dict[str, Any]:
"""Load the skills registry from disk."""
try:
if not os.path.exists(SKILLS_REGISTRY_FILE):
logger.warning(f"Skills registry not found at {SKILLS_REGISTRY_FILE}")
return {"skills": []}
with open(SKILLS_REGISTRY_FILE, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Failed to load skills registry: {e}")
return {"skills": []}
def _resolve_skill_version(skill_name: str, version_constraint: str, registry: Dict[str, Any]) -> Optional[str]:
"""
Resolve a skill version from the registry that satisfies the constraint.
Args:
skill_name: Name of the skill to resolve
version_constraint: Version constraint (e.g., ">=1.0.0 <2.0.0")
registry: Skills registry data
Returns:
Resolved version string, or None if no matching version found
"""
matching_versions = []
for skill in registry.get("skills", []):
if skill.get("name") == skill_name:
skill_version = skill.get("version")
if skill_version and satisfies(skill_version, version_constraint):
matching_versions.append(skill_version)
if not matching_versions:
return None
# Return the latest version that satisfies the constraint
# (assuming versions are stored in order, or we could sort them)
return matching_versions[-1]
def _validate_steps(steps: Any) -> List[str]:
"""Validate the steps section of the workflow."""
errors: List[str] = []
if not isinstance(steps, list):
errors.append("`steps` must be a list")
return errors
for index, step in enumerate(steps, start=1):
if not isinstance(step, dict):
errors.append(f"Step {index} must be a mapping")
continue
# Check if step has skill or agent field
has_skill = "skill" in step
has_agent = "agent" in step
if not has_skill and not has_agent:
errors.append(f"Step {index} must have either 'skill' or 'agent' field")
continue
if has_skill and has_agent:
errors.append(f"Step {index} cannot have both 'skill' and 'agent' fields")
continue
# Validate skill steps
if has_skill:
skill_value = step.get("skill")
if not isinstance(skill_value, str):
errors.append(f"Step {index} 'skill' must be a string")
# version field is required for skill steps
if "version" not in step:
errors.append(f"Step {index} missing 'version' constraint (required for skill steps)")
else:
version_value = step.get("version")
if not isinstance(version_value, str):
errors.append(f"Step {index} 'version' must be a string")
# args field is required for skill steps
if "args" not in step:
errors.append(f"Step {index} missing 'args' field (required for skill steps)")
else:
args_value = step.get("args")
if not isinstance(args_value, list):
errors.append(f"Step {index} 'args' must be a list")
# Validate agent steps
if has_agent:
agent_value = step.get("agent")
if not isinstance(agent_value, str):
errors.append(f"Step {index} 'agent' must be a string")
# input field is optional for agent steps, but if present must be a string
input_value = step.get("input")
if input_value is not None and not isinstance(input_value, str):
errors.append(f"Step {index} 'input' must be a string")
return errors
def _validate_with_pydantic(data: Dict[str, Any]) -> List[str]:
"""
Validate workflow data using Pydantic schema.
Args:
data: Workflow data dictionary
Returns:
List of validation errors (empty if valid)
"""
errors: List[str] = []
try:
# Attempt Pydantic validation
WorkflowDefinition.model_validate(data)
logger.info("Pydantic schema validation passed")
except PydanticValidationError as exc:
logger.warning("Pydantic schema validation failed")
# Convert Pydantic errors to human-readable messages
for error in exc.errors():
field = ".".join(str(loc) for loc in error["loc"])
message = error["msg"]
error_type = error["type"]
errors.append(f"Schema validation error at '{field}': {message} (type: {error_type})")
return errors
def _resolve_versions_and_create_lockfile(
workflow_name: str,
workflow_data: Dict[str, Any],
registry: Dict[str, Any]
) -> Dict[str, Any]:
"""
Resolve skill versions from registry and create a lockfile.
Args:
workflow_name: Name of the workflow
workflow_data: Workflow definition data
registry: Skills registry data
Returns:
Dictionary with resolved versions and lockfile path
Raises:
WorkflowError: If version resolution fails
"""
resolved = []
errors = []
for index, step in enumerate(workflow_data.get("steps", []), start=1):
if "skill" in step:
skill_name = step.get("skill")
version_constraint = step.get("version")
if skill_name and version_constraint:
resolved_version = _resolve_skill_version(skill_name, version_constraint, registry)
if resolved_version:
resolved.append({
"skill": skill_name,
"version": resolved_version,
"constraint": version_constraint
})
else:
errors.append(
f"Step {index}: No version of skill '{skill_name}' "
f"satisfies constraint '{version_constraint}'"
)
if errors:
raise WorkflowError(
f"Version resolution failed for workflow '{workflow_name}':\n" +
"\n".join(f" - {err}" for err in errors)
)
# Create lockfile
timestamp = datetime.now(timezone.utc).isoformat()
lockfile_data = {
"workflow": workflow_name,
"timestamp": timestamp,
"resolved": resolved
}
# Ensure lockfile directory exists
os.makedirs(LOCKFILE_DIR, exist_ok=True)
# Generate lockfile name
lockfile_name = f"{timestamp.replace(':', '-').replace('.', '-')}.lock.json"
lockfile_path = os.path.join(LOCKFILE_DIR, lockfile_name)
# Write lockfile
try:
with open(lockfile_path, 'w') as f:
json.dump(lockfile_data, f, indent=2)
logger.info(f"Lockfile created at {lockfile_path}")
except Exception as e:
logger.error(f"Failed to create lockfile: {e}")
raise WorkflowError(f"Failed to create lockfile: {e}")
return {
"resolved": resolved,
"lockfile_path": lockfile_path,
"lockfile_data": lockfile_data
}
def validate_workflow(path: str) -> Dict[str, Any]:
"""
Validate a workflow definition file.
Validates workflow structure, version constraints, and resolves skill versions
from the registry. On success, creates a lockfile under registry/runs/.
Args:
path: Path to workflow YAML file
Returns:
Validation result dictionary
Raises:
SkillValidationError: If validation fails
WorkflowError: If version resolution fails
"""
try:
validate_path(path, must_exist=True)
except ValidationError as exc:
raise SkillValidationError(str(exc)) from exc
workflow_data = _load_workflow(path)
errors: List[str] = []
# First, validate with Pydantic schema
schema_errors = _validate_with_pydantic(workflow_data)
errors.extend(schema_errors)
# Then run existing validation for more specific checks
errors.extend(_validate_required_fields(workflow_data))
errors.extend(_validate_steps(workflow_data.get("steps", [])))
if errors:
status = "failed"
result = {
"valid": False,
"errors": errors,
"status": status,
"path": path,
}
return result
# If validation passed, resolve versions and create lockfile
workflow_name = workflow_data.get("name", os.path.basename(path).replace(".yaml", ""))
registry = _load_skills_registry()
try:
lockfile_info = _resolve_versions_and_create_lockfile(workflow_name, workflow_data, registry)
result = {
"valid": True,
"errors": [],
"status": "validated",
"path": path,
"lockfile": lockfile_info["lockfile_path"],
"resolved_versions": lockfile_info["resolved"],
}
except WorkflowError as e:
# Version resolution failed
result = {
"valid": False,
"errors": [str(e)],
"status": "failed",
"path": path,
}
return result
@telemetry_tracked(skill_name="workflow.validate", caller="cli")
def main(argv: Optional[List[str]] = None) -> int:
"""Entry point for CLI execution."""
argv = argv or sys.argv[1:]
if len(argv) != 1:
message = "Usage: workflow_validate.py <workflow.yaml>"
logger.error(message)
response = build_response(
False,
path="",
errors=[message],
details={"error": {"error": "UsageError", "message": message, "details": {}}},
)
print(json.dumps(response, indent=2))
return 1
workflow_path = argv[0]
try:
result = validate_workflow(workflow_path)
# Check if there are schema validation errors
has_schema_errors = any("Schema validation error" in err for err in result.get("errors", []))
details = result.copy()
if not result.get("valid", False) and has_schema_errors:
details["error"] = {
"type": "SchemaError",
"error": "SchemaError",
"message": "Workflow schema validation failed",
"details": {"errors": result.get("errors", [])}
}
response = build_response(
result.get("valid", False),
path=result.get("path", workflow_path),
errors=result.get("errors", []),
details=details,
)
print(json.dumps(response, indent=2))
return 0 if response["ok"] else 1
except (SkillValidationError, WorkflowError) as exc:
logger.error("Validation failed: %s", exc)
response = build_response(
False,
path=workflow_path,
errors=[str(exc)],
details={"error": {"error": type(exc).__name__, "message": str(exc), "details": {}}},
)
print(json.dumps(response, indent=2))
return 1
except Exception as exc: # pragma: no cover - unexpected failures
logger.exception("Unexpected error during workflow validation")
response = build_response(
False,
path=workflow_path,
errors=[str(exc)],
details={"error": {"error": type(exc).__name__, "message": str(exc)}},
)
print(json.dumps(response, indent=2))
return 1
if __name__ == "__main__": # pragma: no cover - CLI entry point
sys.exit(main(sys.argv[1:]))