gh-epieczko-betty/skills/docs.sync.readme/readme_sync.py

#!/usr/bin/env python3
"""
readme_sync.py - Implementation of the docs.sync.readme Skill
Regenerates the top-level README.md to reflect all current registered skills and agents.
"""

import os
import sys
import json
import re
from typing import Dict, Any, List, Optional
from datetime import datetime, timezone
from pathlib import Path


from betty.config import BASE_DIR, REGISTRY_FILE, AGENTS_REGISTRY_FILE
from betty.logging_utils import setup_logger

logger = setup_logger(__name__)


def load_registry(registry_path: str) -> Dict[str, Any]:
    """
    Load a JSON registry file.

    Args:
        registry_path: Path to registry JSON file

    Returns:
        Parsed registry data
    """
    try:
        with open(registry_path) as f:
            return json.load(f)
    except FileNotFoundError:
        logger.warning(f"Registry file not found: {registry_path}")
        return {"skills": []} if "skills" in registry_path else {"agents": []}
    except json.JSONDecodeError as e:
        logger.error(f"Failed to parse JSON from {registry_path}: {e}")
        raise


def categorize_skills(skills: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
    """
    Categorize skills by their tags into foundation, api, infrastructure, and governance groups.

    Args:
        skills: List of skill dictionaries from registry

    Returns:
        Dictionary mapping category names to lists of skills
    """
    categories = {
        "foundation": [],
        "api": [],
        "infrastructure": [],
        "governance": []
    }

    for skill in skills:
        # Only include active skills
        if skill.get("status") != "active":
            continue

        # Skip test skills
        if skill.get("name", "").startswith("test."):
            continue

        tags = skill.get("tags", [])
        name = skill.get("name", "")

        # Categorize based on tags or name patterns
        if any(tag in ["api", "openapi", "asyncapi"] for tag in tags) or name.startswith("api."):
            categories["api"].append(skill)
        elif any(tag in ["agents", "command", "hook", "policy", "plugin", "registry"] for tag in tags):
            categories["infrastructure"].append(skill)
        elif any(tag in ["governance", "policy", "audit"] for tag in tags):
            categories["governance"].append(skill)
        elif name.startswith("skill.") or name.startswith("registry.") or name.startswith("workflow."):
            categories["foundation"].append(skill)
        else:
            # Default to infrastructure if unclear
            categories["infrastructure"].append(skill)

    # Remove duplicates and sort by name
    for category in categories:
        seen = set()
        unique_skills = []
        for skill in categories[category]:
            if skill["name"] not in seen:
                seen.add(skill["name"])
                unique_skills.append(skill)
        categories[category] = sorted(unique_skills, key=lambda s: s["name"])

    return categories


def format_skill_table(skills: List[Dict[str, Any]]) -> str:
    """
    Format a list of skills as a markdown table.

    Args:
        skills: List of skill dictionaries

    Returns:
        Markdown table string
    """
    if not skills:
        return "| Skill | Purpose |\n|--------|----------|\n| _(No skills in this category)_ | |"

    lines = ["| Skill | Purpose |", "|--------|----------|"]

    for skill in skills:
        name = skill.get("name", "")
        # Get first line of description only
        desc = skill.get("description", "").strip().split("\n")[0]
        # Clean up description (remove extra whitespace)
        desc = " ".join(desc.split())

        lines.append(f"| **{name}** | {desc} |")

    return "\n".join(lines)


def format_agents_docs(agents: List[Dict[str, Any]]) -> str:
    """
    Format agent documentation links.

    Args:
        agents: List of agent dictionaries

    Returns:
        Markdown list of agent links
    """
    if not agents:
        return "_(No agents registered)_"

    lines = []
    for agent in agents:
        name = agent.get("name", "")
        # Get first line of description only
        desc = agent.get("description", "").strip().split("\n")[0]
        desc = " ".join(desc.split())

        lines.append(f"* [{name}](agents/{name}/README.md) — {desc}")

    return "\n".join(lines)


def update_readme_section(
    content: str,
    section_marker: str,
    end_marker: str,
    new_content: str
) -> str:
    """
    Update a section of the README between two markers.

    Args:
        content: Full README content
        section_marker: Start marker (e.g., "## 🧩 Current Core Skills")
        end_marker: End marker (e.g., "---")
        new_content: New content to insert between markers

    Returns:
        Updated README content
    """
    # Find section start
    section_start = content.find(section_marker)
    if section_start == -1:
        logger.warning(f"Section marker not found: {section_marker}")
        return content

    # Find section end after the start - look for the end marker on its own line
    search_start = section_start + len(section_marker)
    end_marker_pattern = f"\n{end_marker}\n"
    section_end = content.find(end_marker_pattern, search_start)

    if section_end == -1:
        logger.warning(f"End marker not found after {section_marker}")
        return content

    # Replace the section (include the newline before end marker)
    before = content[:section_start]
    after = content[section_end + 1:]  # +1 to skip the first newline

    return before + section_marker + "\n\n" + new_content + "\n" + after


def generate_skills_section(categories: Dict[str, List[Dict[str, Any]]]) -> str:
    """
    Generate the complete skills section content.

    Args:
        categories: Dictionary of categorized skills

    Returns:
        Markdown content for skills section
    """
    lines = [
        "Betty's self-referential \"kernel\" of skills bootstraps the rest of the system:",
        ""
    ]

    # Foundation Skills
    if categories["foundation"]:
        lines.extend([
            "### Foundation Skills",
            "",
            format_skill_table(categories["foundation"]),
            ""
        ])

    # API Development Skills
    if categories["api"]:
        lines.extend([
            "### API Development Skills",
            "",
            format_skill_table(categories["api"]),
            ""
        ])

    # Infrastructure Skills
    if categories["infrastructure"]:
        lines.extend([
            "### Infrastructure Skills",
            "",
            format_skill_table(categories["infrastructure"]),
            ""
        ])

    # Governance Skills (if any)
    if categories["governance"]:
        lines.extend([
            "### Governance Skills",
            "",
            format_skill_table(categories["governance"]),
            ""
        ])

    lines.append("These skills form the baseline for an **AI-native SDLC** where creation, validation, registration, and orchestration are themselves skills.")

    return "\n".join(lines)


def update_agents_section(content: str, agents: List[Dict[str, Any]]) -> str:
    """
    Update the Agents Documentation section.

    Args:
        content: Full README content
        agents: List of active agents

    Returns:
        Updated README content
    """
    agents_docs = format_agents_docs(agents)

    # Find the "### Agents Documentation" section
    section_start = content.find("### Agents Documentation")
    if section_start == -1:
        logger.warning("Agents Documentation section not found")
        return content

    # Find the next ### or ## to determine section end
    next_section = content.find("\n##", section_start + 25)
    if next_section == -1:
        next_section = len(content)

    # Find "Each agent has a" line as the start of actual content
    intro_start = content.find("Each agent has a `README.md` in its directory:", section_start)
    if intro_start == -1:
        intro_start = section_start + 25
    else:
        intro_start += len("Each agent has a `README.md` in its directory:")

    before = content[:intro_start]
    after = content[next_section:]

    return before + "\n" + agents_docs + "\n\n" + after


def generate_readme(
    skills_data: Dict[str, Any],
    agents_data: Dict[str, Any]
) -> tuple[str, Dict[str, Any]]:
    """
    Generate updated README.md content.

    Args:
        skills_data: Parsed skills.json
        agents_data: Parsed agents.json

    Returns:
        Tuple of (updated_readme_content, report_dict)
    """
    readme_path = os.path.join(BASE_DIR, "README.md")

    # Read current README
    try:
        with open(readme_path) as f:
            content = f.read()
    except FileNotFoundError:
        logger.error(f"README.md not found at {readme_path}")
        raise

    # Categorize skills
    skills = skills_data.get("skills", [])
    categories = categorize_skills(skills)

    # Get active agents
    agents = [a for a in agents_data.get("agents", []) if a.get("status") == "active" or a.get("status") == "draft"]
    agents = sorted(agents, key=lambda a: a["name"])

    # Generate new skills section
    skills_section = generate_skills_section(categories)

    # Update skills section
    content = update_readme_section(
        content,
        "## 🧩 Current Core Skills",
        "---",
        skills_section
    )

    # Update agents section
    content = update_agents_section(content, agents)

    # Generate report
    report = {
        "skills_by_category": {
            "foundation": len(categories["foundation"]),
            "api": len(categories["api"]),
            "infrastructure": len(categories["infrastructure"]),
            "governance": len(categories["governance"])
        },
        "total_skills": sum(len(skills) for skills in categories.values()),
        "agents_count": len(agents),
        "timestamp": datetime.now(timezone.utc).isoformat()
    }

    return content, report


def main():
    """Main CLI entry point."""
    logger.info("Starting README.md sync from registries...")

    try:
        # Load registries
        logger.info("Loading registry files...")
        skills_data = load_registry(REGISTRY_FILE)
        agents_data = load_registry(AGENTS_REGISTRY_FILE)

        # Generate updated README
        logger.info("Generating updated README content...")
        readme_content, report = generate_readme(skills_data, agents_data)

        # Write README
        readme_path = os.path.join(BASE_DIR, "README.md")
        with open(readme_path, 'w') as f:
            f.write(readme_content)

        logger.info(f"✅ Updated README.md")
        logger.info(f"   - Foundation skills: {report['skills_by_category']['foundation']}")
        logger.info(f"   - API skills: {report['skills_by_category']['api']}")
        logger.info(f"   - Infrastructure skills: {report['skills_by_category']['infrastructure']}")
        logger.info(f"   - Governance skills: {report['skills_by_category']['governance']}")
        logger.info(f"   - Total active skills: {report['total_skills']}")
        logger.info(f"   - Agents: {report['agents_count']}")

        # Write report
        report_path = os.path.join(BASE_DIR, "skills", "docs.sync.readme", "sync_report.json")
        with open(report_path, 'w') as f:
            json.dump(report, f, indent=2)

        result = {
            "ok": True,
            "status": "success",
            "readme_path": readme_path,
            "report": report
        }

        print(json.dumps(result, indent=2))
        sys.exit(0)

    except Exception as e:
        logger.error(f"Failed to sync README: {e}")
        result = {
            "ok": False,
            "status": "failed",
            "error": str(e)
        }
        print(json.dumps(result, indent=2))
        sys.exit(1)


if __name__ == "__main__":
    main()