gh-shakes-tzd-contextune/hooks/context_preserver.py

#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.10"
# dependencies = ["pyyaml>=6.0"]
# ///
"""
Enhanced Context Preservation Hook (PreCompact) with Checkpoint Pattern

Implements checkpoint pattern for long sessions:
1. Extracts COMPLETED plans to .plans/ (permanent storage)
2. Preserves IN-PROGRESS work to scratch_pad.md (ephemeral transfer)
3. Enables compact-after-plan workflow

Benefits:
- Each plan becomes a checkpoint (permanent)
- Compaction clears old discussion (reduces bloat)
- Working context stays focused on current plan
- Cumulative documentation without context pollution
"""

import json
import sys
import re
from pathlib import Path
from datetime import datetime
from typing import Optional, List
import yaml

# Import extraction functions from session_end_extractor
sys.path.insert(0, str(Path(__file__).parent))

# High-value patterns for in-progress work
HIGH_VALUE_PATTERNS = [
    r'## Architecture',
    r'## Implementation',
    r'## Task \d+:',
    r'## Solution:',
    r'```yaml',
    r'```python',
    r'Option \d+:',
    r'Let me design',
    r'Enhanced schema:',
    r'task-\d+\.md',
]

# Plan completion markers
PLAN_COMPLETION_MARKERS = [
    r'\*\*Type:\*\* (Design|Plan|Architecture)',
    r'\*\*Status:\*\* (Complete|Ready)',
    r'## Success Criteria',
    r'## Task Breakdown',
    r'Ready for: /ctx:plan',
    r'Ready for: /ctx:execute',
]

def read_full_transcript(transcript_path: str) -> List[dict]:
    """
    Read full conversation transcript.

    Args:
        transcript_path: Path to transcript JSONL file

    Returns:
        List of conversation entries
    """
    try:
        with open(transcript_path, 'r') as f:
            return [json.loads(line) for line in f if line.strip()]
    except Exception as e:
        print(f"DEBUG: Failed to read transcript: {e}", file=sys.stderr)
        return []

def extract_assistant_text(entry: dict) -> Optional[str]:
    """Extract text from assistant message entry."""
    if entry.get('type') != 'assistant':
        return None

    message = entry.get('message', {})
    if not isinstance(message, dict):
        return None

    content = message.get('content', [])

    # Handle both formats
    if isinstance(content, str):
        return content
    elif isinstance(content, list):
        return ' '.join(
            block.get('text', '')
            for block in content
            if block.get('type') == 'text'
        )

    return None

def is_completed_plan(text: str) -> int:
    """
    Detect if text contains a COMPLETED plan.

    Returns count of completion markers (≥2 = completed plan)
    """
    if not text:
        return 0

    count = 0
    for pattern in PLAN_COMPLETION_MARKERS:
        matches = re.findall(pattern, text, re.IGNORECASE)
        count += len(matches)

    return count

def extract_plans_from_transcript(transcript: List[dict]) -> List[dict]:
    """
    Extract all completed plans from conversation.

    Args:
        transcript: Full conversation transcript

    Returns:
        List of {index, timestamp, content, completion_score} dicts
    """
    plans = []

    for i, entry in enumerate(transcript):
        text = extract_assistant_text(entry)
        if not text:
            continue

        completion_score = is_completed_plan(text)

        # Require ≥2 completion markers for a plan
        if completion_score >= 2:
            plans.append({
                'index': i,
                'timestamp': entry.get('timestamp', ''),
                'content': text,
                'completion_score': completion_score
            })

    return plans

def extract_yaml_blocks(content: str) -> List[dict]:
    """Extract and parse YAML blocks."""
    yaml_blocks = re.findall(r'```yaml\n(.*?)```', content, re.DOTALL)
    parsed = []

    for block in yaml_blocks:
        try:
            data = yaml.safe_load(block)
            if data:
                parsed.append(data)
        except yaml.YAMLError:
            continue

    return parsed

def extract_title(content: str) -> Optional[str]:
    """Extract title from markdown (# Title)."""
    match = re.search(r'^#\s+(.+?)$', content, re.MULTILINE)
    return match.group(1).strip() if match else None

def sanitize_topic(title: str) -> str:
    """Convert title to filesystem-safe slug."""
    slug = re.sub(r'[^\w\s-]', '', title.lower())
    slug = re.sub(r'[-\s]+', '-', slug)
    return slug[:50]

def save_plan_to_disk(project_root: Path, plan: dict, session_id: str) -> bool:
    """
    Save completed plan to .plans/ directory.

    Args:
        project_root: Project root directory
        plan: Plan dict with content
        session_id: Current session ID

    Returns:
        bool indicating success
    """
    try:
        content = plan['content']

        # Extract title and create topic slug
        title = extract_title(content) or 'untitled-plan'
        topic_slug = sanitize_topic(title)

        # Create .plans directory
        plans_dir = project_root / '.plans' / topic_slug
        plans_dir.mkdir(parents=True, exist_ok=True)

        # Write design.md
        design_file = plans_dir / 'design.md'
        with open(design_file, 'w') as f:
            f.write(content)

        print(f"DEBUG: ✅ Checkpoint: Saved plan to {design_file}", file=sys.stderr)

        # Extract and save tasks if present
        yaml_blocks = extract_yaml_blocks(content)
        task_count = 0

        for yaml_data in yaml_blocks:
            if 'tasks' in yaml_data and isinstance(yaml_data['tasks'], list):
                tasks_dir = plans_dir / 'tasks'
                tasks_dir.mkdir(exist_ok=True)

                for task in yaml_data['tasks']:
                    if not isinstance(task, dict):
                        continue

                    task_id = task.get('id', f'task-{task_count + 1}')
                    task_file = tasks_dir / f"{task_id}.md"

                    with open(task_file, 'w') as f:
                        # YAML frontmatter
                        f.write('---\n')
                        yaml.dump(task, f, default_flow_style=False, sort_keys=False)
                        f.write('---\n\n')

                        # Task body
                        title = task.get('title', 'Untitled')
                        f.write(f"# {task_id}: {title}\n\n")
                        f.write(task.get('description', '(To be filled in)\n'))

                    task_count += 1

        if task_count:
            print(f"DEBUG: ✅ Checkpoint: Saved {task_count} task files", file=sys.stderr)

        return True

    except Exception as e:
        print(f"DEBUG: Failed to save plan: {e}", file=sys.stderr)
        return False

def extract_last_message_for_scratch_pad(transcript_path: str) -> Optional[str]:
    """Extract last Claude message for scratch_pad."""
    try:
        with open(transcript_path, 'r') as f:
            lines = f.readlines()

        for line in reversed(lines):
            try:
                entry = json.loads(line)
                if entry.get('type') == 'assistant':
                    message = entry.get('message', {})
                    if isinstance(message, dict):
                        content = message.get('content', [])
                        if isinstance(content, list):
                            text = ' '.join(
                                block.get('text', '')
                                for block in content
                                if block.get('type') == 'text'
                            )
                            return text if text.strip() else None
            except json.JSONDecodeError:
                continue

        return None

    except Exception as e:
        print(f"DEBUG: Failed to extract last message: {e}", file=sys.stderr)
        return None

def write_scratch_pad(project_root: Path, content: str, session_id: str):
    """Write in-progress work to scratch_pad.md."""
    scratch_pad = project_root / 'scratch_pad.md'

    with open(scratch_pad, 'w') as f:
        f.write(f"# In-Progress Context from Compaction\n\n")
        f.write(f"**Session ID:** {session_id}\n")
        f.write(f"**Preserved:** {datetime.now().isoformat()}\n")
        f.write(f"**Auto-extracted by:** PreCompact hook\n\n")
        f.write("---\n\n")
        f.write(content)

    print(f"DEBUG: ✅ Preserved in-progress work to scratch_pad.md", file=sys.stderr)

def main():
    """
    Enhanced PreCompact hook with checkpoint pattern.

    1. Extracts COMPLETED plans to .plans/ (checkpoints)
    2. Preserves IN-PROGRESS work to scratch_pad.md
    3. Enables compact-after-plan workflow
    """
    try:
        hook_data = json.loads(sys.stdin.read())

        transcript_path = hook_data.get('transcript_path', '')
        session_id = hook_data.get('session_id', 'unknown')
        trigger = hook_data.get('trigger', 'unknown')

        print(f"DEBUG: PreCompact checkpoint triggered ({trigger})", file=sys.stderr)

        if not transcript_path or not Path(transcript_path).exists():
            print("DEBUG: Transcript not found", file=sys.stderr)
            output = {"continue": True}
            print(json.dumps(output))
            sys.exit(0)

        # Find project root
        project_root = Path.cwd()
        transcript_dir = Path(transcript_path).parent
        temp_root = transcript_dir
        while temp_root.parent != temp_root:
            if (temp_root / '.git').exists() or (temp_root / 'pyproject.toml').exists():
                project_root = temp_root
                break
            temp_root = temp_root.parent

        print(f"DEBUG: Project root: {project_root}", file=sys.stderr)

        # Step 1: Extract COMPLETED plans (checkpoint pattern)
        print(f"DEBUG: Scanning for completed plans...", file=sys.stderr)
        transcript = read_full_transcript(transcript_path)
        completed_plans = extract_plans_from_transcript(transcript)

        print(f"DEBUG: Found {len(completed_plans)} completed plans", file=sys.stderr)

        plans_saved = 0
        for plan in completed_plans:
            if save_plan_to_disk(project_root, plan, session_id):
                plans_saved += 1

        if plans_saved:
            print(f"DEBUG: 🎯 Checkpoint: {plans_saved} completed plans saved to .plans/", file=sys.stderr)

        # Step 2: Preserve IN-PROGRESS work to scratch_pad.md
        last_message = extract_last_message_for_scratch_pad(transcript_path)

        if last_message:
            # Check if last message is in-progress work (not a completed plan)
            completion_score = is_completed_plan(last_message)

            if completion_score < 2:
                # In-progress work - save to scratch_pad
                pattern_count = len([p for p in HIGH_VALUE_PATTERNS
                                   if re.search(p, last_message, re.IGNORECASE)])

                if pattern_count >= 3:
                    write_scratch_pad(project_root, last_message, session_id)
                    print(f"DEBUG: ✅ Preserved in-progress work ({pattern_count} patterns)", file=sys.stderr)
            else:
                print(f"DEBUG: Last message is completed plan (already extracted)", file=sys.stderr)

        # Summary
        print(f"DEBUG: 📋 Checkpoint Summary:", file=sys.stderr)
        print(f"DEBUG:   Completed plans: {plans_saved} saved to .plans/", file=sys.stderr)
        print(f"DEBUG:   In-progress work: {'saved to scratch_pad.md' if last_message and completion_score < 2 else 'none'}", file=sys.stderr)

    except Exception as e:
        print(f"DEBUG: Checkpoint failed: {e}", file=sys.stderr)
        import traceback
        traceback.print_exc(file=sys.stderr)

    # Always continue
    output = {"continue": True}
    print(json.dumps(output))
    sys.exit(0)

if __name__ == '__main__':
    main()