Initial commit
This commit is contained in:
363
hooks/context_preserver.py
Executable file
363
hooks/context_preserver.py
Executable file
@@ -0,0 +1,363 @@
|
||||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = ">=3.10"
|
||||
# dependencies = ["pyyaml>=6.0"]
|
||||
# ///
|
||||
"""
|
||||
Enhanced Context Preservation Hook (PreCompact) with Checkpoint Pattern
|
||||
|
||||
Implements checkpoint pattern for long sessions:
|
||||
1. Extracts COMPLETED plans to .plans/ (permanent storage)
|
||||
2. Preserves IN-PROGRESS work to scratch_pad.md (ephemeral transfer)
|
||||
3. Enables compact-after-plan workflow
|
||||
|
||||
Benefits:
|
||||
- Each plan becomes a checkpoint (permanent)
|
||||
- Compaction clears old discussion (reduces bloat)
|
||||
- Working context stays focused on current plan
|
||||
- Cumulative documentation without context pollution
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
import yaml
|
||||
|
||||
# Import extraction functions from session_end_extractor
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
# High-value patterns for in-progress work
|
||||
HIGH_VALUE_PATTERNS = [
|
||||
r'## Architecture',
|
||||
r'## Implementation',
|
||||
r'## Task \d+:',
|
||||
r'## Solution:',
|
||||
r'```yaml',
|
||||
r'```python',
|
||||
r'Option \d+:',
|
||||
r'Let me design',
|
||||
r'Enhanced schema:',
|
||||
r'task-\d+\.md',
|
||||
]
|
||||
|
||||
# Plan completion markers
|
||||
PLAN_COMPLETION_MARKERS = [
|
||||
r'\*\*Type:\*\* (Design|Plan|Architecture)',
|
||||
r'\*\*Status:\*\* (Complete|Ready)',
|
||||
r'## Success Criteria',
|
||||
r'## Task Breakdown',
|
||||
r'Ready for: /ctx:plan',
|
||||
r'Ready for: /ctx:execute',
|
||||
]
|
||||
|
||||
def read_full_transcript(transcript_path: str) -> List[dict]:
|
||||
"""
|
||||
Read full conversation transcript.
|
||||
|
||||
Args:
|
||||
transcript_path: Path to transcript JSONL file
|
||||
|
||||
Returns:
|
||||
List of conversation entries
|
||||
"""
|
||||
try:
|
||||
with open(transcript_path, 'r') as f:
|
||||
return [json.loads(line) for line in f if line.strip()]
|
||||
except Exception as e:
|
||||
print(f"DEBUG: Failed to read transcript: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def extract_assistant_text(entry: dict) -> Optional[str]:
|
||||
"""Extract text from assistant message entry."""
|
||||
if entry.get('type') != 'assistant':
|
||||
return None
|
||||
|
||||
message = entry.get('message', {})
|
||||
if not isinstance(message, dict):
|
||||
return None
|
||||
|
||||
content = message.get('content', [])
|
||||
|
||||
# Handle both formats
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, list):
|
||||
return ' '.join(
|
||||
block.get('text', '')
|
||||
for block in content
|
||||
if block.get('type') == 'text'
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def is_completed_plan(text: str) -> int:
|
||||
"""
|
||||
Detect if text contains a COMPLETED plan.
|
||||
|
||||
Returns count of completion markers (≥2 = completed plan)
|
||||
"""
|
||||
if not text:
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
for pattern in PLAN_COMPLETION_MARKERS:
|
||||
matches = re.findall(pattern, text, re.IGNORECASE)
|
||||
count += len(matches)
|
||||
|
||||
return count
|
||||
|
||||
def extract_plans_from_transcript(transcript: List[dict]) -> List[dict]:
|
||||
"""
|
||||
Extract all completed plans from conversation.
|
||||
|
||||
Args:
|
||||
transcript: Full conversation transcript
|
||||
|
||||
Returns:
|
||||
List of {index, timestamp, content, completion_score} dicts
|
||||
"""
|
||||
plans = []
|
||||
|
||||
for i, entry in enumerate(transcript):
|
||||
text = extract_assistant_text(entry)
|
||||
if not text:
|
||||
continue
|
||||
|
||||
completion_score = is_completed_plan(text)
|
||||
|
||||
# Require ≥2 completion markers for a plan
|
||||
if completion_score >= 2:
|
||||
plans.append({
|
||||
'index': i,
|
||||
'timestamp': entry.get('timestamp', ''),
|
||||
'content': text,
|
||||
'completion_score': completion_score
|
||||
})
|
||||
|
||||
return plans
|
||||
|
||||
def extract_yaml_blocks(content: str) -> List[dict]:
|
||||
"""Extract and parse YAML blocks."""
|
||||
yaml_blocks = re.findall(r'```yaml\n(.*?)```', content, re.DOTALL)
|
||||
parsed = []
|
||||
|
||||
for block in yaml_blocks:
|
||||
try:
|
||||
data = yaml.safe_load(block)
|
||||
if data:
|
||||
parsed.append(data)
|
||||
except yaml.YAMLError:
|
||||
continue
|
||||
|
||||
return parsed
|
||||
|
||||
def extract_title(content: str) -> Optional[str]:
|
||||
"""Extract title from markdown (# Title)."""
|
||||
match = re.search(r'^#\s+(.+?)$', content, re.MULTILINE)
|
||||
return match.group(1).strip() if match else None
|
||||
|
||||
def sanitize_topic(title: str) -> str:
|
||||
"""Convert title to filesystem-safe slug."""
|
||||
slug = re.sub(r'[^\w\s-]', '', title.lower())
|
||||
slug = re.sub(r'[-\s]+', '-', slug)
|
||||
return slug[:50]
|
||||
|
||||
def save_plan_to_disk(project_root: Path, plan: dict, session_id: str) -> bool:
|
||||
"""
|
||||
Save completed plan to .plans/ directory.
|
||||
|
||||
Args:
|
||||
project_root: Project root directory
|
||||
plan: Plan dict with content
|
||||
session_id: Current session ID
|
||||
|
||||
Returns:
|
||||
bool indicating success
|
||||
"""
|
||||
try:
|
||||
content = plan['content']
|
||||
|
||||
# Extract title and create topic slug
|
||||
title = extract_title(content) or 'untitled-plan'
|
||||
topic_slug = sanitize_topic(title)
|
||||
|
||||
# Create .plans directory
|
||||
plans_dir = project_root / '.plans' / topic_slug
|
||||
plans_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write design.md
|
||||
design_file = plans_dir / 'design.md'
|
||||
with open(design_file, 'w') as f:
|
||||
f.write(content)
|
||||
|
||||
print(f"DEBUG: ✅ Checkpoint: Saved plan to {design_file}", file=sys.stderr)
|
||||
|
||||
# Extract and save tasks if present
|
||||
yaml_blocks = extract_yaml_blocks(content)
|
||||
task_count = 0
|
||||
|
||||
for yaml_data in yaml_blocks:
|
||||
if 'tasks' in yaml_data and isinstance(yaml_data['tasks'], list):
|
||||
tasks_dir = plans_dir / 'tasks'
|
||||
tasks_dir.mkdir(exist_ok=True)
|
||||
|
||||
for task in yaml_data['tasks']:
|
||||
if not isinstance(task, dict):
|
||||
continue
|
||||
|
||||
task_id = task.get('id', f'task-{task_count + 1}')
|
||||
task_file = tasks_dir / f"{task_id}.md"
|
||||
|
||||
with open(task_file, 'w') as f:
|
||||
# YAML frontmatter
|
||||
f.write('---\n')
|
||||
yaml.dump(task, f, default_flow_style=False, sort_keys=False)
|
||||
f.write('---\n\n')
|
||||
|
||||
# Task body
|
||||
title = task.get('title', 'Untitled')
|
||||
f.write(f"# {task_id}: {title}\n\n")
|
||||
f.write(task.get('description', '(To be filled in)\n'))
|
||||
|
||||
task_count += 1
|
||||
|
||||
if task_count:
|
||||
print(f"DEBUG: ✅ Checkpoint: Saved {task_count} task files", file=sys.stderr)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"DEBUG: Failed to save plan: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def extract_last_message_for_scratch_pad(transcript_path: str) -> Optional[str]:
|
||||
"""Extract last Claude message for scratch_pad."""
|
||||
try:
|
||||
with open(transcript_path, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in reversed(lines):
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
if entry.get('type') == 'assistant':
|
||||
message = entry.get('message', {})
|
||||
if isinstance(message, dict):
|
||||
content = message.get('content', [])
|
||||
if isinstance(content, list):
|
||||
text = ' '.join(
|
||||
block.get('text', '')
|
||||
for block in content
|
||||
if block.get('type') == 'text'
|
||||
)
|
||||
return text if text.strip() else None
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"DEBUG: Failed to extract last message: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def write_scratch_pad(project_root: Path, content: str, session_id: str):
|
||||
"""Write in-progress work to scratch_pad.md."""
|
||||
scratch_pad = project_root / 'scratch_pad.md'
|
||||
|
||||
with open(scratch_pad, 'w') as f:
|
||||
f.write(f"# In-Progress Context from Compaction\n\n")
|
||||
f.write(f"**Session ID:** {session_id}\n")
|
||||
f.write(f"**Preserved:** {datetime.now().isoformat()}\n")
|
||||
f.write(f"**Auto-extracted by:** PreCompact hook\n\n")
|
||||
f.write("---\n\n")
|
||||
f.write(content)
|
||||
|
||||
print(f"DEBUG: ✅ Preserved in-progress work to scratch_pad.md", file=sys.stderr)
|
||||
|
||||
def main():
|
||||
"""
|
||||
Enhanced PreCompact hook with checkpoint pattern.
|
||||
|
||||
1. Extracts COMPLETED plans to .plans/ (checkpoints)
|
||||
2. Preserves IN-PROGRESS work to scratch_pad.md
|
||||
3. Enables compact-after-plan workflow
|
||||
"""
|
||||
try:
|
||||
hook_data = json.loads(sys.stdin.read())
|
||||
|
||||
transcript_path = hook_data.get('transcript_path', '')
|
||||
session_id = hook_data.get('session_id', 'unknown')
|
||||
trigger = hook_data.get('trigger', 'unknown')
|
||||
|
||||
print(f"DEBUG: PreCompact checkpoint triggered ({trigger})", file=sys.stderr)
|
||||
|
||||
if not transcript_path or not Path(transcript_path).exists():
|
||||
print("DEBUG: Transcript not found", file=sys.stderr)
|
||||
output = {"continue": True}
|
||||
print(json.dumps(output))
|
||||
sys.exit(0)
|
||||
|
||||
# Find project root
|
||||
project_root = Path.cwd()
|
||||
transcript_dir = Path(transcript_path).parent
|
||||
temp_root = transcript_dir
|
||||
while temp_root.parent != temp_root:
|
||||
if (temp_root / '.git').exists() or (temp_root / 'pyproject.toml').exists():
|
||||
project_root = temp_root
|
||||
break
|
||||
temp_root = temp_root.parent
|
||||
|
||||
print(f"DEBUG: Project root: {project_root}", file=sys.stderr)
|
||||
|
||||
# Step 1: Extract COMPLETED plans (checkpoint pattern)
|
||||
print(f"DEBUG: Scanning for completed plans...", file=sys.stderr)
|
||||
transcript = read_full_transcript(transcript_path)
|
||||
completed_plans = extract_plans_from_transcript(transcript)
|
||||
|
||||
print(f"DEBUG: Found {len(completed_plans)} completed plans", file=sys.stderr)
|
||||
|
||||
plans_saved = 0
|
||||
for plan in completed_plans:
|
||||
if save_plan_to_disk(project_root, plan, session_id):
|
||||
plans_saved += 1
|
||||
|
||||
if plans_saved:
|
||||
print(f"DEBUG: 🎯 Checkpoint: {plans_saved} completed plans saved to .plans/", file=sys.stderr)
|
||||
|
||||
# Step 2: Preserve IN-PROGRESS work to scratch_pad.md
|
||||
last_message = extract_last_message_for_scratch_pad(transcript_path)
|
||||
|
||||
if last_message:
|
||||
# Check if last message is in-progress work (not a completed plan)
|
||||
completion_score = is_completed_plan(last_message)
|
||||
|
||||
if completion_score < 2:
|
||||
# In-progress work - save to scratch_pad
|
||||
pattern_count = len([p for p in HIGH_VALUE_PATTERNS
|
||||
if re.search(p, last_message, re.IGNORECASE)])
|
||||
|
||||
if pattern_count >= 3:
|
||||
write_scratch_pad(project_root, last_message, session_id)
|
||||
print(f"DEBUG: ✅ Preserved in-progress work ({pattern_count} patterns)", file=sys.stderr)
|
||||
else:
|
||||
print(f"DEBUG: Last message is completed plan (already extracted)", file=sys.stderr)
|
||||
|
||||
# Summary
|
||||
print(f"DEBUG: 📋 Checkpoint Summary:", file=sys.stderr)
|
||||
print(f"DEBUG: Completed plans: {plans_saved} saved to .plans/", file=sys.stderr)
|
||||
print(f"DEBUG: In-progress work: {'saved to scratch_pad.md' if last_message and completion_score < 2 else 'none'}", file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
print(f"DEBUG: Checkpoint failed: {e}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
|
||||
# Always continue
|
||||
output = {"continue": True}
|
||||
print(json.dumps(output))
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user