Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:56:10 +08:00
commit 400ca062d1
48 changed files with 18674 additions and 0 deletions

363
hooks/context_preserver.py Executable file
View File

@@ -0,0 +1,363 @@
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.10"
# dependencies = ["pyyaml>=6.0"]
# ///
"""
Enhanced Context Preservation Hook (PreCompact) with Checkpoint Pattern
Implements checkpoint pattern for long sessions:
1. Extracts COMPLETED plans to .plans/ (permanent storage)
2. Preserves IN-PROGRESS work to scratch_pad.md (ephemeral transfer)
3. Enables compact-after-plan workflow
Benefits:
- Each plan becomes a checkpoint (permanent)
- Compaction clears old discussion (reduces bloat)
- Working context stays focused on current plan
- Cumulative documentation without context pollution
"""
import json
import sys
import re
from pathlib import Path
from datetime import datetime
from typing import Optional, List
import yaml
# Import extraction functions from session_end_extractor
sys.path.insert(0, str(Path(__file__).parent))
# High-value patterns for in-progress work
HIGH_VALUE_PATTERNS = [
r'## Architecture',
r'## Implementation',
r'## Task \d+:',
r'## Solution:',
r'```yaml',
r'```python',
r'Option \d+:',
r'Let me design',
r'Enhanced schema:',
r'task-\d+\.md',
]
# Plan completion markers
PLAN_COMPLETION_MARKERS = [
r'\*\*Type:\*\* (Design|Plan|Architecture)',
r'\*\*Status:\*\* (Complete|Ready)',
r'## Success Criteria',
r'## Task Breakdown',
r'Ready for: /ctx:plan',
r'Ready for: /ctx:execute',
]
def read_full_transcript(transcript_path: str) -> List[dict]:
"""
Read full conversation transcript.
Args:
transcript_path: Path to transcript JSONL file
Returns:
List of conversation entries
"""
try:
with open(transcript_path, 'r') as f:
return [json.loads(line) for line in f if line.strip()]
except Exception as e:
print(f"DEBUG: Failed to read transcript: {e}", file=sys.stderr)
return []
def extract_assistant_text(entry: dict) -> Optional[str]:
"""Extract text from assistant message entry."""
if entry.get('type') != 'assistant':
return None
message = entry.get('message', {})
if not isinstance(message, dict):
return None
content = message.get('content', [])
# Handle both formats
if isinstance(content, str):
return content
elif isinstance(content, list):
return ' '.join(
block.get('text', '')
for block in content
if block.get('type') == 'text'
)
return None
def is_completed_plan(text: str) -> int:
"""
Detect if text contains a COMPLETED plan.
Returns count of completion markers (≥2 = completed plan)
"""
if not text:
return 0
count = 0
for pattern in PLAN_COMPLETION_MARKERS:
matches = re.findall(pattern, text, re.IGNORECASE)
count += len(matches)
return count
def extract_plans_from_transcript(transcript: List[dict]) -> List[dict]:
"""
Extract all completed plans from conversation.
Args:
transcript: Full conversation transcript
Returns:
List of {index, timestamp, content, completion_score} dicts
"""
plans = []
for i, entry in enumerate(transcript):
text = extract_assistant_text(entry)
if not text:
continue
completion_score = is_completed_plan(text)
# Require ≥2 completion markers for a plan
if completion_score >= 2:
plans.append({
'index': i,
'timestamp': entry.get('timestamp', ''),
'content': text,
'completion_score': completion_score
})
return plans
def extract_yaml_blocks(content: str) -> List[dict]:
"""Extract and parse YAML blocks."""
yaml_blocks = re.findall(r'```yaml\n(.*?)```', content, re.DOTALL)
parsed = []
for block in yaml_blocks:
try:
data = yaml.safe_load(block)
if data:
parsed.append(data)
except yaml.YAMLError:
continue
return parsed
def extract_title(content: str) -> Optional[str]:
"""Extract title from markdown (# Title)."""
match = re.search(r'^#\s+(.+?)$', content, re.MULTILINE)
return match.group(1).strip() if match else None
def sanitize_topic(title: str) -> str:
"""Convert title to filesystem-safe slug."""
slug = re.sub(r'[^\w\s-]', '', title.lower())
slug = re.sub(r'[-\s]+', '-', slug)
return slug[:50]
def save_plan_to_disk(project_root: Path, plan: dict, session_id: str) -> bool:
"""
Save completed plan to .plans/ directory.
Args:
project_root: Project root directory
plan: Plan dict with content
session_id: Current session ID
Returns:
bool indicating success
"""
try:
content = plan['content']
# Extract title and create topic slug
title = extract_title(content) or 'untitled-plan'
topic_slug = sanitize_topic(title)
# Create .plans directory
plans_dir = project_root / '.plans' / topic_slug
plans_dir.mkdir(parents=True, exist_ok=True)
# Write design.md
design_file = plans_dir / 'design.md'
with open(design_file, 'w') as f:
f.write(content)
print(f"DEBUG: ✅ Checkpoint: Saved plan to {design_file}", file=sys.stderr)
# Extract and save tasks if present
yaml_blocks = extract_yaml_blocks(content)
task_count = 0
for yaml_data in yaml_blocks:
if 'tasks' in yaml_data and isinstance(yaml_data['tasks'], list):
tasks_dir = plans_dir / 'tasks'
tasks_dir.mkdir(exist_ok=True)
for task in yaml_data['tasks']:
if not isinstance(task, dict):
continue
task_id = task.get('id', f'task-{task_count + 1}')
task_file = tasks_dir / f"{task_id}.md"
with open(task_file, 'w') as f:
# YAML frontmatter
f.write('---\n')
yaml.dump(task, f, default_flow_style=False, sort_keys=False)
f.write('---\n\n')
# Task body
title = task.get('title', 'Untitled')
f.write(f"# {task_id}: {title}\n\n")
f.write(task.get('description', '(To be filled in)\n'))
task_count += 1
if task_count:
print(f"DEBUG: ✅ Checkpoint: Saved {task_count} task files", file=sys.stderr)
return True
except Exception as e:
print(f"DEBUG: Failed to save plan: {e}", file=sys.stderr)
return False
def extract_last_message_for_scratch_pad(transcript_path: str) -> Optional[str]:
"""Extract last Claude message for scratch_pad."""
try:
with open(transcript_path, 'r') as f:
lines = f.readlines()
for line in reversed(lines):
try:
entry = json.loads(line)
if entry.get('type') == 'assistant':
message = entry.get('message', {})
if isinstance(message, dict):
content = message.get('content', [])
if isinstance(content, list):
text = ' '.join(
block.get('text', '')
for block in content
if block.get('type') == 'text'
)
return text if text.strip() else None
except json.JSONDecodeError:
continue
return None
except Exception as e:
print(f"DEBUG: Failed to extract last message: {e}", file=sys.stderr)
return None
def write_scratch_pad(project_root: Path, content: str, session_id: str):
"""Write in-progress work to scratch_pad.md."""
scratch_pad = project_root / 'scratch_pad.md'
with open(scratch_pad, 'w') as f:
f.write(f"# In-Progress Context from Compaction\n\n")
f.write(f"**Session ID:** {session_id}\n")
f.write(f"**Preserved:** {datetime.now().isoformat()}\n")
f.write(f"**Auto-extracted by:** PreCompact hook\n\n")
f.write("---\n\n")
f.write(content)
print(f"DEBUG: ✅ Preserved in-progress work to scratch_pad.md", file=sys.stderr)
def main():
"""
Enhanced PreCompact hook with checkpoint pattern.
1. Extracts COMPLETED plans to .plans/ (checkpoints)
2. Preserves IN-PROGRESS work to scratch_pad.md
3. Enables compact-after-plan workflow
"""
try:
hook_data = json.loads(sys.stdin.read())
transcript_path = hook_data.get('transcript_path', '')
session_id = hook_data.get('session_id', 'unknown')
trigger = hook_data.get('trigger', 'unknown')
print(f"DEBUG: PreCompact checkpoint triggered ({trigger})", file=sys.stderr)
if not transcript_path or not Path(transcript_path).exists():
print("DEBUG: Transcript not found", file=sys.stderr)
output = {"continue": True}
print(json.dumps(output))
sys.exit(0)
# Find project root
project_root = Path.cwd()
transcript_dir = Path(transcript_path).parent
temp_root = transcript_dir
while temp_root.parent != temp_root:
if (temp_root / '.git').exists() or (temp_root / 'pyproject.toml').exists():
project_root = temp_root
break
temp_root = temp_root.parent
print(f"DEBUG: Project root: {project_root}", file=sys.stderr)
# Step 1: Extract COMPLETED plans (checkpoint pattern)
print(f"DEBUG: Scanning for completed plans...", file=sys.stderr)
transcript = read_full_transcript(transcript_path)
completed_plans = extract_plans_from_transcript(transcript)
print(f"DEBUG: Found {len(completed_plans)} completed plans", file=sys.stderr)
plans_saved = 0
for plan in completed_plans:
if save_plan_to_disk(project_root, plan, session_id):
plans_saved += 1
if plans_saved:
print(f"DEBUG: 🎯 Checkpoint: {plans_saved} completed plans saved to .plans/", file=sys.stderr)
# Step 2: Preserve IN-PROGRESS work to scratch_pad.md
last_message = extract_last_message_for_scratch_pad(transcript_path)
if last_message:
# Check if last message is in-progress work (not a completed plan)
completion_score = is_completed_plan(last_message)
if completion_score < 2:
# In-progress work - save to scratch_pad
pattern_count = len([p for p in HIGH_VALUE_PATTERNS
if re.search(p, last_message, re.IGNORECASE)])
if pattern_count >= 3:
write_scratch_pad(project_root, last_message, session_id)
print(f"DEBUG: ✅ Preserved in-progress work ({pattern_count} patterns)", file=sys.stderr)
else:
print(f"DEBUG: Last message is completed plan (already extracted)", file=sys.stderr)
# Summary
print(f"DEBUG: 📋 Checkpoint Summary:", file=sys.stderr)
print(f"DEBUG: Completed plans: {plans_saved} saved to .plans/", file=sys.stderr)
print(f"DEBUG: In-progress work: {'saved to scratch_pad.md' if last_message and completion_score < 2 else 'none'}", file=sys.stderr)
except Exception as e:
print(f"DEBUG: Checkpoint failed: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
# Always continue
output = {"continue": True}
print(json.dumps(output))
sys.exit(0)
if __name__ == '__main__':
main()