Initial commit

2025-11-30 08:56:10 +08:00
commit 400ca062d1
48 changed files with 18674 additions and 0 deletions
--- a/hooks/user_prompt_submit.py
+++ b/hooks/user_prompt_submit.py
@@ -0,0 +1,914 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "model2vec>=0.3.0",
+#     "semantic-router>=0.1.0",
+#     "numpy>=1.24.0",
+#     "rapidfuzz>=3.0.0"
+# ]
+# ///
+"""
+Contextune UserPromptSubmit Hook
+
+Detects slash commands from natural language prompts using 3-tier cascade:
+1. Keyword matching (0.02ms, 60% coverage)
+2. Model2Vec embeddings (0.2ms, 30% coverage)
+3. Semantic Router (50ms, 10% coverage)
+
+Uses Claude Code headless mode for interactive prompt analysis and suggestions.
+
+Hook Protocol:
+- Input: JSON via stdin with {"prompt": "...", "session_id": "..."}
+- Output: JSON via stdout with {"continue": true, "feedback": "..."}
+"""
+
+import json
+import subprocess
+import sys
+import re
+from pathlib import Path
+from typing import Any
+
+# Add lib directory to Python path
+PLUGIN_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PLUGIN_ROOT / "lib"))
+
+# Import matchers (now using RapidFuzz-based keyword matcher v2!)
+from keyword_matcher_v2 import IntentMatch, KeywordMatcherV2 as KeywordMatcher
+from model2vec_matcher import Model2VecMatcher
+from observability_db import ObservabilityDB
+from semantic_router_matcher import SemanticRouterMatcher
+
+
+class ContextuneDetector:
+    """
+    3-tier intent detection cascade.
+
+    Uses your existing matchers in order of speed:
+    1. KeywordMatcher (always fast)
+    2. Model2VecMatcher (if available)
+    3. SemanticRouterMatcher (if API key available)
+    """
+
+    def __init__(self):
+        self._keyword = None
+        self._model2vec = None
+        self._semantic = None
+
+    def _get_keyword(self):
+        if self._keyword is None:
+            self._keyword = KeywordMatcher()
+        return self._keyword
+
+    def _get_model2vec(self):
+        if self._model2vec is None:
+            m = Model2VecMatcher()
+            self._model2vec = m if m.is_available() else None
+        return self._model2vec
+
+    def _get_semantic(self):
+        if self._semantic is None:
+            m = SemanticRouterMatcher()
+            self._semantic = m if m.is_available() else None
+        return self._semantic
+
+    def detect(self, text: str) -> IntentMatch | None:
+        """Detect intent using 3-tier cascade."""
+
+        # Tier 1: Keyword (always available)
+        result = self._get_keyword().match(text)
+        if result:
+            return result
+
+        # Tier 2: Model2Vec
+        m2v = self._get_model2vec()
+        if m2v:
+            result = m2v.match(text)
+            if result:
+                return result
+
+        # Tier 3: Semantic Router
+        sem = self._get_semantic()
+        if sem:
+            result = sem.match(text)
+            if result:
+                return result
+
+        return None
+
+
+class ClaudeCodeHaikuEngineer:
+    """
+    Uses Claude Code headless mode to analyze prompts and provide interactive suggestions.
+
+    Benefits:
+    - No separate API key needed (uses existing Claude Code auth)
+    - Integrated billing with Claude Code
+    - Fast Haiku model for cost optimization
+    - Interactive blocking mode for user feedback
+    """
+
+    def __init__(self):
+        self._claude_available = None
+
+    def is_available(self) -> bool:
+        """Check if Claude Code CLI is available."""
+        if self._claude_available is None:
+            try:
+                result = subprocess.run(
+                    ["claude", "--version"], capture_output=True, text=True, timeout=2
+                )
+                self._claude_available = result.returncode == 0
+            except (FileNotFoundError, subprocess.TimeoutExpired):
+                self._claude_available = False
+
+        return self._claude_available
+
+    def analyze_and_enhance(
+        self,
+        prompt: str,
+        detected_command: str,
+        confidence: float,
+        available_commands: list[str],
+        timeout: int = 30,
+    ) -> dict[str, Any] | None:
+        """
+        Analyze prompt using Claude Code headless mode and suggest enhancements.
+
+        Args:
+            prompt: User's original prompt
+            detected_command: Command detected by cascade
+            confidence: Detection confidence (0-1)
+            available_commands: List of all available commands
+            timeout: Timeout in seconds
+
+        Returns:
+            Dict with analysis results or None if unavailable/failed
+        """
+        if not self.is_available():
+            return None
+
+        # Build analysis prompt for Haiku
+        analysis_prompt = f"""You are a prompt enhancement assistant for Contextune, a Claude Code plugin.
+
+USER'S PROMPT: "{prompt}"
+
+DETECTED COMMAND: {detected_command}
+DETECTION CONFIDENCE: {confidence:.0%}
+
+AVAILABLE ALTERNATIVES:
+{chr(10).join(f"- {cmd}" for cmd in available_commands[:10])}
+
+TASK: Analyze the user's prompt and provide:
+1. Whether the detected command is the best match (true/false)
+2. Alternative commands if better matches exist
+3. A brief, helpful suggestion for the user
+
+RESPONSE FORMAT (JSON):
+{{
+  "is_best_match": true/false,
+  "alternatives": ["command1", "command2"],
+  "suggestion": "Brief suggestion text"
+}}
+
+Be concise. Focus on actionability."""
+
+        try:
+            # Call Claude Code headless with Haiku model
+            cmd = [
+                "claude",
+                "--model",
+                "claude-haiku-4-5",
+                "-p",
+                analysis_prompt,
+                "--output-format",
+                "json",
+                "--allowedTools",
+                "",  # No tools needed for this analysis
+            ]
+
+            result = subprocess.run(
+                cmd, capture_output=True, text=True, timeout=timeout
+            )
+
+            if result.returncode != 0:
+                print(f"DEBUG: Claude Code error: {result.stderr}", file=sys.stderr)
+                return None
+
+            # Parse Claude's response - it's nested in a wrapper object
+            claude_response = json.loads(result.stdout)
+
+            # Extract the actual result (may be nested in "result" field)
+            if "result" in claude_response:
+                result_text = claude_response["result"]
+                # Result may contain JSON in markdown code blocks
+                if "```json" in result_text:
+                    # Extract JSON from markdown code block
+                    json_start = result_text.find("```json") + 7
+                    json_end = result_text.find("```", json_start)
+                    result_text = result_text[json_start:json_end].strip()
+
+                # Parse the extracted JSON
+                analysis = json.loads(result_text)
+                return analysis
+            else:
+                # If no "result" field, assume the whole response is the analysis
+                return claude_response
+
+        except subprocess.TimeoutExpired:
+            print(f"DEBUG: Claude Code timeout after {timeout}s", file=sys.stderr)
+            return None
+        except json.JSONDecodeError as e:
+            print(f"DEBUG: Failed to parse Claude response: {e}", file=sys.stderr)
+            print(f"DEBUG: Raw output: {result.stdout[:200]}", file=sys.stderr)
+            return None
+        except Exception as e:
+            print(f"DEBUG: Haiku engineer error: {e}", file=sys.stderr)
+            return None
+
+
+def should_process(prompt: str) -> bool:
+    """Check if prompt needs intent detection."""
+    if not prompt or not prompt.strip():
+        return False
+
+    # Skip if already a command
+    if prompt.strip().startswith("/"):
+        return False
+
+    # Skip internal Haiku analysis prompts (prevent feedback loop)
+    if prompt.startswith("You are a prompt enhancement assistant"):
+        return False
+
+    # Skip if too short
+    if len(prompt.strip().split()) < 3:
+        return False
+
+    return True
+
+
+def write_detection_for_statusline(match: IntentMatch, prompt: str):
+    """Write detection data to observability DB for status line to read."""
+    try:
+        db = ObservabilityDB(".contextune/observability.db")
+        db.set_detection(
+            command=match.command,
+            confidence=match.confidence,
+            method=match.method,
+            prompt_preview=prompt[:60] + ("..." if len(prompt) > 60 else ""),
+            latency_ms=match.latency_ms,
+        )
+
+        # Also log matcher performance
+        db.log_matcher_performance(match.method, match.latency_ms, success=True)
+
+        print(
+            f"DEBUG: Wrote detection to observability DB: {match.command} ({match.confidence:.0%} {match.method})",
+            file=sys.stderr,
+        )
+    except Exception as e:
+        # Don't fail hook if observability write fails
+        print(f"DEBUG: Failed to write to observability DB: {e}", file=sys.stderr)
+        # Also log the error
+        try:
+            db = ObservabilityDB(".contextune/observability.db")
+            db.log_error("user_prompt_submit", type(e).__name__, str(e))
+        except:
+            pass
+
+
+def clear_detection_statusline():
+    """Clear status line detection (no match found)."""
+    try:
+        db = ObservabilityDB(".contextune/observability.db")
+        db.clear_detection()
+        print("DEBUG: Cleared detection from observability DB", file=sys.stderr)
+    except Exception as e:
+        print(
+            f"DEBUG: Failed to clear detection from observability DB: {e}",
+            file=sys.stderr,
+        )
+
+
+def get_detection_count() -> int:
+    """Get total number of detections for progressive tips."""
+    try:
+        db = ObservabilityDB(".contextune/observability.db")
+        stats = db.get_stats()
+        return stats.get("detections", {}).get("total", 0)
+    except:
+        pass
+    return 0
+
+
+def increment_detection_count():
+    """Increment detection counter for progressive disclosure."""
+    try:
+        data_dir = Path.home() / ".claude" / "plugins" / "contextune" / "data"
+        data_dir.mkdir(parents=True, exist_ok=True)
+        stats_file = data_dir / "detection_stats.json"
+
+        stats = {"total_detections": 0, "by_method": {}, "by_command": {}}
+        if stats_file.exists():
+            with open(stats_file) as f:
+                stats = json.load(f)
+
+        stats["total_detections"] = stats.get("total_detections", 0) + 1
+
+        with open(stats_file, "w") as f:
+            json.dump(stats, f, indent=2)
+    except:
+        pass  # Don't fail hook if stats tracking fails
+
+
+# Command action descriptions for directive feedback
+COMMAND_ACTIONS = {
+    # Contextune commands
+    "/ctx:design": "design system architecture with structured workflow",
+    "/ctx:research": "get fast answers using 3 parallel agents",
+    "/ctx:plan": "create parallel development plans",
+    "/ctx:execute": "run tasks in parallel worktrees",
+    "/ctx:status": "monitor parallel task progress",
+    "/ctx:cleanup": "clean up completed worktrees",
+    "/ctx:help": "see example-first command guide",
+    "/ctx:configure": "enable persistent status bar display",
+    "/ctx:stats": "see your time & cost savings",
+    "/ctx:verify": "verify and execute detected command with confirmation",
+    # Skill-only detections (no commands)
+    "skill:ctx:performance": "analyze and optimize parallel workflow performance",
+    "skill:ctx:parallel-expert": "get guidance on parallelizing tasks effectively",
+    "skill:ctx:help": "discover Contextune features and capabilities",
+    "skill:ctx:worktree": "troubleshoot git worktree issues and conflicts",
+}
+
+# Skill mapping for reliable Claude execution
+# Maps slash commands AND skill detections to skill names
+# Skills are auto-discovered by Claude Code from: contextune/skills/*/SKILL.md
+SKILL_MAPPING = {
+    # Commands with skills
+    "/ctx:design": "ctx:architect",  # Plugin skill: skills/software-architect
+    "/ctx:research": "ctx:researcher",  # Plugin skill: skills/researcher
+    # Skills without commands (direct skill suggestions)
+    "skill:ctx:performance": "ctx:performance",
+    "skill:ctx:parallel-expert": "ctx:parallel-expert",
+    "skill:ctx:help": "ctx:help",
+    "skill:ctx:worktree": "ctx:worktree",
+    # Note: /ctx:plan and /ctx:execute are commands, not skills
+    # They execute workflows directly rather than providing guidance
+}
+
+
+def create_skill_augmented_prompt(match: IntentMatch, original_prompt: str) -> str:
+    """
+    Augment prompt with skill suggestion for more reliable execution.
+
+    Evidence: Skills are invoked more reliably than slash commands because
+    they use Claude's native Skill tool (structured, type-safe) vs text expansion.
+
+    Args:
+        match: Detected command and confidence
+        original_prompt: User's original prompt text
+
+    Returns:
+        Augmented prompt that guides Claude to use skill or command
+    """
+    if match.command in SKILL_MAPPING:
+        skill_name = SKILL_MAPPING[match.command]
+        # Strong directive: "You can use your X skill"
+        return f"{original_prompt}. You can use your {skill_name} skill to help with this task."
+    else:
+        # For commands without skills, use directive language
+        action = COMMAND_ACTIONS.get(match.command, "complete this request")
+        return f"{original_prompt}. Please use the {match.command} command to {action}."
+
+
+def get_contextual_tip(match: IntentMatch, detection_count: int) -> str:
+    """Generate directive contextual tip based on usage patterns."""
+
+    # First-time users (1-3 detections)
+    if detection_count <= 3:
+        return "New user? Type `/ctx:help` to see all commands with examples"
+
+    # Early users (4-10 detections) - promote status bar
+    elif detection_count <= 10:
+        return "Enable persistent detection: Type `/ctx:configure` to set up status bar"
+
+    # Experienced users (11-20) - promote advanced features
+    elif detection_count <= 20:
+        if match.command.startswith("/ctx:"):
+            return "Want parallel workflows? Type `/ctx:plan` to work on multiple tasks simultaneously"
+        return f"Blazing fast: {match.latency_ms:.2f}ms detection. Type `/ctx:stats` to see all metrics"
+
+    # Power users (21+) - occasional celebration
+    else:
+        if detection_count % 10 == 0:  # Every 10th detection
+            return f"🎉 {detection_count} detections! Type `/ctx:stats` to see your time & cost savings"
+        return None  # No tip for most interactions
+
+
+def load_available_commands() -> list[str]:
+    """Load list of all available commands for Claude Code."""
+    # Return all commands from COMMAND_ACTIONS
+    return [cmd for cmd in COMMAND_ACTIONS if cmd.startswith("/")]
+
+
+def load_available_skills() -> dict[str, str]:
+    """
+    Load all available skills from plugin.
+
+    Returns:
+        Dict mapping directory names to skill names
+        e.g., {'software-architect': 'ctx:architect'}
+    """
+    skills_dir = PLUGIN_ROOT / 'skills'
+
+    if not skills_dir.exists():
+        return {}
+
+    skill_map = {}
+
+    for skill_dir in skills_dir.iterdir():
+        if not skill_dir.is_dir():
+            continue
+
+        skill_file = skill_dir / 'SKILL.md'
+        if not skill_file.exists():
+            continue
+
+        # Read skill name from frontmatter
+        try:
+            with open(skill_file) as f:
+                content = f.read()
+                match = re.search(r'^name:\s*(.+)$', content, re.MULTILINE)
+                if match:
+                    skill_name = match.group(1).strip()
+                    skill_map[skill_dir.name] = skill_name
+        except:
+            continue
+
+    return skill_map
+
+
+def detect_skill_invocation(prompt: str) -> tuple[bool, str]:
+    """
+    Detect if user is trying to invoke a skill explicitly.
+
+    Returns:
+        (is_skill_invocation, attempted_skill_name)
+    """
+    patterns = [
+        r'use (?:the )?([a-z\-:]+) skill',
+        r'with (?:the )?([a-z\-:]+) skill',
+        r'([a-z\-:]+) skill to',
+        r'activate (?:the )?([a-z\-:]+)',
+        r'invoke (?:the )?([a-z\-:]+)',
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, prompt.lower())
+        if match:
+            return True, match.group(1)
+
+    return False, ''
+
+
+def find_correct_skill_name(attempted_name: str, skill_map: dict[str, str]) -> tuple[str | None, int]:
+    """
+    Find correct skill name using fuzzy matching.
+
+    Args:
+        attempted_name: What user tried to use
+        skill_map: Directory → skill name mapping
+
+    Returns:
+        (correct_name, confidence_score)
+    """
+    from rapidfuzz import fuzz
+
+    # Exact directory match
+    if attempted_name in skill_map:
+        return skill_map[attempted_name], 100
+
+    # Exact skill name match (already correct)
+    if attempted_name in skill_map.values():
+        return attempted_name, 100
+
+    # Fuzzy match
+    best_match = None
+    best_score = 0
+
+    for directory, skill_name in skill_map.items():
+        # Directory name
+        score = fuzz.ratio(attempted_name, directory)
+        if score > best_score and score > 70:
+            best_score = score
+            best_match = skill_name
+
+        # Skill name
+        score = fuzz.ratio(attempted_name, skill_name)
+        if score > best_score and score > 70:
+            best_score = score
+            best_match = skill_name
+
+        # Skill name without prefix
+        skill_base = skill_name.replace('ctx:', '')
+        score = fuzz.ratio(attempted_name, skill_base)
+        if score > best_score and score > 70:
+            best_score = score
+            best_match = skill_name
+
+    return best_match, best_score if best_match else 0
+
+
+def format_suggestion(match: IntentMatch, detection_count: int = 0) -> str:
+    """Format detection with directive, actionable phrasing."""
+
+    # Get action description
+    action = COMMAND_ACTIONS.get(match.command, "execute this command")
+
+    # Build directive message
+    confidence_pct = int(match.confidence * 100)
+
+    # Primary directive message
+    base_msg = (
+        f"💡 Type `{match.command}` to {action} ({confidence_pct}% {match.method}"
+    )
+
+    # Add latency if fast (show performance)
+    if match.latency_ms < 1.0:
+        base_msg += f", {match.latency_ms:.2f}ms"
+
+    base_msg += ")"
+
+    # Get contextual tip
+    tip = get_contextual_tip(match, detection_count)
+
+    if tip:
+        return f"{base_msg}\n💡 {tip}"
+
+    return base_msg
+
+
+def format_interactive_suggestion(
+    match: IntentMatch, analysis: dict[str, Any] | None, detection_count: int = 0
+) -> str:
+    """
+    Format interactive suggestion with Haiku analysis.
+
+    Args:
+        match: Detected command match
+        analysis: Haiku analysis results (optional)
+        detection_count: Total detections for contextual tips
+
+    Returns:
+        Formatted suggestion message
+    """
+    # Get action description
+    action = COMMAND_ACTIONS.get(match.command, "execute this command")
+    confidence_pct = int(match.confidence * 100)
+
+    # Base detection message
+    base_msg = f"🎯 Detected: `{match.command}` ({confidence_pct}% via {match.method})"
+
+    # Add latency if fast
+    if match.latency_ms < 1.0:
+        base_msg += f"\n⚡ Detection speed: {match.latency_ms:.2f}ms"
+
+    # Add Haiku analysis if available
+    if analysis:
+        if not analysis.get("is_best_match", True):
+            alternatives = analysis.get("alternatives", [])
+            if alternatives:
+                base_msg += "\n\n💡 Better alternatives:"
+                for alt in alternatives[:3]:
+                    alt_action = COMMAND_ACTIONS.get(alt, "execute this command")
+                    base_msg += f"\n  • `{alt}` - {alt_action}"
+
+        suggestion = analysis.get("suggestion")
+        if suggestion:
+            base_msg += f"\n\n💬 Suggestion: {suggestion}"
+    else:
+        # Fallback without analysis
+        base_msg += f"\n\n📝 Action: Type `{match.command}` to {action}"
+
+    # Get contextual tip
+    tip = get_contextual_tip(match, detection_count)
+    if tip:
+        base_msg += f"\n\n💡 Tip: {tip}"
+
+    return base_msg
+
+
+def detect_git_workflow(prompt: str) -> tuple[bool, str | None]:
+    """
+    Detect git workflow patterns in natural language.
+
+    Returns: (is_git_workflow, script_command)
+    """
+    prompt_lower = prompt.lower()
+
+    # Git workflow patterns
+    patterns = [
+        # Commit and push
+        (r'\b(commit|save|push|commit.*push|push.*commit)\b', '/ctx:git-commit'),
+        # Create PR
+        (r'\b(create.*pr|make.*pr|open.*pr|pull.*request)\b', None),  # TODO: /ctx:git-pr
+        # Merge
+        (r'\b(merge|merge.*branch)\b', None),  # TODO: /ctx:git-merge
+    ]
+
+    for pattern, command in patterns:
+        if re.search(pattern, prompt_lower):
+            print(f"DEBUG: Git workflow detected: {pattern} → {command}", file=sys.stderr)
+            return True, command
+
+    return False, None
+
+
+def main():
+    """Hook entry point."""
+
+    try:
+        # Read hook event from stdin
+        event_json = sys.stdin.read()
+        event = json.loads(event_json)
+
+        prompt = event.get("prompt", "")
+
+        # DEBUG: Log what we received
+        print(
+            f"DEBUG: Contextune hook triggered with prompt: '{prompt}'", file=sys.stderr
+        )
+
+        # Check if we should process
+        if not should_process(prompt):
+            print("DEBUG: Skipping prompt (should_process=False)", file=sys.stderr)
+            # Pass through unchanged
+            response = {"continue": True, "suppressOutput": True}
+            print(json.dumps(response))
+            return
+
+        print("DEBUG: Processing prompt (should_process=True)", file=sys.stderr)
+
+        # GIT WORKFLOW DETECTION: Check if user wants git workflow
+        is_git_workflow, git_command = detect_git_workflow(prompt)
+
+        if is_git_workflow and git_command:
+            print(f"DEBUG: Git workflow detected, suggesting: {git_command}", file=sys.stderr)
+
+            feedback = f"""⚡ Git Workflow Detected
+
+Your request matches a git workflow pattern.
+
+💡 **Recommended:** Use the deterministic slash command instead:
+   `{git_command}`
+
+**Benefits:**
+- ✅ 93-97% token reduction (~$0.002 vs ~$0.037-0.086)
+- ✅ Single command execution
+- ✅ Deterministic, tested workflow
+- ✅ Auto-detects remote
+- ✅ Proper error handling
+
+**Example:**
+```
+{git_command}
+```
+
+Continuing with your original prompt, but consider using the slash command for efficiency."""
+
+            response = {
+                "continue": True,
+                "additionalContext": feedback,
+                "suppressOutput": False
+            }
+            print(json.dumps(response))
+            return
+
+        # SKILL DETECTION: Check if user is trying to invoke a skill
+        is_skill_invocation, attempted_skill = detect_skill_invocation(prompt)
+
+        if is_skill_invocation:
+            print(f"DEBUG: Skill invocation detected: '{attempted_skill}'", file=sys.stderr)
+
+            # Load available skills
+            skill_map = load_available_skills()
+            print(f"DEBUG: Loaded {len(skill_map)} skills", file=sys.stderr)
+
+            # Find correct skill name
+            correct_skill, confidence = find_correct_skill_name(attempted_skill, skill_map)
+
+            if correct_skill and confidence > 70:
+                if attempted_skill != correct_skill:
+                    # Suggest correction
+                    suggestion = f"""💡 Skill Name Correction
+
+Detected: Trying to use '{attempted_skill}' skill
+Correct name: '{correct_skill}' (match confidence: {confidence}%)
+
+Use: {correct_skill}
+
+Available skills:
+{chr(10).join(f'  • {name} (directory: {dir})' for dir, name in skill_map.items())}
+"""
+                    print(f"DEBUG: Suggesting skill name correction: {attempted_skill} → {correct_skill}", file=sys.stderr)
+
+                    response = {
+                        "continue": True,
+                        "additionalContext": suggestion,
+                        "suppressOutput": False
+                    }
+                    print(json.dumps(response))
+                    return
+                else:
+                    print(f"DEBUG: Skill name already correct: {correct_skill}", file=sys.stderr)
+            else:
+                print(f"DEBUG: No matching skill found for '{attempted_skill}'", file=sys.stderr)
+
+        # Initialize detector
+        detector = ContextuneDetector()
+
+        # Detect intent
+        match = detector.detect(prompt)
+
+        print(f"DEBUG: Detection result: {match}", file=sys.stderr)
+
+        if match is None or match.confidence < 0.7:
+            print(
+                "DEBUG: No match or low confidence, passing through", file=sys.stderr
+            )
+            # Clear status line detection (no match)
+            clear_detection_statusline()
+            # No match or low confidence - pass through
+            response = {"continue": True, "suppressOutput": True}
+            print(json.dumps(response))
+            return
+
+        # Write detection for status line
+        write_detection_for_statusline(match, prompt)
+
+        # Get current detection count for progressive tips
+        detection_count = get_detection_count()
+
+        # Increment counter
+        increment_detection_count()
+
+        print(
+            f"DEBUG: Command detected (detection #{detection_count + 1})",
+            file=sys.stderr,
+        )
+
+        # Initialize Haiku engineer for interactive analysis
+        engineer = ClaudeCodeHaikuEngineer()
+        haiku_analysis = None
+        haiku_latency_ms = 0.0
+
+        # Selective triggering: Only run Haiku for low-confidence or fuzzy/semantic matches
+        # High-confidence exact matches (0.95+) are reliable and don't need Haiku validation
+        should_run_haiku = match.confidence < 0.95 or match.method in ['fuzzy', 'semantic']
+
+        if should_run_haiku:
+            print(f"DEBUG: Triggering Haiku analysis (confidence={match.confidence:.2f}, method={match.method})", file=sys.stderr)
+        else:
+            print(f"DEBUG: Skipping Haiku analysis (high-confidence {match.method} match: {match.confidence:.2f})", file=sys.stderr)
+
+        # Try to get Haiku analysis for better suggestions (only if needed)
+        if should_run_haiku and engineer.is_available():
+            print(f"DEBUG: Running Haiku analysis...", file=sys.stderr)
+            available_commands = load_available_commands()
+
+            # Track Haiku analysis latency
+            import time
+
+            haiku_start = time.perf_counter()
+
+            haiku_analysis = engineer.analyze_and_enhance(
+                prompt=prompt,
+                detected_command=match.command,
+                confidence=match.confidence,
+                available_commands=available_commands,
+                timeout=30,
+            )
+
+            haiku_latency_ms = (time.perf_counter() - haiku_start) * 1000
+
+            if haiku_analysis:
+                print(
+                    f"DEBUG: Haiku analysis: {json.dumps(haiku_analysis)}",
+                    file=sys.stderr,
+                )
+            else:
+                print(f"DEBUG: Haiku analysis failed or timed out", file=sys.stderr)
+        elif not should_run_haiku:
+            print(f"DEBUG: Haiku analysis skipped (selective triggering)", file=sys.stderr)
+        else:
+            print(
+                "DEBUG: Claude Code CLI not available, skipping Haiku analysis",
+                file=sys.stderr,
+            )
+
+        # AUGMENT MODE: Modify prompt with skill/command suggestion for reliability
+        print("DEBUG: Augmenting prompt for Claude", file=sys.stderr)
+
+        # Create augmented prompt with skill suggestion
+        augmented_prompt = create_skill_augmented_prompt(match, prompt)
+
+        # Format interactive suggestion with Haiku analysis (if available)
+        interactive_msg = format_interactive_suggestion(
+            match, haiku_analysis, detection_count
+        )
+
+        # Determine the best command to use
+        # If Haiku suggests alternatives and it's not the best match, use the first alternative
+        best_command = match.command
+        correction_accepted = False
+
+        if haiku_analysis and not haiku_analysis.get("is_best_match", True):
+            alternatives = haiku_analysis.get("alternatives", [])
+            if alternatives:
+                best_command = alternatives[0]
+                correction_accepted = True
+                print(
+                    f"DEBUG: Haiku suggests using {best_command} instead of {match.command}",
+                    file=sys.stderr,
+                )
+
+        # Log correction to observability DB
+        if haiku_analysis:
+            try:
+                db = ObservabilityDB(".contextune/observability.db")
+
+                # Estimate token counts (rough approximation)
+                # Haiku prompt is ~150 tokens + command list + user prompt
+                prompt_tokens = (
+                    150 + len(prompt.split()) + len(load_available_commands()) * 5
+                )
+                # Response is typically ~50-100 tokens
+                completion_tokens = (
+                    50 + len(str(haiku_analysis.get("suggestion", ""))) // 4
+                )
+
+                db.log_correction(
+                    original_command=match.command,
+                    corrected_command=best_command,
+                    original_confidence=match.confidence,
+                    correction_accepted=correction_accepted,
+                    model_name="haiku-4-5",
+                    reasoning=haiku_analysis.get("suggestion", ""),
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    latency_ms=haiku_latency_ms,
+                    session_id=event.get("session_id", ""),
+                    prompt_preview=prompt[:100],
+                )
+                print(
+                    f"DEBUG: Logged correction to observability DB (accepted={correction_accepted})",
+                    file=sys.stderr,
+                )
+            except Exception as e:
+                print(f"DEBUG: Failed to log correction: {e}", file=sys.stderr)
+
+        # Create augmented prompt with the best command (potentially corrected by Haiku)
+        if best_command != match.command:
+            # Use Haiku's suggested command
+            action = COMMAND_ACTIONS.get(best_command, "complete this request")
+            if best_command in SKILL_MAPPING:
+                skill_name = SKILL_MAPPING[best_command]
+                augmented_prompt = f"{prompt}. You can use your {skill_name} skill to help with this task."
+            else:
+                augmented_prompt = (
+                    f"{prompt}. Please use the {best_command} command to {action}."
+                )
+        else:
+            # Use original detection
+            augmented_prompt = create_skill_augmented_prompt(match, prompt)
+
+        # SUGGEST-ONLY MODE: Show detection but let user decide
+        # Don't auto-execute - just provide helpful suggestion
+        print(
+            f"DEBUG: Using suggest-only mode - showing suggestion for {best_command}",
+            file=sys.stderr,
+        )
+
+        response = {
+            "continue": True,
+            "additionalContext": interactive_msg,
+            "suppressOutput": False
+        }
+
+        print(f"DEBUG: Response: {json.dumps(response)}", file=sys.stderr)
+        print(json.dumps(response))
+
+    except Exception as e:
+        # Log error but don't block Claude
+        import traceback
+
+        print(f"Contextune error: {e}", file=sys.stderr)
+        print(f"DEBUG: Traceback: {traceback.format_exc()}", file=sys.stderr)
+        response = {"continue": True, "suppressOutput": True}
+        print(json.dumps(response))
+
+
+if __name__ == "__main__":
+    main()