Initial commit

2025-11-30 08:56:10 +08:00
commit 400ca062d1
48 changed files with 18674 additions and 0 deletions
--- a/hooks/tool_cost_tracker.py
+++ b/hooks/tool_cost_tracker.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = ">=3.10"
+# dependencies = []
+# ///
+
+"""
+PostToolUse hook to track actual costs vs. estimates.
+
+Compares routing decisions with actual token usage to:
+1. Validate routing decisions
+2. Track cumulative costs
+3. Calculate actual Haiku vs Sonnet savings
+4. Feed data to weekly review
+"""
+
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from lib.observability_db import ObservabilityDB
+
+
+class CostTracker:
+    """
+    Track actual tool costs and compare with routing estimates.
+
+    Cost model (per 1K tokens):
+    - Sonnet input: $0.003
+    - Sonnet output: $0.015
+    - Haiku input: $0.00025
+    - Haiku output: $0.00125
+    """
+
+    SONNET_INPUT_COST = 0.003
+    SONNET_OUTPUT_COST = 0.015
+    HAIKU_INPUT_COST = 0.00025
+    HAIKU_OUTPUT_COST = 0.00125
+
+    def __init__(self):
+        self.db = ObservabilityDB()
+
+    def track_tool_usage(
+        self,
+        tool_name: str,
+        tool_params: dict[str, Any],
+        result: Any,
+        model_used: str = "sonnet",  # "sonnet" or "haiku"
+    ) -> dict[str, Any]:
+        """
+        Track actual tool usage and calculate costs.
+
+        Args:
+            tool_name: Name of tool used
+            tool_params: Tool parameters
+            result: Tool result/output
+            model_used: Which model executed the tool
+
+        Returns:
+            Cost analysis dictionary
+        """
+
+        # Estimate tokens from result
+        estimated_tokens = self._estimate_tokens(tool_name, result)
+
+        # Calculate actual cost
+        if model_used == "sonnet":
+            input_cost = (estimated_tokens / 1000) * self.SONNET_INPUT_COST
+            output_cost = (estimated_tokens / 1000) * self.SONNET_OUTPUT_COST
+            total_cost = input_cost + output_cost
+        else:  # haiku
+            input_cost = (estimated_tokens / 1000) * self.HAIKU_INPUT_COST
+            output_cost = (estimated_tokens / 1000) * self.HAIKU_OUTPUT_COST
+            total_cost = input_cost + output_cost
+
+        # Calculate potential savings if wrong model used
+        if model_used == "sonnet":
+            haiku_cost = (estimated_tokens / 1000) * (
+                self.HAIKU_INPUT_COST + self.HAIKU_OUTPUT_COST
+            )
+            potential_savings = total_cost - haiku_cost
+        else:
+            potential_savings = 0.0  # Already using cheapest model
+
+        cost_analysis = {
+            "tool": tool_name,
+            "model": model_used,
+            "estimated_tokens": estimated_tokens,
+            "actual_cost": total_cost,
+            "potential_savings": potential_savings,
+            "efficiency": "optimal" if potential_savings <= 0 else "suboptimal",
+        }
+
+        return cost_analysis
+
+    def _estimate_tokens(self, tool_name: str, result: Any) -> int:
+        """
+        Estimate tokens from tool result.
+
+        Rough heuristics:
+        - Read: ~2 tokens per line
+        - Bash: ~0.5 tokens per char
+        - Grep: ~1 token per match
+        - Other: ~100 tokens baseline
+        """
+
+        if isinstance(result, dict):
+            result_str = json.dumps(result)
+        else:
+            result_str = str(result)
+
+        # Tool-specific heuristics
+        if tool_name == "Read":
+            line_count = result_str.count("\n")
+            return line_count * 2
+        elif tool_name == "Bash":
+            return len(result_str) // 2
+        elif tool_name == "Grep":
+            match_count = result_str.count("\n")
+            return match_count * 1
+        else:
+            # Generic: ~4 chars per token
+            return len(result_str) // 4
+
+    def log_cost_metrics(self, cost_analysis: dict[str, Any]):
+        """Log cost metrics to observability database."""
+
+        self.db.log_performance_metric(
+            component="cost_tracker",
+            operation="tool_cost",
+            latency_ms=0.0,
+            metadata={
+                "tool": cost_analysis["tool"],
+                "model": cost_analysis["model"],
+                "tokens": cost_analysis["estimated_tokens"],
+                "cost": cost_analysis["actual_cost"],
+                "savings": cost_analysis["potential_savings"],
+                "efficiency": cost_analysis["efficiency"],
+            },
+        )
+
+
+def main():
+    """Main entry point for PostToolUse hook."""
+    try:
+        # Read hook input from stdin
+        hook_data: dict[str, Any] = json.load(sys.stdin)
+
+        tool: dict[str, Any] = hook_data.get("tool", {})
+        tool_name: str = tool.get("name", "")
+        tool_params: dict[str, Any] = tool.get("parameters", {})
+        result: Any = hook_data.get("result", {})
+
+        # Detect which model was used
+        # Heuristic: If result is very large but fast, likely Haiku
+        # For now, assume Sonnet (can be enhanced with actual detection)
+        model_used = "sonnet"
+
+        # Track cost
+        tracker = CostTracker()
+        cost_analysis = tracker.track_tool_usage(
+            tool_name, tool_params, result, model_used
+        )
+
+        # Log to database
+        tracker.log_cost_metrics(cost_analysis)
+
+        # Generate feedback if significant savings possible
+        if cost_analysis["potential_savings"] > 0.01:  # $0.01 threshold
+            feedback = f"""
+💰 **Cost Optimization Opportunity**
+
+Tool: `{tool_name}`
+Current cost: ${cost_analysis["actual_cost"]:.4f}
+Potential savings: ${cost_analysis["potential_savings"]:.4f}
+
+This operation could be delegated to Haiku for cost efficiency.
+            """.strip()
+
+            output = {"continue": True, "additionalContext": feedback}
+        else:
+            output = {"continue": True}
+
+        print(json.dumps(output))
+
+    except Exception as e:
+        # Log error but don't block
+        try:
+            db = ObservabilityDB()
+            db.log_error(
+                component="cost_tracker",
+                message=str(e),
+                error_type=type(e).__name__,
+            )
+        except Exception:
+            pass
+
+        # Always continue
+        print(json.dumps({"continue": True}))
+
+
+if __name__ == "__main__":
+    main()