Initial commit

2025-11-30 08:51:46 +08:00
commit 00486a9b97
66 changed files with 29954 additions and 0 deletions
--- a/.claude/skills/observability-logging/SKILL.md
+++ b/.claude/skills/observability-logging/SKILL.md
@@ -0,0 +1,484 @@
+---
+name: observability-logging
+description: Automatically applies when adding logging and observability. Ensures structured logging, OpenTelemetry tracing, LLM-specific metrics (tokens, cost, latency), and proper log correlation.
+category: observability
+---
+
+# Observability and Logging Patterns
+
+When instrumenting AI/LLM applications, follow these patterns for comprehensive observability.
+
+**Trigger Keywords**: logging, observability, tracing, monitoring, metrics, OpenTelemetry, structured logs, correlation ID, request tracking, telemetry
+
+**Agent Integration**: Used by `ml-system-architect`, `llm-app-engineer`, `mlops-ai-engineer`, `performance-and-cost-engineer-llm`
+
+## ✅ Correct Pattern: Structured Logging
+
+```python
+import logging
+import json
+from typing import Any, Dict
+from datetime import datetime
+import uuid
+
+
+class StructuredLogger:
+    """Structured JSON logger for AI applications."""
+
+    def __init__(self, name: str):
+        self.logger = logging.getLogger(name)
+        self._setup_handler()
+
+    def _setup_handler(self):
+        """Configure JSON formatting."""
+        handler = logging.StreamHandler()
+        handler.setFormatter(JSONFormatter())
+        self.logger.addHandler(handler)
+        self.logger.setLevel(logging.INFO)
+
+    def info(self, message: str, **extra: Any):
+        """Log info with structured data."""
+        self.logger.info(message, extra=self._enrich(extra))
+
+    def error(self, message: str, **extra: Any):
+        """Log error with structured data."""
+        self.logger.error(message, extra=self._enrich(extra), exc_info=True)
+
+    def _enrich(self, extra: Dict[str, Any]) -> Dict[str, Any]:
+        """Add standard fields to log entry."""
+        return {
+            **extra,
+            "timestamp": datetime.utcnow().isoformat(),
+            "service": "llm-service"
+        }
+
+
+class JSONFormatter(logging.Formatter):
+    """Format logs as JSON."""
+
+    def format(self, record: logging.LogRecord) -> str:
+        """Format log record as JSON."""
+        log_data = {
+            "timestamp": datetime.utcnow().isoformat(),
+            "level": record.levelname,
+            "logger": record.name,
+            "message": record.getMessage(),
+        }
+
+        # Add extra fields
+        if hasattr(record, "extra"):
+            log_data.update(record.extra)
+
+        # Add exception if present
+        if record.exc_info:
+            log_data["exception"] = self.formatException(record.exc_info)
+
+        return json.dumps(log_data)
+
+
+# Usage
+logger = StructuredLogger(__name__)
+
+logger.info(
+    "LLM request started",
+    request_id="req_123",
+    model="claude-sonnet-4",
+    prompt_length=150
+)
+```
+
+## LLM-Specific Logging
+
+```python
+from typing import Optional
+from dataclasses import dataclass
+from datetime import datetime
+
+
+@dataclass
+class LLMMetrics:
+    """Metrics for LLM call."""
+    request_id: str
+    model: str
+    input_tokens: int
+    output_tokens: int
+    total_tokens: int
+    duration_ms: float
+    cost_usd: float
+    prompt_length: int
+    response_length: int
+    success: bool
+    error: Optional[str] = None
+
+
+class LLMLogger:
+    """Specialized logger for LLM operations."""
+
+    def __init__(self):
+        self.logger = StructuredLogger("llm")
+
+    async def log_llm_call(
+        self,
+        request_id: str,
+        model: str,
+        prompt: str,
+        response: str,
+        usage: dict,
+        duration_ms: float,
+        success: bool = True,
+        error: Optional[str] = None
+    ):
+        """
+        Log complete LLM interaction with metrics.
+
+        Args:
+            request_id: Unique request identifier
+            model: Model name
+            prompt: Input prompt (will be truncated/redacted)
+            response: Model response (will be truncated)
+            usage: Token usage dict
+            duration_ms: Request duration in milliseconds
+            success: Whether request succeeded
+            error: Error message if failed
+        """
+        # Calculate cost
+        cost = self._estimate_cost(
+            model,
+            usage["input_tokens"],
+            usage["output_tokens"]
+        )
+
+        # Create metrics
+        metrics = LLMMetrics(
+            request_id=request_id,
+            model=model,
+            input_tokens=usage["input_tokens"],
+            output_tokens=usage["output_tokens"],
+            total_tokens=usage["total_tokens"],
+            duration_ms=duration_ms,
+            cost_usd=cost,
+            prompt_length=len(prompt),
+            response_length=len(response),
+            success=success,
+            error=error
+        )
+
+        # Log with structured data
+        self.logger.info(
+            "LLM call completed" if success else "LLM call failed",
+            **vars(metrics),
+            # Redact sensitive content, keep prefix for debugging
+            prompt_preview=self._redact(prompt[:100]),
+            response_preview=self._redact(response[:100]) if response else None
+        )
+
+    def _redact(self, text: str) -> str:
+        """Redact PII from logs."""
+        # Implement PII redaction
+        return text  # TODO: Add actual redaction
+
+    def _estimate_cost(
+        self,
+        model: str,
+        input_tokens: int,
+        output_tokens: int
+    ) -> float:
+        """Estimate cost in USD."""
+        # Pricing logic
+        pricing = {
+            "claude-sonnet-4-20250514": {
+                "input": 3.00 / 1_000_000,
+                "output": 15.00 / 1_000_000
+            }
+        }
+        rates = pricing.get(model, pricing["claude-sonnet-4-20250514"])
+        return (input_tokens * rates["input"]) + (output_tokens * rates["output"])
+```
+
+## Request Correlation with Context
+
+```python
+from contextvars import ContextVar
+from typing import Optional
+import uuid
+
+# Context variable for request ID
+request_id_ctx: ContextVar[Optional[str]] = ContextVar("request_id", default=None)
+
+
+class RequestContext:
+    """Manage request context for distributed tracing."""
+
+    @staticmethod
+    def set_request_id(request_id: Optional[str] = None) -> str:
+        """Set request ID for current context."""
+        if request_id is None:
+            request_id = f"req_{uuid.uuid4().hex[:12]}"
+        request_id_ctx.set(request_id)
+        return request_id
+
+    @staticmethod
+    def get_request_id() -> Optional[str]:
+        """Get current request ID."""
+        return request_id_ctx.get()
+
+    @staticmethod
+    def clear():
+        """Clear request context."""
+        request_id_ctx.set(None)
+
+
+class CorrelatedLogger(StructuredLogger):
+    """Logger that automatically includes request context."""
+
+    def _enrich(self, extra: Dict[str, Any]) -> Dict[str, Any]:
+        """Add request ID to all logs."""
+        enriched = super()._enrich(extra)
+        request_id = RequestContext.get_request_id()
+        if request_id:
+            enriched["request_id"] = request_id
+        return enriched
+
+
+# Usage in FastAPI
+from fastapi import Request, FastAPI
+from starlette.middleware.base import BaseHTTPMiddleware
+
+class RequestIDMiddleware(BaseHTTPMiddleware):
+    """Add request ID to all requests."""
+
+    async def dispatch(self, request: Request, call_next):
+        # Set request ID from header or generate new
+        request_id = request.headers.get("X-Request-ID") or RequestContext.set_request_id()
+        request.state.request_id = request_id
+
+        response = await call_next(request)
+
+        # Add to response headers
+        response.headers["X-Request-ID"] = request_id
+
+        RequestContext.clear()
+        return response
+
+
+app = FastAPI()
+app.add_middleware(RequestIDMiddleware)
+```
+
+## OpenTelemetry Integration
+
+```python
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from typing import Any
+
+
+def setup_tracing(service_name: str = "llm-service"):
+    """Configure OpenTelemetry tracing."""
+    # Set up tracer provider
+    provider = TracerProvider()
+    processor = BatchSpanProcessor(OTLPSpanExporter())
+    provider.add_span_processor(processor)
+    trace.set_tracer_provider(provider)
+
+    # Get tracer
+    return trace.get_tracer(service_name)
+
+
+tracer = setup_tracing()
+
+
+class TracedLLMClient:
+    """LLM client with distributed tracing."""
+
+    async def complete(self, prompt: str, **kwargs) -> str:
+        """Complete with tracing."""
+        with tracer.start_as_current_span("llm.complete") as span:
+            # Add span attributes
+            span.set_attribute("llm.model", self.model)
+            span.set_attribute("llm.prompt_length", len(prompt))
+            span.set_attribute("llm.max_tokens", kwargs.get("max_tokens", 1024))
+
+            try:
+                response = await self._do_complete(prompt, **kwargs)
+
+                # Add response metrics to span
+                span.set_attribute("llm.response_length", len(response))
+                span.set_attribute("llm.success", True)
+
+                return response
+
+            except Exception as e:
+                span.set_attribute("llm.success", False)
+                span.set_attribute("llm.error", str(e))
+                span.record_exception(e)
+                raise
+
+
+# Instrument FastAPI automatically
+from fastapi import FastAPI
+
+app = FastAPI()
+FastAPIInstrumentor.instrument_app(app)
+```
+
+## Metrics Collection
+
+```python
+from prometheus_client import Counter, Histogram, Gauge
+import time
+
+
+# Define metrics
+llm_requests_total = Counter(
+    "llm_requests_total",
+    "Total LLM requests",
+    ["model", "status"]
+)
+
+llm_request_duration = Histogram(
+    "llm_request_duration_seconds",
+    "LLM request duration",
+    ["model"]
+)
+
+llm_tokens_total = Counter(
+    "llm_tokens_total",
+    "Total tokens processed",
+    ["model", "type"]  # type: input/output
+)
+
+llm_cost_total = Counter(
+    "llm_cost_usd_total",
+    "Total LLM cost in USD",
+    ["model"]
+)
+
+llm_active_requests = Gauge(
+    "llm_active_requests",
+    "Currently active LLM requests",
+    ["model"]
+)
+
+
+class MeteredLLMClient:
+    """LLM client with Prometheus metrics."""
+
+    async def complete(self, prompt: str, **kwargs) -> str:
+        """Complete with metrics collection."""
+        llm_active_requests.labels(model=self.model).inc()
+        start_time = time.time()
+
+        try:
+            response = await self._do_complete(prompt, **kwargs)
+
+            # Record success metrics
+            duration = time.time() - start_time
+            llm_request_duration.labels(model=self.model).observe(duration)
+            llm_requests_total.labels(model=self.model, status="success").inc()
+
+            # Token metrics (if available)
+            if hasattr(response, "usage"):
+                llm_tokens_total.labels(
+                    model=self.model,
+                    type="input"
+                ).inc(response.usage.input_tokens)
+
+                llm_tokens_total.labels(
+                    model=self.model,
+                    type="output"
+                ).inc(response.usage.output_tokens)
+
+                # Cost metric
+                cost = self._estimate_cost(response.usage)
+                llm_cost_total.labels(model=self.model).inc(cost)
+
+            return response
+
+        except Exception as e:
+            llm_requests_total.labels(model=self.model, status="error").inc()
+            raise
+
+        finally:
+            llm_active_requests.labels(model=self.model).dec()
+```
+
+## ❌ Anti-Patterns
+
+```python
+# ❌ Unstructured logging
+print(f"LLM call took {duration}s")  # Can't parse!
+logger.info("Request completed")  # No context!
+
+# ✅ Better: Structured with context
+logger.info(
+    "LLM request completed",
+    duration_seconds=duration,
+    model=model,
+    tokens=tokens,
+    request_id=request_id
+)
+
+
+# ❌ No request correlation
+logger.info("Started processing")
+# ... other logs ...
+logger.info("Finished processing")  # Can't correlate!
+
+# ✅ Better: Use request IDs
+RequestContext.set_request_id()
+logger.info("Started processing")  # Has request_id
+logger.info("Finished processing")  # Same request_id
+
+
+# ❌ Logging full prompts/responses
+logger.info(f"Prompt: {full_prompt}")  # PII leak + huge logs!
+
+# ✅ Better: Log length and preview
+logger.info(
+    "LLM request",
+    prompt_length=len(prompt),
+    prompt_preview=redact(prompt[:100])
+)
+
+
+# ❌ No metrics
+await llm_complete(prompt)  # No visibility!
+
+# ✅ Better: Track metrics
+with metrics.track_llm_call(model):
+    result = await llm_complete(prompt)
+```
+
+## Best Practices Checklist
+
+- ✅ Use structured (JSON) logging
+- ✅ Include request IDs in all logs
+- ✅ Track LLM-specific metrics (tokens, cost, latency)
+- ✅ Implement distributed tracing (OpenTelemetry)
+- ✅ Redact PII from logs
+- ✅ Use log levels appropriately (INFO, ERROR)
+- ✅ Add context to all log messages
+- ✅ Collect metrics in Prometheus format
+- ✅ Monitor error rates and latencies
+- ✅ Set up alerts for anomalies
+
+## Auto-Apply
+
+When adding observability:
+1. Use StructuredLogger for JSON logs
+2. Add RequestContext middleware
+3. Implement LLM-specific logging
+4. Set up OpenTelemetry tracing
+5. Define Prometheus metrics
+6. Redact sensitive data
+
+## Related Skills
+
+- `llm-app-architecture` - For LLM integration
+- `pii-redaction` - For data redaction
+- `fastapi-patterns` - For middleware
+- `async-await-checker` - For async logging