Initial commit

2025-11-30 08:24:01 +08:00
commit 7ca465850c
24 changed files with 5512 additions and 0 deletions
--- a/references/api-reference.md
+++ b/references/api-reference.md
@@ -0,0 +1,224 @@
+# Claude API Reference
+
+Quick reference for the Anthropic Messages API endpoints and parameters.
+
+## Base URL
+
+```
+https://api.anthropic.com/v1
+```
+
+## Authentication
+
+All requests require:
+```http
+x-api-key: YOUR_API_KEY
+anthropic-version: 2023-06-01
+content-type: application/json
+```
+
+## Endpoints
+
+### POST /messages
+
+Create a message with Claude.
+
+**Request Body:**
+
+```typescript
+{
+  model: string,              // Required: "claude-sonnet-4-5-20250929", etc.
+  max_tokens: number,         // Required: Maximum tokens to generate (1-8192)
+  messages: Message[],        // Required: Conversation history
+  system?: string | SystemBlock[],  // Optional: System prompt
+  temperature?: number,       // Optional: 0-1 (default: 1)
+  top_p?: number,             // Optional: 0-1 (default: 1)
+  top_k?: number,             // Optional: Sampling parameter
+  stop_sequences?: string[],  // Optional: Stop generation at these sequences
+  stream?: boolean,           // Optional: Enable streaming (default: false)
+  tools?: Tool[],             // Optional: Available tools
+  tool_choice?: ToolChoice,   // Optional: Tool selection strategy
+  metadata?: Metadata         // Optional: Request metadata
+}
+```
+
+**Message Format:**
+
+```typescript
+{
+  role: "user" | "assistant",
+  content: string | ContentBlock[]
+}
+```
+
+**Content Block Types:**
+
+```typescript
+// Text
+{
+  type: "text",
+  text: string,
+  cache_control?: { type: "ephemeral" }  // For prompt caching
+}
+
+// Image
+{
+  type: "image",
+  source: {
+    type: "base64" | "url",
+    media_type: "image/jpeg" | "image/png" | "image/webp" | "image/gif",
+    data?: string,  // base64 encoded
+    url?: string    // publicly accessible URL
+  },
+  cache_control?: { type: "ephemeral" }
+}
+
+// Tool use (assistant messages only)
+{
+  type: "tool_use",
+  id: string,
+  name: string,
+  input: object
+}
+
+// Tool result (user messages only)
+{
+  type: "tool_result",
+  tool_use_id: string,
+  content: string | ContentBlock[],
+  is_error?: boolean
+}
+```
+
+**Response:**
+
+```typescript
+{
+  id: string,
+  type: "message",
+  role: "assistant",
+  content: ContentBlock[],
+  model: string,
+  stop_reason: "end_turn" | "max_tokens" | "stop_sequence" | "tool_use",
+  stop_sequence?: string,
+  usage: {
+    input_tokens: number,
+    output_tokens: number,
+    cache_creation_input_tokens?: number,
+    cache_read_input_tokens?: number
+  }
+}
+```
+
+## Model IDs
+
+| Model | ID | Context Window |
+|-------|-----|----------------|
+| Claude Sonnet 4.5 | claude-sonnet-4-5-20250929 | 200k tokens |
+| Claude 3.7 Sonnet | claude-3-7-sonnet-20250228 | 2M tokens |
+| Claude Opus 4 | claude-opus-4-20250514 | 200k tokens |
+| Claude 3.5 Haiku | claude-3-5-haiku-20241022 | 200k tokens |
+
+## Tool Definition
+
+```typescript
+{
+  name: string,           // Tool identifier
+  description: string,    // What the tool does
+  input_schema: {         // JSON Schema
+    type: "object",
+    properties: {
+      [key: string]: {
+        type: "string" | "number" | "boolean" | "array" | "object",
+        description?: string,
+        enum?: any[]
+      }
+    },
+    required?: string[]
+  }
+}
+```
+
+## Streaming
+
+Set `stream: true` in request. Returns Server-Sent Events (SSE):
+
+**Event Types:**
+- `message_start`: Message begins
+- `content_block_start`: Content block begins
+- `content_block_delta`: Text or JSON delta
+- `content_block_stop`: Content block complete
+- `message_delta`: Metadata update
+- `message_stop`: Message complete
+- `ping`: Keep-alive
+
+**Event Format:**
+
+```
+event: message_start
+data: {"type":"message_start","message":{...}}
+
+event: content_block_delta
+data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}}
+
+event: message_stop
+data: {"type":"message_stop"}
+```
+
+## Error Responses
+
+```typescript
+{
+  type: "error",
+  error: {
+    type: string,  // Error type identifier
+    message: string  // Human-readable description
+  }
+}
+```
+
+**Common Error Types:**
+- `invalid_request_error` (400)
+- `authentication_error` (401)
+- `permission_error` (403)
+- `not_found_error` (404)
+- `rate_limit_error` (429)
+- `api_error` (500)
+- `overloaded_error` (529)
+
+## Rate Limit Headers
+
+Response includes:
+
+```
+anthropic-ratelimit-requests-limit: 50
+anthropic-ratelimit-requests-remaining: 49
+anthropic-ratelimit-requests-reset: 2025-10-25T12:00:00Z
+anthropic-ratelimit-tokens-limit: 50000
+anthropic-ratelimit-tokens-remaining: 49500
+anthropic-ratelimit-tokens-reset: 2025-10-25T12:01:00Z
+retry-after: 60  // Only on 429 errors
+```
+
+## SDK Installation
+
+```bash
+# TypeScript/JavaScript
+npm install @anthropic-ai/sdk
+
+# Python
+pip install anthropic
+
+# Java
+# See https://github.com/anthropics/anthropic-sdk-java
+
+# Go
+go get github.com/anthropics/anthropic-sdk-go
+```
+
+## Official Documentation
+
+- **API Reference**: https://docs.claude.com/en/api/messages
+- **SDK Documentation**: https://github.com/anthropics/anthropic-sdk-typescript
+- **Rate Limits**: https://docs.claude.com/en/api/rate-limits
+- **Errors**: https://docs.claude.com/en/api/errors
--- a/references/prompt-caching-guide.md
+++ b/references/prompt-caching-guide.md
@@ -0,0 +1,354 @@
+# Prompt Caching Guide
+
+Complete guide to using prompt caching for cost optimization.
+
+## Overview
+
+Prompt caching reduces costs by up to 90% and latency by up to 85% by caching frequently used context.
+
+### Benefits
+
+- **Cost Savings**: Cache reads = 10% of input token price
+- **Latency Reduction**: 85% faster time to first token
+- **Use Cases**: Long documents, codebases, system instructions, conversation history
+
+### Pricing
+
+| Operation | Cost (per MTok) | vs Regular Input |
+|-----------|-----------------|------------------|
+| Regular input | $3 | 100% |
+| Cache write | $3.75 | 125% |
+| Cache read | $0.30 | 10% |
+
+**Example**: 100k tokens cached, used 10 times
+- Without caching: 100k × $3/MTok × 10 = $3.00
+- With caching: (100k × $3.75/MTok) + (100k × $0.30/MTok × 9) = $0.375 + $0.27 = $0.645
+- **Savings: $2.355 (78.5%)**
+
+## Requirements
+
+### Minimum Cacheable Content
+
+- **Claude 3.5 Sonnet**: 1,024 tokens minimum
+- **Claude 3.5 Haiku**: 2,048 tokens minimum
+- **Claude 3.7 Sonnet**: 1,024 tokens minimum
+
+### Cache Lifetime
+
+- **Default**: 5 minutes
+- **Extended**: 1 hour (configurable)
+- Refreshes on each use
+
+### Cache Matching
+
+Cache hits require:
+- ✅ **Identical content** (byte-for-byte)
+- ✅ **Same position** in request
+- ✅ **Within TTL** (5 min or 1 hour)
+
+## Implementation
+
+### Basic System Prompt Caching
+
+```typescript
+const message = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  system: [
+    {
+      type: 'text',
+      text: LARGE_SYSTEM_INSTRUCTIONS, // >= 1024 tokens
+      cache_control: { type: 'ephemeral' },
+    },
+  ],
+  messages: [
+    { role: 'user', content: 'Your question here' }
+  ],
+});
+```
+
+### Caching in User Messages
+
+```typescript
+const message = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  messages: [
+    {
+      role: 'user',
+      content: [
+        {
+          type: 'text',
+          text: 'Analyze this document:',
+        },
+        {
+          type: 'text',
+          text: LARGE_DOCUMENT, // >= 1024 tokens
+          cache_control: { type: 'ephemeral' },
+        },
+        {
+          type: 'text',
+          text: 'What are the main themes?',
+        },
+      ],
+    },
+  ],
+});
+```
+
+### Multi-Turn Conversation Caching
+
+```typescript
+// Turn 1 - Creates cache
+const response1 = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  system: [
+    {
+      type: 'text',
+      text: SYSTEM_PROMPT,
+      cache_control: { type: 'ephemeral' },
+    },
+  ],
+  messages: [
+    { role: 'user', content: 'Hello!' }
+  ],
+});
+
+// Turn 2 - Hits cache (same system prompt)
+const response2 = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  system: [
+    {
+      type: 'text',
+      text: SYSTEM_PROMPT, // Identical - cache hit
+      cache_control: { type: 'ephemeral' },
+    },
+  ],
+  messages: [
+    { role: 'user', content: 'Hello!' },
+    { role: 'assistant', content: response1.content[0].text },
+    { role: 'user', content: 'Tell me more' },
+  ],
+});
+```
+
+### Caching Conversation History
+
+```typescript
+const messages = [
+  { role: 'user', content: 'Message 1' },
+  { role: 'assistant', content: 'Response 1' },
+  { role: 'user', content: 'Message 2' },
+  { role: 'assistant', content: 'Response 2' },
+];
+
+// Cache last assistant message
+messages[messages.length - 1] = {
+  role: 'assistant',
+  content: [
+    {
+      type: 'text',
+      text: 'Response 2',
+      cache_control: { type: 'ephemeral' },
+    },
+  ],
+};
+
+messages.push({ role: 'user', content: 'Message 3' });
+
+const response = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  messages,
+});
+```
+
+## Best Practices
+
+### ✅ Do
+
+- Place `cache_control` on the **last block** of cacheable content
+- Cache content >= 1024 tokens (3.5 Sonnet) or >= 2048 tokens (3.5 Haiku)
+- Use caching for repeated context (system prompts, documents, code)
+- Monitor cache usage in response headers
+- Cache conversation history in long chats
+
+### ❌ Don't
+
+- Cache content below minimum token threshold
+- Place `cache_control` in the middle of text
+- Change cached content (breaks cache matching)
+- Cache rarely used content (not cost-effective)
+- Expect caching to work across different API keys
+
+## Monitoring Cache Usage
+
+```typescript
+const response = await anthropic.messages.create({...});
+
+console.log('Input tokens:', response.usage.input_tokens);
+console.log('Cache creation:', response.usage.cache_creation_input_tokens);
+console.log('Cache read:', response.usage.cache_read_input_tokens);
+console.log('Output tokens:', response.usage.output_tokens);
+
+// First request
+// input_tokens: 1000
+// cache_creation_input_tokens: 5000
+// cache_read_input_tokens: 0
+
+// Subsequent requests (within 5 min)
+// input_tokens: 1000
+// cache_creation_input_tokens: 0
+// cache_read_input_tokens: 5000  // 90% cost savings!
+```
+
+## Common Patterns
+
+### Pattern 1: Document Analysis Chatbot
+
+```typescript
+const document = fs.readFileSync('./document.txt', 'utf-8'); // 10k tokens
+
+// All requests use same cached document
+for (const question of questions) {
+  const response = await anthropic.messages.create({
+    model: 'claude-sonnet-4-5-20250929',
+    max_tokens: 1024,
+    messages: [
+      {
+        role: 'user',
+        content: [
+          { type: 'text', text: 'Document:' },
+          {
+            type: 'text',
+            text: document,
+            cache_control: { type: 'ephemeral' },
+          },
+          { type: 'text', text: `Question: ${question}` },
+        ],
+      },
+    ],
+  });
+
+  // First request: cache_creation_input_tokens: 10000
+  // Subsequent: cache_read_input_tokens: 10000 (90% savings)
+}
+```
+
+### Pattern 2: Code Review with Codebase Context
+
+```typescript
+const codebase = await loadCodebase(); // 50k tokens
+
+const review = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 2048,
+  system: [
+    { type: 'text', text: 'You are a code reviewer.' },
+    {
+      type: 'text',
+      text: `Codebase context:\n${codebase}`,
+      cache_control: { type: 'ephemeral' },
+    },
+  ],
+  messages: [
+    { role: 'user', content: 'Review this PR: ...' }
+  ],
+});
+```
+
+### Pattern 3: Customer Support with Knowledge Base
+
+```typescript
+const knowledgeBase = await loadKB(); // 20k tokens
+
+// Cache persists across all customer queries
+const response = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  system: [
+    { type: 'text', text: 'You are a customer support agent.' },
+    {
+      type: 'text',
+      text: knowledgeBase,
+      cache_control: { type: 'ephemeral' },
+    },
+  ],
+  messages: customerConversation,
+});
+```
+
+## Troubleshooting
+
+### Cache Not Activating
+
+**Problem**: `cache_read_input_tokens` is 0
+
+**Solutions**:
+1. Ensure content >= 1024 tokens (or 2048 for Haiku)
+2. Verify `cache_control` is on **last block**
+3. Check content is byte-for-byte identical
+4. Confirm requests within 5-minute window
+
+### Unexpected Cache Misses
+
+**Problem**: Cache hits intermittently
+
+**Solutions**:
+1. Ensure content doesn't change (even whitespace)
+2. Check TTL hasn't expired
+3. Verify using same API key
+4. Monitor cache headers in responses
+
+### High Cache Creation Costs
+
+**Problem**: Frequent `cache_creation_input_tokens`
+
+**Solutions**:
+1. Increase request frequency (use cache before expiry)
+2. Consider if caching is cost-effective (need 2+ uses)
+3. Extend cache TTL to 1 hour if supported
+
+## Cost Calculator
+
+```typescript
+function calculateCachingSavings(
+  cachedTokens: number,
+  uncachedTokens: number,
+  requestCount: number
+): {
+  withoutCaching: number;
+  withCaching: number;
+  savings: number;
+  savingsPercent: number;
+} {
+  const inputCostPerMTok = 3;
+  const cacheCostPerMTok = 3.75;
+  const cacheReadCostPerMTok = 0.3;
+
+  const withoutCaching = ((cachedTokens + uncachedTokens) / 1_000_000) *
+    inputCostPerMTok * requestCount;
+
+  const cacheWrite = (cachedTokens / 1_000_000) * cacheCostPerMTok;
+  const cacheReads = (cachedTokens / 1_000_000) * cacheReadCostPerMTok * (requestCount - 1);
+  const uncachedInput = (uncachedTokens / 1_000_000) * inputCostPerMTok * requestCount;
+  const withCaching = cacheWrite + cacheReads + uncachedInput;
+
+  const savings = withoutCaching - withCaching;
+  const savingsPercent = (savings / withoutCaching) * 100;
+
+  return { withoutCaching, withCaching, savings, savingsPercent };
+}
+
+// Example: 10k cached tokens, 1k uncached, 20 requests
+const result = calculateCachingSavings(10000, 1000, 20);
+console.log(`Savings: $${result.savings.toFixed(4)} (${result.savingsPercent.toFixed(1)}%)`);
+```
+
+## Official Documentation
+
+- **Prompt Caching Guide**: https://docs.claude.com/en/docs/build-with-claude/prompt-caching
+- **Pricing**: https://www.anthropic.com/pricing
+- **API Reference**: https://docs.claude.com/en/api/messages
--- a/references/rate-limits.md
+++ b/references/rate-limits.md
@@ -0,0 +1,80 @@
+# Rate Limits Guide
+
+Complete guide to Claude API rate limits and how to handle them.
+
+## Overview
+
+Claude API uses **token bucket algorithm** for rate limiting:
+- Capacity continuously replenishes
+- Three types: Requests per minute (RPM), Tokens per minute (TPM), Daily tokens
+- Limits vary by account tier and model
+
+## Rate Limit Tiers
+
+| Tier | Requirements | Example Limits (Sonnet 3.5) |
+|------|--------------|------------------------------|
+| Tier 1 | New account | 50 RPM, 40k TPM |
+| Tier 2 | $10 spend | 1000 RPM, 100k TPM |
+| Tier 3 | $50 spend | 2000 RPM, 200k TPM |
+| Tier 4 | $500 spend | 4000 RPM, 400k TPM |
+
+**Note**: Limits vary by model. Check Console for exact limits.
+
+## Response Headers
+
+Every API response includes:
+
+```
+anthropic-ratelimit-requests-limit: 50
+anthropic-ratelimit-requests-remaining: 49
+anthropic-ratelimit-requests-reset: 2025-10-25T12:00:00Z
+anthropic-ratelimit-tokens-limit: 50000
+anthropic-ratelimit-tokens-remaining: 49500
+anthropic-ratelimit-tokens-reset: 2025-10-25T12:01:00Z
+```
+
+On 429 errors:
+```
+retry-after: 60  // Seconds until retry allowed
+```
+
+## Handling Rate Limits
+
+### Basic Exponential Backoff
+
+```typescript
+async function withRetry(requestFn, maxRetries = 3) {
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      return await requestFn();
+    } catch (error) {
+      if (error.status === 429) {
+        const delay = 1000 * Math.pow(2, attempt);
+        await new Promise(resolve => setTimeout(resolve, delay));
+      } else {
+        throw error;
+      }
+    }
+  }
+}
+```
+
+### Respecting retry-after Header
+
+```typescript
+const retryAfter = error.response?.headers?.['retry-after'];
+const delay = retryAfter ? parseInt(retryAfter) * 1000 : baseDelay;
+```
+
+## Best Practices
+
+1. **Monitor headers** - Check remaining requests/tokens
+2. **Implement backoff** - Exponential delay on 429
+3. **Respect retry-after** - Use provided wait time
+4. **Batch requests** - Group when possible
+5. **Use caching** - Reduce duplicate requests
+6. **Upgrade tier** - Scale with usage
+
+## Official Docs
+
+https://docs.claude.com/en/api/rate-limits
--- a/references/tool-use-patterns.md
+++ b/references/tool-use-patterns.md
@@ -0,0 +1,114 @@
+# Tool Use Patterns
+
+Common patterns for using tools (function calling) with Claude API.
+
+## Basic Pattern
+
+```typescript
+const tools = [{
+  name: 'get_weather',
+  description: 'Get current weather',
+  input_schema: {
+    type: 'object',
+    properties: {
+      location: { type: 'string' }
+    },
+    required: ['location']
+  }
+}];
+
+// 1. Send request with tools
+const response = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  tools,
+  messages: [{ role: 'user', content: 'Weather in NYC?' }]
+});
+
+// 2. Check if Claude wants to use tools
+if (response.stop_reason === 'tool_use') {
+  for (const block of response.content) {
+    if (block.type === 'tool_use') {
+      // 3. Execute tool
+      const result = await executeToolFunction(block.name, block.input);
+
+      // 4. Return result
+      const toolResult = {
+        type: 'tool_result',
+        tool_use_id: block.id,
+        content: JSON.stringify(result)
+      };
+    }
+  }
+}
+```
+
+## Tool Execution Loop
+
+```typescript
+async function chatWithTools(userMessage) {
+  const messages = [{ role: 'user', content: userMessage }];
+
+  while (true) {
+    const response = await anthropic.messages.create({
+      model: 'claude-sonnet-4-5-20250929',
+      max_tokens: 1024,
+      tools,
+      messages,
+    });
+
+    messages.push({ role: 'assistant', content: response.content });
+
+    if (response.stop_reason === 'tool_use') {
+      // Execute tools and continue
+      const toolResults = await executeAllTools(response.content);
+      messages.push({ role: 'user', content: toolResults });
+    } else {
+      // Final response
+      return response.content.find(b => b.type === 'text')?.text;
+    }
+  }
+}
+```
+
+## With Zod Validation
+
+```typescript
+import { betaZodTool } from '@anthropic-ai/sdk/helpers/zod';
+import { z } from 'zod';
+
+const weatherTool = betaZodTool({
+  name: 'get_weather',
+  inputSchema: z.object({
+    location: z.string(),
+    unit: z.enum(['celsius', 'fahrenheit']).optional(),
+  }),
+  description: 'Get weather',
+  run: async (input) => {
+    return `Weather in ${input.location}: 72°F`;
+  },
+});
+
+// Automatic execution
+const finalMessage = await anthropic.beta.messages.toolRunner({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1000,
+  messages: [{ role: 'user', content: 'Weather in SF?' }],
+  tools: [weatherTool],
+});
+```
+
+## Error Handling in Tools
+
+```typescript
+{
+  type: 'tool_result',
+  tool_use_id: block.id,
+  content: 'Error: API unavailable',
+  is_error: true  // Mark as error
+}
+```
+
+## Official Docs
+
+https://docs.claude.com/en/docs/build-with-claude/tool-use
--- a/references/top-errors.md
+++ b/references/top-errors.md
@@ -0,0 +1,507 @@
+# Top 12 Common Errors and Solutions
+
+Complete reference for troubleshooting Claude API errors.
+
+---
+
+## Error #1: Rate Limit 429 - Too Many Requests
+
+**Error Message:**
+```
+429 Too Many Requests: Number of request tokens has exceeded your per-minute rate limit
+```
+
+**Source**: https://docs.claude.com/en/api/errors
+
+**Why It Happens:**
+- Exceeded requests per minute (RPM)
+- Exceeded tokens per minute (TPM)
+- Exceeded daily token quota
+
+**Solution:**
+```typescript
+async function handleRateLimit(requestFn, maxRetries = 3) {
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      return await requestFn();
+    } catch (error) {
+      if (error.status === 429) {
+        const retryAfter = error.response?.headers?.['retry-after'];
+        const delay = retryAfter ? parseInt(retryAfter) * 1000 : 1000 * Math.pow(2, attempt);
+        console.log(`Retrying in ${delay}ms...`);
+        await new Promise(resolve => setTimeout(resolve, delay));
+      } else {
+        throw error;
+      }
+    }
+  }
+}
+```
+
+**Prevention:**
+- Implement exponential backoff
+- Respect `retry-after` header
+- Monitor rate limit headers
+- Upgrade account tier for higher limits
+
+---
+
+## Error #2: Streaming SSE Parsing Errors
+
+**Error Message:**
+```
+Incomplete chunks, malformed events, connection dropped
+```
+
+**Source**: Community reports, SDK issues
+
+**Why It Happens:**
+- Network interruptions
+- Improper SSE event parsing
+- Errors occur AFTER initial 200 response
+
+**Solution:**
+```typescript
+const stream = anthropic.messages.stream({...});
+
+stream
+  .on('error', (error) => {
+    console.error('Stream error:', error);
+    // Implement retry or fallback
+  })
+  .on('abort', (error) => {
+    console.warn('Stream aborted');
+  })
+  .on('end', () => {
+    console.log('Stream completed');
+  });
+
+await stream.finalMessage();
+```
+
+**Prevention:**
+- Always implement error event listeners
+- Handle stream abortion
+- Use SDK helpers (don't parse SSE manually)
+- Implement reconnection logic
+
+---
+
+## Error #3: Prompt Caching Not Activating
+
+**Error Message:**
+```
+High costs despite cache_control blocks
+cache_read_input_tokens: 0
+```
+
+**Source**: https://docs.claude.com/en/docs/build-with-claude/prompt-caching
+
+**Why It Happens:**
+- `cache_control` not on last block
+- Content below minimum tokens (1024/2048)
+- Content changed (breaks cache match)
+- Outside 5-minute TTL
+
+**Solution:**
+```typescript
+// ❌ Wrong - cache_control not at end
+{
+  type: 'text',
+  text: DOCUMENT,
+  cache_control: { type: 'ephemeral' },  // Wrong position
+},
+{
+  type: 'text',
+  text: 'Additional text',
+}
+
+// ✅ Correct - cache_control at end
+{
+  type: 'text',
+  text: DOCUMENT + '\n\nAdditional text',
+  cache_control: { type: 'ephemeral' },  // Correct position
+}
+```
+
+**Prevention:**
+- Place `cache_control` on LAST block
+- Ensure content >= 1024 tokens
+- Keep cached content identical
+- Monitor `cache_read_input_tokens`
+
+---
+
+## Error #4: Tool Use Response Format Errors
+
+**Error Message:**
+```
+invalid_request_error: tools[0].input_schema is invalid
+```
+
+**Source**: API validation
+
+**Why It Happens:**
+- Invalid JSON Schema
+- Missing required fields
+- Incorrect tool_use_id in tool_result
+
+**Solution:**
+```typescript
+// ✅ Valid tool schema
+{
+  name: 'get_weather',
+  description: 'Get current weather',
+  input_schema: {
+    type: 'object',           // Must be 'object'
+    properties: {
+      location: {
+        type: 'string',       // Valid JSON Schema types
+        description: 'City'   // Optional but recommended
+      }
+    },
+    required: ['location']    // List required fields
+  }
+}
+
+// ✅ Valid tool result
+{
+  type: 'tool_result',
+  tool_use_id: block.id,      // Must match tool_use id
+  content: JSON.stringify(result)  // Convert to string
+}
+```
+
+**Prevention:**
+- Validate schemas with JSON Schema validator
+- Match `tool_use_id` exactly
+- Stringify tool results
+- Test thoroughly before production
+
+---
+
+## Error #5: Vision Image Format Issues
+
+**Error Message:**
+```
+invalid_request_error: image source must be base64 or url
+```
+
+**Source**: API documentation
+
+**Why It Happens:**
+- Unsupported image format
+- Incorrect base64 encoding
+- Invalid media_type
+
+**Solution:**
+```typescript
+import fs from 'fs';
+
+const imageData = fs.readFileSync('./image.jpg');
+const base64Image = imageData.toString('base64');
+
+// ✅ Correct format
+{
+  type: 'image',
+  source: {
+    type: 'base64',
+    media_type: 'image/jpeg',  // Must match actual format
+    data: base64Image          // Pure base64 (no data URI prefix)
+  }
+}
+```
+
+**Supported Formats:**
+- image/jpeg
+- image/png
+- image/webp
+- image/gif
+
+**Prevention:**
+- Validate format before encoding
+- Use correct media_type
+- Remove data URI prefix if present
+- Keep images under 5MB
+
+---
+
+## Error #6: Token Counting Mismatches
+
+**Error Message:**
+```
+invalid_request_error: messages: too many tokens
+```
+
+**Source**: Token counting differences
+
+**Why It Happens:**
+- Not accounting for special tokens
+- Formatting adds hidden tokens
+- Context window exceeded
+
+**Solution:**
+```typescript
+// Monitor token usage
+const response = await anthropic.messages.create({...});
+
+console.log('Input tokens:', response.usage.input_tokens);
+console.log('Output tokens:', response.usage.output_tokens);
+console.log('Total:', response.usage.input_tokens + response.usage.output_tokens);
+
+// Check against context window
+const contextWindow = 200000; // Claude 3.5 Sonnet
+if (response.usage.input_tokens > contextWindow) {
+  console.warn('Approaching context limit');
+}
+```
+
+**Prevention:**
+- Use official token counter
+- Monitor usage headers
+- Implement message pruning
+- Use prompt caching for long context
+
+---
+
+## Error #7: System Prompt Ordering Issues
+
+**Error Message:**
+```
+System prompt ignored or overridden
+```
+
+**Source**: API behavior
+
+**Why It Happens:**
+- System prompt placed after messages
+- System prompt in wrong format
+
+**Solution:**
+```typescript
+// ❌ Wrong
+const message = await anthropic.messages.create({
+  messages: [...],
+  system: 'You are helpful',  // Wrong - after messages
+});
+
+// ✅ Correct
+const message = await anthropic.messages.create({
+  system: 'You are helpful',  // Correct - before messages
+  messages: [...],
+});
+```
+
+**Prevention:**
+- Always place `system` before `messages`
+- Use system prompt for behavior instructions
+- Test system prompt effectiveness
+
+---
+
+## Error #8: Context Window Exceeded
+
+**Error Message:**
+```
+invalid_request_error: messages: too many tokens (210000 > 200000)
+```
+
+**Source**: Model limits
+
+**Why It Happens:**
+- Long conversations
+- Large documents
+- Not pruning message history
+
+**Solution:**
+```typescript
+function pruneMessages(messages, maxTokens = 150000) {
+  // Keep most recent messages
+  let totalTokens = 0;
+  const prunedMessages = [];
+
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msgTokens = estimateTokens(messages[i].content);
+    if (totalTokens + msgTokens > maxTokens) break;
+    prunedMessages.unshift(messages[i]);
+    totalTokens += msgTokens;
+  }
+
+  return prunedMessages;
+}
+```
+
+**Prevention:**
+- Implement message history pruning
+- Use summarization for old messages
+- Use prompt caching
+- Choose model with larger context (3.7 Sonnet: 2M tokens)
+
+---
+
+## Error #9: Extended Thinking on Wrong Model
+
+**Error Message:**
+```
+No thinking blocks in response
+```
+
+**Source**: Model capabilities
+
+**Why It Happens:**
+- Using Claude 3.5 Sonnet (not supported)
+- Should use Claude 3.7 Sonnet or Claude 4
+
+**Solution:**
+```typescript
+// ❌ Wrong model - no extended thinking
+model: 'claude-sonnet-4-5-20250929'
+
+// ✅ Correct models for extended thinking
+model: 'claude-3-7-sonnet-20250228'  // Has extended thinking
+model: 'claude-opus-4-20250514'      // Has extended thinking
+```
+
+**Prevention:**
+- Verify model capabilities
+- Use 3.7 Sonnet or 4.x models for extended thinking
+- Document model requirements
+
+---
+
+## Error #10: API Key Exposure in Client Code
+
+**Error Message:**
+```
+CORS errors, security vulnerability
+```
+
+**Source**: Security best practices
+
+**Why It Happens:**
+- Making API calls from browser
+- API key in client-side code
+
+**Solution:**
+```typescript
+// ❌ Never do this
+const anthropic = new Anthropic({
+  apiKey: 'sk-ant-...',  // Exposed in browser!
+});
+
+// ✅ Use server-side endpoint
+async function callClaude(messages) {
+  const response = await fetch('/api/chat', {
+    method: 'POST',
+    body: JSON.stringify({ messages }),
+  });
+  return response.json();
+}
+```
+
+**Prevention:**
+- Server-side only
+- Use environment variables
+- Implement authentication
+- Never expose API key
+
+---
+
+## Error #11: Rate Limit Tier Confusion
+
+**Error Message:**
+```
+Lower limits than expected
+```
+
+**Source**: Account tier system
+
+**Why It Happens:**
+- Not understanding tier progression
+- Expecting higher limits without usage history
+
+**Solution:**
+- Check current tier in Console
+- Tiers auto-scale with usage ($10, $50, $500 spend)
+- Monitor rate limit headers
+- Contact support for custom limits
+
+**Tier Progression:**
+- Tier 1: 50 RPM, 40k TPM
+- Tier 2: 1000 RPM, 100k TPM ($10 spend)
+- Tier 3: 2000 RPM, 200k TPM ($50 spend)
+- Tier 4: 4000 RPM, 400k TPM ($500 spend)
+
+**Prevention:**
+- Review tier requirements
+- Plan for gradual scale-up
+- Implement proper rate limiting
+
+---
+
+## Error #12: Message Batches Beta Headers Missing
+
+**Error Message:**
+```
+invalid_request_error: unknown parameter: batches
+```
+
+**Source**: Beta API requirements
+
+**Why It Happens:**
+- Missing `anthropic-beta` header
+- Using wrong endpoint
+
+**Solution:**
+```typescript
+const response = await fetch('https://api.anthropic.com/v1/messages/batches', {
+  method: 'POST',
+  headers: {
+    'x-api-key': API_KEY,
+    'anthropic-version': '2023-06-01',
+    'anthropic-beta': 'message-batches-2024-09-24',  // Required!
+    'content-type': 'application/json',
+  },
+  body: JSON.stringify({...}),
+});
+```
+
+**Prevention:**
+- Include beta headers for beta features
+- Check official docs for header requirements
+- Test beta features in development first
+
+---
+
+## Quick Diagnosis Checklist
+
+When encountering errors:
+
+1. ✅ Check error status code (400, 401, 429, 500, etc.)
+2. ✅ Read error message carefully
+3. ✅ Verify API key is valid and in environment variable
+4. ✅ Confirm model ID is correct
+5. ✅ Check request format matches API spec
+6. ✅ Monitor rate limit headers
+7. ✅ Review recent code changes
+8. ✅ Test with minimal example
+9. ✅ Check official docs for breaking changes
+10. ✅ Search GitHub issues for similar problems
+
+---
+
+## Getting Help
+
+**Official Resources:**
+- **Errors Reference**: https://docs.claude.com/en/api/errors
+- **API Documentation**: https://docs.claude.com/en/api
+- **Support**: https://support.claude.com/
+
+**Community:**
+- **GitHub Issues**: https://github.com/anthropics/anthropic-sdk-typescript/issues
+- **Developer Forum**: https://support.claude.com/
+
+**This Skill:**
+- Check other reference files for detailed guides
+- Review templates for working examples
+- Verify setup checklist in SKILL.md
--- a/references/vision-capabilities.md
+++ b/references/vision-capabilities.md
@@ -0,0 +1,108 @@
+# Vision Capabilities
+
+Guide to using Claude's vision capabilities for image understanding.
+
+## Supported Formats
+
+- **JPEG** (image/jpeg)
+- **PNG** (image/png)
+- **WebP** (image/webp)
+- **GIF** (image/gif) - non-animated only
+
+**Max size**: 5MB per image
+
+## Basic Usage
+
+```typescript
+import fs from 'fs';
+
+const imageData = fs.readFileSync('./image.jpg').toString('base64');
+
+const message = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  messages: [{
+    role: 'user',
+    content: [
+      {
+        type: 'image',
+        source: {
+          type: 'base64',
+          media_type: 'image/jpeg',
+          data: imageData
+        }
+      },
+      {
+        type: 'text',
+        text: 'What is in this image?'
+      }
+    ]
+  }]
+});
+```
+
+## Multiple Images
+
+```typescript
+content: [
+  { type: 'text', text: 'Compare these images:' },
+  { type: 'image', source: { type: 'base64', media_type: 'image/jpeg', data: img1 } },
+  { type: 'image', source: { type: 'base64', media_type: 'image/png', data: img2 } },
+  { type: 'text', text: 'What are the differences?' }
+]
+```
+
+## With Tools
+
+```typescript
+const message = await anthropic.messages.create({
+  model: 'claude-sonnet-4-5-20250929',
+  max_tokens: 1024,
+  tools: [searchProductTool],
+  messages: [{
+    role: 'user',
+    content: [
+      { type: 'image', source: {...} },
+      { type: 'text', text: 'Find similar products' }
+    ]
+  }]
+});
+```
+
+## With Prompt Caching
+
+```typescript
+{
+  type: 'image',
+  source: {
+    type: 'base64',
+    media_type: 'image/jpeg',
+    data: imageData
+  },
+  cache_control: { type: 'ephemeral' }  // Cache image
+}
+```
+
+## Validation
+
+```typescript
+function validateImage(path: string) {
+  const stats = fs.statSync(path);
+  const sizeMB = stats.size / (1024 * 1024);
+
+  if (sizeMB > 5) {
+    throw new Error('Image exceeds 5MB');
+  }
+
+  const ext = path.split('.').pop()?.toLowerCase();
+  if (!['jpg', 'jpeg', 'png', 'webp', 'gif'].includes(ext)) {
+    throw new Error('Unsupported format');
+  }
+
+  return true;
+}
+```
+
+## Official Docs
+
+https://docs.claude.com/en/docs/build-with-claude/vision