Initial commit
This commit is contained in:
224
references/api-reference.md
Normal file
224
references/api-reference.md
Normal file
@@ -0,0 +1,224 @@
|
||||
# Claude API Reference
|
||||
|
||||
Quick reference for the Anthropic Messages API endpoints and parameters.
|
||||
|
||||
## Base URL
|
||||
|
||||
```
|
||||
https://api.anthropic.com/v1
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
All requests require:
|
||||
```http
|
||||
x-api-key: YOUR_API_KEY
|
||||
anthropic-version: 2023-06-01
|
||||
content-type: application/json
|
||||
```
|
||||
|
||||
## Endpoints
|
||||
|
||||
### POST /messages
|
||||
|
||||
Create a message with Claude.
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```typescript
|
||||
{
|
||||
model: string, // Required: "claude-sonnet-4-5-20250929", etc.
|
||||
max_tokens: number, // Required: Maximum tokens to generate (1-8192)
|
||||
messages: Message[], // Required: Conversation history
|
||||
system?: string | SystemBlock[], // Optional: System prompt
|
||||
temperature?: number, // Optional: 0-1 (default: 1)
|
||||
top_p?: number, // Optional: 0-1 (default: 1)
|
||||
top_k?: number, // Optional: Sampling parameter
|
||||
stop_sequences?: string[], // Optional: Stop generation at these sequences
|
||||
stream?: boolean, // Optional: Enable streaming (default: false)
|
||||
tools?: Tool[], // Optional: Available tools
|
||||
tool_choice?: ToolChoice, // Optional: Tool selection strategy
|
||||
metadata?: Metadata // Optional: Request metadata
|
||||
}
|
||||
```
|
||||
|
||||
**Message Format:**
|
||||
|
||||
```typescript
|
||||
{
|
||||
role: "user" | "assistant",
|
||||
content: string | ContentBlock[]
|
||||
}
|
||||
```
|
||||
|
||||
**Content Block Types:**
|
||||
|
||||
```typescript
|
||||
// Text
|
||||
{
|
||||
type: "text",
|
||||
text: string,
|
||||
cache_control?: { type: "ephemeral" } // For prompt caching
|
||||
}
|
||||
|
||||
// Image
|
||||
{
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64" | "url",
|
||||
media_type: "image/jpeg" | "image/png" | "image/webp" | "image/gif",
|
||||
data?: string, // base64 encoded
|
||||
url?: string // publicly accessible URL
|
||||
},
|
||||
cache_control?: { type: "ephemeral" }
|
||||
}
|
||||
|
||||
// Tool use (assistant messages only)
|
||||
{
|
||||
type: "tool_use",
|
||||
id: string,
|
||||
name: string,
|
||||
input: object
|
||||
}
|
||||
|
||||
// Tool result (user messages only)
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: string,
|
||||
content: string | ContentBlock[],
|
||||
is_error?: boolean
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```typescript
|
||||
{
|
||||
id: string,
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: ContentBlock[],
|
||||
model: string,
|
||||
stop_reason: "end_turn" | "max_tokens" | "stop_sequence" | "tool_use",
|
||||
stop_sequence?: string,
|
||||
usage: {
|
||||
input_tokens: number,
|
||||
output_tokens: number,
|
||||
cache_creation_input_tokens?: number,
|
||||
cache_read_input_tokens?: number
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Model IDs
|
||||
|
||||
| Model | ID | Context Window |
|
||||
|-------|-----|----------------|
|
||||
| Claude Sonnet 4.5 | claude-sonnet-4-5-20250929 | 200k tokens |
|
||||
| Claude 3.7 Sonnet | claude-3-7-sonnet-20250228 | 2M tokens |
|
||||
| Claude Opus 4 | claude-opus-4-20250514 | 200k tokens |
|
||||
| Claude 3.5 Haiku | claude-3-5-haiku-20241022 | 200k tokens |
|
||||
|
||||
## Tool Definition
|
||||
|
||||
```typescript
|
||||
{
|
||||
name: string, // Tool identifier
|
||||
description: string, // What the tool does
|
||||
input_schema: { // JSON Schema
|
||||
type: "object",
|
||||
properties: {
|
||||
[key: string]: {
|
||||
type: "string" | "number" | "boolean" | "array" | "object",
|
||||
description?: string,
|
||||
enum?: any[]
|
||||
}
|
||||
},
|
||||
required?: string[]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Streaming
|
||||
|
||||
Set `stream: true` in request. Returns Server-Sent Events (SSE):
|
||||
|
||||
**Event Types:**
|
||||
- `message_start`: Message begins
|
||||
- `content_block_start`: Content block begins
|
||||
- `content_block_delta`: Text or JSON delta
|
||||
- `content_block_stop`: Content block complete
|
||||
- `message_delta`: Metadata update
|
||||
- `message_stop`: Message complete
|
||||
- `ping`: Keep-alive
|
||||
|
||||
**Event Format:**
|
||||
|
||||
```
|
||||
event: message_start
|
||||
data: {"type":"message_start","message":{...}}
|
||||
|
||||
event: content_block_delta
|
||||
data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}}
|
||||
|
||||
event: message_stop
|
||||
data: {"type":"message_stop"}
|
||||
```
|
||||
|
||||
## Error Responses
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: "error",
|
||||
error: {
|
||||
type: string, // Error type identifier
|
||||
message: string // Human-readable description
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Common Error Types:**
|
||||
- `invalid_request_error` (400)
|
||||
- `authentication_error` (401)
|
||||
- `permission_error` (403)
|
||||
- `not_found_error` (404)
|
||||
- `rate_limit_error` (429)
|
||||
- `api_error` (500)
|
||||
- `overloaded_error` (529)
|
||||
|
||||
## Rate Limit Headers
|
||||
|
||||
Response includes:
|
||||
|
||||
```
|
||||
anthropic-ratelimit-requests-limit: 50
|
||||
anthropic-ratelimit-requests-remaining: 49
|
||||
anthropic-ratelimit-requests-reset: 2025-10-25T12:00:00Z
|
||||
anthropic-ratelimit-tokens-limit: 50000
|
||||
anthropic-ratelimit-tokens-remaining: 49500
|
||||
anthropic-ratelimit-tokens-reset: 2025-10-25T12:01:00Z
|
||||
retry-after: 60 // Only on 429 errors
|
||||
```
|
||||
|
||||
## SDK Installation
|
||||
|
||||
```bash
|
||||
# TypeScript/JavaScript
|
||||
npm install @anthropic-ai/sdk
|
||||
|
||||
# Python
|
||||
pip install anthropic
|
||||
|
||||
# Java
|
||||
# See https://github.com/anthropics/anthropic-sdk-java
|
||||
|
||||
# Go
|
||||
go get github.com/anthropics/anthropic-sdk-go
|
||||
```
|
||||
|
||||
## Official Documentation
|
||||
|
||||
- **API Reference**: https://docs.claude.com/en/api/messages
|
||||
- **SDK Documentation**: https://github.com/anthropics/anthropic-sdk-typescript
|
||||
- **Rate Limits**: https://docs.claude.com/en/api/rate-limits
|
||||
- **Errors**: https://docs.claude.com/en/api/errors
|
||||
354
references/prompt-caching-guide.md
Normal file
354
references/prompt-caching-guide.md
Normal file
@@ -0,0 +1,354 @@
|
||||
# Prompt Caching Guide
|
||||
|
||||
Complete guide to using prompt caching for cost optimization.
|
||||
|
||||
## Overview
|
||||
|
||||
Prompt caching reduces costs by up to 90% and latency by up to 85% by caching frequently used context.
|
||||
|
||||
### Benefits
|
||||
|
||||
- **Cost Savings**: Cache reads = 10% of input token price
|
||||
- **Latency Reduction**: 85% faster time to first token
|
||||
- **Use Cases**: Long documents, codebases, system instructions, conversation history
|
||||
|
||||
### Pricing
|
||||
|
||||
| Operation | Cost (per MTok) | vs Regular Input |
|
||||
|-----------|-----------------|------------------|
|
||||
| Regular input | $3 | 100% |
|
||||
| Cache write | $3.75 | 125% |
|
||||
| Cache read | $0.30 | 10% |
|
||||
|
||||
**Example**: 100k tokens cached, used 10 times
|
||||
- Without caching: 100k × $3/MTok × 10 = $3.00
|
||||
- With caching: (100k × $3.75/MTok) + (100k × $0.30/MTok × 9) = $0.375 + $0.27 = $0.645
|
||||
- **Savings: $2.355 (78.5%)**
|
||||
|
||||
## Requirements
|
||||
|
||||
### Minimum Cacheable Content
|
||||
|
||||
- **Claude 3.5 Sonnet**: 1,024 tokens minimum
|
||||
- **Claude 3.5 Haiku**: 2,048 tokens minimum
|
||||
- **Claude 3.7 Sonnet**: 1,024 tokens minimum
|
||||
|
||||
### Cache Lifetime
|
||||
|
||||
- **Default**: 5 minutes
|
||||
- **Extended**: 1 hour (configurable)
|
||||
- Refreshes on each use
|
||||
|
||||
### Cache Matching
|
||||
|
||||
Cache hits require:
|
||||
- ✅ **Identical content** (byte-for-byte)
|
||||
- ✅ **Same position** in request
|
||||
- ✅ **Within TTL** (5 min or 1 hour)
|
||||
|
||||
## Implementation
|
||||
|
||||
### Basic System Prompt Caching
|
||||
|
||||
```typescript
|
||||
const message = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
system: [
|
||||
{
|
||||
type: 'text',
|
||||
text: LARGE_SYSTEM_INSTRUCTIONS, // >= 1024 tokens
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
messages: [
|
||||
{ role: 'user', content: 'Your question here' }
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
### Caching in User Messages
|
||||
|
||||
```typescript
|
||||
const message = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Analyze this document:',
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: LARGE_DOCUMENT, // >= 1024 tokens
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: 'What are the main themes?',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
### Multi-Turn Conversation Caching
|
||||
|
||||
```typescript
|
||||
// Turn 1 - Creates cache
|
||||
const response1 = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
system: [
|
||||
{
|
||||
type: 'text',
|
||||
text: SYSTEM_PROMPT,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
messages: [
|
||||
{ role: 'user', content: 'Hello!' }
|
||||
],
|
||||
});
|
||||
|
||||
// Turn 2 - Hits cache (same system prompt)
|
||||
const response2 = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
system: [
|
||||
{
|
||||
type: 'text',
|
||||
text: SYSTEM_PROMPT, // Identical - cache hit
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
messages: [
|
||||
{ role: 'user', content: 'Hello!' },
|
||||
{ role: 'assistant', content: response1.content[0].text },
|
||||
{ role: 'user', content: 'Tell me more' },
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
### Caching Conversation History
|
||||
|
||||
```typescript
|
||||
const messages = [
|
||||
{ role: 'user', content: 'Message 1' },
|
||||
{ role: 'assistant', content: 'Response 1' },
|
||||
{ role: 'user', content: 'Message 2' },
|
||||
{ role: 'assistant', content: 'Response 2' },
|
||||
];
|
||||
|
||||
// Cache last assistant message
|
||||
messages[messages.length - 1] = {
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Response 2',
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
messages.push({ role: 'user', content: 'Message 3' });
|
||||
|
||||
const response = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
messages,
|
||||
});
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### ✅ Do
|
||||
|
||||
- Place `cache_control` on the **last block** of cacheable content
|
||||
- Cache content >= 1024 tokens (3.5 Sonnet) or >= 2048 tokens (3.5 Haiku)
|
||||
- Use caching for repeated context (system prompts, documents, code)
|
||||
- Monitor cache usage in response headers
|
||||
- Cache conversation history in long chats
|
||||
|
||||
### ❌ Don't
|
||||
|
||||
- Cache content below minimum token threshold
|
||||
- Place `cache_control` in the middle of text
|
||||
- Change cached content (breaks cache matching)
|
||||
- Cache rarely used content (not cost-effective)
|
||||
- Expect caching to work across different API keys
|
||||
|
||||
## Monitoring Cache Usage
|
||||
|
||||
```typescript
|
||||
const response = await anthropic.messages.create({...});
|
||||
|
||||
console.log('Input tokens:', response.usage.input_tokens);
|
||||
console.log('Cache creation:', response.usage.cache_creation_input_tokens);
|
||||
console.log('Cache read:', response.usage.cache_read_input_tokens);
|
||||
console.log('Output tokens:', response.usage.output_tokens);
|
||||
|
||||
// First request
|
||||
// input_tokens: 1000
|
||||
// cache_creation_input_tokens: 5000
|
||||
// cache_read_input_tokens: 0
|
||||
|
||||
// Subsequent requests (within 5 min)
|
||||
// input_tokens: 1000
|
||||
// cache_creation_input_tokens: 0
|
||||
// cache_read_input_tokens: 5000 // 90% cost savings!
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Pattern 1: Document Analysis Chatbot
|
||||
|
||||
```typescript
|
||||
const document = fs.readFileSync('./document.txt', 'utf-8'); // 10k tokens
|
||||
|
||||
// All requests use same cached document
|
||||
for (const question of questions) {
|
||||
const response = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'Document:' },
|
||||
{
|
||||
type: 'text',
|
||||
text: document,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
{ type: 'text', text: `Question: ${question}` },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// First request: cache_creation_input_tokens: 10000
|
||||
// Subsequent: cache_read_input_tokens: 10000 (90% savings)
|
||||
}
|
||||
```
|
||||
|
||||
### Pattern 2: Code Review with Codebase Context
|
||||
|
||||
```typescript
|
||||
const codebase = await loadCodebase(); // 50k tokens
|
||||
|
||||
const review = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 2048,
|
||||
system: [
|
||||
{ type: 'text', text: 'You are a code reviewer.' },
|
||||
{
|
||||
type: 'text',
|
||||
text: `Codebase context:\n${codebase}`,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
messages: [
|
||||
{ role: 'user', content: 'Review this PR: ...' }
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 3: Customer Support with Knowledge Base
|
||||
|
||||
```typescript
|
||||
const knowledgeBase = await loadKB(); // 20k tokens
|
||||
|
||||
// Cache persists across all customer queries
|
||||
const response = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
system: [
|
||||
{ type: 'text', text: 'You are a customer support agent.' },
|
||||
{
|
||||
type: 'text',
|
||||
text: knowledgeBase,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
},
|
||||
],
|
||||
messages: customerConversation,
|
||||
});
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Cache Not Activating
|
||||
|
||||
**Problem**: `cache_read_input_tokens` is 0
|
||||
|
||||
**Solutions**:
|
||||
1. Ensure content >= 1024 tokens (or 2048 for Haiku)
|
||||
2. Verify `cache_control` is on **last block**
|
||||
3. Check content is byte-for-byte identical
|
||||
4. Confirm requests within 5-minute window
|
||||
|
||||
### Unexpected Cache Misses
|
||||
|
||||
**Problem**: Cache hits intermittently
|
||||
|
||||
**Solutions**:
|
||||
1. Ensure content doesn't change (even whitespace)
|
||||
2. Check TTL hasn't expired
|
||||
3. Verify using same API key
|
||||
4. Monitor cache headers in responses
|
||||
|
||||
### High Cache Creation Costs
|
||||
|
||||
**Problem**: Frequent `cache_creation_input_tokens`
|
||||
|
||||
**Solutions**:
|
||||
1. Increase request frequency (use cache before expiry)
|
||||
2. Consider if caching is cost-effective (need 2+ uses)
|
||||
3. Extend cache TTL to 1 hour if supported
|
||||
|
||||
## Cost Calculator
|
||||
|
||||
```typescript
|
||||
function calculateCachingSavings(
|
||||
cachedTokens: number,
|
||||
uncachedTokens: number,
|
||||
requestCount: number
|
||||
): {
|
||||
withoutCaching: number;
|
||||
withCaching: number;
|
||||
savings: number;
|
||||
savingsPercent: number;
|
||||
} {
|
||||
const inputCostPerMTok = 3;
|
||||
const cacheCostPerMTok = 3.75;
|
||||
const cacheReadCostPerMTok = 0.3;
|
||||
|
||||
const withoutCaching = ((cachedTokens + uncachedTokens) / 1_000_000) *
|
||||
inputCostPerMTok * requestCount;
|
||||
|
||||
const cacheWrite = (cachedTokens / 1_000_000) * cacheCostPerMTok;
|
||||
const cacheReads = (cachedTokens / 1_000_000) * cacheReadCostPerMTok * (requestCount - 1);
|
||||
const uncachedInput = (uncachedTokens / 1_000_000) * inputCostPerMTok * requestCount;
|
||||
const withCaching = cacheWrite + cacheReads + uncachedInput;
|
||||
|
||||
const savings = withoutCaching - withCaching;
|
||||
const savingsPercent = (savings / withoutCaching) * 100;
|
||||
|
||||
return { withoutCaching, withCaching, savings, savingsPercent };
|
||||
}
|
||||
|
||||
// Example: 10k cached tokens, 1k uncached, 20 requests
|
||||
const result = calculateCachingSavings(10000, 1000, 20);
|
||||
console.log(`Savings: $${result.savings.toFixed(4)} (${result.savingsPercent.toFixed(1)}%)`);
|
||||
```
|
||||
|
||||
## Official Documentation
|
||||
|
||||
- **Prompt Caching Guide**: https://docs.claude.com/en/docs/build-with-claude/prompt-caching
|
||||
- **Pricing**: https://www.anthropic.com/pricing
|
||||
- **API Reference**: https://docs.claude.com/en/api/messages
|
||||
80
references/rate-limits.md
Normal file
80
references/rate-limits.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Rate Limits Guide
|
||||
|
||||
Complete guide to Claude API rate limits and how to handle them.
|
||||
|
||||
## Overview
|
||||
|
||||
Claude API uses **token bucket algorithm** for rate limiting:
|
||||
- Capacity continuously replenishes
|
||||
- Three types: Requests per minute (RPM), Tokens per minute (TPM), Daily tokens
|
||||
- Limits vary by account tier and model
|
||||
|
||||
## Rate Limit Tiers
|
||||
|
||||
| Tier | Requirements | Example Limits (Sonnet 3.5) |
|
||||
|------|--------------|------------------------------|
|
||||
| Tier 1 | New account | 50 RPM, 40k TPM |
|
||||
| Tier 2 | $10 spend | 1000 RPM, 100k TPM |
|
||||
| Tier 3 | $50 spend | 2000 RPM, 200k TPM |
|
||||
| Tier 4 | $500 spend | 4000 RPM, 400k TPM |
|
||||
|
||||
**Note**: Limits vary by model. Check Console for exact limits.
|
||||
|
||||
## Response Headers
|
||||
|
||||
Every API response includes:
|
||||
|
||||
```
|
||||
anthropic-ratelimit-requests-limit: 50
|
||||
anthropic-ratelimit-requests-remaining: 49
|
||||
anthropic-ratelimit-requests-reset: 2025-10-25T12:00:00Z
|
||||
anthropic-ratelimit-tokens-limit: 50000
|
||||
anthropic-ratelimit-tokens-remaining: 49500
|
||||
anthropic-ratelimit-tokens-reset: 2025-10-25T12:01:00Z
|
||||
```
|
||||
|
||||
On 429 errors:
|
||||
```
|
||||
retry-after: 60 // Seconds until retry allowed
|
||||
```
|
||||
|
||||
## Handling Rate Limits
|
||||
|
||||
### Basic Exponential Backoff
|
||||
|
||||
```typescript
|
||||
async function withRetry(requestFn, maxRetries = 3) {
|
||||
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
||||
try {
|
||||
return await requestFn();
|
||||
} catch (error) {
|
||||
if (error.status === 429) {
|
||||
const delay = 1000 * Math.pow(2, attempt);
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Respecting retry-after Header
|
||||
|
||||
```typescript
|
||||
const retryAfter = error.response?.headers?.['retry-after'];
|
||||
const delay = retryAfter ? parseInt(retryAfter) * 1000 : baseDelay;
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Monitor headers** - Check remaining requests/tokens
|
||||
2. **Implement backoff** - Exponential delay on 429
|
||||
3. **Respect retry-after** - Use provided wait time
|
||||
4. **Batch requests** - Group when possible
|
||||
5. **Use caching** - Reduce duplicate requests
|
||||
6. **Upgrade tier** - Scale with usage
|
||||
|
||||
## Official Docs
|
||||
|
||||
https://docs.claude.com/en/api/rate-limits
|
||||
114
references/tool-use-patterns.md
Normal file
114
references/tool-use-patterns.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# Tool Use Patterns
|
||||
|
||||
Common patterns for using tools (function calling) with Claude API.
|
||||
|
||||
## Basic Pattern
|
||||
|
||||
```typescript
|
||||
const tools = [{
|
||||
name: 'get_weather',
|
||||
description: 'Get current weather',
|
||||
input_schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
location: { type: 'string' }
|
||||
},
|
||||
required: ['location']
|
||||
}
|
||||
}];
|
||||
|
||||
// 1. Send request with tools
|
||||
const response = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
tools,
|
||||
messages: [{ role: 'user', content: 'Weather in NYC?' }]
|
||||
});
|
||||
|
||||
// 2. Check if Claude wants to use tools
|
||||
if (response.stop_reason === 'tool_use') {
|
||||
for (const block of response.content) {
|
||||
if (block.type === 'tool_use') {
|
||||
// 3. Execute tool
|
||||
const result = await executeToolFunction(block.name, block.input);
|
||||
|
||||
// 4. Return result
|
||||
const toolResult = {
|
||||
type: 'tool_result',
|
||||
tool_use_id: block.id,
|
||||
content: JSON.stringify(result)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Tool Execution Loop
|
||||
|
||||
```typescript
|
||||
async function chatWithTools(userMessage) {
|
||||
const messages = [{ role: 'user', content: userMessage }];
|
||||
|
||||
while (true) {
|
||||
const response = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
tools,
|
||||
messages,
|
||||
});
|
||||
|
||||
messages.push({ role: 'assistant', content: response.content });
|
||||
|
||||
if (response.stop_reason === 'tool_use') {
|
||||
// Execute tools and continue
|
||||
const toolResults = await executeAllTools(response.content);
|
||||
messages.push({ role: 'user', content: toolResults });
|
||||
} else {
|
||||
// Final response
|
||||
return response.content.find(b => b.type === 'text')?.text;
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## With Zod Validation
|
||||
|
||||
```typescript
|
||||
import { betaZodTool } from '@anthropic-ai/sdk/helpers/zod';
|
||||
import { z } from 'zod';
|
||||
|
||||
const weatherTool = betaZodTool({
|
||||
name: 'get_weather',
|
||||
inputSchema: z.object({
|
||||
location: z.string(),
|
||||
unit: z.enum(['celsius', 'fahrenheit']).optional(),
|
||||
}),
|
||||
description: 'Get weather',
|
||||
run: async (input) => {
|
||||
return `Weather in ${input.location}: 72°F`;
|
||||
},
|
||||
});
|
||||
|
||||
// Automatic execution
|
||||
const finalMessage = await anthropic.beta.messages.toolRunner({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1000,
|
||||
messages: [{ role: 'user', content: 'Weather in SF?' }],
|
||||
tools: [weatherTool],
|
||||
});
|
||||
```
|
||||
|
||||
## Error Handling in Tools
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: block.id,
|
||||
content: 'Error: API unavailable',
|
||||
is_error: true // Mark as error
|
||||
}
|
||||
```
|
||||
|
||||
## Official Docs
|
||||
|
||||
https://docs.claude.com/en/docs/build-with-claude/tool-use
|
||||
507
references/top-errors.md
Normal file
507
references/top-errors.md
Normal file
@@ -0,0 +1,507 @@
|
||||
# Top 12 Common Errors and Solutions
|
||||
|
||||
Complete reference for troubleshooting Claude API errors.
|
||||
|
||||
---
|
||||
|
||||
## Error #1: Rate Limit 429 - Too Many Requests
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
429 Too Many Requests: Number of request tokens has exceeded your per-minute rate limit
|
||||
```
|
||||
|
||||
**Source**: https://docs.claude.com/en/api/errors
|
||||
|
||||
**Why It Happens:**
|
||||
- Exceeded requests per minute (RPM)
|
||||
- Exceeded tokens per minute (TPM)
|
||||
- Exceeded daily token quota
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
async function handleRateLimit(requestFn, maxRetries = 3) {
|
||||
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
||||
try {
|
||||
return await requestFn();
|
||||
} catch (error) {
|
||||
if (error.status === 429) {
|
||||
const retryAfter = error.response?.headers?.['retry-after'];
|
||||
const delay = retryAfter ? parseInt(retryAfter) * 1000 : 1000 * Math.pow(2, attempt);
|
||||
console.log(`Retrying in ${delay}ms...`);
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Implement exponential backoff
|
||||
- Respect `retry-after` header
|
||||
- Monitor rate limit headers
|
||||
- Upgrade account tier for higher limits
|
||||
|
||||
---
|
||||
|
||||
## Error #2: Streaming SSE Parsing Errors
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
Incomplete chunks, malformed events, connection dropped
|
||||
```
|
||||
|
||||
**Source**: Community reports, SDK issues
|
||||
|
||||
**Why It Happens:**
|
||||
- Network interruptions
|
||||
- Improper SSE event parsing
|
||||
- Errors occur AFTER initial 200 response
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
const stream = anthropic.messages.stream({...});
|
||||
|
||||
stream
|
||||
.on('error', (error) => {
|
||||
console.error('Stream error:', error);
|
||||
// Implement retry or fallback
|
||||
})
|
||||
.on('abort', (error) => {
|
||||
console.warn('Stream aborted');
|
||||
})
|
||||
.on('end', () => {
|
||||
console.log('Stream completed');
|
||||
});
|
||||
|
||||
await stream.finalMessage();
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Always implement error event listeners
|
||||
- Handle stream abortion
|
||||
- Use SDK helpers (don't parse SSE manually)
|
||||
- Implement reconnection logic
|
||||
|
||||
---
|
||||
|
||||
## Error #3: Prompt Caching Not Activating
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
High costs despite cache_control blocks
|
||||
cache_read_input_tokens: 0
|
||||
```
|
||||
|
||||
**Source**: https://docs.claude.com/en/docs/build-with-claude/prompt-caching
|
||||
|
||||
**Why It Happens:**
|
||||
- `cache_control` not on last block
|
||||
- Content below minimum tokens (1024/2048)
|
||||
- Content changed (breaks cache match)
|
||||
- Outside 5-minute TTL
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
// ❌ Wrong - cache_control not at end
|
||||
{
|
||||
type: 'text',
|
||||
text: DOCUMENT,
|
||||
cache_control: { type: 'ephemeral' }, // Wrong position
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Additional text',
|
||||
}
|
||||
|
||||
// ✅ Correct - cache_control at end
|
||||
{
|
||||
type: 'text',
|
||||
text: DOCUMENT + '\n\nAdditional text',
|
||||
cache_control: { type: 'ephemeral' }, // Correct position
|
||||
}
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Place `cache_control` on LAST block
|
||||
- Ensure content >= 1024 tokens
|
||||
- Keep cached content identical
|
||||
- Monitor `cache_read_input_tokens`
|
||||
|
||||
---
|
||||
|
||||
## Error #4: Tool Use Response Format Errors
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
invalid_request_error: tools[0].input_schema is invalid
|
||||
```
|
||||
|
||||
**Source**: API validation
|
||||
|
||||
**Why It Happens:**
|
||||
- Invalid JSON Schema
|
||||
- Missing required fields
|
||||
- Incorrect tool_use_id in tool_result
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
// ✅ Valid tool schema
|
||||
{
|
||||
name: 'get_weather',
|
||||
description: 'Get current weather',
|
||||
input_schema: {
|
||||
type: 'object', // Must be 'object'
|
||||
properties: {
|
||||
location: {
|
||||
type: 'string', // Valid JSON Schema types
|
||||
description: 'City' // Optional but recommended
|
||||
}
|
||||
},
|
||||
required: ['location'] // List required fields
|
||||
}
|
||||
}
|
||||
|
||||
// ✅ Valid tool result
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: block.id, // Must match tool_use id
|
||||
content: JSON.stringify(result) // Convert to string
|
||||
}
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Validate schemas with JSON Schema validator
|
||||
- Match `tool_use_id` exactly
|
||||
- Stringify tool results
|
||||
- Test thoroughly before production
|
||||
|
||||
---
|
||||
|
||||
## Error #5: Vision Image Format Issues
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
invalid_request_error: image source must be base64 or url
|
||||
```
|
||||
|
||||
**Source**: API documentation
|
||||
|
||||
**Why It Happens:**
|
||||
- Unsupported image format
|
||||
- Incorrect base64 encoding
|
||||
- Invalid media_type
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
import fs from 'fs';
|
||||
|
||||
const imageData = fs.readFileSync('./image.jpg');
|
||||
const base64Image = imageData.toString('base64');
|
||||
|
||||
// ✅ Correct format
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/jpeg', // Must match actual format
|
||||
data: base64Image // Pure base64 (no data URI prefix)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Supported Formats:**
|
||||
- image/jpeg
|
||||
- image/png
|
||||
- image/webp
|
||||
- image/gif
|
||||
|
||||
**Prevention:**
|
||||
- Validate format before encoding
|
||||
- Use correct media_type
|
||||
- Remove data URI prefix if present
|
||||
- Keep images under 5MB
|
||||
|
||||
---
|
||||
|
||||
## Error #6: Token Counting Mismatches
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
invalid_request_error: messages: too many tokens
|
||||
```
|
||||
|
||||
**Source**: Token counting differences
|
||||
|
||||
**Why It Happens:**
|
||||
- Not accounting for special tokens
|
||||
- Formatting adds hidden tokens
|
||||
- Context window exceeded
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
// Monitor token usage
|
||||
const response = await anthropic.messages.create({...});
|
||||
|
||||
console.log('Input tokens:', response.usage.input_tokens);
|
||||
console.log('Output tokens:', response.usage.output_tokens);
|
||||
console.log('Total:', response.usage.input_tokens + response.usage.output_tokens);
|
||||
|
||||
// Check against context window
|
||||
const contextWindow = 200000; // Claude 3.5 Sonnet
|
||||
if (response.usage.input_tokens > contextWindow) {
|
||||
console.warn('Approaching context limit');
|
||||
}
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Use official token counter
|
||||
- Monitor usage headers
|
||||
- Implement message pruning
|
||||
- Use prompt caching for long context
|
||||
|
||||
---
|
||||
|
||||
## Error #7: System Prompt Ordering Issues
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
System prompt ignored or overridden
|
||||
```
|
||||
|
||||
**Source**: API behavior
|
||||
|
||||
**Why It Happens:**
|
||||
- System prompt placed after messages
|
||||
- System prompt in wrong format
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
// ❌ Wrong
|
||||
const message = await anthropic.messages.create({
|
||||
messages: [...],
|
||||
system: 'You are helpful', // Wrong - after messages
|
||||
});
|
||||
|
||||
// ✅ Correct
|
||||
const message = await anthropic.messages.create({
|
||||
system: 'You are helpful', // Correct - before messages
|
||||
messages: [...],
|
||||
});
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Always place `system` before `messages`
|
||||
- Use system prompt for behavior instructions
|
||||
- Test system prompt effectiveness
|
||||
|
||||
---
|
||||
|
||||
## Error #8: Context Window Exceeded
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
invalid_request_error: messages: too many tokens (210000 > 200000)
|
||||
```
|
||||
|
||||
**Source**: Model limits
|
||||
|
||||
**Why It Happens:**
|
||||
- Long conversations
|
||||
- Large documents
|
||||
- Not pruning message history
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
function pruneMessages(messages, maxTokens = 150000) {
|
||||
// Keep most recent messages
|
||||
let totalTokens = 0;
|
||||
const prunedMessages = [];
|
||||
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msgTokens = estimateTokens(messages[i].content);
|
||||
if (totalTokens + msgTokens > maxTokens) break;
|
||||
prunedMessages.unshift(messages[i]);
|
||||
totalTokens += msgTokens;
|
||||
}
|
||||
|
||||
return prunedMessages;
|
||||
}
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Implement message history pruning
|
||||
- Use summarization for old messages
|
||||
- Use prompt caching
|
||||
- Choose model with larger context (3.7 Sonnet: 2M tokens)
|
||||
|
||||
---
|
||||
|
||||
## Error #9: Extended Thinking on Wrong Model
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
No thinking blocks in response
|
||||
```
|
||||
|
||||
**Source**: Model capabilities
|
||||
|
||||
**Why It Happens:**
|
||||
- Using Claude 3.5 Sonnet (not supported)
|
||||
- Should use Claude 3.7 Sonnet or Claude 4
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
// ❌ Wrong model - no extended thinking
|
||||
model: 'claude-sonnet-4-5-20250929'
|
||||
|
||||
// ✅ Correct models for extended thinking
|
||||
model: 'claude-3-7-sonnet-20250228' // Has extended thinking
|
||||
model: 'claude-opus-4-20250514' // Has extended thinking
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Verify model capabilities
|
||||
- Use 3.7 Sonnet or 4.x models for extended thinking
|
||||
- Document model requirements
|
||||
|
||||
---
|
||||
|
||||
## Error #10: API Key Exposure in Client Code
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
CORS errors, security vulnerability
|
||||
```
|
||||
|
||||
**Source**: Security best practices
|
||||
|
||||
**Why It Happens:**
|
||||
- Making API calls from browser
|
||||
- API key in client-side code
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
// ❌ Never do this
|
||||
const anthropic = new Anthropic({
|
||||
apiKey: 'sk-ant-...', // Exposed in browser!
|
||||
});
|
||||
|
||||
// ✅ Use server-side endpoint
|
||||
async function callClaude(messages) {
|
||||
const response = await fetch('/api/chat', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ messages }),
|
||||
});
|
||||
return response.json();
|
||||
}
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Server-side only
|
||||
- Use environment variables
|
||||
- Implement authentication
|
||||
- Never expose API key
|
||||
|
||||
---
|
||||
|
||||
## Error #11: Rate Limit Tier Confusion
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
Lower limits than expected
|
||||
```
|
||||
|
||||
**Source**: Account tier system
|
||||
|
||||
**Why It Happens:**
|
||||
- Not understanding tier progression
|
||||
- Expecting higher limits without usage history
|
||||
|
||||
**Solution:**
|
||||
- Check current tier in Console
|
||||
- Tiers auto-scale with usage ($10, $50, $500 spend)
|
||||
- Monitor rate limit headers
|
||||
- Contact support for custom limits
|
||||
|
||||
**Tier Progression:**
|
||||
- Tier 1: 50 RPM, 40k TPM
|
||||
- Tier 2: 1000 RPM, 100k TPM ($10 spend)
|
||||
- Tier 3: 2000 RPM, 200k TPM ($50 spend)
|
||||
- Tier 4: 4000 RPM, 400k TPM ($500 spend)
|
||||
|
||||
**Prevention:**
|
||||
- Review tier requirements
|
||||
- Plan for gradual scale-up
|
||||
- Implement proper rate limiting
|
||||
|
||||
---
|
||||
|
||||
## Error #12: Message Batches Beta Headers Missing
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
invalid_request_error: unknown parameter: batches
|
||||
```
|
||||
|
||||
**Source**: Beta API requirements
|
||||
|
||||
**Why It Happens:**
|
||||
- Missing `anthropic-beta` header
|
||||
- Using wrong endpoint
|
||||
|
||||
**Solution:**
|
||||
```typescript
|
||||
const response = await fetch('https://api.anthropic.com/v1/messages/batches', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': API_KEY,
|
||||
'anthropic-version': '2023-06-01',
|
||||
'anthropic-beta': 'message-batches-2024-09-24', // Required!
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({...}),
|
||||
});
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Include beta headers for beta features
|
||||
- Check official docs for header requirements
|
||||
- Test beta features in development first
|
||||
|
||||
---
|
||||
|
||||
## Quick Diagnosis Checklist
|
||||
|
||||
When encountering errors:
|
||||
|
||||
1. ✅ Check error status code (400, 401, 429, 500, etc.)
|
||||
2. ✅ Read error message carefully
|
||||
3. ✅ Verify API key is valid and in environment variable
|
||||
4. ✅ Confirm model ID is correct
|
||||
5. ✅ Check request format matches API spec
|
||||
6. ✅ Monitor rate limit headers
|
||||
7. ✅ Review recent code changes
|
||||
8. ✅ Test with minimal example
|
||||
9. ✅ Check official docs for breaking changes
|
||||
10. ✅ Search GitHub issues for similar problems
|
||||
|
||||
---
|
||||
|
||||
## Getting Help
|
||||
|
||||
**Official Resources:**
|
||||
- **Errors Reference**: https://docs.claude.com/en/api/errors
|
||||
- **API Documentation**: https://docs.claude.com/en/api
|
||||
- **Support**: https://support.claude.com/
|
||||
|
||||
**Community:**
|
||||
- **GitHub Issues**: https://github.com/anthropics/anthropic-sdk-typescript/issues
|
||||
- **Developer Forum**: https://support.claude.com/
|
||||
|
||||
**This Skill:**
|
||||
- Check other reference files for detailed guides
|
||||
- Review templates for working examples
|
||||
- Verify setup checklist in SKILL.md
|
||||
108
references/vision-capabilities.md
Normal file
108
references/vision-capabilities.md
Normal file
@@ -0,0 +1,108 @@
|
||||
# Vision Capabilities
|
||||
|
||||
Guide to using Claude's vision capabilities for image understanding.
|
||||
|
||||
## Supported Formats
|
||||
|
||||
- **JPEG** (image/jpeg)
|
||||
- **PNG** (image/png)
|
||||
- **WebP** (image/webp)
|
||||
- **GIF** (image/gif) - non-animated only
|
||||
|
||||
**Max size**: 5MB per image
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```typescript
|
||||
import fs from 'fs';
|
||||
|
||||
const imageData = fs.readFileSync('./image.jpg').toString('base64');
|
||||
|
||||
const message = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/jpeg',
|
||||
data: imageData
|
||||
}
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: 'What is in this image?'
|
||||
}
|
||||
]
|
||||
}]
|
||||
});
|
||||
```
|
||||
|
||||
## Multiple Images
|
||||
|
||||
```typescript
|
||||
content: [
|
||||
{ type: 'text', text: 'Compare these images:' },
|
||||
{ type: 'image', source: { type: 'base64', media_type: 'image/jpeg', data: img1 } },
|
||||
{ type: 'image', source: { type: 'base64', media_type: 'image/png', data: img2 } },
|
||||
{ type: 'text', text: 'What are the differences?' }
|
||||
]
|
||||
```
|
||||
|
||||
## With Tools
|
||||
|
||||
```typescript
|
||||
const message = await anthropic.messages.create({
|
||||
model: 'claude-sonnet-4-5-20250929',
|
||||
max_tokens: 1024,
|
||||
tools: [searchProductTool],
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'image', source: {...} },
|
||||
{ type: 'text', text: 'Find similar products' }
|
||||
]
|
||||
}]
|
||||
});
|
||||
```
|
||||
|
||||
## With Prompt Caching
|
||||
|
||||
```typescript
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: 'image/jpeg',
|
||||
data: imageData
|
||||
},
|
||||
cache_control: { type: 'ephemeral' } // Cache image
|
||||
}
|
||||
```
|
||||
|
||||
## Validation
|
||||
|
||||
```typescript
|
||||
function validateImage(path: string) {
|
||||
const stats = fs.statSync(path);
|
||||
const sizeMB = stats.size / (1024 * 1024);
|
||||
|
||||
if (sizeMB > 5) {
|
||||
throw new Error('Image exceeds 5MB');
|
||||
}
|
||||
|
||||
const ext = path.split('.').pop()?.toLowerCase();
|
||||
if (!['jpg', 'jpeg', 'png', 'webp', 'gif'].includes(ext)) {
|
||||
throw new Error('Unsupported format');
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
```
|
||||
|
||||
## Official Docs
|
||||
|
||||
https://docs.claude.com/en/docs/build-with-claude/vision
|
||||
Reference in New Issue
Block a user