Initial commit

2025-11-30 08:24:51 +08:00
commit 8aebb293cd
31 changed files with 7386 additions and 0 deletions
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
+{
+  "name": "google-gemini-api",
+  "description": "Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun",
+  "version": "1.0.0",
+  "author": {
+    "name": "Jeremy Dawes",
+    "email": "jeremy@jezweb.net"
+  },
+  "skills": [
+    "./"
+  ]
+}
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
+# google-gemini-api
+
+Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun
--- a/SKILL.md
+++ b/SKILL.md
--- a/plugin.lock.json
+++ b/plugin.lock.json
@@ -0,0 +1,153 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:jezweb/claude-skills:skills/google-gemini-api",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "bb3e9b7656ff919916c1b773f1e5d845bb3c6633",
+    "treeHash": "b8bbae8bede51e3e5854007aa5c1ab3cb3a7e1050bbca038893d7f48dd558ba6",
+    "generatedAt": "2025-11-28T10:19:01.568870Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "google-gemini-api",
+    "description": "Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun",
+    "version": "1.0.0"
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "1d4a239aa8f7a1d70453fbb0c216b4ebdc01b312dcf5d14325819f73c513439c"
+      },
+      {
+        "path": "SKILL.md",
+        "sha256": "c17553ecbdb69ac44a1733121a9e728d9575b3d80d40ff72e7ccdf57044bf379"
+      },
+      {
+        "path": "references/thinking-mode-guide.md",
+        "sha256": "c1c85ddc27b582e0fab36385c0cc50863d2a032c5d03730fadfe10d6f4efe5ab"
+      },
+      {
+        "path": "references/grounding-guide.md",
+        "sha256": "dd7a2ef2f5e09e090a300caeaebe1bbb333c03496e7e572c6dd7de50549c85c2"
+      },
+      {
+        "path": "references/multimodal-guide.md",
+        "sha256": "cd2361169dd4d6be8941ca862157878772bd748aea8655fd7ca2b66561234743"
+      },
+      {
+        "path": "references/streaming-patterns.md",
+        "sha256": "3bd053362626b4646e366da339cd0eadcc7085e48121eb36bafd56d110b6e322"
+      },
+      {
+        "path": "references/context-caching-guide.md",
+        "sha256": "c967b0d121477ecb89345b13ad68c417d6c18d367e02f081bd84692f9e1d4397"
+      },
+      {
+        "path": "references/top-errors.md",
+        "sha256": "f4de6f9304496d740bd48dba2722b08160a007e7df70d6c782f14c99447eba79"
+      },
+      {
+        "path": "references/code-execution-patterns.md",
+        "sha256": "374ecca169795f5d2299bb0d8917a39dc79bef7d18c61330dff07f31624486ea"
+      },
+      {
+        "path": "references/models-guide.md",
+        "sha256": "8f142fee3071ce4f1a071c2a3966b02e725c01357a0c049d29b236d810ff3858"
+      },
+      {
+        "path": "references/sdk-migration-guide.md",
+        "sha256": "a61f0c8adb118cdd615cb09bc4f531c2c96e898d820045a8dac3ec48928eaf63"
+      },
+      {
+        "path": "references/function-calling-patterns.md",
+        "sha256": "5900df715f79137126a43fc9011c680f9f01afdcc515a7c3ecd3e02f6f8062fe"
+      },
+      {
+        "path": "references/generation-config.md",
+        "sha256": "c7dc7abdb40f16d31d950deacfb45f8bcc1af89671e29d5baa83bddc2e924844"
+      },
+      {
+        "path": "scripts/check-versions.sh",
+        "sha256": "944f9ad6dd1c8749bf15555c9855c2bf09659c1766c8dad3490cc29fc7626e05"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "42aee177ecd23e9adf0f9b406bc329405300f2be1fd5d8e721eca1851714a1a1"
+      },
+      {
+        "path": "templates/function-calling-basic.ts",
+        "sha256": "f1af0c619c93aa97f782899d2e5a0425a72c26a31141ca9cd1e8bf3b9e51579b"
+      },
+      {
+        "path": "templates/thinking-mode.ts",
+        "sha256": "42d2a63670690e74005948543e3579bdb214844f2c3de5896eb23d3f1efdc3ea"
+      },
+      {
+        "path": "templates/function-calling-parallel.ts",
+        "sha256": "a15b99eed2c9496a23d118a70893a3c1dc797aced2866ea953adcae34cf1d24f"
+      },
+      {
+        "path": "templates/multimodal-video-audio.ts",
+        "sha256": "9a709be0fbe5bcf56cea600fc8f228cec5511a3e02e91ce6f2614502b3bbb59f"
+      },
+      {
+        "path": "templates/text-generation-fetch.ts",
+        "sha256": "3a7cadf7990cc39a58b04b3e2766099235bd3c32ebe71f61910436bbff96bf31"
+      },
+      {
+        "path": "templates/grounding-search.ts",
+        "sha256": "d0ee315b13d64eeb7896578edddf7efd8f6057ddeac889751a429bcd7014184e"
+      },
+      {
+        "path": "templates/code-execution.ts",
+        "sha256": "0a7b261c5665fba8bc661627dc9295eb8ed7bb6ea80087899d4287fafd464eaf"
+      },
+      {
+        "path": "templates/streaming-chat.ts",
+        "sha256": "a8883cf071c083e2b50cddfe4754fd916e6260cc15d021e10082e4aa84179b80"
+      },
+      {
+        "path": "templates/package.json",
+        "sha256": "cd97bba41f70e8a1b16381fb1c05927d74b3c4afe30f81d3be91167765be1ebb"
+      },
+      {
+        "path": "templates/context-caching.ts",
+        "sha256": "395418ad8397a7e0a2698cc4308ea1674d3ef00bec3def54a19faefa9bd985e4"
+      },
+      {
+        "path": "templates/text-generation-basic.ts",
+        "sha256": "8722c5eea9efc8411513ec53b6df45d827d382b4c50fe0dfce21d7af97f9a7f0"
+      },
+      {
+        "path": "templates/cloudflare-worker.ts",
+        "sha256": "8b026ae94b2797e8b15bfac43442d0f95bcfdd6581863e49e1d199945e93594d"
+      },
+      {
+        "path": "templates/multimodal-image.ts",
+        "sha256": "88c50979e7f71f670e022accf669e9fa757887b645d7e318d2ba1ac0ffe85d16"
+      },
+      {
+        "path": "templates/streaming-fetch.ts",
+        "sha256": "9dd0a6bd48d69290787b7f526600a7acb99f47c0d9823cbd81158e358b5d108d"
+      },
+      {
+        "path": "templates/combined-advanced.ts",
+        "sha256": "37972b62e1e139c0357f5d2a03c1b335879ec472a5c4b47a90422630f485fa89"
+      }
+    ],
+    "dirSha256": "b8bbae8bede51e3e5854007aa5c1ab3cb3a7e1050bbca038893d7f48dd558ba6"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
--- a/references/code-execution-patterns.md
+++ b/references/code-execution-patterns.md
@@ -0,0 +1,481 @@
+# Code Execution Patterns
+
+Complete guide to using code execution with Google Gemini API for computational tasks, data analysis, and problem-solving.
+
+---
+
+## What is Code Execution?
+
+Code Execution allows Gemini models to generate and execute Python code to solve problems requiring computation, enabling the model to:
+- Perform precise mathematical calculations
+- Analyze data with pandas/numpy
+- Generate charts and visualizations
+- Implement algorithms
+- Process files and data structures
+
+---
+
+## How It Works
+
+1. **Model receives prompt** requiring computation
+2. **Model generates Python code** to solve the problem
+3. **Code executes in sandbox** (secure, isolated environment)
+4. **Results return to model** for incorporation into response
+5. **Model explains results** in natural language
+
+---
+
+## Enabling Code Execution
+
+### Basic Setup (SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash', // Or gemini-2.5-pro
+  contents: 'Calculate the sum of first 50 prime numbers',
+  config: {
+    tools: [{ codeExecution: {} }] // Enable code execution
+  }
+});
+```
+
+### Basic Setup (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      tools: [{ code_execution: {} }],
+      contents: [{ parts: [{ text: 'Calculate...' }] }]
+    }),
+  }
+);
+```
+
+---
+
+## Available Python Packages
+
+### Standard Library
+- `math`, `statistics`, `random`
+- `datetime`, `time`, `calendar`
+- `json`, `csv`, `re`
+- `collections`, `itertools`, `functools`
+
+### Data Science
+- `numpy` - numerical computing
+- `pandas` - data analysis and manipulation
+- `scipy` - scientific computing
+
+### Visualization
+- `matplotlib` - plotting and charts
+- `seaborn` - statistical visualization
+
+**Note**: This is a **limited sandbox environment** - not all PyPI packages are available.
+
+---
+
+## Response Structure
+
+### Parsing Code Execution Results
+
+```typescript
+for (const part of response.candidates[0].content.parts) {
+  // Inline text
+  if (part.text) {
+    console.log('Text:', part.text);
+  }
+
+  // Generated code
+  if (part.executableCode) {
+    console.log('Language:', part.executableCode.language); // "PYTHON"
+    console.log('Code:', part.executableCode.code);
+  }
+
+  // Execution results
+  if (part.codeExecutionResult) {
+    console.log('Outcome:', part.codeExecutionResult.outcome); // "OUTCOME_OK" or "OUTCOME_FAILED"
+    console.log('Output:', part.codeExecutionResult.output);
+  }
+}
+```
+
+### Example Response
+
+```json
+{
+  "candidates": [{
+    "content": {
+      "parts": [
+        { "text": "I'll calculate that for you." },
+        {
+          "executableCode": {
+            "language": "PYTHON",
+            "code": "primes = []\nnum = 2\nwhile len(primes) < 50:\n  if is_prime(num):\n    primes.append(num)\n  num += 1\nprint(sum(primes))"
+          }
+        },
+        {
+          "codeExecutionResult": {
+            "outcome": "OUTCOME_OK",
+            "output": "5117\n"
+          }
+        },
+        { "text": "The sum is 5117." }
+      ]
+    }
+  }]
+}
+```
+
+---
+
+## Common Patterns
+
+### 1. Mathematical Calculations
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Calculate the 100th Fibonacci number',
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+**Prompting Tip**: Use phrases like "generate and run code" or "calculate using code" to explicitly request code execution.
+
+### 2. Data Analysis
+
+```typescript
+const prompt = `
+  Analyze this sales data:
+
+  month,revenue,customers
+  Jan,50000,120
+  Feb,62000,145
+  Mar,58000,138
+
+  Calculate:
+  1. Total revenue
+  2. Average revenue per customer
+  3. Month-over-month growth rate
+
+  Use pandas or numpy for analysis.
+`;
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: prompt,
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+### 3. Chart Generation
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Create a bar chart showing prime number distribution by last digit (0-9) for primes under 100',
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+**Note**: Chart image data appears in `codeExecutionResult.output` (base64 encoded in some cases).
+
+### 4. Algorithm Implementation
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Implement quicksort and sort this list: [64, 34, 25, 12, 22, 11, 90]. Show the sorted result.',
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+### 5. File Processing (In-Memory)
+
+```typescript
+const csvData = `name,age,city
+Alice,30,NYC
+Bob,25,LA
+Charlie,35,Chicago`;
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: `Parse this CSV data and calculate average age:\n\n${csvData}`,
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+---
+
+## Chat with Code Execution
+
+### Multi-Turn Computational Conversations
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: { tools: [{ codeExecution: {} }] }
+});
+
+// First turn
+let response = await chat.sendMessage('I have a data analysis question');
+console.log(response.text);
+
+// Second turn (will use code execution)
+response = await chat.sendMessage(`
+  Calculate statistics for: [12, 15, 18, 22, 25, 28, 30]
+  - Mean
+  - Median
+  - Standard deviation
+`);
+
+for (const part of response.candidates[0].content.parts) {
+  if (part.text) console.log(part.text);
+  if (part.executableCode) console.log('Code:', part.executableCode.code);
+  if (part.codeExecutionResult) console.log('Results:', part.codeExecutionResult.output);
+}
+```
+
+---
+
+## Error Handling
+
+### Checking Execution Outcome
+
+```typescript
+for (const part of response.candidates[0].content.parts) {
+  if (part.codeExecutionResult) {
+    if (part.codeExecutionResult.outcome === 'OUTCOME_OK') {
+      console.log('✅ Success:', part.codeExecutionResult.output);
+    } else if (part.codeExecutionResult.outcome === 'OUTCOME_FAILED') {
+      console.error('❌ Execution failed:', part.codeExecutionResult.output);
+    }
+  }
+}
+```
+
+### Common Execution Errors
+
+**Timeout**:
+```
+Error: Execution timed out after 30 seconds
+```
+**Solution**: Simplify computation or reduce data size.
+
+**Import Error**:
+```
+ModuleNotFoundError: No module named 'requests'
+```
+**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy).
+
+**Syntax Error**:
+```
+SyntaxError: invalid syntax
+```
+**Solution**: Model generated invalid code - try rephrasing prompt or regenerating.
+
+---
+
+## Best Practices
+
+### ✅ Do
+
+1. **Be Explicit**: Use phrases like "generate and run code" to trigger code execution
+2. **Provide Data**: Include data directly in prompt for analysis
+3. **Specify Output**: Ask for specific calculations or metrics
+4. **Use Available Packages**: Stick to numpy, pandas, matplotlib, scipy
+5. **Check Outcome**: Always verify `outcome === 'OUTCOME_OK'`
+
+### ❌ Don't
+
+1. **Network Access**: Code cannot make HTTP requests
+2. **File System**: No persistent file storage between executions
+3. **Long Computations**: Timeout limits apply (~30 seconds)
+4. **External Dependencies**: Can't install new packages
+5. **State Persistence**: Each execution is isolated (no global state)
+
+---
+
+## Limitations
+
+### Sandbox Restrictions
+
+- **No Network Access**: Cannot call external APIs
+- **No File I/O**: Cannot read/write to disk (in-memory only)
+- **Limited Packages**: Only pre-installed packages available
+- **Execution Timeout**: ~30 seconds maximum
+- **No State**: Each execution is independent
+
+### Supported Models
+
+✅ **Works with**:
+- `gemini-2.5-pro`
+- `gemini-2.5-flash`
+
+❌ **Does NOT work with**:
+- `gemini-2.5-flash-lite` (no code execution support)
+- Gemini 1.5 models (use Gemini 2.5)
+
+---
+
+## Advanced Patterns
+
+### Iterative Analysis
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: { tools: [{ codeExecution: {} }] }
+});
+
+// Step 1: Initial analysis
+let response = await chat.sendMessage('Analyze data: [10, 20, 30, 40, 50]');
+
+// Step 2: Follow-up based on results
+response = await chat.sendMessage('Now calculate the variance');
+
+// Step 3: Visualization
+response = await chat.sendMessage('Create a histogram of this data');
+```
+
+### Combining with Function Calling
+
+```typescript
+const weatherFunction = {
+  name: 'get_current_weather',
+  description: 'Get weather for a city',
+  parametersJsonSchema: {
+    type: 'object',
+    properties: { city: { type: 'string' } },
+    required: ['city']
+  }
+};
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Get weather for NYC, LA, Chicago. Calculate the average temperature.',
+  config: {
+    tools: [
+      { functionDeclarations: [weatherFunction] },
+      { codeExecution: {} }
+    ]
+  }
+});
+
+// Model will:
+// 1. Call get_current_weather for each city
+// 2. Generate code to calculate average
+// 3. Return result
+```
+
+### Data Transformation Pipeline
+
+```typescript
+const prompt = `
+  Transform this data:
+  Input: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+  Pipeline:
+  1. Filter odd numbers
+  2. Square each number
+  3. Calculate sum
+  4. Return result
+
+  Use code to process.
+`;
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: prompt,
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+---
+
+## Optimization Tips
+
+### 1. Clear Instructions
+
+**❌ Vague**:
+```typescript
+contents: 'Analyze this data'
+```
+
+**✅ Specific**:
+```typescript
+contents: 'Calculate mean, median, and standard deviation for: [12, 15, 18, 22, 25]'
+```
+
+### 2. Provide Complete Data
+
+```typescript
+const csvData = `...complete dataset...`;
+const prompt = `Analyze this CSV data:\n\n${csvData}\n\nCalculate total revenue.`;
+```
+
+### 3. Request Code Explicitly
+
+```typescript
+contents: 'Generate and run code to calculate the factorial of 20'
+```
+
+### 4. Handle Large Datasets
+
+For large data, consider:
+- Sampling (analyze subset)
+- Aggregation (group by categories)
+- Pagination (process in chunks)
+
+---
+
+## Troubleshooting
+
+### Code Not Executing
+
+**Symptom**: Response has text but no `executableCode`
+
+**Causes**:
+1. Code execution not enabled (`tools: [{ codeExecution: {} }]`)
+2. Model decided code wasn't necessary
+3. Using `gemini-2.5-flash-lite` (doesn't support code execution)
+
+**Solution**: Be explicit in prompt: "Use code to calculate..."
+
+### Timeout Errors
+
+**Symptom**: `OUTCOME_FAILED` with timeout message
+
+**Causes**: Computation too complex or data too large
+
+**Solution**:
+- Simplify algorithm
+- Reduce data size
+- Use more efficient approach
+
+### Import Errors
+
+**Symptom**: `ModuleNotFoundError`
+
+**Causes**: Trying to import unavailable package
+
+**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy)
+
+---
+
+## References
+
+- Official Docs: https://ai.google.dev/gemini-api/docs/code-execution
+- Templates: See `code-execution.ts` for working examples
+- Available Packages: See "Available Python Packages" section above
--- a/references/context-caching-guide.md
+++ b/references/context-caching-guide.md
@@ -0,0 +1,373 @@
+# Context Caching Guide
+
+Complete guide to using context caching with Google Gemini API to reduce costs by up to 90%.
+
+---
+
+## What is Context Caching?
+
+Context caching allows you to cache frequently used content (system instructions, large documents, videos) and reuse it across multiple requests, significantly reducing token costs and improving latency.
+
+---
+
+## How It Works
+
+1. **Create a cache** with your repeated content (documents, videos, system instructions)
+2. **Set TTL** (time-to-live) for cache expiration
+3. **Reference the cache** in subsequent API calls
+4. **Pay less** - cached tokens cost ~90% less than regular input tokens
+
+---
+
+## Benefits
+
+### Cost Savings
+- **Cached input tokens**: ~90% cheaper than regular tokens
+- **Output tokens**: Same price (not cached)
+- **Example**: 100K token document cached → ~10K token cost equivalent
+
+### Performance
+- **Reduced latency**: Cached content is preprocessed
+- **Faster responses**: No need to reprocess large context
+- **Consistent results**: Same context every time
+
+### Use Cases
+- Large documents analyzed repeatedly
+- Long system instructions used across sessions
+- Video/audio files queried multiple times
+- Consistent conversation context
+
+---
+
+## Cache Creation
+
+### Basic Cache (SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001', // Must use explicit version!
+  config: {
+    displayName: 'my-cache',
+    systemInstruction: 'You are a helpful assistant.',
+    contents: 'Large document content here...',
+    ttl: '3600s', // 1 hour
+  }
+});
+```
+
+### Cache with Expiration Time
+
+```typescript
+// Set specific expiration time (timezone-aware)
+const expirationTime = new Date(Date.now() + 2 * 60 * 60 * 1000); // 2 hours from now
+
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'my-cache',
+    contents: documentText,
+    expireTime: expirationTime, // Use expireTime instead of ttl
+  }
+});
+```
+
+---
+
+## TTL (Time-To-Live) Guidelines
+
+### Recommended TTL Values
+
+| Use Case | TTL | Reason |
+|----------|-----|--------|
+| Quick analysis session | 300s (5 min) | Short-lived tasks |
+| Extended conversation | 3600s (1 hour) | Standard session length |
+| Daily batch processing | 86400s (24 hours) | Reuse across day |
+| Long-term analysis | 604800s (7 days) | Maximum allowed |
+
+### TTL vs Expiration Time
+
+**TTL (time-to-live)**:
+- Relative duration from cache creation
+- Format: `"3600s"` (string with 's' suffix)
+- Easy for session-based caching
+
+**Expiration Time**:
+- Absolute timestamp
+- Must be timezone-aware Date object
+- Precise control over cache lifetime
+
+---
+
+## Using a Cache
+
+### Generate Content with Cache (SDK)
+
+```typescript
+// Use cache name as model parameter
+const response = await ai.models.generateContent({
+  model: cache.name, // Use cache.name, not original model name
+  contents: 'Summarize the document'
+});
+
+console.log(response.text);
+```
+
+### Multiple Queries with Same Cache
+
+```typescript
+const queries = [
+  'What are the key points?',
+  'Who are the main characters?',
+  'What is the conclusion?'
+];
+
+for (const query of queries) {
+  const response = await ai.models.generateContent({
+    model: cache.name,
+    contents: query
+  });
+  console.log(`Q: ${query}`);
+  console.log(`A: ${response.text}\n`);
+}
+```
+
+---
+
+## Cache Management
+
+### Update Cache TTL
+
+```typescript
+// Extend cache lifetime before it expires
+await ai.caches.update({
+  name: cache.name,
+  config: {
+    ttl: '7200s' // Extend to 2 hours
+  }
+});
+```
+
+### List All Caches
+
+```typescript
+const caches = await ai.caches.list();
+caches.forEach(cache => {
+  console.log(`${cache.displayName}: ${cache.name}`);
+  console.log(`Expires: ${cache.expireTime}`);
+});
+```
+
+### Delete Cache
+
+```typescript
+// Delete when no longer needed
+await ai.caches.delete({ name: cache.name });
+```
+
+---
+
+## Advanced Use Cases
+
+### Caching Video Files
+
+```typescript
+import fs from 'fs';
+
+// 1. Upload video
+const videoFile = await ai.files.upload({
+  file: fs.createReadStream('./video.mp4')
+});
+
+// 2. Wait for processing
+while (videoFile.state.name === 'PROCESSING') {
+  await new Promise(resolve => setTimeout(resolve, 2000));
+  videoFile = await ai.files.get({ name: videoFile.name });
+}
+
+// 3. Create cache with video
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'video-cache',
+    systemInstruction: 'Analyze this video.',
+    contents: [videoFile],
+    ttl: '600s'
+  }
+});
+
+// 4. Query video multiple times
+const response1 = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'What happens in the first minute?'
+});
+
+const response2 = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'Who are the main people?'
+});
+```
+
+### Caching with System Instructions
+
+```typescript
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'legal-expert-cache',
+    systemInstruction: `
+      You are a legal expert specializing in contract law.
+      Always cite relevant sections when making claims.
+      Use clear, professional language.
+    `,
+    contents: largeContractDocument,
+    ttl: '3600s'
+  }
+});
+
+// System instruction is part of cached context
+const response = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'Is this contract enforceable?'
+});
+```
+
+---
+
+## Important Notes
+
+### Model Version Requirement
+
+**⚠️ You MUST use explicit version suffixes when creating caches:**
+
+```typescript
+// ✅ CORRECT
+model: 'gemini-2.5-flash-001'
+
+// ❌ WRONG (will fail)
+model: 'gemini-2.5-flash'
+```
+
+### Cache Expiration
+
+- Caches are **automatically deleted** after TTL expires
+- **Cannot recover** expired caches - must recreate
+- Update TTL **before expiration** to extend lifetime
+
+### Cost Calculation
+
+```
+Regular request: 100,000 input tokens = 100K token cost
+
+With caching (after cache creation):
+- Cached tokens: 100,000 × 0.1 (90% discount) = 10K equivalent cost
+- New tokens: 1,000 × 1.0 = 1K cost
+- Total: 11K equivalent (89% savings!)
+```
+
+### Limitations
+
+- Maximum TTL: 7 days (604800s)
+- Cache creation costs same as regular tokens (first time only)
+- Subsequent uses get 90% discount
+- Only input tokens are cached (output tokens never cached)
+
+---
+
+## Best Practices
+
+### When to Use Caching
+
+✅ **Good Use Cases:**
+- Large documents queried repeatedly (legal docs, research papers)
+- Video/audio files analyzed with different questions
+- Long system instructions used across many requests
+- Consistent context in multi-turn conversations
+
+❌ **Bad Use Cases:**
+- Single-use content (no benefit)
+- Frequently changing content
+- Short content (<1000 tokens) - minimal savings
+- Content used only once per day (cache might expire)
+
+### Optimization Tips
+
+1. **Cache Early**: Create cache at session start
+2. **Extend TTL**: Update before expiration if still needed
+3. **Monitor Usage**: Track how often cache is reused
+4. **Clean Up**: Delete unused caches to avoid clutter
+5. **Combine Features**: Use caching with code execution, grounding for powerful workflows
+
+### Cache Naming
+
+Use descriptive `displayName` for easy identification:
+
+```typescript
+// ✅ Good names
+displayName: 'financial-report-2024-q3'
+displayName: 'legal-contract-acme-corp'
+displayName: 'video-analysis-project-x'
+
+// ❌ Vague names
+displayName: 'cache1'
+displayName: 'test'
+```
+
+---
+
+## Troubleshooting
+
+### "Invalid model name" Error
+
+**Problem**: Using `gemini-2.5-flash` instead of `gemini-2.5-flash-001`
+
+**Solution**: Always use explicit version suffix:
+
+```typescript
+model: 'gemini-2.5-flash-001' // Correct
+```
+
+### Cache Expired Error
+
+**Problem**: Trying to use cache after TTL expired
+
+**Solution**: Check expiration before use or extend TTL proactively:
+
+```typescript
+const cache = await ai.caches.get({ name: cacheName });
+if (new Date(cache.expireTime) < new Date()) {
+  // Cache expired, recreate it
+  cache = await ai.caches.create({ ... });
+}
+```
+
+### High Costs Despite Caching
+
+**Problem**: Creating new cache for each request
+
+**Solution**: Reuse the same cache across multiple requests:
+
+```typescript
+// ❌ Wrong - creates new cache each time
+for (const query of queries) {
+  const cache = await ai.caches.create({ ... }); // Expensive!
+  const response = await ai.models.generateContent({ model: cache.name, ... });
+}
+
+// ✅ Correct - create once, use many times
+const cache = await ai.caches.create({ ... }); // Create once
+for (const query of queries) {
+  const response = await ai.models.generateContent({ model: cache.name, ... });
+}
+```
+
+---
+
+## References
+
+- Official Docs: https://ai.google.dev/gemini-api/docs/caching
+- Cost Optimization: See "Cost Optimization" in main SKILL.md
+- Templates: See `context-caching.ts` for working examples
--- a/references/function-calling-patterns.md
+++ b/references/function-calling-patterns.md
@@ -0,0 +1,59 @@
+# Function Calling Patterns
+
+Complete guide to implementing function calling (tool use) with Gemini API.
+
+---
+
+## Basic Pattern
+
+1. Define function declarations
+2. Send request with tools
+3. Check if model wants to call functions
+4. Execute functions
+5. Send results back to model
+6. Get final response
+
+---
+
+## Function Declaration Schema
+
+```typescript
+{
+  name: string,                    // Function name (no spaces)
+  description: string,             // What the function does
+  parametersJsonSchema: {          // Subset of OpenAPI schema
+    type: 'object',
+    properties: {
+      [paramName]: {
+        type: string,              // 'string' | 'number' | 'boolean' | 'array' | 'object'
+        description: string,       // Parameter description
+        enum?: string[]            // Optional: allowed values
+      }
+    },
+    required: string[]            // Required parameter names
+  }
+}
+```
+
+---
+
+## Calling Modes
+
+- **AUTO** (default): Model decides when to call
+- **ANY**: Force at least one function call
+- **NONE**: Disable function calling
+
+---
+
+## Parallel vs Compositional
+
+**Parallel**: Independent functions run simultaneously
+**Compositional**: Sequential dependencies (A → B → C)
+
+Gemini automatically detects which pattern to use.
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/function-calling
--- a/references/generation-config.md
+++ b/references/generation-config.md
@@ -0,0 +1,57 @@
+# Generation Configuration Reference
+
+Complete reference for all generation parameters.
+
+---
+
+## All Parameters
+
+```typescript
+config: {
+  temperature: number,        // 0.0-2.0 (default: 1.0)
+  topP: number,              // 0.0-1.0 (default: 0.95)
+  topK: number,              // 1-100+ (default: 40)
+  maxOutputTokens: number,   // 1-65536
+  stopSequences: string[],   // Stop at these strings
+  responseMimeType: string,  // 'text/plain' | 'application/json'
+  candidateCount: number,    // Usually 1
+  thinkingConfig: {
+    thinkingBudget: number   // Max thinking tokens
+  }
+}
+```
+
+---
+
+## Parameter Guidelines
+
+### temperature
+- **0.0**: Deterministic, focused
+- **1.0**: Balanced (default)
+- **2.0**: Very creative, random
+
+### topP (nucleus sampling)
+- **0.95**: Default, good balance
+- Lower = more focused
+
+### topK
+- **40**: Default
+- Higher = more diversity
+
+### maxOutputTokens
+- Always set this to prevent excessive generation
+- Max: 65,536 tokens
+
+---
+
+## Use Cases
+
+**Factual tasks**: temperature=0.0, topP=0.8
+**Creative tasks**: temperature=1.2, topP=0.95
+**Code generation**: temperature=0.3, topP=0.9
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/models/generative-models#model-parameters
--- a/references/grounding-guide.md
+++ b/references/grounding-guide.md
@@ -0,0 +1,602 @@
+# Grounding with Google Search Guide
+
+Complete guide to using grounding with Google Search to connect Gemini models to real-time web information, reducing hallucinations and providing verifiable, up-to-date responses.
+
+---
+
+## What is Grounding?
+
+Grounding connects the Gemini model to Google Search, allowing it to:
+- Access real-time information beyond training cutoff
+- Reduce hallucinations with fact-checked web sources
+- Provide citations and source URLs
+- Answer questions about current events
+- Verify information against the web
+
+---
+
+## How It Works
+
+1. **Model receives query** (e.g., "Who won Euro 2024?")
+2. **Model determines** if current information is needed
+3. **Performs Google Search** automatically
+4. **Processes search results** (web pages, snippets)
+5. **Incorporates findings** into response
+6. **Provides citations** with source URLs
+
+---
+
+## Two Grounding APIs
+
+### 1. Google Search (`googleSearch`) - Recommended for Gemini 2.5
+
+**Simple, automatic grounding**:
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+```
+
+**Features**:
+- Simple configuration (empty object)
+- Automatic search when model needs current info
+- Available on all Gemini 2.5 models
+- Recommended for new projects
+
+### 2. Google Search Retrieval (`googleSearchRetrieval`) - Legacy for Gemini 1.5
+
+**Dynamic threshold control**:
+
+```typescript
+import { DynamicRetrievalConfigMode } from '@google/genai';
+
+const response = await ai.models.generateContent({
+  model: 'gemini-1.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [{
+      googleSearchRetrieval: {
+        dynamicRetrievalConfig: {
+          mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
+          dynamicThreshold: 0.7 // Search only if confidence < 70%
+        }
+      }
+    }]
+  }
+});
+```
+
+**Features**:
+- Control when searches happen via threshold
+- Used with Gemini 1.5 models
+- More configuration options
+
+**Recommendation**: Use `googleSearch` for Gemini 2.5 models (simpler and newer).
+
+---
+
+## Basic Usage
+
+### SDK Approach (Gemini 2.5)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What are the latest developments in AI?',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+
+console.log(response.text);
+
+// Check if grounding was used
+if (response.candidates[0].groundingMetadata) {
+  console.log('✓ Search performed');
+  console.log('Sources:', response.candidates[0].groundingMetadata.webPages);
+} else {
+  console.log('✓ Answered from model knowledge');
+}
+```
+
+### Fetch Approach (Cloudflare Workers)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [{ parts: [{ text: 'What are the latest developments in AI?' }] }],
+      tools: [{ google_search: {} }]
+    }),
+  }
+);
+
+const data = await response.json();
+console.log(data.candidates[0].content.parts[0].text);
+```
+
+---
+
+## Grounding Metadata
+
+### Structure
+
+```typescript
+{
+  groundingMetadata: {
+    // Search queries performed
+    searchQueries: [
+      { text: "euro 2024 winner" }
+    ],
+
+    // Web pages retrieved
+    webPages: [
+      {
+        url: "https://example.com/euro-2024",
+        title: "UEFA Euro 2024 Results",
+        snippet: "Spain won UEFA Euro 2024..."
+      }
+    ],
+
+    // Citations (inline references)
+    citations: [
+      {
+        startIndex: 42,
+        endIndex: 47,
+        uri: "https://example.com/euro-2024"
+      }
+    ],
+
+    // Retrieval queries (alternative search terms)
+    retrievalQueries: [
+      { query: "who won euro 2024 final" }
+    ]
+  }
+}
+```
+
+### Accessing Metadata
+
+```typescript
+if (response.candidates[0].groundingMetadata) {
+  const metadata = response.candidates[0].groundingMetadata;
+
+  // Display sources
+  console.log('Sources:');
+  metadata.webPages?.forEach((page, i) => {
+    console.log(`${i + 1}. ${page.title}`);
+    console.log(`   ${page.url}`);
+  });
+
+  // Display citations
+  console.log('\nCitations:');
+  metadata.citations?.forEach((citation) => {
+    console.log(`Position ${citation.startIndex}-${citation.endIndex}: ${citation.uri}`);
+  });
+}
+```
+
+---
+
+## When to Use Grounding
+
+### ✅ Good Use Cases
+
+**Current Events**:
+```typescript
+'What happened in the news today?'
+'Who won the latest sports championship?'
+'What are the current stock prices?'
+```
+
+**Recent Developments**:
+```typescript
+'What are the latest AI breakthroughs?'
+'What are recent changes in climate policy?'
+```
+
+**Fact-Checking**:
+```typescript
+'Is this claim true: [claim]?'
+'What does the latest research say about [topic]?'
+```
+
+**Real-Time Data**:
+```typescript
+'What is the current weather in Tokyo?'
+'What are today's cryptocurrency prices?'
+```
+
+### ❌ Not Recommended For
+
+**General Knowledge**:
+```typescript
+'What is the capital of France?' // Model knows this
+'How does photosynthesis work?' // Stable knowledge
+```
+
+**Mathematical Calculations**:
+```typescript
+'What is 15 * 27?' // Use code execution instead
+```
+
+**Creative Tasks**:
+```typescript
+'Write a poem about autumn' // No search needed
+```
+
+**Code Generation**:
+```typescript
+'Write a sorting algorithm' // Internal reasoning sufficient
+```
+
+---
+
+## Chat with Grounding
+
+### Multi-Turn Conversations
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+
+// First question
+let response = await chat.sendMessage('What are the latest quantum computing developments?');
+console.log(response.text);
+
+// Display sources
+if (response.candidates[0].groundingMetadata) {
+  const sources = response.candidates[0].groundingMetadata.webPages || [];
+  console.log(`\nSources: ${sources.length} web pages`);
+  sources.forEach(s => console.log(`- ${s.title}: ${s.url}`));
+}
+
+// Follow-up question
+response = await chat.sendMessage('Which company made the biggest breakthrough?');
+console.log('\n' + response.text);
+```
+
+---
+
+## Combining with Other Features
+
+### Grounding + Function Calling
+
+```typescript
+const weatherFunction = {
+  name: 'get_current_weather',
+  description: 'Get weather for a location',
+  parametersJsonSchema: {
+    type: 'object',
+    properties: {
+      location: { type: 'string', description: 'City name' }
+    },
+    required: ['location']
+  }
+};
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What is the weather like in the city that won Euro 2024?',
+  config: {
+    tools: [
+      { googleSearch: {} },           // For finding Euro 2024 winner
+      { functionDeclarations: [weatherFunction] }  // For weather lookup
+    ]
+  }
+});
+
+// Model will:
+// 1. Use Google Search to find Euro 2024 winner (Madrid/Spain)
+// 2. Call get_current_weather function with the city
+// 3. Combine both results in response
+```
+
+### Grounding + Code Execution
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Find the current stock prices for AAPL, GOOGL, MSFT and calculate their average',
+  config: {
+    tools: [
+      { googleSearch: {} },      // For current stock prices
+      { codeExecution: {} }      // For averaging
+    ]
+  }
+});
+
+// Model will:
+// 1. Search for current stock prices
+// 2. Generate code to calculate average
+// 3. Execute code with the found prices
+// 4. Return result with citations
+```
+
+---
+
+## Checking Grounding Usage
+
+### Determine if Search Was Performed
+
+```typescript
+const queries = [
+  'What is 2+2?',                  // Should NOT use search
+  'What happened in the news today?' // Should use search
+];
+
+for (const query of queries) {
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: query,
+    config: { tools: [{ googleSearch: {} }] }
+  });
+
+  console.log(`Query: ${query}`);
+  console.log(`Search used: ${response.candidates[0].groundingMetadata ? 'YES' : 'NO'}`);
+  console.log();
+}
+```
+
+**Output**:
+```
+Query: What is 2+2?
+Search used: NO
+
+Query: What happened in the news today?
+Search used: YES
+```
+
+---
+
+## Dynamic Retrieval (Gemini 1.5)
+
+### Threshold-Based Grounding
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-1.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [{
+      googleSearchRetrieval: {
+        dynamicRetrievalConfig: {
+          mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
+          dynamicThreshold: 0.7 // Search only if confidence < 70%
+        }
+      }
+    }]
+  }
+});
+
+if (!response.candidates[0].groundingMetadata) {
+  console.log('Model answered from knowledge (confidence >= 70%)');
+} else {
+  console.log('Search performed (confidence < 70%)');
+}
+```
+
+**How It Works**:
+- Model evaluates confidence in its internal knowledge
+- If confidence < threshold → performs search
+- If confidence >= threshold → uses internal knowledge
+
+**Threshold Values**:
+- `0.0`: Never search (always use internal knowledge)
+- `0.5`: Search if moderately uncertain
+- `0.7`: Search if somewhat uncertain (good default)
+- `1.0`: Always search
+
+---
+
+## Best Practices
+
+### ✅ Do
+
+1. **Check Metadata**: Always verify if grounding was used
+   ```typescript
+   if (response.candidates[0].groundingMetadata) { ... }
+   ```
+
+2. **Display Citations**: Show sources to users for transparency
+   ```typescript
+   metadata.webPages.forEach(page => {
+     console.log(`Source: ${page.title} (${page.url})`);
+   });
+   ```
+
+3. **Use Specific Queries**: Better search results with clear questions
+   ```typescript
+   // ✅ Good: "What are Microsoft's Q3 2024 earnings?"
+   // ❌ Vague: "Tell me about Microsoft"
+   ```
+
+4. **Combine Features**: Use with function calling/code execution for powerful workflows
+
+5. **Handle Missing Metadata**: Not all queries trigger search
+   ```typescript
+   const sources = response.candidates[0].groundingMetadata?.webPages || [];
+   ```
+
+### ❌ Don't
+
+1. **Don't Assume Search Always Happens**: Model decides when to search
+2. **Don't Ignore Citations**: They're crucial for fact-checking
+3. **Don't Use for Stable Knowledge**: Waste of resources for unchanging facts
+4. **Don't Expect Perfect Coverage**: Not all information is on the web
+
+---
+
+## Cost and Performance
+
+### Cost Considerations
+
+- **Added Latency**: Search takes 1-3 seconds typically
+- **Token Costs**: Retrieved content counts as input tokens
+- **Rate Limits**: Subject to API rate limits
+
+### Optimization
+
+**Use Dynamic Threshold** (Gemini 1.5):
+```typescript
+dynamicThreshold: 0.7 // Higher = more searches, lower = fewer searches
+```
+
+**Cache Grounding Results** (if appropriate):
+```typescript
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'grounding-cache',
+    tools: [{ googleSearch: {} }],
+    contents: 'Initial query that triggers search...',
+    ttl: '3600s'
+  }
+});
+// Subsequent queries reuse cached grounding results
+```
+
+---
+
+## Troubleshooting
+
+### Grounding Not Working
+
+**Symptom**: No `groundingMetadata` in response
+
+**Causes**:
+1. Grounding not enabled: `tools: [{ googleSearch: {} }]`
+2. Model decided search wasn't needed (query answerable from knowledge)
+3. Google Cloud project not configured (grounding requires GCP)
+
+**Solution**:
+- Verify `tools` configuration
+- Use queries requiring current information
+- Set up Google Cloud project
+
+### Poor Search Quality
+
+**Symptom**: Irrelevant sources or wrong information
+
+**Causes**:
+- Vague query
+- Search terms ambiguous
+- Recent events not yet indexed
+
+**Solution**:
+- Make queries more specific
+- Include context in prompt
+- Verify search queries in metadata
+
+### Citations Missing
+
+**Symptom**: `groundingMetadata` present but no citations
+
+**Explanation**: Citations are **inline references** - they may not always be present if model doesn't directly quote sources.
+
+**Solution**: Check `webPages` instead for full source list
+
+---
+
+## Important Requirements
+
+### Google Cloud Project
+
+**⚠️ Grounding requires a Google Cloud project, not just an API key.**
+
+**Setup**:
+1. Create Google Cloud project
+2. Enable Generative Language API
+3. Configure billing
+4. Use API key from that project
+
+**Error if Missing**:
+```
+Error: Grounding requires Google Cloud project configuration
+```
+
+### Model Support
+
+**✅ Supported**:
+- All Gemini 2.5 models (`googleSearch`)
+- All Gemini 1.5 models (`googleSearchRetrieval`)
+
+**❌ Not Supported**:
+- Gemini 1.0 models
+
+---
+
+## Examples
+
+### News Summary
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Summarize today's top 3 technology news headlines',
+  config: { tools: [{ googleSearch: {} }] }
+});
+
+console.log(response.text);
+metadata.webPages?.forEach((page, i) => {
+  console.log(`${i + 1}. ${page.title}: ${page.url}`);
+});
+```
+
+### Fact Verification
+
+```typescript
+const claim = "The Earth is flat";
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: `Is this claim true: "${claim}"? Use reliable sources to verify.`,
+  config: { tools: [{ googleSearch: {} }] }
+});
+
+console.log(response.text);
+```
+
+### Market Research
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What are the current trends in electric vehicle adoption in 2024?',
+  config: { tools: [{ googleSearch: {} }] }
+});
+
+console.log(response.text);
+console.log('\nSources:');
+metadata.webPages?.forEach(page => {
+  console.log(`- ${page.title}`);
+});
+```
+
+---
+
+## References
+
+- Official Docs: https://ai.google.dev/gemini-api/docs/grounding
+- Google Search Docs: https://ai.google.dev/gemini-api/docs/google-search
+- Templates: See `grounding-search.ts` for working examples
+- Combined Features: See `combined-advanced.ts` for integration patterns
--- a/references/models-guide.md
+++ b/references/models-guide.md
@@ -0,0 +1,289 @@
+# Gemini Models Guide (2025)
+
+**Last Updated**: 2025-11-19 (Gemini 3 preview release)
+
+---
+
+## Gemini 3 Series (Preview - November 2025)
+
+### gemini-3-pro-preview
+
+**Model ID**: `gemini-3-pro-preview`
+
+**Status**: 🆕 Preview release (November 18, 2025)
+
+**Context Windows**:
+- Input: TBD (documentation pending)
+- Output: TBD (documentation pending)
+
+**Description**: Google's newest and most intelligent AI model with state-of-the-art reasoning and multimodal understanding. Outperforms Gemini 2.5 Pro on every major AI benchmark.
+
+**Best For**:
+- Most complex reasoning tasks
+- Advanced multimodal analysis (images, videos, PDFs, audio)
+- Benchmark-critical applications
+- Cutting-edge projects requiring latest capabilities
+- Tasks requiring absolute best quality
+
+**Features**:
+- ✅ Enhanced multimodal understanding
+- ✅ Function calling
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+- TBD Thinking mode (documentation pending)
+
+**Knowledge Cutoff**: TBD
+
+**Pricing**: Preview pricing (likely higher than 2.5 Pro)
+
+**⚠️ Preview Status**: Use for evaluation and testing. Consider `gemini-2.5-pro` for production-critical decisions until Gemini 3 reaches stable general availability.
+
+**New Capabilities**:
+- Record-breaking benchmark performance
+- Enhanced generative UI responses
+- Advanced coding capabilities (Google Antigravity integration)
+- State-of-the-art multimodal understanding
+
+---
+
+## Current Production Models (Gemini 2.5 - Stable)
+
+### gemini-2.5-pro
+
+**Model ID**: `gemini-2.5-pro`
+
+**Context Windows**:
+- Input: 1,048,576 tokens (NOT 2M!)
+- Output: 65,536 tokens
+
+**Description**: State-of-the-art thinking model capable of reasoning over complex problems in code, math, and STEM.
+
+**Best For**:
+- Complex reasoning tasks
+- Advanced code generation and optimization
+- Mathematical problem-solving
+- Multi-step logical analysis
+- STEM applications
+
+**Features**:
+- ✅ Thinking mode (enabled by default)
+- ✅ Function calling
+- ✅ Multimodal (text, images, video, audio, PDFs)
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+
+**Knowledge Cutoff**: January 2025
+
+**Pricing**: Higher cost, use for tasks requiring best quality
+
+---
+
+### gemini-2.5-flash
+
+**Model ID**: `gemini-2.5-flash`
+
+**Context Windows**:
+- Input: 1,048,576 tokens
+- Output: 65,536 tokens
+
+**Description**: Best price-performance model for large-scale processing, low-latency, and high-volume tasks.
+
+**Best For**:
+- General-purpose AI applications
+- High-volume API calls
+- Agentic workflows
+- Cost-sensitive applications
+- Production workloads
+
+**Features**:
+- ✅ Thinking mode (enabled by default)
+- ✅ Function calling
+- ✅ Multimodal (text, images, video, audio, PDFs)
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+
+**Knowledge Cutoff**: January 2025
+
+**Pricing**: Best price-performance ratio
+
+**⭐ Recommended**: This is the default choice for most applications
+
+---
+
+### gemini-2.5-flash-lite
+
+**Model ID**: `gemini-2.5-flash-lite`
+
+**Context Windows**:
+- Input: 1,048,576 tokens
+- Output: 65,536 tokens
+
+**Description**: Most cost-efficient and fastest 2.5 model, optimized for high throughput.
+
+**Best For**:
+- High-throughput applications
+- Simple text generation
+- Cost-critical use cases
+- Speed-prioritized workloads
+
+**Features**:
+- ✅ Thinking mode (enabled by default)
+- ❌ **NO function calling** (critical limitation!)
+- ✅ Multimodal (text, images, video, audio, PDFs)
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+
+**Knowledge Cutoff**: January 2025
+
+**Pricing**: Lowest cost
+
+**⚠️ Important**: Flash-Lite does NOT support function calling! Use Flash or Pro if you need tool use.
+
+---
+
+## Model Comparison Matrix
+
+| Feature | Pro | Flash | Flash-Lite |
+|---------|-----|-------|------------|
+| **Thinking Mode** | ✅ Default ON | ✅ Default ON | ✅ Default ON |
+| **Function Calling** | ✅ Yes | ✅ Yes | ❌ **NO** |
+| **Multimodal** | ✅ Full | ✅ Full | ✅ Full |
+| **Streaming** | ✅ Yes | ✅ Yes | ✅ Yes |
+| **Input Tokens** | 1,048,576 | 1,048,576 | 1,048,576 |
+| **Output Tokens** | 65,536 | 65,536 | 65,536 |
+| **Reasoning Quality** | Best | Good | Basic |
+| **Speed** | Moderate | Fast | Fastest |
+| **Cost** | Highest | Medium | Lowest |
+
+---
+
+## Previous Generation Models (Still Available)
+
+### Gemini 2.0 Flash
+
+**Model ID**: `gemini-2.0-flash`
+
+**Context**: 1M input / 65K output tokens
+
+**Status**: Previous generation, 2.5 Flash recommended instead
+
+### Gemini 1.5 Pro
+
+**Model ID**: `gemini-1.5-pro`
+
+**Context**: 2M input tokens (this is the ONLY model with 2M!)
+
+**Status**: Older model, 2.5 models recommended
+
+---
+
+## Context Window Clarification
+
+**⚠️ CRITICAL CORRECTION**:
+
+**ACCURATE**: Gemini 2.5 models support **1,048,576 input tokens** (approximately 1 million)
+
+**INACCURATE**: Claiming Gemini 2.5 has 2M token context window
+
+**WHY THIS MATTERS**:
+- Gemini 1.5 Pro (older model) had 2M tokens
+- Gemini 2.5 models (current) have ~1M tokens
+- This is a common mistake that causes confusion!
+
+**This skill prevents this error by providing accurate information.**
+
+---
+
+## Model Selection Guide
+
+### Use gemini-2.5-pro When:
+- ✅ Complex reasoning required (math, logic, STEM)
+- ✅ Advanced code generation and optimization
+- ✅ Multi-step problem-solving
+- ✅ Quality is more important than cost
+- ✅ Tasks require maximum capability
+
+### Use gemini-2.5-flash When:
+- ✅ General-purpose AI applications
+- ✅ High-volume production workloads
+- ✅ Function calling required
+- ✅ Agentic workflows
+- ✅ Good balance of cost and quality needed
+- ⭐ **Recommended default choice**
+
+### Use gemini-2.5-flash-lite When:
+- ✅ Simple text generation only
+- ✅ No function calling needed
+- ✅ High throughput required
+- ✅ Cost is primary concern
+- ⚠️ **Only if you don't need function calling!**
+
+---
+
+## Common Mistakes
+
+### ❌ Mistake 1: Using Wrong Model Name
+```typescript
+// WRONG - old model name
+model: 'gemini-1.5-pro'
+
+// CORRECT - current model
+model: 'gemini-2.5-flash'
+```
+
+### ❌ Mistake 2: Claiming 2M Context for 2.5 Models
+```typescript
+// WRONG ASSUMPTION
+// "Gemini 2.5 has 2M token context window"
+
+// CORRECT
+// Gemini 2.5 has 1,048,576 input tokens
+// Only Gemini 1.5 Pro (older) had 2M
+```
+
+### ❌ Mistake 3: Using Flash-Lite for Function Calling
+```typescript
+// WRONG - Flash-Lite doesn't support function calling!
+model: 'gemini-2.5-flash-lite',
+config: {
+  tools: [{ functionDeclarations: [...] }] // This will FAIL
+}
+
+// CORRECT
+model: 'gemini-2.5-flash', // or gemini-2.5-pro
+config: {
+  tools: [{ functionDeclarations: [...] }]
+}
+```
+
+---
+
+## Rate Limits (Free vs Paid)
+
+### Free Tier
+- **15 RPM** (requests per minute)
+- **1M TPM** (tokens per minute)
+- **1,500 RPD** (requests per day)
+
+### Paid Tier
+- **360 RPM**
+- **4M TPM**
+- Unlimited daily requests
+
+**Tip**: Monitor your usage and implement rate limiting to stay within quotas.
+
+---
+
+## Official Documentation
+
+- **Models Overview**: https://ai.google.dev/gemini-api/docs/models
+- **Gemini 2.5 Announcement**: https://developers.googleblog.com/en/gemini-2-5-thinking-model-updates/
+- **Pricing**: https://ai.google.dev/pricing
+
+---
+
+**Production Tip**: Always use gemini-2.5-flash as your default unless you specifically need Pro's advanced reasoning or want to minimize cost with Flash-Lite (and don't need function calling).
--- a/references/multimodal-guide.md
+++ b/references/multimodal-guide.md
@@ -0,0 +1,58 @@
+# Multimodal Guide
+
+Complete guide to using images, video, audio, and PDFs with Gemini API.
+
+---
+
+## Supported Formats
+
+### Images
+- JPEG, PNG, WebP, HEIC, HEIF
+- Max size: 20MB
+
+### Video
+- MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV
+- Max size: 2GB
+- Max length (inline): 2 minutes
+
+### Audio
+- MP3, WAV, FLAC, AAC, OGG, OPUS
+- Max size: 20MB
+
+### PDFs
+- Max size: 30MB
+- Text-based PDFs work best
+
+---
+
+## Usage Pattern
+
+```typescript
+contents: [
+  {
+    parts: [
+      { text: 'Your question' },
+      {
+        inlineData: {
+          data: base64EncodedData,
+          mimeType: 'image/jpeg' // or video/mp4, audio/mp3, application/pdf
+        }
+      }
+    ]
+  }
+]
+```
+
+---
+
+## Best Practices
+
+- Use specific, detailed prompts
+- Combine multiple modalities in one request
+- For large files (>2GB), use File API (Phase 2)
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/vision
--- a/references/sdk-migration-guide.md
+++ b/references/sdk-migration-guide.md
@@ -0,0 +1,235 @@
+# SDK Migration Guide
+
+**From**: `@google/generative-ai` (DEPRECATED)
+**To**: `@google/genai` (CURRENT)
+
+**Deadline**: November 30, 2025 (deprecated SDK sunset)
+
+---
+
+## Why Migrate?
+
+The `@google/generative-ai` SDK is deprecated and will stop receiving updates on **November 30, 2025**.
+
+The new `@google/genai` SDK:
+- ✅ Works with both Gemini API and Vertex AI
+- ✅ Supports Gemini 2.0+ features
+- ✅ Better TypeScript support
+- ✅ Unified API across platforms
+- ✅ Active development and updates
+
+---
+
+## Migration Steps
+
+### 1. Update Package
+
+```bash
+# Remove deprecated SDK
+npm uninstall @google/generative-ai
+
+# Install current SDK
+npm install @google/genai@1.27.0
+```
+
+### 2. Update Imports
+
+**Old (DEPRECATED)**:
+```typescript
+import { GoogleGenerativeAI } from '@google/generative-ai';
+
+const genAI = new GoogleGenerativeAI(apiKey);
+const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
+```
+
+**New (CURRENT)**:
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey });
+// No need to get model separately
+```
+
+### 3. Update API Calls
+
+**Old**:
+```typescript
+const result = await model.generateContent(prompt);
+const response = await result.response;
+const text = response.text();
+```
+
+**New**:
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: prompt
+});
+const text = response.text;
+```
+
+### 4. Update Streaming
+
+**Old**:
+```typescript
+const result = await model.generateContentStream(prompt);
+for await (const chunk of result.stream) {
+  console.log(chunk.text());
+}
+```
+
+**New**:
+```typescript
+const response = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: prompt
+});
+for await (const chunk of response) {
+  console.log(chunk.text);
+}
+```
+
+### 5. Update Chat
+
+**Old**:
+```typescript
+const chat = model.startChat({
+  history: []
+});
+const result = await chat.sendMessage(message);
+const response = await result.response;
+console.log(response.text());
+```
+
+**New**:
+```typescript
+const chat = await ai.models.createChat({
+  model: 'gemini-2.5-flash',
+  history: []
+});
+const response = await chat.sendMessage(message);
+console.log(response.text);
+```
+
+---
+
+## Complete Before/After Example
+
+### Before (Deprecated SDK)
+
+```typescript
+import { GoogleGenerativeAI } from '@google/generative-ai';
+
+const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
+const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
+
+// Generate
+const result = await model.generateContent('Hello');
+const response = await result.response;
+console.log(response.text());
+
+// Stream
+const streamResult = await model.generateContentStream('Write a story');
+for await (const chunk of streamResult.stream) {
+  console.log(chunk.text());
+}
+
+// Chat
+const chat = model.startChat();
+const chatResult = await chat.sendMessage('Hi');
+const chatResponse = await chatResult.response;
+console.log(chatResponse.text());
+```
+
+### After (Current SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+// Generate
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Hello'
+});
+console.log(response.text);
+
+// Stream
+const streamResponse = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: 'Write a story'
+});
+for await (const chunk of streamResponse) {
+  console.log(chunk.text);
+}
+
+// Chat
+const chat = await ai.models.createChat({ model: 'gemini-2.5-flash' });
+const chatResponse = await chat.sendMessage('Hi');
+console.log(chatResponse.text);
+```
+
+---
+
+## Key Differences
+
+| Aspect | Old SDK | New SDK |
+|--------|---------|---------|
+| Package | `@google/generative-ai` | `@google/genai` |
+| Class | `GoogleGenerativeAI` | `GoogleGenAI` |
+| Model Init | `genAI.getGenerativeModel()` | Specify in each call |
+| Text Access | `response.text()` (method) | `response.text` (property) |
+| Stream Iteration | `result.stream` | Direct iteration |
+| Chat Creation | `model.startChat()` | `ai.models.createChat()` |
+
+---
+
+## Troubleshooting
+
+### Error: "Cannot find module '@google/generative-ai'"
+
+**Cause**: Old import statement after migration
+
+**Solution**: Update all imports to `@google/genai`
+
+### Error: "Property 'text' does not exist"
+
+**Cause**: Using `response.text()` (method) instead of `response.text` (property)
+
+**Solution**: Remove parentheses: `response.text` not `response.text()`
+
+### Error: "generateContent is not a function"
+
+**Cause**: Trying to call methods on old model object
+
+**Solution**: Use `ai.models.generateContent()` directly
+
+---
+
+## Automated Migration Script
+
+```bash
+# Find all files using old SDK
+rg "@google/generative-ai" --type ts
+
+# Replace import statements
+find . -name "*.ts" -exec sed -i 's/@google\/generative-ai/@google\/genai/g' {} +
+
+# Replace class name
+find . -name "*.ts" -exec sed -i 's/GoogleGenerativeAI/GoogleGenAI/g' {} +
+```
+
+**⚠️ Note**: This script handles imports but NOT API changes. Manual review required!
+
+---
+
+## Official Resources
+
+- **Migration Guide**: https://ai.google.dev/gemini-api/docs/migrate-to-genai
+- **New SDK Docs**: https://github.com/googleapis/js-genai
+- **Deprecated SDK**: https://github.com/google-gemini/deprecated-generative-ai-js
+
+---
+
+**Deadline Reminder**: November 30, 2025 - Deprecated SDK sunset
--- a/references/streaming-patterns.md
+++ b/references/streaming-patterns.md
@@ -0,0 +1,81 @@
+# Streaming Patterns
+
+Complete guide to implementing streaming with Gemini API.
+
+---
+
+## SDK Approach (Async Iteration)
+
+```typescript
+const response = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: 'Write a story'
+});
+
+for await (const chunk of response) {
+  process.stdout.write(chunk.text);
+}
+```
+
+**Pros**: Simple, automatic parsing
+**Cons**: Requires Node.js or compatible runtime
+
+---
+
+## Fetch Approach (SSE Parsing)
+
+```typescript
+const response = await fetch(
+  'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent',
+  { /* ... */ }
+);
+
+const reader = response.body.getReader();
+const decoder = new TextDecoder();
+let buffer = '';
+
+while (true) {
+  const { done, value } = await reader.read();
+  if (done) break;
+
+  buffer += decoder.decode(value, { stream: true });
+  const lines = buffer.split('\n');
+  buffer = lines.pop() || '';
+
+  for (const line of lines) {
+    if (!line.startsWith('data: ')) continue;
+    
+    const data = JSON.parse(line.slice(6));
+    const text = data.candidates[0]?.content?.parts[0]?.text;
+    if (text) process.stdout.write(text);
+  }
+}
+```
+
+**Pros**: Works in any environment
+**Cons**: Manual SSE parsing required
+
+---
+
+## SSE Format
+
+```
+data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}
+data: {"candidates":[{"content":{"parts":[{"text":" world"}]}}]}
+data: [DONE]
+```
+
+---
+
+## Best Practices
+
+- Always use `streamGenerateContent` endpoint
+- Handle incomplete chunks in buffer
+- Skip empty lines and `[DONE]` markers
+- Use streaming for better UX on long responses
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/streaming
--- a/references/thinking-mode-guide.md
+++ b/references/thinking-mode-guide.md
@@ -0,0 +1,59 @@
+# Thinking Mode Guide
+
+Complete guide to thinking mode in Gemini 2.5 models.
+
+---
+
+## What is Thinking Mode?
+
+Gemini 2.5 models "think" internally before responding, improving accuracy on complex tasks.
+
+**Key Points**:
+- ✅ Always enabled on 2.5 models (cannot disable)
+- ✅ Transparent (you don't see the thinking process)
+- ✅ Configurable thinking budget
+- ✅ Improves reasoning quality
+
+---
+
+## Configuration
+
+```typescript
+config: {
+  thinkingConfig: {
+    thinkingBudget: 8192  // Max tokens for internal reasoning
+  }
+}
+```
+
+---
+
+## When to Increase Budget
+
+✅ Complex math/logic problems
+✅ Multi-step reasoning
+✅ Code optimization
+✅ Detailed analysis
+
+---
+
+## When Default is Fine
+
+⏺️ Simple questions
+⏺️ Creative writing
+⏺️ Translation
+⏺️ Summarization
+
+---
+
+## Model Comparison
+
+- **gemini-2.5-pro**: Best for complex reasoning
+- **gemini-2.5-flash**: Good balance
+- **gemini-2.5-flash-lite**: Basic thinking
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/thinking
--- a/references/top-errors.md
+++ b/references/top-errors.md
@@ -0,0 +1,304 @@
+# Top Errors and Solutions
+
+22 common Gemini API errors with solutions (Phase 1 + Phase 2).
+
+---
+
+## 1. Using Deprecated SDK
+
+**Error**: `Cannot find module '@google/generative-ai'`
+
+**Cause**: Using old SDK after migration
+
+**Solution**: Install `@google/genai` instead
+
+---
+
+## 2. Wrong Context Window Claims
+
+**Error**: Input exceeds model capacity
+
+**Cause**: Assuming 2M tokens for Gemini 2.5
+
+**Solution**: Gemini 2.5 has 1,048,576 input tokens (NOT 2M!)
+
+---
+
+## 3. Model Not Found
+
+**Error**: `models/gemini-3.0-flash is not found`
+
+**Cause**: Wrong model name
+
+**Solution**: Use: `gemini-2.5-pro`, `gemini-2.5-flash`, or `gemini-2.5-flash-lite`
+
+---
+
+## 4. Function Calling on Flash-Lite
+
+**Error**: Function calling not working
+
+**Cause**: Flash-Lite doesn't support function calling
+
+**Solution**: Use `gemini-2.5-flash` or `gemini-2.5-pro`
+
+---
+
+## 5. Invalid API Key (401)
+
+**Error**: `API key not valid`
+
+**Cause**: Missing or wrong `GEMINI_API_KEY`
+
+**Solution**: Set environment variable correctly
+
+---
+
+## 6. Rate Limit Exceeded (429)
+
+**Error**: `Resource has been exhausted`
+
+**Cause**: Too many requests
+
+**Solution**: Implement exponential backoff
+
+---
+
+## 7. Streaming Parse Errors
+
+**Error**: Invalid JSON in SSE stream
+
+**Cause**: Incomplete chunk parsing
+
+**Solution**: Use buffer to handle partial chunks
+
+---
+
+## 8. Multimodal Format Errors
+
+**Error**: Invalid base64 or MIME type
+
+**Cause**: Wrong image encoding
+
+**Solution**: Use correct base64 encoding and MIME type
+
+---
+
+## 9. Context Length Exceeded
+
+**Error**: `Request payload size exceeds the limit`
+
+**Cause**: Input too large
+
+**Solution**: Reduce input size (max 1,048,576 tokens)
+
+---
+
+## 10. Chat Not Working with Fetch
+
+**Error**: No chat helper available
+
+**Cause**: Chat helpers are SDK-only
+
+**Solution**: Manually manage conversation history or use SDK
+
+---
+
+## 11. Thinking Mode Not Supported
+
+**Error**: Trying to disable thinking mode
+
+**Cause**: Thinking mode always enabled on 2.5
+
+**Solution**: You can only configure budget, not disable
+
+---
+
+## 12. Parameter Conflicts
+
+**Error**: Unsupported parameters
+
+**Cause**: Using wrong config options
+
+**Solution**: Use only supported parameters (see generation-config.md)
+
+---
+
+## 13. System Instruction Placement
+
+**Error**: System instruction not working
+
+**Cause**: Placed inside contents array
+
+**Solution**: Place at top level, not in contents
+
+---
+
+## 14. Token Counting Errors
+
+**Error**: Unexpected token usage
+
+**Cause**: Multimodal inputs use more tokens
+
+**Solution**: Images/video/audio count toward token limit
+
+---
+
+## 15. Parallel Function Call Errors
+
+**Error**: Functions not executing in parallel
+
+**Cause**: Dependencies between functions
+
+**Solution**: Gemini auto-detects; ensure functions are independent
+
+---
+
+## Phase 2 Errors
+
+### 16. Invalid Model Version for Caching
+
+**Error**: `Invalid model name for caching`
+
+**Cause**: Using `gemini-2.5-flash` instead of `gemini-2.5-flash-001`
+
+**Solution**: Must use explicit version suffix when creating caches
+
+```typescript
+// ✅ Correct
+model: 'gemini-2.5-flash-001'
+
+// ❌ Wrong
+model: 'gemini-2.5-flash'
+```
+
+**Source**: https://ai.google.dev/gemini-api/docs/caching
+
+---
+
+### 17. Cache Expired or Not Found
+
+**Error**: `Cache not found` or `Cache expired`
+
+**Cause**: Trying to use cache after TTL expiration
+
+**Solution**: Check expiration before use or recreate cache
+
+```typescript
+const cache = await ai.caches.get({ name: cacheName });
+if (new Date(cache.expireTime) < new Date()) {
+  // Recreate cache
+  cache = await ai.caches.create({ ... });
+}
+```
+
+---
+
+### 18. Cannot Update Expired Cache TTL
+
+**Error**: `Cannot update expired cache`
+
+**Cause**: Trying to extend TTL after cache already expired
+
+**Solution**: Update TTL before expiration or create new cache
+
+```typescript
+// Update TTL before expiration
+await ai.caches.update({
+  name: cache.name,
+  config: { ttl: '7200s' }
+});
+```
+
+---
+
+### 19. Code Execution Timeout
+
+**Error**: `Execution timed out after 30 seconds` with `OUTCOME_FAILED`
+
+**Cause**: Python code taking too long to execute
+
+**Solution**: Simplify computation or reduce data size
+
+```typescript
+// Check outcome before using results
+if (part.codeExecutionResult?.outcome === 'OUTCOME_FAILED') {
+  console.error('Execution failed:', part.codeExecutionResult.output);
+}
+```
+
+**Source**: https://ai.google.dev/gemini-api/docs/code-execution
+
+---
+
+### 20. Python Package Not Available
+
+**Error**: `ModuleNotFoundError: No module named 'requests'`
+
+**Cause**: Trying to import package not in sandbox
+
+**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy)
+
+**Available Packages**:
+- Standard library: math, statistics, json, csv, datetime
+- Data science: numpy, pandas, scipy
+- Visualization: matplotlib, seaborn
+
+---
+
+### 21. Code Execution on Flash-Lite
+
+**Error**: Code execution not working
+
+**Cause**: `gemini-2.5-flash-lite` doesn't support code execution
+
+**Solution**: Use `gemini-2.5-flash` or `gemini-2.5-pro`
+
+```typescript
+// ✅ Correct
+model: 'gemini-2.5-flash' // Supports code execution
+
+// ❌ Wrong
+model: 'gemini-2.5-flash-lite' // NO code execution support
+```
+
+---
+
+### 22. Grounding Requires Google Cloud Project
+
+**Error**: `Grounding requires Google Cloud project configuration`
+
+**Cause**: Using API key not associated with GCP project
+
+**Solution**: Set up Google Cloud project and enable Generative Language API
+
+**Steps**:
+1. Create Google Cloud project
+2. Enable Generative Language API
+3. Configure billing
+4. Use API key from that project
+
+**Source**: https://ai.google.dev/gemini-api/docs/grounding
+
+---
+
+## Quick Debugging Checklist
+
+### Phase 1 (Core)
+- [ ] Using @google/genai (NOT @google/generative-ai)
+- [ ] Model name is gemini-2.5-pro/flash/flash-lite
+- [ ] API key is set correctly
+- [ ] Input under 1,048,576 tokens
+- [ ] Not using Flash-Lite for function calling
+- [ ] System instruction at top level
+- [ ] Streaming endpoint is streamGenerateContent
+- [ ] MIME types are correct for multimodal
+
+### Phase 2 (Advanced)
+- [ ] Caching: Using explicit model version (e.g., gemini-2.5-flash-001)
+- [ ] Caching: Cache not expired (check expireTime)
+- [ ] Code Execution: Not using Flash-Lite
+- [ ] Code Execution: Using only available Python packages
+- [ ] Grounding: Google Cloud project configured
+- [ ] Grounding: Checking groundingMetadata for search results
+
--- a/scripts/check-versions.sh
+++ b/scripts/check-versions.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Check @google/genai package version and warn about deprecated SDK
+# Usage: ./scripts/check-versions.sh
+
+echo "🔍 Checking Gemini API SDK versions..."
+echo ""
+
+# Check if package.json exists
+if [ ! -f "package.json" ]; then
+  echo "❌ No package.json found in current directory"
+  exit 1
+fi
+
+# Check for deprecated SDK
+if grep -q "@google/generative-ai" package.json; then
+  echo "⚠️  WARNING: DEPRECATED SDK DETECTED!"
+  echo ""
+  echo "   Package: @google/generative-ai"
+  echo "   Status: DEPRECATED (sunset Nov 30, 2025)"
+  echo ""
+  echo "   Action required:"
+  echo "   1. npm uninstall @google/generative-ai"
+  echo "   2. npm install @google/genai@1.27.0"
+  echo "   3. Update imports (see sdk-migration-guide.md)"
+  echo ""
+fi
+
+# Check for current SDK
+if grep -q "@google/genai" package.json; then
+  # Get installed version
+  INSTALLED_VERSION=$(npm list @google/genai --depth=0 2>/dev/null | grep @google/genai | sed 's/.*@//' | sed 's/ .*//')
+  RECOMMENDED_VERSION="1.27.0"
+
+  echo "✅ Current SDK installed: @google/genai"
+  echo "   Installed version: $INSTALLED_VERSION"
+  echo "   Recommended version: $RECOMMENDED_VERSION"
+  echo ""
+
+  # Check if version matches recommendation
+  if [ "$INSTALLED_VERSION" != "$RECOMMENDED_VERSION" ]; then
+    echo "ℹ️  Consider updating to recommended version:"
+    echo "   npm install @google/genai@$RECOMMENDED_VERSION"
+    echo ""
+  fi
+else
+  echo "❌ @google/genai not found in package.json"
+  echo ""
+  echo "   Install with:"
+  echo "   npm install @google/genai@1.27.0"
+  echo ""
+fi
+
+# Check Node.js version
+NODE_VERSION=$(node -v | sed 's/v//')
+REQUIRED_NODE="18.0.0"
+
+echo "Node.js version: $NODE_VERSION"
+echo "Required: >= $REQUIRED_NODE"
+echo ""
+
+# Summary
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Summary:"
+echo ""
+if grep -q "@google/generative-ai" package.json; then
+  echo "❌ Migration needed: Remove deprecated SDK"
+elif grep -q "@google/genai" package.json; then
+  echo "✅ Using current SDK (@google/genai)"
+else
+  echo "❌ Gemini SDK not installed"
+fi
+echo ""
+echo "For migration help, see:"
+echo "  references/sdk-migration-guide.md"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
--- a/templates/cloudflare-worker.ts
+++ b/templates/cloudflare-worker.ts
@@ -0,0 +1,245 @@
+/**
+ * Complete Cloudflare Worker with Gemini API
+ *
+ * Demonstrates:
+ * - Fetch-based Gemini API integration (no SDK dependencies)
+ * - Streaming responses to client
+ * - Multi-turn chat with session storage
+ * - Error handling for production
+ * - CORS configuration
+ *
+ * Deploy:
+ * - npx wrangler deploy
+ * - Set GEMINI_API_KEY in Cloudflare dashboard or wrangler.toml
+ */
+
+interface Env {
+  GEMINI_API_KEY: string;
+}
+
+interface ChatMessage {
+  role: 'user' | 'model';
+  parts: Array<{ text: string }>;
+}
+
+export default {
+  /**
+   * Main request handler
+   */
+  async fetch(request: Request, env: Env): Promise<Response> {
+    // CORS headers
+    const corsHeaders = {
+      'Access-Control-Allow-Origin': '*',
+      'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
+      'Access-Control-Allow-Headers': 'Content-Type',
+    };
+
+    // Handle preflight
+    if (request.method === 'OPTIONS') {
+      return new Response(null, { headers: corsHeaders });
+    }
+
+    const url = new URL(request.url);
+
+    try {
+      // Route: POST /api/chat (non-streaming)
+      if (url.pathname === '/api/chat' && request.method === 'POST') {
+        return await handleChat(request, env, corsHeaders);
+      }
+
+      // Route: POST /api/chat/stream (streaming)
+      if (url.pathname === '/api/chat/stream' && request.method === 'POST') {
+        return await handleChatStream(request, env, corsHeaders);
+      }
+
+      // Route: GET / (health check)
+      if (url.pathname === '/' && request.method === 'GET') {
+        return new Response(
+          JSON.stringify({ status: 'ok', service: 'Gemini API Worker' }),
+          { headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
+        );
+      }
+
+      // 404 for unknown routes
+      return new Response('Not Found', { status: 404, headers: corsHeaders });
+
+    } catch (error: any) {
+      return new Response(
+        JSON.stringify({ error: error.message }),
+        { status: 500, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
+      );
+    }
+  }
+};
+
+/**
+ * Handle non-streaming chat request
+ */
+async function handleChat(request: Request, env: Env, corsHeaders: any): Promise<Response> {
+  const { message, history = [] } = await request.json() as {
+    message: string;
+    history?: ChatMessage[];
+  };
+
+  // Build contents array with history
+  const contents: ChatMessage[] = [
+    ...history,
+    { role: 'user', parts: [{ text: message }] }
+  ];
+
+  // Call Gemini API
+  const response = await fetch(
+    `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-goog-api-key': env.GEMINI_API_KEY,
+      },
+      body: JSON.stringify({ contents }),
+    }
+  );
+
+  if (!response.ok) {
+    const errorData = await response.json();
+    throw new Error(errorData.error?.message || 'Gemini API error');
+  }
+
+  const data = await response.json();
+  const assistantReply = data.candidates[0]?.content?.parts[0]?.text;
+
+  // Return response with updated history
+  return new Response(
+    JSON.stringify({
+      reply: assistantReply,
+      history: [
+        ...history,
+        { role: 'user', parts: [{ text: message }] },
+        { role: 'model', parts: [{ text: assistantReply }] }
+      ],
+      usage: data.usageMetadata
+    }),
+    { headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
+  );
+}
+
+/**
+ * Handle streaming chat request
+ */
+async function handleChatStream(request: Request, env: Env, corsHeaders: any): Promise<Response> {
+  const { message, history = [] } = await request.json() as {
+    message: string;
+    history?: ChatMessage[];
+  };
+
+  const contents: ChatMessage[] = [
+    ...history,
+    { role: 'user', parts: [{ text: message }] }
+  ];
+
+  // Create a TransformStream to stream to client
+  const { readable, writable } = new TransformStream();
+  const writer = writable.getWriter();
+  const encoder = new TextEncoder();
+
+  // Start streaming in background
+  (async () => {
+    try {
+      const response = await fetch(
+        `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
+        {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+            'x-goog-api-key': env.GEMINI_API_KEY,
+          },
+          body: JSON.stringify({ contents }),
+        }
+      );
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}`);
+      }
+
+      if (!response.body) {
+        throw new Error('Response body is null');
+      }
+
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+      let buffer = '';
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+
+        for (const line of lines) {
+          if (line.trim() === '' || line.startsWith('data: [DONE]')) continue;
+          if (!line.startsWith('data: ')) continue;
+
+          try {
+            const data = JSON.parse(line.slice(6));
+            const text = data.candidates[0]?.content?.parts[0]?.text;
+
+            if (text) {
+              await writer.write(encoder.encode(text));
+            }
+          } catch (e) {
+            // Skip invalid JSON
+          }
+        }
+      }
+
+      await writer.close();
+
+    } catch (error: any) {
+      await writer.write(encoder.encode(`\n\nError: ${error.message}`));
+      await writer.close();
+    }
+  })();
+
+  return new Response(readable, {
+    headers: {
+      ...corsHeaders,
+      'Content-Type': 'text/plain; charset=utf-8',
+      'Transfer-Encoding': 'chunked',
+    },
+  });
+}
+
+/**
+ * Example Client Usage (JavaScript):
+ *
+ * // Non-streaming
+ * const response = await fetch('https://your-worker.workers.dev/api/chat', {
+ *   method: 'POST',
+ *   headers: { 'Content-Type': 'application/json' },
+ *   body: JSON.stringify({
+ *     message: 'What is quantum computing?',
+ *     history: []
+ *   })
+ * });
+ * const data = await response.json();
+ * console.log(data.reply);
+ *
+ * // Streaming
+ * const response = await fetch('https://your-worker.workers.dev/api/chat/stream', {
+ *   method: 'POST',
+ *   headers: { 'Content-Type': 'application/json' },
+ *   body: JSON.stringify({
+ *     message: 'Write a story',
+ *     history: []
+ *   })
+ * });
+ * const reader = response.body.getReader();
+ * const decoder = new TextDecoder();
+ * while (true) {
+ *   const { done, value } = await reader.read();
+ *   if (done) break;
+ *   console.log(decoder.decode(value));
+ * }
+ */
--- a/templates/code-execution.ts
+++ b/templates/code-execution.ts
@@ -0,0 +1,206 @@
+/**
+ * Google Gemini API - Code Execution Example
+ *
+ * Demonstrates how to enable code execution so the model can generate
+ * and run Python code to solve computational problems.
+ *
+ * Features:
+ * - Basic code execution
+ * - Data analysis with code
+ * - Chart generation
+ * - Error handling for failed execution
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ *
+ * Note: Code Execution is NOT available on gemini-2.5-flash-lite
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function basicCodeExecution() {
+  console.log('=== Basic Code Execution Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents:
+      'What is the sum of the first 50 prime numbers? Generate and run code for the calculation, and make sure you get all 50.',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  // Parse response parts
+  console.log('Response parts:\n');
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('📝 Text:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n💻 Generated Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n✅ Execution Output:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  console.log('\n=== Basic Code Execution Complete ===');
+}
+
+async function dataAnalysisExample() {
+  console.log('\n=== Data Analysis Example ===\n');
+
+  const prompt = `
+    Analyze this sales data and calculate:
+    1. Total revenue
+    2. Average sale price
+    3. Best-selling month
+    4. Month with highest revenue
+
+    Use pandas or numpy for analysis.
+
+    Data (CSV format):
+    month,sales,revenue
+    Jan,150,45000
+    Feb,200,62000
+    Mar,175,53000
+    Apr,220,68000
+    May,190,58000
+  `;
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: prompt,
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('📊 Analysis:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n💻 Analysis Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n📈 Results:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  console.log('\n=== Data Analysis Complete ===');
+}
+
+async function chartGenerationExample() {
+  console.log('\n=== Chart Generation Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents:
+      'Create a bar chart showing the distribution of prime numbers under 100 by their last digit. Generate the chart code and describe any patterns you see.',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('📊 Chart Description:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n📉 Chart Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n✓ Chart generated');
+      // Note: Image data would be in output
+    }
+  }
+
+  console.log('\n=== Chart Generation Complete ===');
+}
+
+async function chatWithCodeExecution() {
+  console.log('\n=== Chat with Code Execution Example ===\n');
+
+  const chat = await ai.chats.create({
+    model: 'gemini-2.5-flash',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  // First message
+  console.log('User: I have a math question for you.');
+  let response = await chat.sendMessage('I have a math question for you.');
+  console.log(`Assistant: ${response.text}\n`);
+
+  // Second message (will generate and execute code)
+  console.log('User: Calculate the Fibonacci sequence up to the 20th number and sum them.');
+  response = await chat.sendMessage(
+    'Calculate the Fibonacci sequence up to the 20th number and sum them.'
+  );
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('Assistant:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\nCode:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\nOutput:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  console.log('\n=== Chat with Code Execution Complete ===');
+}
+
+async function errorHandlingExample() {
+  console.log('\n=== Error Handling Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'Write code that divides by zero and see what happens',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.codeExecutionResult) {
+      if (part.codeExecutionResult.outcome === 'OUTCOME_FAILED') {
+        console.error('❌ Code execution failed:');
+        console.error(part.codeExecutionResult.output);
+      } else {
+        console.log('✅ Success:');
+        console.log(part.codeExecutionResult.output);
+      }
+    }
+  }
+
+  console.log('\n=== Error Handling Example Complete ===');
+}
+
+// Run all examples
+async function main() {
+  await basicCodeExecution();
+  await dataAnalysisExample();
+  await chartGenerationExample();
+  await chatWithCodeExecution();
+  await errorHandlingExample();
+}
+
+main().catch((error) => {
+  console.error('Error:', error.message);
+  process.exit(1);
+});
--- a/templates/combined-advanced.ts
+++ b/templates/combined-advanced.ts
@@ -0,0 +1,278 @@
+/**
+ * Google Gemini API - Combined Advanced Features Example
+ *
+ * Demonstrates how to use all Phase 2 advanced features together:
+ * - Context Caching (cost optimization)
+ * - Code Execution (computational tasks)
+ * - Grounding with Google Search (real-time information)
+ *
+ * Use Case: Financial analysis chatbot that:
+ * 1. Caches financial data and analysis instructions
+ * 2. Uses code execution for calculations and data analysis
+ * 3. Uses grounding for current market information
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ * - Google Cloud project (for grounding)
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function financialAnalysisChatbot() {
+  console.log('=== Combined Advanced Features: Financial Analysis Chatbot ===\n');
+
+  // Step 1: Create cache with financial data and analysis instructions
+  console.log('Step 1: Creating cache with financial data...');
+
+  const financialData = `
+    Company Financial Data (Q1-Q4 2024):
+
+    Revenue:
+    Q1: $2.5M
+    Q2: $3.1M
+    Q3: $2.9M
+    Q4: $3.8M
+
+    Expenses:
+    Q1: $1.8M
+    Q2: $2.2M
+    Q3: $2.1M
+    Q4: $2.6M
+
+    Customer Acquisition:
+    Q1: 1,200 new customers
+    Q2: 1,500 new customers
+    Q3: 1,350 new customers
+    Q4: 1,800 new customers
+  `.trim();
+
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001', // Explicit version required for caching
+    config: {
+      displayName: 'financial-analysis-cache',
+      systemInstruction: `
+        You are a financial analyst chatbot. You have access to:
+        1. Company financial data (cached)
+        2. Code execution for calculations
+        3. Google Search for current market information
+
+        Provide detailed, accurate financial analysis.
+      `,
+      contents: financialData,
+      ttl: '3600s', // Cache for 1 hour
+    },
+  });
+
+  console.log(`✓ Cache created: ${cache.name}\n`);
+
+  // Step 2: Create chat with all advanced features enabled
+  console.log('Step 2: Creating chat with code execution and grounding...\n');
+
+  const chat = await ai.chats.create({
+    model: cache.name, // Use cached context
+    config: {
+      tools: [
+        { codeExecution: {} }, // Enable code execution
+        { googleSearch: {} }, // Enable grounding
+      ],
+    },
+  });
+
+  // Query 1: Analysis requiring code execution (uses cache)
+  console.log('===========================================');
+  console.log('Query 1: Calculate year-over-year growth');
+  console.log('===========================================\n');
+
+  let response = await chat.sendMessage(`
+    Calculate the following for 2024:
+    1. Total annual revenue
+    2. Total annual expenses
+    3. Net profit
+    4. Profit margin percentage
+    5. Quarter-over-quarter revenue growth rates
+
+    Use code to perform these calculations.
+  `);
+
+  console.log('📊 Financial Analysis:\n');
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log(part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n💻 Calculations Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n📈 Results:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  // Query 2: Current market information (uses grounding)
+  console.log('\n\n===========================================');
+  console.log('Query 2: Compare with industry benchmarks');
+  console.log('===========================================\n');
+
+  response = await chat.sendMessage(`
+    What are the current industry benchmarks for SaaS companies in terms of:
+    1. Profit margins
+    2. Customer acquisition cost trends
+    3. Growth rate expectations
+
+    Use current market data to provide context.
+  `);
+
+  console.log('📰 Market Context:\n');
+  console.log(response.text);
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Used current market data from:');
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    sources.slice(0, 3).forEach((source, i) => {
+      console.log(`${i + 1}. ${source.title}`);
+      console.log(`   ${source.url}`);
+    });
+  }
+
+  // Query 3: Combined analysis (uses cache + code + grounding)
+  console.log('\n\n===========================================');
+  console.log('Query 3: Comprehensive recommendation');
+  console.log('===========================================\n');
+
+  response = await chat.sendMessage(`
+    Based on:
+    1. Our company's financial performance (from cached data)
+    2. Calculated growth metrics (using code execution)
+    3. Current industry trends (from search)
+
+    Provide a comprehensive recommendation for Q1 2025 strategy.
+  `);
+
+  console.log('💡 Strategic Recommendation:\n');
+  console.log(response.text);
+
+  // Show which features were used
+  console.log('\n\n📋 Features Used in This Query:');
+  let featuresUsed = [];
+
+  // Check for code execution
+  const hasCode = response.candidates[0].content.parts.some(
+    (part) => part.executableCode
+  );
+  if (hasCode) featuresUsed.push('✓ Code Execution');
+
+  // Check for grounding
+  if (response.candidates[0].groundingMetadata) {
+    featuresUsed.push('✓ Google Search Grounding');
+  }
+
+  // Cache is always used when we use cache.name as model
+  featuresUsed.push('✓ Context Caching');
+
+  featuresUsed.forEach((feature) => console.log(feature));
+
+  // Clean up
+  console.log('\n\nStep 3: Cleaning up...');
+  await ai.caches.delete({ name: cache.name });
+  console.log('✓ Cache deleted');
+
+  console.log('\n=== Financial Analysis Chatbot Complete ===');
+}
+
+async function researchAssistant() {
+  console.log('\n\n=== Combined Advanced Features: Research Assistant ===\n');
+
+  // Create cache with research paper
+  console.log('Step 1: Caching research paper...');
+
+  const researchPaper = `
+    Title: Climate Change Impact on Arctic Ecosystems (2024)
+
+    Abstract:
+    This study examines the effects of climate change on Arctic ecosystems
+    over the past decade...
+
+    [Imagine full research paper content here]
+
+    Key Findings:
+    - Temperature increase of 2.3°C in Arctic regions
+    - 15% reduction in sea ice coverage
+    - Migration pattern changes in polar species
+    - etc.
+  `.trim();
+
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001',
+    config: {
+      displayName: 'research-paper-cache',
+      systemInstruction: 'You are a research assistant. Analyze papers and provide insights.',
+      contents: researchPaper,
+      ttl: '1800s', // 30 minutes
+    },
+  });
+
+  console.log(`✓ Cache created\n`);
+
+  // Create chat with all tools
+  const chat = await ai.chats.create({
+    model: cache.name,
+    config: {
+      tools: [{ codeExecution: {} }, { googleSearch: {} }],
+    },
+  });
+
+  // Query combining all features
+  console.log('Query: Comprehensive climate analysis\n');
+
+  const response = await chat.sendMessage(`
+    1. Calculate the average temperature change per year from the data in the paper (use code)
+    2. Find the latest climate predictions for the next decade (use search)
+    3. Synthesize findings from both sources into a summary
+  `);
+
+  console.log('Response:\n');
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) console.log(part.text);
+    if (part.executableCode) {
+      console.log('\nCode:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\nResults:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\nSources:');
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    sources.slice(0, 2).forEach((s) => console.log(`- ${s.title}`));
+  }
+
+  // Clean up
+  await ai.caches.delete({ name: cache.name });
+
+  console.log('\n=== Research Assistant Complete ===');
+}
+
+// Run examples
+async function main() {
+  await financialAnalysisChatbot();
+  await researchAssistant();
+
+  console.log('\n\n✅ All advanced features demonstrated!');
+  console.log('\nKey Takeaways:');
+  console.log('- Context Caching: Saves costs by reusing large context');
+  console.log('- Code Execution: Enables computational analysis');
+  console.log('- Grounding: Provides real-time, fact-checked information');
+  console.log('- Combined: Creates powerful, cost-effective AI applications');
+}
+
+main().catch((error) => {
+  console.error('Error:', error.message);
+  process.exit(1);
+});
--- a/templates/context-caching.ts
+++ b/templates/context-caching.ts
@@ -0,0 +1,154 @@
+/**
+ * Google Gemini API - Context Caching Example
+ *
+ * Demonstrates how to create and use context caching to reduce costs by up to 90%
+ * when using the same large context (documents, videos, system instructions) repeatedly.
+ *
+ * Features:
+ * - Create cache with large document
+ * - Use cache for multiple queries
+ * - Update cache TTL
+ * - List and delete caches
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ *
+ * Note: You must use explicit model version suffixes like 'gemini-2.5-flash-001',
+ * not just 'gemini-2.5-flash' when creating caches.
+ */
+
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function contextCachingExample() {
+  console.log('=== Context Caching Example ===\n');
+
+  // Example: Large document content
+  const largeDocument = `
+    This is a large legal document that will be analyzed multiple times.
+
+    Article 1: ...
+    Article 2: ...
+    ... (imagine thousands of lines of legal text)
+  `.trim();
+
+  // 1. Create a cache with large content
+  console.log('1. Creating cache...');
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001', // Must use explicit version suffix!
+    config: {
+      displayName: 'legal-doc-cache',
+      systemInstruction: 'You are an expert legal analyst. Analyze documents and provide clear summaries.',
+      contents: largeDocument,
+      ttl: '3600s', // Cache for 1 hour
+    },
+  });
+
+  console.log(`✓ Cache created: ${cache.name}`);
+  console.log(`  Display name: ${cache.displayName}`);
+  console.log(`  Expires at: ${cache.expireTime}\n`);
+
+  // 2. Use cache for multiple queries (saves tokens!)
+  console.log('2. Using cache for first query...');
+  const response1 = await ai.models.generateContent({
+    model: cache.name, // Use cache name as model
+    contents: 'Summarize the key points of Article 1',
+  });
+  console.log(`Response: ${response1.text}\n`);
+
+  console.log('3. Using cache for second query...');
+  const response2 = await ai.models.generateContent({
+    model: cache.name,
+    contents: 'What are the legal implications of Article 2?',
+  });
+  console.log(`Response: ${response2.text}\n`);
+
+  // 3. Update cache TTL to extend lifetime
+  console.log('4. Extending cache TTL to 2 hours...');
+  await ai.caches.update({
+    name: cache.name,
+    config: {
+      ttl: '7200s', // Extend to 2 hours
+    },
+  });
+  console.log('✓ Cache TTL updated\n');
+
+  // 4. List all caches
+  console.log('5. Listing all caches...');
+  const caches = await ai.caches.list();
+  caches.forEach((c) => {
+    console.log(`  - ${c.displayName}: ${c.name}`);
+  });
+  console.log();
+
+  // 5. Delete cache when done
+  console.log('6. Deleting cache...');
+  await ai.caches.delete({ name: cache.name });
+  console.log('✓ Cache deleted\n');
+
+  console.log('=== Context Caching Complete ===');
+}
+
+async function videoCachingExample() {
+  console.log('\n=== Video Caching Example ===\n');
+
+  // Upload a video file
+  console.log('1. Uploading video file...');
+  const videoFile = await ai.files.upload({
+    file: fs.createReadStream('./example-video.mp4'),
+  });
+
+  console.log('2. Waiting for video processing...');
+  let processedFile = videoFile;
+  while (processedFile.state.name === 'PROCESSING') {
+    await new Promise((resolve) => setTimeout(resolve, 2000));
+    processedFile = await ai.files.get({ name: videoFile.name });
+  }
+  console.log(`✓ Video processed: ${processedFile.uri}\n`);
+
+  // Create cache with video
+  console.log('3. Creating cache with video...');
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001',
+    config: {
+      displayName: 'video-analysis-cache',
+      systemInstruction: 'You are an expert video analyzer.',
+      contents: [processedFile],
+      ttl: '300s', // 5 minutes
+    },
+  });
+  console.log(`✓ Cache created: ${cache.name}\n`);
+
+  // Use cache for multiple video queries
+  console.log('4. Query 1: What happens in the first minute?');
+  const response1 = await ai.models.generateContent({
+    model: cache.name,
+    contents: 'What happens in the first minute?',
+  });
+  console.log(`Response: ${response1.text}\n`);
+
+  console.log('5. Query 2: Describe the main characters');
+  const response2 = await ai.models.generateContent({
+    model: cache.name,
+    contents: 'Describe the main characters',
+  });
+  console.log(`Response: ${response2.text}\n`);
+
+  // Clean up
+  await ai.caches.delete({ name: cache.name });
+  await ai.files.delete({ name: videoFile.name });
+  console.log('✓ Cache and video file deleted');
+
+  console.log('\n=== Video Caching Complete ===');
+}
+
+// Run examples
+contextCachingExample()
+  .then(() => videoCachingExample())
+  .catch((error) => {
+    console.error('Error:', error.message);
+    process.exit(1);
+  });
--- a/templates/function-calling-basic.ts
+++ b/templates/function-calling-basic.ts
@@ -0,0 +1,156 @@
+/**
+ * Basic Function Calling with Gemini API
+ *
+ * Demonstrates:
+ * - Defining function declarations (tools)
+ * - Detecting when model wants to call a function
+ * - Executing functions and returning results
+ * - Multi-turn function calling workflow
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ *
+ * ⚠️ IMPORTANT: Gemini 2.5 Flash-Lite does NOT support function calling!
+ *   Use gemini-2.5-flash or gemini-2.5-pro
+ */
+
+import { GoogleGenAI, FunctionCallingConfigMode } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Step 1: Define function declarations
+    const getCurrentWeather = {
+      name: 'get_current_weather',
+      description: 'Get the current weather for a specific location',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          location: {
+            type: 'string',
+            description: 'The city name, e.g. San Francisco, Tokyo, London'
+          },
+          unit: {
+            type: 'string',
+            enum: ['celsius', 'fahrenheit'],
+            description: 'Temperature unit'
+          }
+        },
+        required: ['location']
+      }
+    };
+
+    // Step 2: Make request with tools
+    console.log('User: What\'s the weather in Tokyo?\n');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash', // ⚠️ NOT flash-lite!
+      contents: 'What\'s the weather in Tokyo?',
+      config: {
+        tools: [
+          { functionDeclarations: [getCurrentWeather] }
+        ]
+      }
+    });
+
+    // Step 3: Check if model wants to call a function
+    const functionCall = response1.candidates[0]?.content?.parts?.find(
+      part => part.functionCall
+    )?.functionCall;
+
+    if (!functionCall) {
+      console.log('Model response (no function call):', response1.text);
+      return;
+    }
+
+    console.log('Model wants to call function:');
+    console.log('- Function name:', functionCall.name);
+    console.log('- Arguments:', JSON.stringify(functionCall.args, null, 2));
+    console.log('');
+
+    // Step 4: Execute the function (your implementation)
+    console.log('Executing function...\n');
+    const weatherData = await getCurrentWeatherImpl(
+      functionCall.args.location,
+      functionCall.args.unit || 'celsius'
+    );
+
+    console.log('Function result:', JSON.stringify(weatherData, null, 2));
+    console.log('');
+
+    // Step 5: Send function result back to model
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        { parts: [{ text: 'What\'s the weather in Tokyo?' }] },
+        response1.candidates[0].content, // Original assistant response with function call
+        {
+          parts: [
+            {
+              functionResponse: {
+                name: functionCall.name,
+                response: weatherData
+              }
+            }
+          ]
+        }
+      ],
+      config: {
+        tools: [
+          { functionDeclarations: [getCurrentWeather] }
+        ]
+      }
+    });
+
+    console.log('Model final response:');
+    console.log(response2.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+/**
+ * Mock implementation of weather API
+ * Replace with actual API call in production
+ */
+async function getCurrentWeatherImpl(location: string, unit: string) {
+  // Simulate API call
+  await new Promise(resolve => setTimeout(resolve, 500));
+
+  // Mock data
+  return {
+    location,
+    temperature: unit === 'celsius' ? 22 : 72,
+    unit,
+    conditions: 'Partly cloudy',
+    humidity: 65,
+    windSpeed: 10
+  };
+}
+
+/**
+ * Function Calling Modes:
+ *
+ * AUTO (default): Model decides whether to call functions
+ * ANY: Force model to call at least one function
+ * NONE: Disable function calling for this request
+ *
+ * Example with mode:
+ *
+ * config: {
+ *   tools: [...],
+ *   toolConfig: {
+ *     functionCallingConfig: {
+ *       mode: FunctionCallingConfigMode.ANY,
+ *       allowedFunctionNames: ['get_current_weather']
+ *     }
+ *   }
+ * }
+ */
+
+main();
--- a/templates/function-calling-parallel.ts
+++ b/templates/function-calling-parallel.ts
@@ -0,0 +1,177 @@
+/**
+ * Parallel Function Calling with Gemini API
+ *
+ * Demonstrates:
+ * - Multiple independent function calls in one request
+ * - Handling multiple function call responses
+ * - Compositional (sequential) vs parallel execution
+ * - Complex multi-step workflows
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ *
+ * ⚠️ IMPORTANT: Only gemini-2.5-flash and gemini-2.5-pro support function calling
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Define multiple functions
+    const getWeather = {
+      name: 'get_weather',
+      description: 'Get current weather for a location',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          location: { type: 'string', description: 'City name' }
+        },
+        required: ['location']
+      }
+    };
+
+    const getPopulation = {
+      name: 'get_population',
+      description: 'Get population of a city',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          city: { type: 'string', description: 'City name' }
+        },
+        required: ['city']
+      }
+    };
+
+    const getTimezone = {
+      name: 'get_timezone',
+      description: 'Get timezone information for a location',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          location: { type: 'string', description: 'City name' }
+        },
+        required: ['location']
+      }
+    };
+
+    // Make request that requires multiple independent functions
+    console.log('User: What is the weather, population, and timezone of Tokyo?\n');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: 'What is the weather, population, and timezone of Tokyo?',
+      config: {
+        tools: [
+          { functionDeclarations: [getWeather, getPopulation, getTimezone] }
+        ]
+      }
+    });
+
+    // Extract all function calls
+    const functionCalls = response1.candidates[0]?.content?.parts?.filter(
+      part => part.functionCall
+    ) || [];
+
+    console.log(`Model wants to call ${functionCalls.length} functions in parallel:\n`);
+
+    // Execute all functions in parallel
+    const functionResponses = await Promise.all(
+      functionCalls.map(async (part) => {
+        const functionCall = part.functionCall!;
+        console.log(`- Calling ${functionCall.name} with args:`, functionCall.args);
+
+        // Execute function
+        let result;
+        if (functionCall.name === 'get_weather') {
+          result = await getWeatherImpl(functionCall.args.location);
+        } else if (functionCall.name === 'get_population') {
+          result = await getPopulationImpl(functionCall.args.city);
+        } else if (functionCall.name === 'get_timezone') {
+          result = await getTimezoneImpl(functionCall.args.location);
+        }
+
+        return {
+          functionResponse: {
+            name: functionCall.name,
+            response: result
+          }
+        };
+      })
+    );
+
+    console.log('\nAll functions executed.\n');
+
+    // Send all function results back to model
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        { parts: [{ text: 'What is the weather, population, and timezone of Tokyo?' }] },
+        response1.candidates[0].content,
+        { parts: functionResponses }
+      ],
+      config: {
+        tools: [
+          { functionDeclarations: [getWeather, getPopulation, getTimezone] }
+        ]
+      }
+    });
+
+    console.log('Model final response:');
+    console.log(response2.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+/**
+ * Mock function implementations
+ */
+async function getWeatherImpl(location: string) {
+  await new Promise(resolve => setTimeout(resolve, 300));
+  return {
+    location,
+    temperature: 22,
+    conditions: 'Sunny',
+    humidity: 60
+  };
+}
+
+async function getPopulationImpl(city: string) {
+  await new Promise(resolve => setTimeout(resolve, 300));
+  return {
+    city,
+    population: 13960000,
+    metropolitan: 37400000
+  };
+}
+
+async function getTimezoneImpl(location: string) {
+  await new Promise(resolve => setTimeout(resolve, 300));
+  return {
+    location,
+    timezone: 'Asia/Tokyo',
+    offset: '+09:00'
+  };
+}
+
+/**
+ * Parallel vs Compositional Function Calling:
+ *
+ * PARALLEL: Functions are independent and can run simultaneously
+ * - Example: "What is the weather AND population of Tokyo?"
+ * - Model calls get_weather() and get_population() together
+ *
+ * COMPOSITIONAL: Functions depend on each other (sequential)
+ * - Example: "What is the weather at my current location?"
+ * - Model first calls get_current_location(), then uses result for get_weather()
+ *
+ * Gemini automatically determines which pattern to use based on dependencies.
+ */
+
+main();
--- a/templates/grounding-search.ts
+++ b/templates/grounding-search.ts
@@ -0,0 +1,271 @@
+/**
+ * Google Gemini API - Grounding with Google Search Example
+ *
+ * Demonstrates how to enable grounding to connect the model to real-time
+ * web information, reducing hallucinations and providing up-to-date responses
+ * with citations.
+ *
+ * Features:
+ * - Basic grounding with Google Search (Gemini 2.5)
+ * - Dynamic retrieval with threshold (Gemini 1.5)
+ * - Chat with grounding
+ * - Combining grounding with function calling
+ * - Checking grounding metadata and citations
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ * - Google Cloud project (grounding requires GCP project, not just API key)
+ *
+ * Note: Use `googleSearch` for Gemini 2.5 models (recommended)
+ *       Use `googleSearchRetrieval` for Gemini 1.5 models (legacy)
+ */
+
+import { GoogleGenAI, DynamicRetrievalConfigMode } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function basicGrounding() {
+  console.log('=== Basic Grounding Example (Gemini 2.5) ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'Who won the euro 2024?',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log('Response:', response.text);
+
+  // Check if grounding was used
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Search was performed!');
+    console.log('\nGrounding Metadata:');
+    console.log(JSON.stringify(response.candidates[0].groundingMetadata, null, 2));
+  } else {
+    console.log('\n✓ Model answered from its own knowledge (no search needed)');
+  }
+
+  console.log('\n=== Basic Grounding Complete ===');
+}
+
+async function dynamicRetrievalExample() {
+  console.log('\n=== Dynamic Retrieval Example (Gemini 1.5) ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-1.5-flash',
+    contents: 'Who won the euro 2024?',
+    config: {
+      tools: [
+        {
+          googleSearchRetrieval: {
+            dynamicRetrievalConfig: {
+              mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
+              dynamicThreshold: 0.7, // Search only if confidence < 70%
+            },
+          },
+        },
+      ],
+    },
+  });
+
+  console.log('Response:', response.text);
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Search performed (confidence < 70%)');
+  } else {
+    console.log('\n✓ Answered from knowledge (confidence >= 70%)');
+  }
+
+  console.log('\n=== Dynamic Retrieval Complete ===');
+}
+
+async function chatWithGrounding() {
+  console.log('\n=== Chat with Grounding Example ===\n');
+
+  const chat = await ai.chats.create({
+    model: 'gemini-2.5-flash',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  // First message
+  console.log('User: What are the latest developments in quantum computing?');
+  let response = await chat.sendMessage('What are the latest developments in quantum computing?');
+  console.log(`\nAssistant: ${response.text}`);
+
+  // Check and display sources
+  if (response.candidates[0].groundingMetadata) {
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    console.log(`\n📚 Sources used: ${sources.length}`);
+    sources.forEach((source, i) => {
+      console.log(`${i + 1}. ${source.title}`);
+      console.log(`   ${source.url}`);
+    });
+  }
+
+  // Follow-up question
+  console.log('\n\nUser: Which company made the biggest breakthrough?');
+  response = await chat.sendMessage('Which company made the biggest breakthrough?');
+  console.log(`\nAssistant: ${response.text}`);
+
+  if (response.candidates[0].groundingMetadata) {
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    console.log(`\n📚 Sources used: ${sources.length}`);
+    sources.forEach((source, i) => {
+      console.log(`${i + 1}. ${source.title} - ${source.url}`);
+    });
+  }
+
+  console.log('\n=== Chat with Grounding Complete ===');
+}
+
+async function groundingWithFunctionCalling() {
+  console.log('\n=== Grounding + Function Calling Example ===\n');
+
+  // Define weather function
+  const weatherFunction = {
+    name: 'get_current_weather',
+    description: 'Get current weather for a location',
+    parametersJsonSchema: {
+      type: 'object',
+      properties: {
+        location: { type: 'string', description: 'City name' },
+        unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
+      },
+      required: ['location'],
+    },
+  };
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'What is the weather like in the city that won Euro 2024?',
+    config: {
+      tools: [{ googleSearch: {} }, { functionDeclarations: [weatherFunction] }],
+    },
+  });
+
+  console.log('User query: What is the weather like in the city that won Euro 2024?');
+  console.log('\nModel will:');
+  console.log('1. Use Google Search to find Euro 2024 winner');
+  console.log('2. Call get_current_weather function with the city');
+  console.log('3. Combine both results in response\n');
+
+  // Check for function calls
+  const functionCall = response.candidates[0].content.parts.find((part) => part.functionCall);
+
+  if (functionCall) {
+    console.log('✓ Function call detected:');
+    console.log(`  Function: ${functionCall.functionCall?.name}`);
+    console.log(`  Arguments:`, functionCall.functionCall?.args);
+  }
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Grounding was used');
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    console.log(`  Sources: ${sources.length} web pages`);
+  }
+
+  console.log('\n=== Grounding + Function Calling Complete ===');
+}
+
+async function checkingGroundingUsage() {
+  console.log('\n=== Checking Grounding Usage Example ===\n');
+
+  // Query that doesn't need search
+  console.log('Query 1: What is 2+2? (Should NOT need search)');
+  const response1 = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'What is 2+2?',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log(`Answer: ${response1.text}`);
+  console.log(
+    `Grounding used: ${response1.candidates[0].groundingMetadata ? 'YES' : 'NO'}\n`
+  );
+
+  // Query that needs current information
+  console.log('Query 2: What happened in the news today? (Should need search)');
+  const response2 = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'What are the top news headlines today?',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log(`Answer: ${response2.text}`);
+  console.log(`Grounding used: ${response2.candidates[0].groundingMetadata ? 'YES' : 'NO'}`);
+
+  if (response2.candidates[0].groundingMetadata) {
+    console.log('\nSearch queries performed:');
+    const queries = response2.candidates[0].groundingMetadata.searchQueries || [];
+    queries.forEach((q, i) => {
+      console.log(`${i + 1}. ${q.text || q}`);
+    });
+  }
+
+  console.log('\n=== Checking Grounding Usage Complete ===');
+}
+
+async function citationsExample() {
+  console.log('\n=== Citations Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'Tell me about the recent Mars rover discoveries',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log('Response:', response.text);
+
+  if (response.candidates[0].groundingMetadata) {
+    const metadata = response.candidates[0].groundingMetadata;
+
+    console.log('\n📚 Web Pages:');
+    const webPages = metadata.webPages || [];
+    webPages.forEach((page, i) => {
+      console.log(`\n${i + 1}. ${page.title}`);
+      console.log(`   URL: ${page.url}`);
+      if (page.snippet) {
+        console.log(`   Snippet: ${page.snippet.substring(0, 100)}...`);
+      }
+    });
+
+    console.log('\n🔗 Citations:');
+    const citations = metadata.citations || [];
+    citations.forEach((citation, i) => {
+      console.log(`\n${i + 1}. Position: ${citation.startIndex}-${citation.endIndex}`);
+      console.log(`   Source: ${citation.uri}`);
+    });
+  }
+
+  console.log('\n=== Citations Example Complete ===');
+}
+
+// Run all examples
+async function main() {
+  await basicGrounding();
+  await dynamicRetrievalExample();
+  await chatWithGrounding();
+  await groundingWithFunctionCalling();
+  await checkingGroundingUsage();
+  await citationsExample();
+}
+
+main().catch((error) => {
+  console.error('Error:', error.message);
+  if (error.message.includes('Google Cloud project')) {
+    console.error(
+      '\nNote: Grounding requires a Google Cloud project, not just an API key.'
+    );
+  }
+  process.exit(1);
+});
--- a/templates/multimodal-image.ts
+++ b/templates/multimodal-image.ts
@@ -0,0 +1,117 @@
+/**
+ * Multimodal Image Understanding with Gemini API
+ *
+ * Demonstrates:
+ * - Image analysis with vision capabilities
+ * - Base64 encoding of images
+ * - Combining text and image inputs
+ * - Multiple images in one request
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Analyze a single image
+    console.log('Example 1: Analyze Single Image\n');
+
+    // Load image from file
+    const imagePath = '/path/to/image.jpg'; // Replace with actual path
+    const imageData = fs.readFileSync(imagePath);
+    const base64Image = imageData.toString('base64');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
+            {
+              inlineData: {
+                data: base64Image,
+                mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response1.text);
+    console.log('\n---\n');
+
+    // Example 2: Compare two images
+    console.log('Example 2: Compare Two Images\n');
+
+    const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
+    const imageData2 = fs.readFileSync(imagePath2);
+    const base64Image2 = imageData2.toString('base64');
+
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Compare these two images. What are the similarities and differences?' },
+            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
+            { inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response2.text);
+    console.log('\n---\n');
+
+    // Example 3: Specific questions about image
+    console.log('Example 3: Specific Questions\n');
+
+    const response3 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'How many people are in this image? What are they wearing?' },
+            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response3.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+
+    if (error.message.includes('ENOENT')) {
+      console.error('\n⚠️ Image file not found. Update the imagePath variable with a valid path.');
+    }
+  }
+}
+
+/**
+ * Supported image formats:
+ * - JPEG (.jpg, .jpeg)
+ * - PNG (.png)
+ * - WebP (.webp)
+ * - HEIC (.heic)
+ * - HEIF (.heif)
+ *
+ * Max size: 20MB per image
+ *
+ * Tips:
+ * - Use specific, detailed prompts for better results
+ * - You can analyze multiple images in one request
+ * - gemini-2.5-flash and gemini-2.5-pro both support vision
+ */
+
+main();
--- a/templates/multimodal-video-audio.ts
+++ b/templates/multimodal-video-audio.ts
@@ -0,0 +1,152 @@
+/**
+ * Multimodal Video and Audio Understanding with Gemini API
+ *
+ * Demonstrates:
+ * - Video analysis (what happens in the video)
+ * - Audio transcription and understanding
+ * - PDF document parsing
+ * - Combining multiple modalities
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Analyze video
+    console.log('Example 1: Video Analysis\n');
+
+    const videoPath = '/path/to/video.mp4'; // Replace with actual path
+    const videoData = fs.readFileSync(videoPath);
+    const base64Video = videoData.toString('base64');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Describe what happens in this video. Summarize the key events.' },
+            {
+              inlineData: {
+                data: base64Video,
+                mimeType: 'video/mp4'
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response1.text);
+    console.log('\n---\n');
+
+    // Example 2: Transcribe and analyze audio
+    console.log('Example 2: Audio Transcription and Analysis\n');
+
+    const audioPath = '/path/to/audio.mp3'; // Replace with actual path
+    const audioData = fs.readFileSync(audioPath);
+    const base64Audio = audioData.toString('base64');
+
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Transcribe this audio and provide a summary of the main points discussed.' },
+            {
+              inlineData: {
+                data: base64Audio,
+                mimeType: 'audio/mp3'
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response2.text);
+    console.log('\n---\n');
+
+    // Example 3: Parse PDF document
+    console.log('Example 3: PDF Document Parsing\n');
+
+    const pdfPath = '/path/to/document.pdf'; // Replace with actual path
+    const pdfData = fs.readFileSync(pdfPath);
+    const base64Pdf = pdfData.toString('base64');
+
+    const response3 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Summarize the key points in this PDF document. Extract any important data or conclusions.' },
+            {
+              inlineData: {
+                data: base64Pdf,
+                mimeType: 'application/pdf'
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response3.text);
+    console.log('\n---\n');
+
+    // Example 4: Combine multiple modalities
+    console.log('Example 4: Multiple Modalities (Video + Text Questions)\n');
+
+    const response4 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Based on this video, answer these questions:\n1. How many people appear?\n2. What is the main activity?\n3. Where does this take place?' },
+            { inlineData: { data: base64Video, mimeType: 'video/mp4' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response4.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+
+    if (error.message.includes('ENOENT')) {
+      console.error('\n⚠️ File not found. Update the file path variables with valid paths.');
+    }
+  }
+}
+
+/**
+ * Supported Video Formats:
+ * - MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV
+ * Max size: 2GB (use File API for larger - Phase 2)
+ * Max length (inline): 2 minutes
+ *
+ * Supported Audio Formats:
+ * - MP3, WAV, FLAC, AAC, OGG, OPUS
+ * Max size: 20MB
+ *
+ * PDF:
+ * - Max size: 30MB
+ * - Text-based PDFs work best
+ * - Scanned images may have lower accuracy
+ *
+ * Tips:
+ * - For videos > 2 minutes, use the File API (Phase 2)
+ * - Specific prompts yield better results
+ * - You can combine text, images, video, audio, and PDFs in one request
+ */
+
+main();
--- a/templates/package.json
+++ b/templates/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "gemini-api-example",
+  "version": "1.0.0",
+  "description": "Google Gemini API examples using @google/genai SDK",
+  "type": "module",
+  "scripts": {
+    "dev": "tsx watch src/index.ts",
+    "start": "node dist/index.js",
+    "build": "tsc"
+  },
+  "dependencies": {
+    "@google/genai": "^1.27.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "tsx": "^4.7.0",
+    "typescript": "^5.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  }
+}
--- a/templates/streaming-chat.ts
+++ b/templates/streaming-chat.ts
@@ -0,0 +1,81 @@
+/**
+ * Streaming Text Generation with Gemini API (Node.js SDK)
+ *
+ * Demonstrates:
+ * - Streaming responses with async iteration
+ * - Real-time token delivery for better UX
+ * - Handling partial chunks
+ * - Multi-turn chat with streaming
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Basic streaming
+    console.log('Example 1: Basic Streaming\n');
+    console.log('Prompt: Write a 200-word story about time travel\n');
+
+    const response = await ai.models.generateContentStream({
+      model: 'gemini-2.5-flash',
+      contents: 'Write a 200-word story about time travel'
+    });
+
+    // Stream chunks as they arrive
+    for await (const chunk of response) {
+      // Each chunk may contain partial text
+      if (chunk.text) {
+        process.stdout.write(chunk.text);
+      }
+    }
+
+    console.log('\n\n---\n');
+
+    // Example 2: Streaming with chat
+    console.log('Example 2: Streaming Chat\n');
+
+    const chat = await ai.models.createChat({
+      model: 'gemini-2.5-flash',
+      systemInstruction: 'You are a helpful coding assistant.'
+    });
+
+    // First turn with streaming
+    console.log('User: What is TypeScript?\n');
+    const response1 = await chat.sendMessageStream('What is TypeScript?');
+
+    console.log('Assistant: ');
+    for await (const chunk of response1) {
+      process.stdout.write(chunk.text);
+    }
+
+    console.log('\n\n');
+
+    // Second turn with streaming (context maintained)
+    console.log('User: How do I install it?\n');
+    const response2 = await chat.sendMessageStream('How do I install it?');
+
+    console.log('Assistant: ');
+    for await (const chunk of response2) {
+      process.stdout.write(chunk.text);
+    }
+
+    console.log('\n\n');
+
+    // Get full chat history
+    const history = chat.getHistory();
+    console.log(`Total messages in history: ${history.length}`);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+main();
--- a/templates/streaming-fetch.ts
+++ b/templates/streaming-fetch.ts
@@ -0,0 +1,190 @@
+/**
+ * Streaming Text Generation with Gemini API (Fetch - SSE Parsing)
+ *
+ * Demonstrates:
+ * - Server-Sent Events (SSE) parsing with fetch
+ * - Manual stream handling for Cloudflare Workers or edge runtimes
+ * - Buffer management for incomplete chunks
+ * - Error handling during streaming
+ *
+ * Prerequisites:
+ * - Set GEMINI_API_KEY environment variable
+ */
+
+interface Env {
+  GEMINI_API_KEY: string;
+}
+
+/**
+ * Example for Cloudflare Workers with streaming response
+ */
+export default {
+  async fetch(request: Request, env: Env): Promise<Response> {
+    // Create a TransformStream to stream tokens to the client
+    const { readable, writable } = new TransformStream();
+    const writer = writable.getWriter();
+    const encoder = new TextEncoder();
+
+    // Start streaming in the background
+    (async () => {
+      try {
+        const response = await fetch(
+          `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
+          {
+            method: 'POST',
+            headers: {
+              'Content-Type': 'application/json',
+              'x-goog-api-key': env.GEMINI_API_KEY,
+            },
+            body: JSON.stringify({
+              contents: [
+                {
+                  parts: [
+                    { text: 'Write a 200-word story about time travel' }
+                  ]
+                }
+              ]
+            }),
+          }
+        );
+
+        if (!response.ok) {
+          throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+        }
+
+        if (!response.body) {
+          throw new Error('Response body is null');
+        }
+
+        const reader = response.body.getReader();
+        const decoder = new TextDecoder();
+        let buffer = '';
+
+        while (true) {
+          const { done, value } = await reader.read();
+
+          if (done) {
+            break;
+          }
+
+          // Append new data to buffer
+          buffer += decoder.decode(value, { stream: true });
+
+          // Split by newlines (SSE format)
+          const lines = buffer.split('\n');
+
+          // Keep the last incomplete line in buffer
+          buffer = lines.pop() || '';
+
+          for (const line of lines) {
+            // Skip empty lines and metadata
+            if (line.trim() === '' || line.startsWith('data: [DONE]')) {
+              continue;
+            }
+
+            // Parse SSE format: "data: {json}"
+            if (!line.startsWith('data: ')) {
+              continue;
+            }
+
+            try {
+              const jsonData = JSON.parse(line.slice(6)); // Remove "data: " prefix
+              const text = jsonData.candidates[0]?.content?.parts[0]?.text;
+
+              if (text) {
+                // Write chunk to client
+                await writer.write(encoder.encode(text));
+              }
+            } catch (e) {
+              // Skip invalid JSON chunks
+              console.error('Failed to parse chunk:', e);
+            }
+          }
+        }
+
+        // Close the stream
+        await writer.close();
+
+      } catch (error: any) {
+        await writer.write(encoder.encode(`\n\nError: ${error.message}`));
+        await writer.close();
+      }
+    })();
+
+    // Return streaming response immediately
+    return new Response(readable, {
+      headers: {
+        'Content-Type': 'text/plain; charset=utf-8',
+        'Transfer-Encoding': 'chunked',
+      },
+    });
+  }
+};
+
+/**
+ * Example for Node.js/Standalone
+ */
+async function mainNodeJS() {
+  const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
+
+  if (!GEMINI_API_KEY) {
+    throw new Error('GEMINI_API_KEY environment variable not set');
+  }
+
+  const response = await fetch(
+    `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-goog-api-key': GEMINI_API_KEY,
+      },
+      body: JSON.stringify({
+        contents: [
+          {
+            parts: [
+              { text: 'Write a 200-word story about time travel' }
+            ]
+          }
+        ]
+      }),
+    }
+  );
+
+  if (!response.body) {
+    throw new Error('Response body is null');
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    buffer += decoder.decode(value, { stream: true });
+    const lines = buffer.split('\n');
+    buffer = lines.pop() || '';
+
+    for (const line of lines) {
+      if (line.trim() === '' || line.startsWith('data: [DONE]')) continue;
+      if (!line.startsWith('data: ')) continue;
+
+      try {
+        const data = JSON.parse(line.slice(6));
+        const text = data.candidates[0]?.content?.parts[0]?.text;
+        if (text) {
+          process.stdout.write(text);
+        }
+      } catch (e) {
+        // Skip invalid JSON
+      }
+    }
+  }
+
+  console.log('\n');
+}
+
+// Uncomment to run in Node.js
+// mainNodeJS();
--- a/templates/text-generation-basic.ts
+++ b/templates/text-generation-basic.ts
@@ -0,0 +1,61 @@
+/**
+ * Basic Text Generation with Gemini API (Node.js SDK)
+ *
+ * Demonstrates:
+ * - Installing @google/genai (CORRECT SDK, NOT @google/generative-ai)
+ * - Basic text generation with gemini-2.5-flash
+ * - Accessing response text
+ * - Error handling
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  // Initialize the Google GenAI client
+  // ⚠️ IMPORTANT: Use @google/genai, NOT @google/generative-ai (deprecated)
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Generate content with gemini-2.5-flash
+    // Models available: gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite
+    const response = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: 'Explain quantum computing in simple terms for a 10-year-old'
+    });
+
+    // Access the generated text
+    console.log('Generated text:');
+    console.log(response.text);
+
+    // Access full response metadata
+    console.log('\nToken usage:');
+    console.log('- Prompt tokens:', response.usageMetadata.promptTokenCount);
+    console.log('- Response tokens:', response.usageMetadata.candidatesTokenCount);
+    console.log('- Total tokens:', response.usageMetadata.totalTokenCount);
+
+    // Check finish reason
+    console.log('\nFinish reason:', response.candidates[0].finishReason);
+    // Possible values: "STOP" (normal), "MAX_TOKENS", "SAFETY", "OTHER"
+
+  } catch (error: any) {
+    console.error('Error generating content:');
+
+    if (error.status === 401) {
+      console.error('❌ Invalid API key. Set GEMINI_API_KEY environment variable.');
+    } else if (error.status === 429) {
+      console.error('❌ Rate limit exceeded. Try again later or implement exponential backoff.');
+    } else if (error.status === 404) {
+      console.error('❌ Model not found. Use: gemini-2.5-pro, gemini-2.5-flash, or gemini-2.5-flash-lite');
+    } else {
+      console.error('❌', error.message);
+    }
+  }
+}
+
+main();
--- a/templates/text-generation-fetch.ts
+++ b/templates/text-generation-fetch.ts
@@ -0,0 +1,124 @@
+/**
+ * Basic Text Generation with Gemini API (Fetch - Cloudflare Workers)
+ *
+ * Demonstrates:
+ * - Direct REST API calls using fetch (no SDK dependencies)
+ * - Perfect for Cloudflare Workers, Deno, Bun, or edge runtimes
+ * - Manual JSON parsing
+ * - Error handling with fetch
+ *
+ * Prerequisites:
+ * - Set GEMINI_API_KEY environment variable (or use env.GEMINI_API_KEY in Workers)
+ */
+
+/**
+ * Example for Cloudflare Workers
+ */
+interface Env {
+  GEMINI_API_KEY: string;
+}
+
+export default {
+  async fetch(request: Request, env: Env): Promise<Response> {
+    try {
+      // Make direct API call to Gemini
+      const response = await fetch(
+        `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+        {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+            'x-goog-api-key': env.GEMINI_API_KEY,
+          },
+          body: JSON.stringify({
+            contents: [
+              {
+                parts: [
+                  {
+                    text: 'Explain quantum computing in simple terms for a 10-year-old'
+                  }
+                ]
+              }
+            ]
+          }),
+        }
+      );
+
+      // Check for HTTP errors
+      if (!response.ok) {
+        const errorData = await response.json();
+        return new Response(
+          JSON.stringify({
+            error: errorData.error?.message || 'Unknown error',
+            status: response.status
+          }),
+          { status: response.status, headers: { 'Content-Type': 'application/json' } }
+        );
+      }
+
+      // Parse response
+      const data = await response.json();
+
+      // Extract text from response structure
+      const generatedText = data.candidates[0]?.content?.parts[0]?.text;
+      const usageMetadata = data.usageMetadata;
+      const finishReason = data.candidates[0]?.finishReason;
+
+      return new Response(
+        JSON.stringify({
+          text: generatedText,
+          usage: {
+            promptTokens: usageMetadata.promptTokenCount,
+            responseTokens: usageMetadata.candidatesTokenCount,
+            totalTokens: usageMetadata.totalTokenCount
+          },
+          finishReason
+        }),
+        { headers: { 'Content-Type': 'application/json' } }
+      );
+
+    } catch (error: any) {
+      return new Response(
+        JSON.stringify({ error: error.message }),
+        { status: 500, headers: { 'Content-Type': 'application/json' } }
+      );
+    }
+  }
+};
+
+/**
+ * Example for Node.js/Standalone
+ */
+async function mainNodeJS() {
+  const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
+
+  if (!GEMINI_API_KEY) {
+    throw new Error('GEMINI_API_KEY environment variable not set');
+  }
+
+  const response = await fetch(
+    `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-goog-api-key': GEMINI_API_KEY,
+      },
+      body: JSON.stringify({
+        contents: [
+          {
+            parts: [
+              { text: 'Explain quantum computing in simple terms' }
+            ]
+          }
+        ]
+      }),
+    }
+  );
+
+  const data = await response.json();
+  console.log(data.candidates[0].content.parts[0].text);
+}
+
+// Uncomment to run in Node.js
+// mainNodeJS();
--- a/templates/thinking-mode.ts
+++ b/templates/thinking-mode.ts
@@ -0,0 +1,137 @@
+/**
+ * Thinking Mode Configuration with Gemini API
+ *
+ * Demonstrates:
+ * - Thinking mode (enabled by default on Gemini 2.5 models)
+ * - Configuring thinking budget
+ * - When to use thinking mode
+ * - Impact on latency and quality
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ *
+ * ℹ️ Thinking mode is ALWAYS ENABLED on Gemini 2.5 models (cannot be disabled)
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Default thinking budget
+    console.log('Example 1: Default Thinking Budget\n');
+    console.log('Prompt: Solve this complex math problem:\n');
+    console.log('If a train travels 120 km in 1.5 hours, then slows down to 60 km/h for 45 minutes, how far has it traveled total?\n');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: 'If a train travels 120 km in 1.5 hours, then slows down to 60 km/h for 45 minutes, how far has it traveled total?'
+      // No thinkingConfig = uses default budget
+    });
+
+    console.log('Answer:', response1.text);
+    console.log('\nToken usage:', response1.usageMetadata);
+    console.log('\n---\n');
+
+    // Example 2: Increased thinking budget for complex reasoning
+    console.log('Example 2: Increased Thinking Budget (8192 tokens)\n');
+    console.log('Prompt: Complex logic puzzle\n');
+
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: `
+        Three people (Alice, Bob, Carol) have different jobs (doctor, engineer, teacher).
+        Clues:
+        1. Alice is not a doctor
+        2. The engineer is older than Bob
+        3. Carol is younger than the teacher
+        4. The doctor is the youngest
+
+        Who has which job?
+      `,
+      config: {
+        thinkingConfig: {
+          thinkingBudget: 8192 // Increase budget for complex reasoning
+        }
+      }
+    });
+
+    console.log('Answer:', response2.text);
+    console.log('\nToken usage:', response2.usageMetadata);
+    console.log('\n---\n');
+
+    // Example 3: Comparison with gemini-2.5-pro (more thinking capability)
+    console.log('Example 3: Using gemini-2.5-pro for Advanced Reasoning\n');
+    console.log('Prompt: Multi-step code optimization problem\n');
+
+    const response3 = await ai.models.generateContent({
+      model: 'gemini-2.5-pro', // Pro model has better reasoning
+      contents: `
+        Optimize this Python code for better performance:
+
+        def find_duplicates(arr):
+            duplicates = []
+            for i in range(len(arr)):
+                for j in range(i + 1, len(arr)):
+                    if arr[i] == arr[j] and arr[i] not in duplicates:
+                        duplicates.append(arr[i])
+            return duplicates
+
+        Explain your optimization strategy step by step.
+      `,
+      config: {
+        thinkingConfig: {
+          thinkingBudget: 8192
+        }
+      }
+    });
+
+    console.log('Optimization:', response3.text);
+    console.log('\nToken usage:', response3.usageMetadata);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+/**
+ * Thinking Mode Guidelines:
+ *
+ * What is Thinking Mode?
+ * - Gemini 2.5 models "think" before responding, improving accuracy
+ * - The model internally reasons through the problem
+ * - This happens transparently (you don't see the thinking process)
+ *
+ * Thinking Budget:
+ * - Controls max tokens allocated for internal reasoning
+ * - Higher budget = more thorough reasoning (may increase latency)
+ * - Default budget is usually sufficient for most tasks
+ *
+ * When to Increase Budget:
+ * ✅ Complex math/logic problems
+ * ✅ Multi-step reasoning tasks
+ * ✅ Code optimization challenges
+ * ✅ Detailed analysis requiring careful consideration
+ *
+ * When Default is Fine:
+ * ⏺️ Simple factual questions
+ * ⏺️ Creative writing
+ * ⏺️ Translation
+ * ⏺️ Summarization
+ *
+ * Model Comparison:
+ * - gemini-2.5-pro: Best for complex reasoning, higher default thinking budget
+ * - gemini-2.5-flash: Good balance, suitable for most thinking tasks
+ * - gemini-2.5-flash-lite: Basic thinking, optimized for speed
+ *
+ * Important Notes:
+ * - You CANNOT disable thinking mode on 2.5 models (always on)
+ * - Thinking tokens count toward total usage
+ * - Higher thinking budget may increase latency slightly
+ */
+
+main();