commit 8aebb293cd10dec83dec4422d0d29342f3f297e8
Author: Zhongwei Li <lizhongwei.nkcs@gmail.com>
Date:   Sun Nov 30 08:24:51 2025 +0800

    Initial commit

diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
new file mode 100644
index 0000000..0dc4fe6
--- /dev/null
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
+{
+  "name": "google-gemini-api",
+  "description": "Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun",
+  "version": "1.0.0",
+  "author": {
+    "name": "Jeremy Dawes",
+    "email": "jeremy@jezweb.net"
+  },
+  "skills": [
+    "./"
+  ]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5243819
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# google-gemini-api
+
+Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun
diff --git a/SKILL.md b/SKILL.md
new file mode 100644
index 0000000..098420b
--- /dev/null
+++ b/SKILL.md
@@ -0,0 +1,2173 @@
+---
+name: google-gemini-api
+description: |
+  Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai).
+  Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens.
+
+  Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling
+  with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting
+  SDK deprecation, context window, model not found, function calling, or multimodal format errors.
+
+  Keywords: gemini api, @google/genai, gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite,
+  gemini-3-pro-preview, multimodal gemini, thinking mode, google ai, genai sdk, function calling gemini,
+  streaming gemini, gemini vision, gemini video, gemini audio, gemini pdf, system instructions,
+  multi-turn chat, DEPRECATED @google/generative-ai, gemini context window, gemini models 2025,
+  gemini 1m tokens, gemini tool use, parallel function calling, compositional function calling, gemini 3
+license: MIT
+---
+
+# Google Gemini API - Complete Guide
+
+**Version**: Phase 2 Complete + Gemini 3 ✅
+**Package**: @google/genai@1.30.0 (⚠️ NOT @google/generative-ai)
+**Last Updated**: 2025-11-26 (Package update + FileSearch preview)
+
+---
+
+## ⚠️ CRITICAL SDK MIGRATION WARNING
+
+**DEPRECATED SDK**: `@google/generative-ai` (sunset November 30, 2025)
+**CURRENT SDK**: `@google/genai` v1.27+
+
+**If you see code using `@google/generative-ai`, it's outdated!**
+
+This skill uses the **correct current SDK** and provides a complete migration guide.
+
+---
+
+## Status
+
+**✅ Phase 1 Complete**:
+- ✅ Text Generation (basic + streaming)
+- ✅ Multimodal Inputs (images, video, audio, PDFs)
+- ✅ Function Calling (basic + parallel execution)
+- ✅ System Instructions & Multi-turn Chat
+- ✅ Thinking Mode Configuration
+- ✅ Generation Parameters (temperature, top-p, top-k, stop sequences)
+- ✅ Both Node.js SDK (@google/genai) and fetch approaches
+
+**✅ Phase 2 Complete**:
+- ✅ Context Caching (cost optimization with TTL-based caching)
+- ✅ Code Execution (built-in Python interpreter and sandbox)
+- ✅ Grounding with Google Search (real-time web information + citations)
+
+**📦 Separate Skills**:
+- **Embeddings**: See `google-gemini-embeddings` skill for text-embedding-004
+
+---
+
+## Table of Contents
+
+**Phase 1 - Core Features**:
+1. [Quick Start](#quick-start)
+2. [Current Models (2025)](#current-models-2025)
+3. [SDK vs Fetch Approaches](#sdk-vs-fetch-approaches)
+4. [Text Generation](#text-generation)
+5. [Streaming](#streaming)
+6. [Multimodal Inputs](#multimodal-inputs)
+7. [Function Calling](#function-calling)
+8. [System Instructions](#system-instructions)
+9. [Multi-turn Chat](#multi-turn-chat)
+10. [Thinking Mode](#thinking-mode)
+11. [Generation Configuration](#generation-configuration)
+
+**Phase 2 - Advanced Features**:
+12. [Context Caching](#context-caching)
+13. [Code Execution](#code-execution)
+14. [Grounding with Google Search](#grounding-with-google-search)
+
+**Common Reference**:
+15. [Error Handling](#error-handling)
+16. [Rate Limits](#rate-limits)
+17. [SDK Migration Guide](#sdk-migration-guide)
+18. [Production Best Practices](#production-best-practices)
+
+---
+
+## Quick Start
+
+### Installation
+
+**CORRECT SDK:**
+```bash
+npm install @google/genai@1.30.0
+```
+
+**❌ WRONG (DEPRECATED):**
+```bash
+npm install @google/generative-ai  # DO NOT USE!
+```
+
+### Environment Setup
+
+```bash
+export GEMINI_API_KEY="..."
+```
+
+Or create `.env` file:
+```
+GEMINI_API_KEY=...
+```
+
+### First Text Generation (Node.js SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Explain quantum computing in simple terms'
+});
+
+console.log(response.text);
+```
+
+### First Text Generation (Fetch - Cloudflare Workers)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [{ parts: [{ text: 'Explain quantum computing in simple terms' }] }]
+    }),
+  }
+);
+
+const data = await response.json();
+console.log(data.candidates[0].content.parts[0].text);
+```
+
+---
+
+## Current Models (2025)
+
+### Gemini 3 Series (Preview - November 2025)
+
+#### gemini-3-pro-preview
+- **Context**: TBD (documentation pending)
+- **Status**: 🆕 Preview release (November 18, 2025)
+- **Description**: Google's newest and most intelligent AI model with state-of-the-art reasoning
+- **Best for**: Most complex reasoning tasks, advanced multimodal understanding, benchmark-critical applications
+- **Features**: Enhanced multimodal (text, image, video, audio, PDF), function calling, streaming
+- **Benchmark Performance**: Outperforms Gemini 2.5 Pro on every major AI benchmark
+- **⚠️ Preview**: Use for evaluation. Consider gemini-2.5-pro for production until stable release
+
+### Gemini 2.5 Series (General Availability - Stable)
+
+#### gemini-2.5-pro
+- **Context**: 1,048,576 input tokens / 65,536 output tokens
+- **Description**: State-of-the-art thinking model for complex reasoning
+- **Best for**: Code, math, STEM, complex problem-solving
+- **Features**: Thinking mode (default on), function calling, multimodal, streaming
+- **Knowledge cutoff**: January 2025
+
+#### gemini-2.5-flash
+- **Context**: 1,048,576 input tokens / 65,536 output tokens
+- **Description**: Best price-performance workhorse model
+- **Best for**: Large-scale processing, low-latency, high-volume, agentic use cases
+- **Features**: Thinking mode (default on), function calling, multimodal, streaming
+- **Knowledge cutoff**: January 2025
+
+#### gemini-2.5-flash-lite
+- **Context**: 1,048,576 input tokens / 65,536 output tokens
+- **Description**: Cost-optimized, fastest 2.5 model
+- **Best for**: High throughput, cost-sensitive applications
+- **Features**: Thinking mode (default on), function calling, multimodal, streaming
+- **Knowledge cutoff**: January 2025
+
+### Model Feature Matrix
+
+| Feature | 3-Pro (Preview) | 2.5-Pro | 2.5-Flash | 2.5-Flash-Lite |
+|---------|-----------------|---------|-----------|----------------|
+| Thinking Mode | TBD | ✅ Default ON | ✅ Default ON | ✅ Default ON |
+| Function Calling | ✅ | ✅ | ✅ | ✅ |
+| Multimodal | ✅ Enhanced | ✅ | ✅ | ✅ |
+| Streaming | ✅ | ✅ | ✅ | ✅ |
+| System Instructions | ✅ | ✅ | ✅ | ✅ |
+| Context Window | TBD | 1,048,576 in | 1,048,576 in | 1,048,576 in |
+| Output Tokens | TBD | 65,536 max | 65,536 max | 65,536 max |
+| Status | Preview | Stable | Stable | Stable |
+
+### ⚠️ Context Window Correction
+
+**ACCURATE (Gemini 2.5)**: Gemini 2.5 models support **1,048,576 input tokens** (NOT 2M!)
+**OUTDATED**: Only Gemini 1.5 Pro (previous generation) had 2M token context window
+**GEMINI 3**: Context window specifications pending official documentation
+
+**Common mistake**: Claiming Gemini 2.5 has 2M tokens. It doesn't. This skill prevents this error.
+
+---
+
+## SDK vs Fetch Approaches
+
+### Node.js SDK (@google/genai)
+
+**Pros:**
+- Type-safe with TypeScript
+- Easier API (simpler syntax)
+- Built-in chat helpers
+- Automatic SSE parsing for streaming
+- Better error handling
+
+**Cons:**
+- Requires Node.js or compatible runtime
+- Larger bundle size
+- May not work in all edge runtimes
+
+**Use when:** Building Node.js apps, Next.js Server Actions/Components, or any environment with Node.js compatibility
+
+### Fetch-based (Direct REST API)
+
+**Pros:**
+- Works in **any** JavaScript environment (Cloudflare Workers, Deno, Bun, browsers)
+- Minimal dependencies
+- Smaller bundle size
+- Full control over requests
+
+**Cons:**
+- More verbose syntax
+- Manual SSE parsing for streaming
+- No built-in chat helpers
+- Manual error handling
+
+**Use when:** Deploying to Cloudflare Workers, browser clients, or lightweight edge runtimes
+
+---
+
+## Text Generation
+
+### Basic Text Generation (SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Write a haiku about artificial intelligence'
+});
+
+console.log(response.text);
+```
+
+### Basic Text Generation (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [
+        {
+          parts: [
+            { text: 'Write a haiku about artificial intelligence' }
+          ]
+        }
+      ]
+    }),
+  }
+);
+
+const data = await response.json();
+console.log(data.candidates[0].content.parts[0].text);
+```
+
+### Response Structure
+
+```typescript
+{
+  text: string,                  // Convenience accessor for text content
+  candidates: [
+    {
+      content: {
+        parts: [
+          { text: string }       // Generated text
+        ],
+        role: string             // "model"
+      },
+      finishReason: string,      // "STOP" | "MAX_TOKENS" | "SAFETY" | "OTHER"
+      index: number
+    }
+  ],
+  usageMetadata: {
+    promptTokenCount: number,
+    candidatesTokenCount: number,
+    totalTokenCount: number
+  }
+}
+```
+
+---
+
+## Streaming
+
+### Streaming with SDK (Async Iteration)
+
+```typescript
+const response = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: 'Write a 200-word story about time travel'
+});
+
+for await (const chunk of response) {
+  process.stdout.write(chunk.text);
+}
+```
+
+### Streaming with Fetch (SSE Parsing)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [{ parts: [{ text: 'Write a 200-word story about time travel' }] }]
+    }),
+  }
+);
+
+const reader = response.body.getReader();
+const decoder = new TextDecoder();
+let buffer = '';
+
+while (true) {
+  const { done, value } = await reader.read();
+  if (done) break;
+
+  buffer += decoder.decode(value, { stream: true });
+  const lines = buffer.split('\n');
+  buffer = lines.pop() || '';
+
+  for (const line of lines) {
+    if (line.trim() === '' || line.startsWith('data: [DONE]')) continue;
+    if (!line.startsWith('data: ')) continue;
+
+    try {
+      const data = JSON.parse(line.slice(6));
+      const text = data.candidates[0]?.content?.parts[0]?.text;
+      if (text) {
+        process.stdout.write(text);
+      }
+    } catch (e) {
+      // Skip invalid JSON
+    }
+  }
+}
+```
+
+**Key Points:**
+- Use `streamGenerateContent` endpoint (not `generateContent`)
+- Parse Server-Sent Events (SSE) format: `data: {json}\n\n`
+- Handle incomplete chunks in buffer
+- Skip empty lines and `[DONE]` markers
+
+---
+
+## Multimodal Inputs
+
+Gemini 2.5 models support text + images + video + audio + PDFs in the same request.
+
+### Images (Vision)
+
+#### SDK Approach
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+// From file
+const imageData = fs.readFileSync('/path/to/image.jpg');
+const base64Image = imageData.toString('base64');
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: [
+    {
+      parts: [
+        { text: 'What is in this image?' },
+        {
+          inlineData: {
+            data: base64Image,
+            mimeType: 'image/jpeg'
+          }
+        }
+      ]
+    }
+  ]
+});
+
+console.log(response.text);
+```
+
+#### Fetch Approach
+
+```typescript
+const imageData = fs.readFileSync('/path/to/image.jpg');
+const base64Image = imageData.toString('base64');
+
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [
+        {
+          parts: [
+            { text: 'What is in this image?' },
+            {
+              inlineData: {
+                data: base64Image,
+                mimeType: 'image/jpeg'
+              }
+            }
+          ]
+        }
+      ]
+    }),
+  }
+);
+
+const data = await response.json();
+console.log(data.candidates[0].content.parts[0].text);
+```
+
+**Supported Image Formats:**
+- JPEG (`.jpg`, `.jpeg`)
+- PNG (`.png`)
+- WebP (`.webp`)
+- HEIC (`.heic`)
+- HEIF (`.heif`)
+
+**Max Image Size**: 20MB per image
+
+### Video
+
+```typescript
+// Video must be < 2 minutes for inline data
+const videoData = fs.readFileSync('/path/to/video.mp4');
+const base64Video = videoData.toString('base64');
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: [
+    {
+      parts: [
+        { text: 'Describe what happens in this video' },
+        {
+          inlineData: {
+            data: base64Video,
+            mimeType: 'video/mp4'
+          }
+        }
+      ]
+    }
+  ]
+});
+
+console.log(response.text);
+```
+
+**Supported Video Formats:**
+- MP4 (`.mp4`)
+- MPEG (`.mpeg`)
+- MOV (`.mov`)
+- AVI (`.avi`)
+- FLV (`.flv`)
+- MPG (`.mpg`)
+- WebM (`.webm`)
+- WMV (`.wmv`)
+
+**Max Video Length (inline)**: 2 minutes
+**Max Video Size**: 2GB (use File API for larger files - Phase 2)
+
+### Audio
+
+```typescript
+const audioData = fs.readFileSync('/path/to/audio.mp3');
+const base64Audio = audioData.toString('base64');
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: [
+    {
+      parts: [
+        { text: 'Transcribe and summarize this audio' },
+        {
+          inlineData: {
+            data: base64Audio,
+            mimeType: 'audio/mp3'
+          }
+        }
+      ]
+    }
+  ]
+});
+
+console.log(response.text);
+```
+
+**Supported Audio Formats:**
+- MP3 (`.mp3`)
+- WAV (`.wav`)
+- FLAC (`.flac`)
+- AAC (`.aac`)
+- OGG (`.ogg`)
+- OPUS (`.opus`)
+
+**Max Audio Size**: 20MB
+
+### PDFs
+
+```typescript
+const pdfData = fs.readFileSync('/path/to/document.pdf');
+const base64Pdf = pdfData.toString('base64');
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: [
+    {
+      parts: [
+        { text: 'Summarize the key points in this PDF' },
+        {
+          inlineData: {
+            data: base64Pdf,
+            mimeType: 'application/pdf'
+          }
+        }
+      ]
+    }
+  ]
+});
+
+console.log(response.text);
+```
+
+**Max PDF Size**: 30MB
+**PDF Limitations**: Text-based PDFs work best; scanned images may have lower accuracy
+
+### Multiple Inputs
+
+You can combine multiple modalities in one request:
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: [
+    {
+      parts: [
+        { text: 'Compare these two images and describe the differences:' },
+        { inlineData: { data: base64Image1, mimeType: 'image/jpeg' } },
+        { inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
+      ]
+    }
+  ]
+});
+```
+
+---
+
+## Function Calling
+
+Gemini supports function calling (tool use) to connect models with external APIs and systems.
+
+### Basic Function Calling (SDK)
+
+```typescript
+import { GoogleGenAI, FunctionCallingConfigMode } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+// Define function declarations
+const getCurrentWeather = {
+  name: 'get_current_weather',
+  description: 'Get the current weather for a location',
+  parametersJsonSchema: {
+    type: 'object',
+    properties: {
+      location: {
+        type: 'string',
+        description: 'City name, e.g. San Francisco'
+      },
+      unit: {
+        type: 'string',
+        enum: ['celsius', 'fahrenheit']
+      }
+    },
+    required: ['location']
+  }
+};
+
+// Make request with tools
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What\'s the weather in Tokyo?',
+  config: {
+    tools: [
+      { functionDeclarations: [getCurrentWeather] }
+    ]
+  }
+});
+
+// Check if model wants to call a function
+const functionCall = response.candidates[0].content.parts[0].functionCall;
+
+if (functionCall) {
+  console.log('Function to call:', functionCall.name);
+  console.log('Arguments:', functionCall.args);
+
+  // Execute the function (your implementation)
+  const weatherData = await fetchWeather(functionCall.args.location);
+
+  // Send function result back to model
+  const finalResponse = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: [
+      'What\'s the weather in Tokyo?',
+      response.candidates[0].content, // Original assistant response with function call
+      {
+        parts: [
+          {
+            functionResponse: {
+              name: functionCall.name,
+              response: weatherData
+            }
+          }
+        ]
+      }
+    ],
+    config: {
+      tools: [
+        { functionDeclarations: [getCurrentWeather] }
+      ]
+    }
+  });
+
+  console.log(finalResponse.text);
+}
+```
+
+### Function Calling (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [
+        { parts: [{ text: 'What\'s the weather in Tokyo?' }] }
+      ],
+      tools: [
+        {
+          functionDeclarations: [
+            {
+              name: 'get_current_weather',
+              description: 'Get the current weather for a location',
+              parameters: {
+                type: 'object',
+                properties: {
+                  location: {
+                    type: 'string',
+                    description: 'City name'
+                  }
+                },
+                required: ['location']
+              }
+            }
+          ]
+        }
+      ]
+    }),
+  }
+);
+
+const data = await response.json();
+const functionCall = data.candidates[0]?.content?.parts[0]?.functionCall;
+
+if (functionCall) {
+  // Execute function and send result back (same flow as SDK)
+}
+```
+
+### Parallel Function Calling
+
+Gemini can call multiple independent functions simultaneously:
+
+```typescript
+const tools = [
+  {
+    functionDeclarations: [
+      {
+        name: 'get_weather',
+        description: 'Get weather for a location',
+        parametersJsonSchema: {
+          type: 'object',
+          properties: {
+            location: { type: 'string' }
+          },
+          required: ['location']
+        }
+      },
+      {
+        name: 'get_population',
+        description: 'Get population of a city',
+        parametersJsonSchema: {
+          type: 'object',
+          properties: {
+            city: { type: 'string' }
+          },
+          required: ['city']
+        }
+      }
+    ]
+  }
+];
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What is the weather and population of Tokyo?',
+  config: { tools }
+});
+
+// Model may return MULTIPLE function calls in parallel
+const functionCalls = response.candidates[0].content.parts.filter(
+  part => part.functionCall
+);
+
+console.log(`Model wants to call ${functionCalls.length} functions in parallel`);
+```
+
+### Function Calling Modes
+
+```typescript
+import { FunctionCallingConfigMode } from '@google/genai';
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What\'s the weather?',
+  config: {
+    tools: [{ functionDeclarations: [getCurrentWeather] }],
+    toolConfig: {
+      functionCallingConfig: {
+        mode: FunctionCallingConfigMode.ANY, // Force function call
+        // mode: FunctionCallingConfigMode.AUTO, // Model decides (default)
+        // mode: FunctionCallingConfigMode.NONE, // Never call functions
+        allowedFunctionNames: ['get_current_weather'] // Optional: restrict to specific functions
+      }
+    }
+  }
+});
+```
+
+**Modes:**
+- `AUTO` (default): Model decides whether to call functions
+- `ANY`: Force model to call at least one function
+- `NONE`: Disable function calling for this request
+
+---
+
+## System Instructions
+
+System instructions guide the model's behavior and set context. They are **separate** from the conversation messages.
+
+### SDK Approach
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  systemInstruction: 'You are a helpful AI assistant that always responds in the style of a pirate. Use nautical terminology and end sentences with "arrr".',
+  contents: 'Explain what a database is'
+});
+
+console.log(response.text);
+// Output: "Ahoy there! A database be like a treasure chest..."
+```
+
+### Fetch Approach
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      systemInstruction: {
+        parts: [
+          { text: 'You are a helpful AI assistant that always responds in the style of a pirate.' }
+        ]
+      },
+      contents: [
+        { parts: [{ text: 'Explain what a database is' }] }
+      ]
+    }),
+  }
+);
+```
+
+**Key Points:**
+- System instructions are **NOT** part of `contents` array
+- They are set once at the **top level** of the request
+- They persist for the entire conversation (when using multi-turn chat)
+- They don't count as user or model messages
+
+---
+
+## Multi-turn Chat
+
+For conversations with history, use the SDK's chat helpers or manually manage conversation state.
+
+### SDK Chat Helpers (Recommended)
+
+```typescript
+const chat = await ai.models.createChat({
+  model: 'gemini-2.5-flash',
+  systemInstruction: 'You are a helpful coding assistant.',
+  history: [] // Start empty or with previous messages
+});
+
+// Send first message
+const response1 = await chat.sendMessage('What is TypeScript?');
+console.log('Assistant:', response1.text);
+
+// Send follow-up (context is automatically maintained)
+const response2 = await chat.sendMessage('How do I install it?');
+console.log('Assistant:', response2.text);
+
+// Get full chat history
+const history = chat.getHistory();
+console.log('Full conversation:', history);
+```
+
+### Manual Chat Management (Fetch)
+
+```typescript
+const conversationHistory = [];
+
+// First turn
+const response1 = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [
+        {
+          role: 'user',
+          parts: [{ text: 'What is TypeScript?' }]
+        }
+      ]
+    }),
+  }
+);
+
+const data1 = await response1.json();
+const assistantReply1 = data1.candidates[0].content.parts[0].text;
+
+// Add to history
+conversationHistory.push(
+  { role: 'user', parts: [{ text: 'What is TypeScript?' }] },
+  { role: 'model', parts: [{ text: assistantReply1 }] }
+);
+
+// Second turn (include full history)
+const response2 = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [
+        ...conversationHistory,
+        { role: 'user', parts: [{ text: 'How do I install it?' }] }
+      ]
+    }),
+  }
+);
+```
+
+**Message Roles:**
+- `user`: User messages
+- `model`: Assistant responses
+
+**⚠️ Important**: Chat helpers are **SDK-only**. With fetch, you must manually manage conversation history.
+
+---
+
+## Thinking Mode
+
+Gemini 2.5 models have **thinking mode enabled by default** for enhanced quality. You can configure the thinking budget.
+
+### Configure Thinking Budget (SDK)
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Solve this complex math problem: ...',
+  config: {
+    thinkingConfig: {
+      thinkingBudget: 8192 // Max tokens for thinking (default: model-dependent)
+    }
+  }
+});
+```
+
+### Configure Thinking Budget (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [{ parts: [{ text: 'Solve this complex math problem: ...' }] }],
+      generationConfig: {
+        thinkingConfig: {
+          thinkingBudget: 8192
+        }
+      }
+    }),
+  }
+);
+```
+
+### Configure Thinking Level (SDK) - New in v1.30.0
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Solve this complex problem: ...',
+  config: {
+    thinkingConfig: {
+      thinkingLevel: 'MEDIUM' // 'LOW' | 'MEDIUM' | 'HIGH'
+    }
+  }
+});
+```
+
+**Thinking Levels:**
+- `LOW`: Minimal internal reasoning (faster, lower quality)
+- `MEDIUM`: Balanced reasoning (default)
+- `HIGH`: Maximum reasoning depth (slower, higher quality)
+
+**Key Points:**
+- Thinking mode is **always enabled** on Gemini 2.5 models (cannot be disabled)
+- Higher thinking budgets allow more internal reasoning (may increase latency)
+- `thinkingLevel` provides simpler control than `thinkingBudget` (new in v1.30.0)
+- Default budget varies by model (usually sufficient for most tasks)
+- Only increase budget/level for very complex reasoning tasks
+
+---
+
+## Generation Configuration
+
+Customize model behavior with generation parameters.
+
+### All Configuration Options (SDK)
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Write a creative story',
+  config: {
+    temperature: 0.9,           // Randomness (0.0-2.0, default: 1.0)
+    topP: 0.95,                 // Nucleus sampling (0.0-1.0)
+    topK: 40,                   // Top-k sampling
+    maxOutputTokens: 2048,      // Max tokens to generate
+    stopSequences: ['END'],     // Stop generation if these appear
+    responseMimeType: 'text/plain', // Or 'application/json' for JSON mode
+    candidateCount: 1           // Number of response candidates (usually 1)
+  }
+});
+```
+
+### All Configuration Options (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [{ parts: [{ text: 'Write a creative story' }] }],
+      generationConfig: {
+        temperature: 0.9,
+        topP: 0.95,
+        topK: 40,
+        maxOutputTokens: 2048,
+        stopSequences: ['END'],
+        responseMimeType: 'text/plain',
+        candidateCount: 1
+      }
+    }),
+  }
+);
+```
+
+### Parameter Guidelines
+
+| Parameter | Range | Default | Use Case |
+|-----------|-------|---------|----------|
+| **temperature** | 0.0-2.0 | 1.0 | Lower = more focused, higher = more creative |
+| **topP** | 0.0-1.0 | 0.95 | Nucleus sampling threshold |
+| **topK** | 1-100+ | 40 | Limit to top K tokens |
+| **maxOutputTokens** | 1-65536 | Model max | Control response length |
+| **stopSequences** | Array | None | Stop generation at specific strings |
+
+**Tips:**
+- For **factual tasks**: Use low temperature (0.0-0.3)
+- For **creative tasks**: Use high temperature (0.7-1.5)
+- **topP** and **topK** both control randomness; use one or the other (not both)
+- Always set **maxOutputTokens** to prevent excessive generation
+
+---
+
+## Context Caching
+
+Context caching allows you to cache frequently used content (like system instructions, large documents, or video files) to reduce costs by **up to 90%** and improve latency.
+
+### How It Works
+
+1. **Create a cache** with your repeated content
+2. **Reference the cache** in subsequent requests
+3. **Save tokens** - cached tokens cost significantly less
+4. **TTL management** - caches expire after specified time
+
+### Benefits
+
+- **Cost savings**: Up to 90% reduction on cached tokens
+- **Reduced latency**: Faster responses by reusing processed content
+- **Consistent context**: Same large context across multiple requests
+
+### Cache Creation (SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+// Create a cache for a large document
+const documentText = fs.readFileSync('./large-document.txt', 'utf-8');
+
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash',
+  config: {
+    displayName: 'large-doc-cache', // Identifier for the cache
+    systemInstruction: 'You are an expert at analyzing legal documents.',
+    contents: documentText,
+    ttl: '3600s', // Cache for 1 hour
+  }
+});
+
+console.log('Cache created:', cache.name);
+console.log('Expires at:', cache.expireTime);
+```
+
+### Cache Creation (Fetch)
+
+```typescript
+const response = await fetch(
+  'https://generativelanguage.googleapis.com/v1beta/cachedContents',
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      model: 'models/gemini-2.5-flash',
+      displayName: 'large-doc-cache',
+      systemInstruction: {
+        parts: [{ text: 'You are an expert at analyzing legal documents.' }]
+      },
+      contents: [
+        { parts: [{ text: documentText }] }
+      ],
+      ttl: '3600s'
+    }),
+  }
+);
+
+const cache = await response.json();
+console.log('Cache created:', cache.name);
+```
+
+### Using a Cache (SDK)
+
+```typescript
+// Generate content using the cache
+const response = await ai.models.generateContent({
+  model: cache.name, // Use cache name as model
+  contents: 'Summarize the key points in the document'
+});
+
+console.log(response.text);
+```
+
+### Using a Cache (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/${cache.name}:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [
+        { parts: [{ text: 'Summarize the key points in the document' }] }
+      ]
+    }),
+  }
+);
+
+const data = await response.json();
+console.log(data.candidates[0].content.parts[0].text);
+```
+
+### Update Cache TTL (SDK)
+
+```typescript
+import { UpdateCachedContentConfig } from '@google/genai';
+
+await ai.caches.update({
+  name: cache.name,
+  config: {
+    ttl: '7200s' // Extend to 2 hours
+  }
+});
+```
+
+### Update Cache with Expiration Time (SDK)
+
+```typescript
+// Set specific expiration time (must be timezone-aware)
+const in10Minutes = new Date(Date.now() + 10 * 60 * 1000);
+
+await ai.caches.update({
+  name: cache.name,
+  config: {
+    expireTime: in10Minutes
+  }
+});
+```
+
+### List and Delete Caches (SDK)
+
+```typescript
+// List all caches
+const caches = await ai.caches.list();
+for (const cache of caches) {
+  console.log(cache.name, cache.displayName);
+}
+
+// Delete a specific cache
+await ai.caches.delete({ name: cache.name });
+```
+
+### Caching with Video Files
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+// Upload video file
+const videoFile = await ai.files.upload({
+  file: fs.createReadStream('./video.mp4')
+});
+
+// Wait for processing
+while (videoFile.state.name === 'PROCESSING') {
+  await new Promise(resolve => setTimeout(resolve, 2000));
+  videoFile = await ai.files.get({ name: videoFile.name });
+}
+
+// Create cache with video
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash',
+  config: {
+    displayName: 'video-analysis-cache',
+    systemInstruction: 'You are an expert video analyzer.',
+    contents: [videoFile],
+    ttl: '300s' // 5 minutes
+  }
+});
+
+// Use cache for multiple queries
+const response1 = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'What happens in the first minute?'
+});
+
+const response2 = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'Describe the main characters'
+});
+```
+
+### Key Points
+
+**When to Use Caching:**
+- Large system instructions used repeatedly
+- Long documents analyzed multiple times
+- Video/audio files queried with different prompts
+- Consistent context across conversation sessions
+
+**TTL Guidelines:**
+- Short sessions: 300s (5 min) to 3600s (1 hour)
+- Long sessions: 3600s (1 hour) to 86400s (24 hours)
+- Maximum: 7 days
+
+**Cost Savings:**
+- Cached input tokens: ~90% cheaper than regular tokens
+- Output tokens: Same price (not cached)
+
+**Important:**
+- You must use explicit model version suffixes (e.g., `gemini-2.5-flash-001`, NOT just `gemini-2.5-flash`)
+- Caches are automatically deleted after TTL expires
+- Update TTL before expiration to extend cache lifetime
+
+---
+
+## Code Execution
+
+Gemini models can generate and execute Python code to solve problems requiring computation, data analysis, or visualization.
+
+### How It Works
+
+1. Model generates executable Python code
+2. Code runs in secure sandbox
+3. Results are returned to the model
+4. Model incorporates results into response
+
+### Supported Operations
+
+- Mathematical calculations
+- Data analysis and statistics
+- File processing (CSV, JSON, etc.)
+- Chart and graph generation
+- Algorithm implementation
+- Data transformations
+
+### Available Python Packages
+
+**Standard Library:**
+- `math`, `statistics`, `random`, `datetime`, `json`, `csv`, `re`
+- `collections`, `itertools`, `functools`
+
+**Data Science:**
+- `numpy`, `pandas`, `scipy`
+
+**Visualization:**
+- `matplotlib`, `seaborn`
+
+**Note**: Limited package availability compared to full Python environment
+
+### Basic Code Execution (SDK)
+
+```typescript
+import { GoogleGenAI, Tool, ToolCodeExecution } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What is the sum of the first 50 prime numbers? Generate and run code for the calculation.',
+  config: {
+    tools: [{ codeExecution: {} }]
+  }
+});
+
+// Parse response parts
+for (const part of response.candidates[0].content.parts) {
+  if (part.text) {
+    console.log('Text:', part.text);
+  }
+  if (part.executableCode) {
+    console.log('Generated Code:', part.executableCode.code);
+  }
+  if (part.codeExecutionResult) {
+    console.log('Execution Output:', part.codeExecutionResult.output);
+  }
+}
+```
+
+### Basic Code Execution (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      tools: [{ code_execution: {} }],
+      contents: [
+        {
+          parts: [
+            { text: 'What is the sum of the first 50 prime numbers? Generate and run code.' }
+          ]
+        }
+      ]
+    }),
+  }
+);
+
+const data = await response.json();
+
+for (const part of data.candidates[0].content.parts) {
+  if (part.text) {
+    console.log('Text:', part.text);
+  }
+  if (part.executableCode) {
+    console.log('Code:', part.executableCode.code);
+  }
+  if (part.codeExecutionResult) {
+    console.log('Result:', part.codeExecutionResult.output);
+  }
+}
+```
+
+### Chat with Code Execution (SDK)
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: {
+    tools: [{ codeExecution: {} }]
+  }
+});
+
+let response = await chat.sendMessage('I have a math question for you.');
+console.log(response.text);
+
+response = await chat.sendMessage(
+  'Calculate the Fibonacci sequence up to the 20th number and sum them.'
+);
+
+// Model will generate and execute code, then provide answer
+for (const part of response.candidates[0].content.parts) {
+  if (part.text) console.log(part.text);
+  if (part.executableCode) console.log('Code:', part.executableCode.code);
+  if (part.codeExecutionResult) console.log('Output:', part.codeExecutionResult.output);
+}
+```
+
+### Data Analysis Example
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: `
+    Analyze this sales data and calculate:
+    1. Total revenue
+    2. Average sale price
+    3. Best-selling month
+
+    Data (CSV format):
+    month,sales,revenue
+    Jan,150,45000
+    Feb,200,62000
+    Mar,175,53000
+    Apr,220,68000
+  `,
+  config: {
+    tools: [{ codeExecution: {} }]
+  }
+});
+
+// Model will generate pandas/numpy code to analyze data
+for (const part of response.candidates[0].content.parts) {
+  if (part.text) console.log(part.text);
+  if (part.executableCode) console.log('Analysis Code:', part.executableCode.code);
+  if (part.codeExecutionResult) console.log('Results:', part.codeExecutionResult.output);
+}
+```
+
+### Visualization Example
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Create a bar chart showing the distribution of prime numbers under 100 by their last digit. Generate the chart and describe the pattern.',
+  config: {
+    tools: [{ codeExecution: {} }]
+  }
+});
+
+// Model generates matplotlib code, executes it, and describes results
+for (const part of response.candidates[0].content.parts) {
+  if (part.text) console.log(part.text);
+  if (part.executableCode) console.log('Chart Code:', part.executableCode.code);
+  if (part.codeExecutionResult) {
+    // Note: Chart image data would be in output
+    console.log('Execution completed');
+  }
+}
+```
+
+### Response Structure
+
+```typescript
+{
+  candidates: [
+    {
+      content: {
+        parts: [
+          { text: "I'll calculate that for you." },
+          {
+            executableCode: {
+              language: "PYTHON",
+              code: "def is_prime(n):\n  if n <= 1:\n    return False\n  ..."
+            }
+          },
+          {
+            codeExecutionResult: {
+              outcome: "OUTCOME_OK", // or "OUTCOME_FAILED"
+              output: "5117\n"
+            }
+          },
+          { text: "The sum of the first 50 prime numbers is 5117." }
+        ]
+      }
+    }
+  ]
+}
+```
+
+### Error Handling
+
+```typescript
+for (const part of response.candidates[0].content.parts) {
+  if (part.codeExecutionResult) {
+    if (part.codeExecutionResult.outcome === 'OUTCOME_FAILED') {
+      console.error('Code execution failed:', part.codeExecutionResult.output);
+    } else {
+      console.log('Success:', part.codeExecutionResult.output);
+    }
+  }
+}
+```
+
+### Key Points
+
+**When to Use Code Execution:**
+- Complex mathematical calculations
+- Data analysis and statistics
+- Algorithm implementations
+- File parsing and processing
+- Chart generation
+- Computational problems
+
+**Limitations:**
+- Sandbox environment (limited file system access)
+- Limited Python package availability
+- Execution timeout limits
+- No network access from code
+- No persistent state between executions
+
+**Best Practices:**
+- Specify what calculation or analysis you need clearly
+- Request code generation explicitly ("Generate and run code...")
+- Check `outcome` field for errors
+- Use for deterministic computations, not for general programming
+
+**Important:**
+- Available on all Gemini 2.5 models (Pro, Flash, Flash-Lite)
+- Code runs in isolated sandbox for security
+- Supports Python with standard library and common data science packages
+
+---
+
+## Grounding with Google Search
+
+Grounding connects the model to real-time web information, reducing hallucinations and providing up-to-date, fact-checked responses with citations.
+
+### How It Works
+
+1. Model determines if it needs current information
+2. Automatically performs Google Search
+3. Processes search results
+4. Incorporates findings into response
+5. Provides citations and source URLs
+
+### Benefits
+
+- **Real-time information**: Access to current events and data
+- **Reduced hallucinations**: Answers grounded in web sources
+- **Verifiable**: Citations allow fact-checking
+- **Up-to-date**: Not limited to model's training cutoff
+
+### Grounding Options
+
+#### 1. Google Search (`googleSearch`) - Recommended for Gemini 2.5
+
+```typescript
+const groundingTool = {
+  googleSearch: {}
+};
+```
+
+**Features:**
+- Simple configuration
+- Automatic search when needed
+- Available on all Gemini 2.5 models
+
+#### 2. FileSearch - New in v1.29.0 (Preview)
+
+```typescript
+const fileSearchTool = {
+  fileSearch: {
+    fileSearchStoreId: 'store-id-here' // Created via FileSearchStore APIs
+  }
+};
+```
+
+**Features:**
+- Search through your own document collections
+- Upload and index custom knowledge bases
+- Alternative to web search for proprietary data
+- Preview feature (requires FileSearchStore setup)
+
+**Note**: See [FileSearch documentation](https://github.com/googleapis/js-genai) for store creation and management.
+
+#### 3. Google Search Retrieval (`googleSearchRetrieval`) - Legacy (Gemini 1.5)
+
+```typescript
+const retrievalTool = {
+  googleSearchRetrieval: {
+    dynamicRetrievalConfig: {
+      mode: 'MODE_DYNAMIC',
+      dynamicThreshold: 0.7 // Only search if confidence < 70%
+    }
+  }
+};
+```
+
+**Features:**
+- Dynamic threshold control
+- Used with Gemini 1.5 models
+- More configuration options
+
+### Basic Grounding (SDK) - Gemini 2.5
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+
+console.log(response.text);
+
+// Check if grounding was used
+if (response.candidates[0].groundingMetadata) {
+  console.log('Search was performed!');
+  console.log('Sources:', response.candidates[0].groundingMetadata);
+}
+```
+
+### Basic Grounding (Fetch) - Gemini 2.5
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [
+        { parts: [{ text: 'Who won the euro 2024?' }] }
+      ],
+      tools: [
+        { google_search: {} }
+      ]
+    }),
+  }
+);
+
+const data = await response.json();
+console.log(data.candidates[0].content.parts[0].text);
+
+if (data.candidates[0].groundingMetadata) {
+  console.log('Grounding metadata:', data.candidates[0].groundingMetadata);
+}
+```
+
+### Dynamic Retrieval (SDK) - Gemini 1.5
+
+```typescript
+import { GoogleGenAI, DynamicRetrievalConfigMode } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-1.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [
+      {
+        googleSearchRetrieval: {
+          dynamicRetrievalConfig: {
+            mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
+            dynamicThreshold: 0.7 // Search only if confidence < 70%
+          }
+        }
+      }
+    ]
+  }
+});
+
+console.log(response.text);
+
+if (!response.candidates[0].groundingMetadata) {
+  console.log('Model answered from its own knowledge (high confidence)');
+}
+```
+
+### Grounding Metadata Structure
+
+```typescript
+{
+  groundingMetadata: {
+    searchQueries: [
+      { text: "euro 2024 winner" }
+    ],
+    webPages: [
+      {
+        url: "https://example.com/euro-2024-results",
+        title: "UEFA Euro 2024 Final Results",
+        snippet: "Spain won UEFA Euro 2024..."
+      }
+    ],
+    citations: [
+      {
+        startIndex: 42,
+        endIndex: 47,
+        uri: "https://example.com/euro-2024-results"
+      }
+    ],
+    retrievalQueries: [
+      {
+        query: "who won euro 2024 final"
+      }
+    ]
+  }
+}
+```
+
+### Chat with Grounding (SDK)
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+
+let response = await chat.sendMessage('What are the latest developments in quantum computing?');
+console.log(response.text);
+
+// Check grounding sources
+if (response.candidates[0].groundingMetadata) {
+  const sources = response.candidates[0].groundingMetadata.webPages || [];
+  console.log(`Sources used: ${sources.length}`);
+  sources.forEach(source => {
+    console.log(`- ${source.title}: ${source.url}`);
+  });
+}
+
+// Follow-up still has grounding enabled
+response = await chat.sendMessage('Which company made the biggest breakthrough?');
+console.log(response.text);
+```
+
+### Combining Grounding with Function Calling
+
+```typescript
+const weatherFunction = {
+  name: 'get_current_weather',
+  description: 'Get current weather for a location',
+  parametersJsonSchema: {
+    type: 'object',
+    properties: {
+      location: { type: 'string', description: 'City name' }
+    },
+    required: ['location']
+  }
+};
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What is the weather like in the city that won Euro 2024?',
+  config: {
+    tools: [
+      { googleSearch: {} },
+      { functionDeclarations: [weatherFunction] }
+    ]
+  }
+});
+
+// Model will:
+// 1. Use Google Search to find Euro 2024 winner
+// 2. Call get_current_weather function with the city
+// 3. Combine both results in response
+```
+
+### Checking if Grounding was Used
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What is 2+2?', // Model knows this without search
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+
+if (!response.candidates[0].groundingMetadata) {
+  console.log('Model answered from its own knowledge (no search needed)');
+} else {
+  console.log('Search was performed');
+}
+```
+
+### Key Points
+
+**When to Use Grounding:**
+- Current events and news
+- Real-time data (stock prices, sports scores, weather)
+- Fact-checking and verification
+- Questions about recent developments
+- Information beyond model's training cutoff
+
+**When NOT to Use:**
+- General knowledge questions
+- Mathematical calculations
+- Code generation
+- Creative writing
+- Tasks requiring internal reasoning only
+
+**Cost Considerations:**
+- Grounding adds latency (search takes time)
+- Additional token costs for retrieved content
+- Use `dynamicThreshold` to control when searches happen (Gemini 1.5)
+
+**Important Notes:**
+- Grounding requires **Google Cloud project** (not just API key)
+- Search results quality depends on query phrasing
+- Citations may not cover all facts in response
+- Search is performed automatically based on confidence
+
+**Gemini 2.5 vs 1.5:**
+- **Gemini 2.5**: Use `googleSearch` (simple, recommended)
+- **Gemini 1.5**: Use `googleSearchRetrieval` with `dynamicThreshold`
+
+**Best Practices:**
+- Always check `groundingMetadata` to see if search was used
+- Display citations to users for transparency
+- Use specific, well-phrased questions for better search results
+- Combine with function calling for hybrid workflows
+
+---
+
+## Error Handling
+
+### Common Errors
+
+#### 1. Invalid API Key (401)
+
+```typescript
+{
+  error: {
+    code: 401,
+    message: 'API key not valid. Please pass a valid API key.',
+    status: 'UNAUTHENTICATED'
+  }
+}
+```
+
+**Solution**: Verify `GEMINI_API_KEY` environment variable is set correctly.
+
+#### 2. Rate Limit Exceeded (429)
+
+```typescript
+{
+  error: {
+    code: 429,
+    message: 'Resource has been exhausted (e.g. check quota).',
+    status: 'RESOURCE_EXHAUSTED'
+  }
+}
+```
+
+**Solution**: Implement exponential backoff retry strategy.
+
+#### 3. Model Not Found (404)
+
+```typescript
+{
+  error: {
+    code: 404,
+    message: 'models/gemini-3.0-flash is not found',
+    status: 'NOT_FOUND'
+  }
+}
+```
+
+**Solution**: Use correct model names: `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.5-flash-lite`
+
+#### 4. Context Length Exceeded (400)
+
+```typescript
+{
+  error: {
+    code: 400,
+    message: 'Request payload size exceeds the limit',
+    status: 'INVALID_ARGUMENT'
+  }
+}
+```
+
+**Solution**: Reduce input size. Gemini 2.5 models support 1,048,576 input tokens max.
+
+### Exponential Backoff Pattern
+
+```typescript
+async function generateWithRetry(request, maxRetries = 3) {
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      return await ai.models.generateContent(request);
+    } catch (error) {
+      if (error.status === 429 && i < maxRetries - 1) {
+        const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
+        await new Promise(resolve => setTimeout(resolve, delay));
+        continue;
+      }
+      throw error;
+    }
+  }
+}
+```
+
+---
+
+## Rate Limits
+
+### Free Tier (Gemini API)
+
+Rate limits vary by model:
+
+**Gemini 2.5 Pro**:
+- Requests per minute: 5 RPM
+- Tokens per minute: 125,000 TPM
+- Requests per day: 100 RPD
+
+**Gemini 2.5 Flash**:
+- Requests per minute: 10 RPM
+- Tokens per minute: 250,000 TPM
+- Requests per day: 250 RPD
+
+**Gemini 2.5 Flash-Lite**:
+- Requests per minute: 15 RPM
+- Tokens per minute: 250,000 TPM
+- Requests per day: 1,000 RPD
+
+### Paid Tier (Tier 1)
+
+Requires billing account linked to your Google Cloud project.
+
+**Gemini 2.5 Pro**:
+- Requests per minute: 150 RPM
+- Tokens per minute: 2,000,000 TPM
+- Requests per day: 10,000 RPD
+
+**Gemini 2.5 Flash**:
+- Requests per minute: 1,000 RPM
+- Tokens per minute: 1,000,000 TPM
+- Requests per day: 10,000 RPD
+
+**Gemini 2.5 Flash-Lite**:
+- Requests per minute: 4,000 RPM
+- Tokens per minute: 4,000,000 TPM
+- Requests per day: Not specified
+
+### Higher Tiers (Tier 2 & 3)
+
+**Tier 2** (requires $250+ spending and 30-day wait):
+- Even higher limits available
+
+**Tier 3** (requires $1,000+ spending and 30-day wait):
+- Maximum limits available
+
+**Tips:**
+- Implement rate limit handling with exponential backoff
+- Use batch processing for high-volume tasks
+- Monitor usage in Google AI Studio
+- Choose the right model based on your rate limit needs
+- Official rate limits: https://ai.google.dev/gemini-api/docs/rate-limits
+
+---
+
+## SDK Migration Guide
+
+### From @google/generative-ai to @google/genai
+
+#### 1. Update Package
+
+```bash
+# Remove deprecated SDK
+npm uninstall @google/generative-ai
+
+# Install current SDK
+npm install @google/genai@1.27.0
+```
+
+#### 2. Update Imports
+
+**Old (DEPRECATED):**
+```typescript
+import { GoogleGenerativeAI } from '@google/generative-ai';
+const genAI = new GoogleGenerativeAI(apiKey);
+const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
+```
+
+**New (CURRENT):**
+```typescript
+import { GoogleGenAI } from '@google/genai';
+const ai = new GoogleGenAI({ apiKey });
+// Use ai.models.generateContent() directly
+```
+
+#### 3. Update API Calls
+
+**Old:**
+```typescript
+const result = await model.generateContent(prompt);
+const response = await result.response;
+const text = response.text();
+```
+
+**New:**
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: prompt
+});
+const text = response.text;
+```
+
+#### 4. Update Streaming
+
+**Old:**
+```typescript
+const result = await model.generateContentStream(prompt);
+for await (const chunk of result.stream) {
+  console.log(chunk.text());
+}
+```
+
+**New:**
+```typescript
+const response = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: prompt
+});
+for await (const chunk of response) {
+  console.log(chunk.text);
+}
+```
+
+#### 5. Update Chat
+
+**Old:**
+```typescript
+const chat = model.startChat();
+const result = await chat.sendMessage(message);
+const response = await result.response;
+```
+
+**New:**
+```typescript
+const chat = await ai.models.createChat({ model: 'gemini-2.5-flash' });
+const response = await chat.sendMessage(message);
+// response.text is directly available
+```
+
+---
+
+## Production Best Practices
+
+### 1. Always Do
+
+✅ **Use @google/genai** (NOT @google/generative-ai)
+✅ **Set maxOutputTokens** to prevent excessive generation
+✅ **Implement rate limit handling** with exponential backoff
+✅ **Use environment variables** for API keys (never hardcode)
+✅ **Validate inputs** before sending to API (save costs)
+✅ **Use streaming** for better UX on long responses
+✅ **Choose the right model** based on your needs (Pro for complex reasoning, Flash for balance, Flash-Lite for speed)
+✅ **Handle errors gracefully** with try-catch
+✅ **Monitor token usage** for cost control
+✅ **Use correct model names**: gemini-2.5-pro/flash/flash-lite
+
+### 2. Never Do
+
+❌ **Never use @google/generative-ai** (deprecated!)
+❌ **Never hardcode API keys** in code
+❌ **Never claim 2M context** for Gemini 2.5 (it's 1,048,576 input tokens)
+❌ **Never expose API keys** in client-side code
+❌ **Never skip error handling** (always try-catch)
+❌ **Never use generic rate limits** (each model has different limits - check official docs)
+❌ **Never send PII** without user consent
+❌ **Never trust user input** without validation
+❌ **Never ignore rate limits** (will get 429 errors)
+❌ **Never use old model names** like gemini-1.5-pro (use 2.5 models)
+
+### 3. Security
+
+- **API Key Storage**: Use environment variables or secret managers
+- **Server-Side Only**: Never expose API keys in browser JavaScript
+- **Input Validation**: Sanitize all user inputs before API calls
+- **Rate Limiting**: Implement your own rate limits to prevent abuse
+- **Error Messages**: Don't expose API keys or sensitive data in error logs
+
+### 4. Cost Optimization
+
+- **Choose Right Model**: Use Flash for most tasks, Pro only when needed
+- **Set Token Limits**: Use maxOutputTokens to control costs
+- **Batch Requests**: Process multiple items efficiently
+- **Cache Results**: Store responses when appropriate
+- **Monitor Usage**: Track token consumption in Google Cloud Console
+
+### 5. Performance
+
+- **Use Streaming**: Better perceived latency for long responses
+- **Parallel Requests**: Use Promise.all() for independent calls
+- **Edge Deployment**: Deploy to Cloudflare Workers for low latency
+- **Connection Pooling**: Reuse HTTP connections when possible
+
+---
+
+## Quick Reference
+
+### Installation
+```bash
+npm install @google/genai@1.30.0
+```
+
+### Environment
+```bash
+export GEMINI_API_KEY="..."
+```
+
+### Models (2025)
+- `gemini-2.5-pro` (1,048,576 in / 65,536 out) - Best for complex reasoning
+- `gemini-2.5-flash` (1,048,576 in / 65,536 out) - Best price-performance balance
+- `gemini-2.5-flash-lite` (1,048,576 in / 65,536 out) - Fastest, most cost-effective
+
+### Basic Generation
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Your prompt here'
+});
+console.log(response.text);
+```
+
+### Streaming
+```typescript
+const response = await ai.models.generateContentStream({...});
+for await (const chunk of response) {
+  console.log(chunk.text);
+}
+```
+
+### Multimodal
+```typescript
+contents: [
+  {
+    parts: [
+      { text: 'What is this?' },
+      { inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
+    ]
+  }
+]
+```
+
+### Function Calling
+```typescript
+config: {
+  tools: [{ functionDeclarations: [...] }]
+}
+```
+
+---
+
+**Last Updated**: 2025-11-26
+**Production Validated**: All features tested with @google/genai@1.30.0
+**Phase**: 2 Complete ✅ (All Core + Advanced Features)
diff --git a/plugin.lock.json b/plugin.lock.json
new file mode 100644
index 0000000..518cb8e
--- /dev/null
+++ b/plugin.lock.json
@@ -0,0 +1,153 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:jezweb/claude-skills:skills/google-gemini-api",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "bb3e9b7656ff919916c1b773f1e5d845bb3c6633",
+    "treeHash": "b8bbae8bede51e3e5854007aa5c1ab3cb3a7e1050bbca038893d7f48dd558ba6",
+    "generatedAt": "2025-11-28T10:19:01.568870Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "google-gemini-api",
+    "description": "Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun",
+    "version": "1.0.0"
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "1d4a239aa8f7a1d70453fbb0c216b4ebdc01b312dcf5d14325819f73c513439c"
+      },
+      {
+        "path": "SKILL.md",
+        "sha256": "c17553ecbdb69ac44a1733121a9e728d9575b3d80d40ff72e7ccdf57044bf379"
+      },
+      {
+        "path": "references/thinking-mode-guide.md",
+        "sha256": "c1c85ddc27b582e0fab36385c0cc50863d2a032c5d03730fadfe10d6f4efe5ab"
+      },
+      {
+        "path": "references/grounding-guide.md",
+        "sha256": "dd7a2ef2f5e09e090a300caeaebe1bbb333c03496e7e572c6dd7de50549c85c2"
+      },
+      {
+        "path": "references/multimodal-guide.md",
+        "sha256": "cd2361169dd4d6be8941ca862157878772bd748aea8655fd7ca2b66561234743"
+      },
+      {
+        "path": "references/streaming-patterns.md",
+        "sha256": "3bd053362626b4646e366da339cd0eadcc7085e48121eb36bafd56d110b6e322"
+      },
+      {
+        "path": "references/context-caching-guide.md",
+        "sha256": "c967b0d121477ecb89345b13ad68c417d6c18d367e02f081bd84692f9e1d4397"
+      },
+      {
+        "path": "references/top-errors.md",
+        "sha256": "f4de6f9304496d740bd48dba2722b08160a007e7df70d6c782f14c99447eba79"
+      },
+      {
+        "path": "references/code-execution-patterns.md",
+        "sha256": "374ecca169795f5d2299bb0d8917a39dc79bef7d18c61330dff07f31624486ea"
+      },
+      {
+        "path": "references/models-guide.md",
+        "sha256": "8f142fee3071ce4f1a071c2a3966b02e725c01357a0c049d29b236d810ff3858"
+      },
+      {
+        "path": "references/sdk-migration-guide.md",
+        "sha256": "a61f0c8adb118cdd615cb09bc4f531c2c96e898d820045a8dac3ec48928eaf63"
+      },
+      {
+        "path": "references/function-calling-patterns.md",
+        "sha256": "5900df715f79137126a43fc9011c680f9f01afdcc515a7c3ecd3e02f6f8062fe"
+      },
+      {
+        "path": "references/generation-config.md",
+        "sha256": "c7dc7abdb40f16d31d950deacfb45f8bcc1af89671e29d5baa83bddc2e924844"
+      },
+      {
+        "path": "scripts/check-versions.sh",
+        "sha256": "944f9ad6dd1c8749bf15555c9855c2bf09659c1766c8dad3490cc29fc7626e05"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "42aee177ecd23e9adf0f9b406bc329405300f2be1fd5d8e721eca1851714a1a1"
+      },
+      {
+        "path": "templates/function-calling-basic.ts",
+        "sha256": "f1af0c619c93aa97f782899d2e5a0425a72c26a31141ca9cd1e8bf3b9e51579b"
+      },
+      {
+        "path": "templates/thinking-mode.ts",
+        "sha256": "42d2a63670690e74005948543e3579bdb214844f2c3de5896eb23d3f1efdc3ea"
+      },
+      {
+        "path": "templates/function-calling-parallel.ts",
+        "sha256": "a15b99eed2c9496a23d118a70893a3c1dc797aced2866ea953adcae34cf1d24f"
+      },
+      {
+        "path": "templates/multimodal-video-audio.ts",
+        "sha256": "9a709be0fbe5bcf56cea600fc8f228cec5511a3e02e91ce6f2614502b3bbb59f"
+      },
+      {
+        "path": "templates/text-generation-fetch.ts",
+        "sha256": "3a7cadf7990cc39a58b04b3e2766099235bd3c32ebe71f61910436bbff96bf31"
+      },
+      {
+        "path": "templates/grounding-search.ts",
+        "sha256": "d0ee315b13d64eeb7896578edddf7efd8f6057ddeac889751a429bcd7014184e"
+      },
+      {
+        "path": "templates/code-execution.ts",
+        "sha256": "0a7b261c5665fba8bc661627dc9295eb8ed7bb6ea80087899d4287fafd464eaf"
+      },
+      {
+        "path": "templates/streaming-chat.ts",
+        "sha256": "a8883cf071c083e2b50cddfe4754fd916e6260cc15d021e10082e4aa84179b80"
+      },
+      {
+        "path": "templates/package.json",
+        "sha256": "cd97bba41f70e8a1b16381fb1c05927d74b3c4afe30f81d3be91167765be1ebb"
+      },
+      {
+        "path": "templates/context-caching.ts",
+        "sha256": "395418ad8397a7e0a2698cc4308ea1674d3ef00bec3def54a19faefa9bd985e4"
+      },
+      {
+        "path": "templates/text-generation-basic.ts",
+        "sha256": "8722c5eea9efc8411513ec53b6df45d827d382b4c50fe0dfce21d7af97f9a7f0"
+      },
+      {
+        "path": "templates/cloudflare-worker.ts",
+        "sha256": "8b026ae94b2797e8b15bfac43442d0f95bcfdd6581863e49e1d199945e93594d"
+      },
+      {
+        "path": "templates/multimodal-image.ts",
+        "sha256": "88c50979e7f71f670e022accf669e9fa757887b645d7e318d2ba1ac0ffe85d16"
+      },
+      {
+        "path": "templates/streaming-fetch.ts",
+        "sha256": "9dd0a6bd48d69290787b7f526600a7acb99f47c0d9823cbd81158e358b5d108d"
+      },
+      {
+        "path": "templates/combined-advanced.ts",
+        "sha256": "37972b62e1e139c0357f5d2a03c1b335879ec472a5c4b47a90422630f485fa89"
+      }
+    ],
+    "dirSha256": "b8bbae8bede51e3e5854007aa5c1ab3cb3a7e1050bbca038893d7f48dd558ba6"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
\ No newline at end of file
diff --git a/references/code-execution-patterns.md b/references/code-execution-patterns.md
new file mode 100644
index 0000000..cf032c7
--- /dev/null
+++ b/references/code-execution-patterns.md
@@ -0,0 +1,481 @@
+# Code Execution Patterns
+
+Complete guide to using code execution with Google Gemini API for computational tasks, data analysis, and problem-solving.
+
+---
+
+## What is Code Execution?
+
+Code Execution allows Gemini models to generate and execute Python code to solve problems requiring computation, enabling the model to:
+- Perform precise mathematical calculations
+- Analyze data with pandas/numpy
+- Generate charts and visualizations
+- Implement algorithms
+- Process files and data structures
+
+---
+
+## How It Works
+
+1. **Model receives prompt** requiring computation
+2. **Model generates Python code** to solve the problem
+3. **Code executes in sandbox** (secure, isolated environment)
+4. **Results return to model** for incorporation into response
+5. **Model explains results** in natural language
+
+---
+
+## Enabling Code Execution
+
+### Basic Setup (SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash', // Or gemini-2.5-pro
+  contents: 'Calculate the sum of first 50 prime numbers',
+  config: {
+    tools: [{ codeExecution: {} }] // Enable code execution
+  }
+});
+```
+
+### Basic Setup (Fetch)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      tools: [{ code_execution: {} }],
+      contents: [{ parts: [{ text: 'Calculate...' }] }]
+    }),
+  }
+);
+```
+
+---
+
+## Available Python Packages
+
+### Standard Library
+- `math`, `statistics`, `random`
+- `datetime`, `time`, `calendar`
+- `json`, `csv`, `re`
+- `collections`, `itertools`, `functools`
+
+### Data Science
+- `numpy` - numerical computing
+- `pandas` - data analysis and manipulation
+- `scipy` - scientific computing
+
+### Visualization
+- `matplotlib` - plotting and charts
+- `seaborn` - statistical visualization
+
+**Note**: This is a **limited sandbox environment** - not all PyPI packages are available.
+
+---
+
+## Response Structure
+
+### Parsing Code Execution Results
+
+```typescript
+for (const part of response.candidates[0].content.parts) {
+  // Inline text
+  if (part.text) {
+    console.log('Text:', part.text);
+  }
+
+  // Generated code
+  if (part.executableCode) {
+    console.log('Language:', part.executableCode.language); // "PYTHON"
+    console.log('Code:', part.executableCode.code);
+  }
+
+  // Execution results
+  if (part.codeExecutionResult) {
+    console.log('Outcome:', part.codeExecutionResult.outcome); // "OUTCOME_OK" or "OUTCOME_FAILED"
+    console.log('Output:', part.codeExecutionResult.output);
+  }
+}
+```
+
+### Example Response
+
+```json
+{
+  "candidates": [{
+    "content": {
+      "parts": [
+        { "text": "I'll calculate that for you." },
+        {
+          "executableCode": {
+            "language": "PYTHON",
+            "code": "primes = []\nnum = 2\nwhile len(primes) < 50:\n  if is_prime(num):\n    primes.append(num)\n  num += 1\nprint(sum(primes))"
+          }
+        },
+        {
+          "codeExecutionResult": {
+            "outcome": "OUTCOME_OK",
+            "output": "5117\n"
+          }
+        },
+        { "text": "The sum is 5117." }
+      ]
+    }
+  }]
+}
+```
+
+---
+
+## Common Patterns
+
+### 1. Mathematical Calculations
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Calculate the 100th Fibonacci number',
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+**Prompting Tip**: Use phrases like "generate and run code" or "calculate using code" to explicitly request code execution.
+
+### 2. Data Analysis
+
+```typescript
+const prompt = `
+  Analyze this sales data:
+
+  month,revenue,customers
+  Jan,50000,120
+  Feb,62000,145
+  Mar,58000,138
+
+  Calculate:
+  1. Total revenue
+  2. Average revenue per customer
+  3. Month-over-month growth rate
+
+  Use pandas or numpy for analysis.
+`;
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: prompt,
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+### 3. Chart Generation
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Create a bar chart showing prime number distribution by last digit (0-9) for primes under 100',
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+**Note**: Chart image data appears in `codeExecutionResult.output` (base64 encoded in some cases).
+
+### 4. Algorithm Implementation
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Implement quicksort and sort this list: [64, 34, 25, 12, 22, 11, 90]. Show the sorted result.',
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+### 5. File Processing (In-Memory)
+
+```typescript
+const csvData = `name,age,city
+Alice,30,NYC
+Bob,25,LA
+Charlie,35,Chicago`;
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: `Parse this CSV data and calculate average age:\n\n${csvData}`,
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+---
+
+## Chat with Code Execution
+
+### Multi-Turn Computational Conversations
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: { tools: [{ codeExecution: {} }] }
+});
+
+// First turn
+let response = await chat.sendMessage('I have a data analysis question');
+console.log(response.text);
+
+// Second turn (will use code execution)
+response = await chat.sendMessage(`
+  Calculate statistics for: [12, 15, 18, 22, 25, 28, 30]
+  - Mean
+  - Median
+  - Standard deviation
+`);
+
+for (const part of response.candidates[0].content.parts) {
+  if (part.text) console.log(part.text);
+  if (part.executableCode) console.log('Code:', part.executableCode.code);
+  if (part.codeExecutionResult) console.log('Results:', part.codeExecutionResult.output);
+}
+```
+
+---
+
+## Error Handling
+
+### Checking Execution Outcome
+
+```typescript
+for (const part of response.candidates[0].content.parts) {
+  if (part.codeExecutionResult) {
+    if (part.codeExecutionResult.outcome === 'OUTCOME_OK') {
+      console.log('✅ Success:', part.codeExecutionResult.output);
+    } else if (part.codeExecutionResult.outcome === 'OUTCOME_FAILED') {
+      console.error('❌ Execution failed:', part.codeExecutionResult.output);
+    }
+  }
+}
+```
+
+### Common Execution Errors
+
+**Timeout**:
+```
+Error: Execution timed out after 30 seconds
+```
+**Solution**: Simplify computation or reduce data size.
+
+**Import Error**:
+```
+ModuleNotFoundError: No module named 'requests'
+```
+**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy).
+
+**Syntax Error**:
+```
+SyntaxError: invalid syntax
+```
+**Solution**: Model generated invalid code - try rephrasing prompt or regenerating.
+
+---
+
+## Best Practices
+
+### ✅ Do
+
+1. **Be Explicit**: Use phrases like "generate and run code" to trigger code execution
+2. **Provide Data**: Include data directly in prompt for analysis
+3. **Specify Output**: Ask for specific calculations or metrics
+4. **Use Available Packages**: Stick to numpy, pandas, matplotlib, scipy
+5. **Check Outcome**: Always verify `outcome === 'OUTCOME_OK'`
+
+### ❌ Don't
+
+1. **Network Access**: Code cannot make HTTP requests
+2. **File System**: No persistent file storage between executions
+3. **Long Computations**: Timeout limits apply (~30 seconds)
+4. **External Dependencies**: Can't install new packages
+5. **State Persistence**: Each execution is isolated (no global state)
+
+---
+
+## Limitations
+
+### Sandbox Restrictions
+
+- **No Network Access**: Cannot call external APIs
+- **No File I/O**: Cannot read/write to disk (in-memory only)
+- **Limited Packages**: Only pre-installed packages available
+- **Execution Timeout**: ~30 seconds maximum
+- **No State**: Each execution is independent
+
+### Supported Models
+
+✅ **Works with**:
+- `gemini-2.5-pro`
+- `gemini-2.5-flash`
+
+❌ **Does NOT work with**:
+- `gemini-2.5-flash-lite` (no code execution support)
+- Gemini 1.5 models (use Gemini 2.5)
+
+---
+
+## Advanced Patterns
+
+### Iterative Analysis
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: { tools: [{ codeExecution: {} }] }
+});
+
+// Step 1: Initial analysis
+let response = await chat.sendMessage('Analyze data: [10, 20, 30, 40, 50]');
+
+// Step 2: Follow-up based on results
+response = await chat.sendMessage('Now calculate the variance');
+
+// Step 3: Visualization
+response = await chat.sendMessage('Create a histogram of this data');
+```
+
+### Combining with Function Calling
+
+```typescript
+const weatherFunction = {
+  name: 'get_current_weather',
+  description: 'Get weather for a city',
+  parametersJsonSchema: {
+    type: 'object',
+    properties: { city: { type: 'string' } },
+    required: ['city']
+  }
+};
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Get weather for NYC, LA, Chicago. Calculate the average temperature.',
+  config: {
+    tools: [
+      { functionDeclarations: [weatherFunction] },
+      { codeExecution: {} }
+    ]
+  }
+});
+
+// Model will:
+// 1. Call get_current_weather for each city
+// 2. Generate code to calculate average
+// 3. Return result
+```
+
+### Data Transformation Pipeline
+
+```typescript
+const prompt = `
+  Transform this data:
+  Input: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+  Pipeline:
+  1. Filter odd numbers
+  2. Square each number
+  3. Calculate sum
+  4. Return result
+
+  Use code to process.
+`;
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: prompt,
+  config: { tools: [{ codeExecution: {} }] }
+});
+```
+
+---
+
+## Optimization Tips
+
+### 1. Clear Instructions
+
+**❌ Vague**:
+```typescript
+contents: 'Analyze this data'
+```
+
+**✅ Specific**:
+```typescript
+contents: 'Calculate mean, median, and standard deviation for: [12, 15, 18, 22, 25]'
+```
+
+### 2. Provide Complete Data
+
+```typescript
+const csvData = `...complete dataset...`;
+const prompt = `Analyze this CSV data:\n\n${csvData}\n\nCalculate total revenue.`;
+```
+
+### 3. Request Code Explicitly
+
+```typescript
+contents: 'Generate and run code to calculate the factorial of 20'
+```
+
+### 4. Handle Large Datasets
+
+For large data, consider:
+- Sampling (analyze subset)
+- Aggregation (group by categories)
+- Pagination (process in chunks)
+
+---
+
+## Troubleshooting
+
+### Code Not Executing
+
+**Symptom**: Response has text but no `executableCode`
+
+**Causes**:
+1. Code execution not enabled (`tools: [{ codeExecution: {} }]`)
+2. Model decided code wasn't necessary
+3. Using `gemini-2.5-flash-lite` (doesn't support code execution)
+
+**Solution**: Be explicit in prompt: "Use code to calculate..."
+
+### Timeout Errors
+
+**Symptom**: `OUTCOME_FAILED` with timeout message
+
+**Causes**: Computation too complex or data too large
+
+**Solution**:
+- Simplify algorithm
+- Reduce data size
+- Use more efficient approach
+
+### Import Errors
+
+**Symptom**: `ModuleNotFoundError`
+
+**Causes**: Trying to import unavailable package
+
+**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy)
+
+---
+
+## References
+
+- Official Docs: https://ai.google.dev/gemini-api/docs/code-execution
+- Templates: See `code-execution.ts` for working examples
+- Available Packages: See "Available Python Packages" section above
diff --git a/references/context-caching-guide.md b/references/context-caching-guide.md
new file mode 100644
index 0000000..de9b17d
--- /dev/null
+++ b/references/context-caching-guide.md
@@ -0,0 +1,373 @@
+# Context Caching Guide
+
+Complete guide to using context caching with Google Gemini API to reduce costs by up to 90%.
+
+---
+
+## What is Context Caching?
+
+Context caching allows you to cache frequently used content (system instructions, large documents, videos) and reuse it across multiple requests, significantly reducing token costs and improving latency.
+
+---
+
+## How It Works
+
+1. **Create a cache** with your repeated content (documents, videos, system instructions)
+2. **Set TTL** (time-to-live) for cache expiration
+3. **Reference the cache** in subsequent API calls
+4. **Pay less** - cached tokens cost ~90% less than regular input tokens
+
+---
+
+## Benefits
+
+### Cost Savings
+- **Cached input tokens**: ~90% cheaper than regular tokens
+- **Output tokens**: Same price (not cached)
+- **Example**: 100K token document cached → ~10K token cost equivalent
+
+### Performance
+- **Reduced latency**: Cached content is preprocessed
+- **Faster responses**: No need to reprocess large context
+- **Consistent results**: Same context every time
+
+### Use Cases
+- Large documents analyzed repeatedly
+- Long system instructions used across sessions
+- Video/audio files queried multiple times
+- Consistent conversation context
+
+---
+
+## Cache Creation
+
+### Basic Cache (SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001', // Must use explicit version!
+  config: {
+    displayName: 'my-cache',
+    systemInstruction: 'You are a helpful assistant.',
+    contents: 'Large document content here...',
+    ttl: '3600s', // 1 hour
+  }
+});
+```
+
+### Cache with Expiration Time
+
+```typescript
+// Set specific expiration time (timezone-aware)
+const expirationTime = new Date(Date.now() + 2 * 60 * 60 * 1000); // 2 hours from now
+
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'my-cache',
+    contents: documentText,
+    expireTime: expirationTime, // Use expireTime instead of ttl
+  }
+});
+```
+
+---
+
+## TTL (Time-To-Live) Guidelines
+
+### Recommended TTL Values
+
+| Use Case | TTL | Reason |
+|----------|-----|--------|
+| Quick analysis session | 300s (5 min) | Short-lived tasks |
+| Extended conversation | 3600s (1 hour) | Standard session length |
+| Daily batch processing | 86400s (24 hours) | Reuse across day |
+| Long-term analysis | 604800s (7 days) | Maximum allowed |
+
+### TTL vs Expiration Time
+
+**TTL (time-to-live)**:
+- Relative duration from cache creation
+- Format: `"3600s"` (string with 's' suffix)
+- Easy for session-based caching
+
+**Expiration Time**:
+- Absolute timestamp
+- Must be timezone-aware Date object
+- Precise control over cache lifetime
+
+---
+
+## Using a Cache
+
+### Generate Content with Cache (SDK)
+
+```typescript
+// Use cache name as model parameter
+const response = await ai.models.generateContent({
+  model: cache.name, // Use cache.name, not original model name
+  contents: 'Summarize the document'
+});
+
+console.log(response.text);
+```
+
+### Multiple Queries with Same Cache
+
+```typescript
+const queries = [
+  'What are the key points?',
+  'Who are the main characters?',
+  'What is the conclusion?'
+];
+
+for (const query of queries) {
+  const response = await ai.models.generateContent({
+    model: cache.name,
+    contents: query
+  });
+  console.log(`Q: ${query}`);
+  console.log(`A: ${response.text}\n`);
+}
+```
+
+---
+
+## Cache Management
+
+### Update Cache TTL
+
+```typescript
+// Extend cache lifetime before it expires
+await ai.caches.update({
+  name: cache.name,
+  config: {
+    ttl: '7200s' // Extend to 2 hours
+  }
+});
+```
+
+### List All Caches
+
+```typescript
+const caches = await ai.caches.list();
+caches.forEach(cache => {
+  console.log(`${cache.displayName}: ${cache.name}`);
+  console.log(`Expires: ${cache.expireTime}`);
+});
+```
+
+### Delete Cache
+
+```typescript
+// Delete when no longer needed
+await ai.caches.delete({ name: cache.name });
+```
+
+---
+
+## Advanced Use Cases
+
+### Caching Video Files
+
+```typescript
+import fs from 'fs';
+
+// 1. Upload video
+const videoFile = await ai.files.upload({
+  file: fs.createReadStream('./video.mp4')
+});
+
+// 2. Wait for processing
+while (videoFile.state.name === 'PROCESSING') {
+  await new Promise(resolve => setTimeout(resolve, 2000));
+  videoFile = await ai.files.get({ name: videoFile.name });
+}
+
+// 3. Create cache with video
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'video-cache',
+    systemInstruction: 'Analyze this video.',
+    contents: [videoFile],
+    ttl: '600s'
+  }
+});
+
+// 4. Query video multiple times
+const response1 = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'What happens in the first minute?'
+});
+
+const response2 = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'Who are the main people?'
+});
+```
+
+### Caching with System Instructions
+
+```typescript
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'legal-expert-cache',
+    systemInstruction: `
+      You are a legal expert specializing in contract law.
+      Always cite relevant sections when making claims.
+      Use clear, professional language.
+    `,
+    contents: largeContractDocument,
+    ttl: '3600s'
+  }
+});
+
+// System instruction is part of cached context
+const response = await ai.models.generateContent({
+  model: cache.name,
+  contents: 'Is this contract enforceable?'
+});
+```
+
+---
+
+## Important Notes
+
+### Model Version Requirement
+
+**⚠️ You MUST use explicit version suffixes when creating caches:**
+
+```typescript
+// ✅ CORRECT
+model: 'gemini-2.5-flash-001'
+
+// ❌ WRONG (will fail)
+model: 'gemini-2.5-flash'
+```
+
+### Cache Expiration
+
+- Caches are **automatically deleted** after TTL expires
+- **Cannot recover** expired caches - must recreate
+- Update TTL **before expiration** to extend lifetime
+
+### Cost Calculation
+
+```
+Regular request: 100,000 input tokens = 100K token cost
+
+With caching (after cache creation):
+- Cached tokens: 100,000 × 0.1 (90% discount) = 10K equivalent cost
+- New tokens: 1,000 × 1.0 = 1K cost
+- Total: 11K equivalent (89% savings!)
+```
+
+### Limitations
+
+- Maximum TTL: 7 days (604800s)
+- Cache creation costs same as regular tokens (first time only)
+- Subsequent uses get 90% discount
+- Only input tokens are cached (output tokens never cached)
+
+---
+
+## Best Practices
+
+### When to Use Caching
+
+✅ **Good Use Cases:**
+- Large documents queried repeatedly (legal docs, research papers)
+- Video/audio files analyzed with different questions
+- Long system instructions used across many requests
+- Consistent context in multi-turn conversations
+
+❌ **Bad Use Cases:**
+- Single-use content (no benefit)
+- Frequently changing content
+- Short content (<1000 tokens) - minimal savings
+- Content used only once per day (cache might expire)
+
+### Optimization Tips
+
+1. **Cache Early**: Create cache at session start
+2. **Extend TTL**: Update before expiration if still needed
+3. **Monitor Usage**: Track how often cache is reused
+4. **Clean Up**: Delete unused caches to avoid clutter
+5. **Combine Features**: Use caching with code execution, grounding for powerful workflows
+
+### Cache Naming
+
+Use descriptive `displayName` for easy identification:
+
+```typescript
+// ✅ Good names
+displayName: 'financial-report-2024-q3'
+displayName: 'legal-contract-acme-corp'
+displayName: 'video-analysis-project-x'
+
+// ❌ Vague names
+displayName: 'cache1'
+displayName: 'test'
+```
+
+---
+
+## Troubleshooting
+
+### "Invalid model name" Error
+
+**Problem**: Using `gemini-2.5-flash` instead of `gemini-2.5-flash-001`
+
+**Solution**: Always use explicit version suffix:
+
+```typescript
+model: 'gemini-2.5-flash-001' // Correct
+```
+
+### Cache Expired Error
+
+**Problem**: Trying to use cache after TTL expired
+
+**Solution**: Check expiration before use or extend TTL proactively:
+
+```typescript
+const cache = await ai.caches.get({ name: cacheName });
+if (new Date(cache.expireTime) < new Date()) {
+  // Cache expired, recreate it
+  cache = await ai.caches.create({ ... });
+}
+```
+
+### High Costs Despite Caching
+
+**Problem**: Creating new cache for each request
+
+**Solution**: Reuse the same cache across multiple requests:
+
+```typescript
+// ❌ Wrong - creates new cache each time
+for (const query of queries) {
+  const cache = await ai.caches.create({ ... }); // Expensive!
+  const response = await ai.models.generateContent({ model: cache.name, ... });
+}
+
+// ✅ Correct - create once, use many times
+const cache = await ai.caches.create({ ... }); // Create once
+for (const query of queries) {
+  const response = await ai.models.generateContent({ model: cache.name, ... });
+}
+```
+
+---
+
+## References
+
+- Official Docs: https://ai.google.dev/gemini-api/docs/caching
+- Cost Optimization: See "Cost Optimization" in main SKILL.md
+- Templates: See `context-caching.ts` for working examples
diff --git a/references/function-calling-patterns.md b/references/function-calling-patterns.md
new file mode 100644
index 0000000..9e99064
--- /dev/null
+++ b/references/function-calling-patterns.md
@@ -0,0 +1,59 @@
+# Function Calling Patterns
+
+Complete guide to implementing function calling (tool use) with Gemini API.
+
+---
+
+## Basic Pattern
+
+1. Define function declarations
+2. Send request with tools
+3. Check if model wants to call functions
+4. Execute functions
+5. Send results back to model
+6. Get final response
+
+---
+
+## Function Declaration Schema
+
+```typescript
+{
+  name: string,                    // Function name (no spaces)
+  description: string,             // What the function does
+  parametersJsonSchema: {          // Subset of OpenAPI schema
+    type: 'object',
+    properties: {
+      [paramName]: {
+        type: string,              // 'string' | 'number' | 'boolean' | 'array' | 'object'
+        description: string,       // Parameter description
+        enum?: string[]            // Optional: allowed values
+      }
+    },
+    required: string[]            // Required parameter names
+  }
+}
+```
+
+---
+
+## Calling Modes
+
+- **AUTO** (default): Model decides when to call
+- **ANY**: Force at least one function call
+- **NONE**: Disable function calling
+
+---
+
+## Parallel vs Compositional
+
+**Parallel**: Independent functions run simultaneously
+**Compositional**: Sequential dependencies (A → B → C)
+
+Gemini automatically detects which pattern to use.
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/function-calling
diff --git a/references/generation-config.md b/references/generation-config.md
new file mode 100644
index 0000000..8691cf8
--- /dev/null
+++ b/references/generation-config.md
@@ -0,0 +1,57 @@
+# Generation Configuration Reference
+
+Complete reference for all generation parameters.
+
+---
+
+## All Parameters
+
+```typescript
+config: {
+  temperature: number,        // 0.0-2.0 (default: 1.0)
+  topP: number,              // 0.0-1.0 (default: 0.95)
+  topK: number,              // 1-100+ (default: 40)
+  maxOutputTokens: number,   // 1-65536
+  stopSequences: string[],   // Stop at these strings
+  responseMimeType: string,  // 'text/plain' | 'application/json'
+  candidateCount: number,    // Usually 1
+  thinkingConfig: {
+    thinkingBudget: number   // Max thinking tokens
+  }
+}
+```
+
+---
+
+## Parameter Guidelines
+
+### temperature
+- **0.0**: Deterministic, focused
+- **1.0**: Balanced (default)
+- **2.0**: Very creative, random
+
+### topP (nucleus sampling)
+- **0.95**: Default, good balance
+- Lower = more focused
+
+### topK
+- **40**: Default
+- Higher = more diversity
+
+### maxOutputTokens
+- Always set this to prevent excessive generation
+- Max: 65,536 tokens
+
+---
+
+## Use Cases
+
+**Factual tasks**: temperature=0.0, topP=0.8
+**Creative tasks**: temperature=1.2, topP=0.95
+**Code generation**: temperature=0.3, topP=0.9
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/models/generative-models#model-parameters
diff --git a/references/grounding-guide.md b/references/grounding-guide.md
new file mode 100644
index 0000000..3746101
--- /dev/null
+++ b/references/grounding-guide.md
@@ -0,0 +1,602 @@
+# Grounding with Google Search Guide
+
+Complete guide to using grounding with Google Search to connect Gemini models to real-time web information, reducing hallucinations and providing verifiable, up-to-date responses.
+
+---
+
+## What is Grounding?
+
+Grounding connects the Gemini model to Google Search, allowing it to:
+- Access real-time information beyond training cutoff
+- Reduce hallucinations with fact-checked web sources
+- Provide citations and source URLs
+- Answer questions about current events
+- Verify information against the web
+
+---
+
+## How It Works
+
+1. **Model receives query** (e.g., "Who won Euro 2024?")
+2. **Model determines** if current information is needed
+3. **Performs Google Search** automatically
+4. **Processes search results** (web pages, snippets)
+5. **Incorporates findings** into response
+6. **Provides citations** with source URLs
+
+---
+
+## Two Grounding APIs
+
+### 1. Google Search (`googleSearch`) - Recommended for Gemini 2.5
+
+**Simple, automatic grounding**:
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+```
+
+**Features**:
+- Simple configuration (empty object)
+- Automatic search when model needs current info
+- Available on all Gemini 2.5 models
+- Recommended for new projects
+
+### 2. Google Search Retrieval (`googleSearchRetrieval`) - Legacy for Gemini 1.5
+
+**Dynamic threshold control**:
+
+```typescript
+import { DynamicRetrievalConfigMode } from '@google/genai';
+
+const response = await ai.models.generateContent({
+  model: 'gemini-1.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [{
+      googleSearchRetrieval: {
+        dynamicRetrievalConfig: {
+          mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
+          dynamicThreshold: 0.7 // Search only if confidence < 70%
+        }
+      }
+    }]
+  }
+});
+```
+
+**Features**:
+- Control when searches happen via threshold
+- Used with Gemini 1.5 models
+- More configuration options
+
+**Recommendation**: Use `googleSearch` for Gemini 2.5 models (simpler and newer).
+
+---
+
+## Basic Usage
+
+### SDK Approach (Gemini 2.5)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What are the latest developments in AI?',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+
+console.log(response.text);
+
+// Check if grounding was used
+if (response.candidates[0].groundingMetadata) {
+  console.log('✓ Search performed');
+  console.log('Sources:', response.candidates[0].groundingMetadata.webPages);
+} else {
+  console.log('✓ Answered from model knowledge');
+}
+```
+
+### Fetch Approach (Cloudflare Workers)
+
+```typescript
+const response = await fetch(
+  `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+  {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-goog-api-key': env.GEMINI_API_KEY,
+    },
+    body: JSON.stringify({
+      contents: [{ parts: [{ text: 'What are the latest developments in AI?' }] }],
+      tools: [{ google_search: {} }]
+    }),
+  }
+);
+
+const data = await response.json();
+console.log(data.candidates[0].content.parts[0].text);
+```
+
+---
+
+## Grounding Metadata
+
+### Structure
+
+```typescript
+{
+  groundingMetadata: {
+    // Search queries performed
+    searchQueries: [
+      { text: "euro 2024 winner" }
+    ],
+
+    // Web pages retrieved
+    webPages: [
+      {
+        url: "https://example.com/euro-2024",
+        title: "UEFA Euro 2024 Results",
+        snippet: "Spain won UEFA Euro 2024..."
+      }
+    ],
+
+    // Citations (inline references)
+    citations: [
+      {
+        startIndex: 42,
+        endIndex: 47,
+        uri: "https://example.com/euro-2024"
+      }
+    ],
+
+    // Retrieval queries (alternative search terms)
+    retrievalQueries: [
+      { query: "who won euro 2024 final" }
+    ]
+  }
+}
+```
+
+### Accessing Metadata
+
+```typescript
+if (response.candidates[0].groundingMetadata) {
+  const metadata = response.candidates[0].groundingMetadata;
+
+  // Display sources
+  console.log('Sources:');
+  metadata.webPages?.forEach((page, i) => {
+    console.log(`${i + 1}. ${page.title}`);
+    console.log(`   ${page.url}`);
+  });
+
+  // Display citations
+  console.log('\nCitations:');
+  metadata.citations?.forEach((citation) => {
+    console.log(`Position ${citation.startIndex}-${citation.endIndex}: ${citation.uri}`);
+  });
+}
+```
+
+---
+
+## When to Use Grounding
+
+### ✅ Good Use Cases
+
+**Current Events**:
+```typescript
+'What happened in the news today?'
+'Who won the latest sports championship?'
+'What are the current stock prices?'
+```
+
+**Recent Developments**:
+```typescript
+'What are the latest AI breakthroughs?'
+'What are recent changes in climate policy?'
+```
+
+**Fact-Checking**:
+```typescript
+'Is this claim true: [claim]?'
+'What does the latest research say about [topic]?'
+```
+
+**Real-Time Data**:
+```typescript
+'What is the current weather in Tokyo?'
+'What are today's cryptocurrency prices?'
+```
+
+### ❌ Not Recommended For
+
+**General Knowledge**:
+```typescript
+'What is the capital of France?' // Model knows this
+'How does photosynthesis work?' // Stable knowledge
+```
+
+**Mathematical Calculations**:
+```typescript
+'What is 15 * 27?' // Use code execution instead
+```
+
+**Creative Tasks**:
+```typescript
+'Write a poem about autumn' // No search needed
+```
+
+**Code Generation**:
+```typescript
+'Write a sorting algorithm' // Internal reasoning sufficient
+```
+
+---
+
+## Chat with Grounding
+
+### Multi-Turn Conversations
+
+```typescript
+const chat = await ai.chats.create({
+  model: 'gemini-2.5-flash',
+  config: {
+    tools: [{ googleSearch: {} }]
+  }
+});
+
+// First question
+let response = await chat.sendMessage('What are the latest quantum computing developments?');
+console.log(response.text);
+
+// Display sources
+if (response.candidates[0].groundingMetadata) {
+  const sources = response.candidates[0].groundingMetadata.webPages || [];
+  console.log(`\nSources: ${sources.length} web pages`);
+  sources.forEach(s => console.log(`- ${s.title}: ${s.url}`));
+}
+
+// Follow-up question
+response = await chat.sendMessage('Which company made the biggest breakthrough?');
+console.log('\n' + response.text);
+```
+
+---
+
+## Combining with Other Features
+
+### Grounding + Function Calling
+
+```typescript
+const weatherFunction = {
+  name: 'get_current_weather',
+  description: 'Get weather for a location',
+  parametersJsonSchema: {
+    type: 'object',
+    properties: {
+      location: { type: 'string', description: 'City name' }
+    },
+    required: ['location']
+  }
+};
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What is the weather like in the city that won Euro 2024?',
+  config: {
+    tools: [
+      { googleSearch: {} },           // For finding Euro 2024 winner
+      { functionDeclarations: [weatherFunction] }  // For weather lookup
+    ]
+  }
+});
+
+// Model will:
+// 1. Use Google Search to find Euro 2024 winner (Madrid/Spain)
+// 2. Call get_current_weather function with the city
+// 3. Combine both results in response
+```
+
+### Grounding + Code Execution
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Find the current stock prices for AAPL, GOOGL, MSFT and calculate their average',
+  config: {
+    tools: [
+      { googleSearch: {} },      // For current stock prices
+      { codeExecution: {} }      // For averaging
+    ]
+  }
+});
+
+// Model will:
+// 1. Search for current stock prices
+// 2. Generate code to calculate average
+// 3. Execute code with the found prices
+// 4. Return result with citations
+```
+
+---
+
+## Checking Grounding Usage
+
+### Determine if Search Was Performed
+
+```typescript
+const queries = [
+  'What is 2+2?',                  // Should NOT use search
+  'What happened in the news today?' // Should use search
+];
+
+for (const query of queries) {
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: query,
+    config: { tools: [{ googleSearch: {} }] }
+  });
+
+  console.log(`Query: ${query}`);
+  console.log(`Search used: ${response.candidates[0].groundingMetadata ? 'YES' : 'NO'}`);
+  console.log();
+}
+```
+
+**Output**:
+```
+Query: What is 2+2?
+Search used: NO
+
+Query: What happened in the news today?
+Search used: YES
+```
+
+---
+
+## Dynamic Retrieval (Gemini 1.5)
+
+### Threshold-Based Grounding
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-1.5-flash',
+  contents: 'Who won the euro 2024?',
+  config: {
+    tools: [{
+      googleSearchRetrieval: {
+        dynamicRetrievalConfig: {
+          mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
+          dynamicThreshold: 0.7 // Search only if confidence < 70%
+        }
+      }
+    }]
+  }
+});
+
+if (!response.candidates[0].groundingMetadata) {
+  console.log('Model answered from knowledge (confidence >= 70%)');
+} else {
+  console.log('Search performed (confidence < 70%)');
+}
+```
+
+**How It Works**:
+- Model evaluates confidence in its internal knowledge
+- If confidence < threshold → performs search
+- If confidence >= threshold → uses internal knowledge
+
+**Threshold Values**:
+- `0.0`: Never search (always use internal knowledge)
+- `0.5`: Search if moderately uncertain
+- `0.7`: Search if somewhat uncertain (good default)
+- `1.0`: Always search
+
+---
+
+## Best Practices
+
+### ✅ Do
+
+1. **Check Metadata**: Always verify if grounding was used
+   ```typescript
+   if (response.candidates[0].groundingMetadata) { ... }
+   ```
+
+2. **Display Citations**: Show sources to users for transparency
+   ```typescript
+   metadata.webPages.forEach(page => {
+     console.log(`Source: ${page.title} (${page.url})`);
+   });
+   ```
+
+3. **Use Specific Queries**: Better search results with clear questions
+   ```typescript
+   // ✅ Good: "What are Microsoft's Q3 2024 earnings?"
+   // ❌ Vague: "Tell me about Microsoft"
+   ```
+
+4. **Combine Features**: Use with function calling/code execution for powerful workflows
+
+5. **Handle Missing Metadata**: Not all queries trigger search
+   ```typescript
+   const sources = response.candidates[0].groundingMetadata?.webPages || [];
+   ```
+
+### ❌ Don't
+
+1. **Don't Assume Search Always Happens**: Model decides when to search
+2. **Don't Ignore Citations**: They're crucial for fact-checking
+3. **Don't Use for Stable Knowledge**: Waste of resources for unchanging facts
+4. **Don't Expect Perfect Coverage**: Not all information is on the web
+
+---
+
+## Cost and Performance
+
+### Cost Considerations
+
+- **Added Latency**: Search takes 1-3 seconds typically
+- **Token Costs**: Retrieved content counts as input tokens
+- **Rate Limits**: Subject to API rate limits
+
+### Optimization
+
+**Use Dynamic Threshold** (Gemini 1.5):
+```typescript
+dynamicThreshold: 0.7 // Higher = more searches, lower = fewer searches
+```
+
+**Cache Grounding Results** (if appropriate):
+```typescript
+const cache = await ai.caches.create({
+  model: 'gemini-2.5-flash-001',
+  config: {
+    displayName: 'grounding-cache',
+    tools: [{ googleSearch: {} }],
+    contents: 'Initial query that triggers search...',
+    ttl: '3600s'
+  }
+});
+// Subsequent queries reuse cached grounding results
+```
+
+---
+
+## Troubleshooting
+
+### Grounding Not Working
+
+**Symptom**: No `groundingMetadata` in response
+
+**Causes**:
+1. Grounding not enabled: `tools: [{ googleSearch: {} }]`
+2. Model decided search wasn't needed (query answerable from knowledge)
+3. Google Cloud project not configured (grounding requires GCP)
+
+**Solution**:
+- Verify `tools` configuration
+- Use queries requiring current information
+- Set up Google Cloud project
+
+### Poor Search Quality
+
+**Symptom**: Irrelevant sources or wrong information
+
+**Causes**:
+- Vague query
+- Search terms ambiguous
+- Recent events not yet indexed
+
+**Solution**:
+- Make queries more specific
+- Include context in prompt
+- Verify search queries in metadata
+
+### Citations Missing
+
+**Symptom**: `groundingMetadata` present but no citations
+
+**Explanation**: Citations are **inline references** - they may not always be present if model doesn't directly quote sources.
+
+**Solution**: Check `webPages` instead for full source list
+
+---
+
+## Important Requirements
+
+### Google Cloud Project
+
+**⚠️ Grounding requires a Google Cloud project, not just an API key.**
+
+**Setup**:
+1. Create Google Cloud project
+2. Enable Generative Language API
+3. Configure billing
+4. Use API key from that project
+
+**Error if Missing**:
+```
+Error: Grounding requires Google Cloud project configuration
+```
+
+### Model Support
+
+**✅ Supported**:
+- All Gemini 2.5 models (`googleSearch`)
+- All Gemini 1.5 models (`googleSearchRetrieval`)
+
+**❌ Not Supported**:
+- Gemini 1.0 models
+
+---
+
+## Examples
+
+### News Summary
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Summarize today's top 3 technology news headlines',
+  config: { tools: [{ googleSearch: {} }] }
+});
+
+console.log(response.text);
+metadata.webPages?.forEach((page, i) => {
+  console.log(`${i + 1}. ${page.title}: ${page.url}`);
+});
+```
+
+### Fact Verification
+
+```typescript
+const claim = "The Earth is flat";
+
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: `Is this claim true: "${claim}"? Use reliable sources to verify.`,
+  config: { tools: [{ googleSearch: {} }] }
+});
+
+console.log(response.text);
+```
+
+### Market Research
+
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'What are the current trends in electric vehicle adoption in 2024?',
+  config: { tools: [{ googleSearch: {} }] }
+});
+
+console.log(response.text);
+console.log('\nSources:');
+metadata.webPages?.forEach(page => {
+  console.log(`- ${page.title}`);
+});
+```
+
+---
+
+## References
+
+- Official Docs: https://ai.google.dev/gemini-api/docs/grounding
+- Google Search Docs: https://ai.google.dev/gemini-api/docs/google-search
+- Templates: See `grounding-search.ts` for working examples
+- Combined Features: See `combined-advanced.ts` for integration patterns
diff --git a/references/models-guide.md b/references/models-guide.md
new file mode 100644
index 0000000..4d08ce1
--- /dev/null
+++ b/references/models-guide.md
@@ -0,0 +1,289 @@
+# Gemini Models Guide (2025)
+
+**Last Updated**: 2025-11-19 (Gemini 3 preview release)
+
+---
+
+## Gemini 3 Series (Preview - November 2025)
+
+### gemini-3-pro-preview
+
+**Model ID**: `gemini-3-pro-preview`
+
+**Status**: 🆕 Preview release (November 18, 2025)
+
+**Context Windows**:
+- Input: TBD (documentation pending)
+- Output: TBD (documentation pending)
+
+**Description**: Google's newest and most intelligent AI model with state-of-the-art reasoning and multimodal understanding. Outperforms Gemini 2.5 Pro on every major AI benchmark.
+
+**Best For**:
+- Most complex reasoning tasks
+- Advanced multimodal analysis (images, videos, PDFs, audio)
+- Benchmark-critical applications
+- Cutting-edge projects requiring latest capabilities
+- Tasks requiring absolute best quality
+
+**Features**:
+- ✅ Enhanced multimodal understanding
+- ✅ Function calling
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+- TBD Thinking mode (documentation pending)
+
+**Knowledge Cutoff**: TBD
+
+**Pricing**: Preview pricing (likely higher than 2.5 Pro)
+
+**⚠️ Preview Status**: Use for evaluation and testing. Consider `gemini-2.5-pro` for production-critical decisions until Gemini 3 reaches stable general availability.
+
+**New Capabilities**:
+- Record-breaking benchmark performance
+- Enhanced generative UI responses
+- Advanced coding capabilities (Google Antigravity integration)
+- State-of-the-art multimodal understanding
+
+---
+
+## Current Production Models (Gemini 2.5 - Stable)
+
+### gemini-2.5-pro
+
+**Model ID**: `gemini-2.5-pro`
+
+**Context Windows**:
+- Input: 1,048,576 tokens (NOT 2M!)
+- Output: 65,536 tokens
+
+**Description**: State-of-the-art thinking model capable of reasoning over complex problems in code, math, and STEM.
+
+**Best For**:
+- Complex reasoning tasks
+- Advanced code generation and optimization
+- Mathematical problem-solving
+- Multi-step logical analysis
+- STEM applications
+
+**Features**:
+- ✅ Thinking mode (enabled by default)
+- ✅ Function calling
+- ✅ Multimodal (text, images, video, audio, PDFs)
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+
+**Knowledge Cutoff**: January 2025
+
+**Pricing**: Higher cost, use for tasks requiring best quality
+
+---
+
+### gemini-2.5-flash
+
+**Model ID**: `gemini-2.5-flash`
+
+**Context Windows**:
+- Input: 1,048,576 tokens
+- Output: 65,536 tokens
+
+**Description**: Best price-performance model for large-scale processing, low-latency, and high-volume tasks.
+
+**Best For**:
+- General-purpose AI applications
+- High-volume API calls
+- Agentic workflows
+- Cost-sensitive applications
+- Production workloads
+
+**Features**:
+- ✅ Thinking mode (enabled by default)
+- ✅ Function calling
+- ✅ Multimodal (text, images, video, audio, PDFs)
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+
+**Knowledge Cutoff**: January 2025
+
+**Pricing**: Best price-performance ratio
+
+**⭐ Recommended**: This is the default choice for most applications
+
+---
+
+### gemini-2.5-flash-lite
+
+**Model ID**: `gemini-2.5-flash-lite`
+
+**Context Windows**:
+- Input: 1,048,576 tokens
+- Output: 65,536 tokens
+
+**Description**: Most cost-efficient and fastest 2.5 model, optimized for high throughput.
+
+**Best For**:
+- High-throughput applications
+- Simple text generation
+- Cost-critical use cases
+- Speed-prioritized workloads
+
+**Features**:
+- ✅ Thinking mode (enabled by default)
+- ❌ **NO function calling** (critical limitation!)
+- ✅ Multimodal (text, images, video, audio, PDFs)
+- ✅ Streaming
+- ✅ System instructions
+- ✅ JSON mode
+
+**Knowledge Cutoff**: January 2025
+
+**Pricing**: Lowest cost
+
+**⚠️ Important**: Flash-Lite does NOT support function calling! Use Flash or Pro if you need tool use.
+
+---
+
+## Model Comparison Matrix
+
+| Feature | Pro | Flash | Flash-Lite |
+|---------|-----|-------|------------|
+| **Thinking Mode** | ✅ Default ON | ✅ Default ON | ✅ Default ON |
+| **Function Calling** | ✅ Yes | ✅ Yes | ❌ **NO** |
+| **Multimodal** | ✅ Full | ✅ Full | ✅ Full |
+| **Streaming** | ✅ Yes | ✅ Yes | ✅ Yes |
+| **Input Tokens** | 1,048,576 | 1,048,576 | 1,048,576 |
+| **Output Tokens** | 65,536 | 65,536 | 65,536 |
+| **Reasoning Quality** | Best | Good | Basic |
+| **Speed** | Moderate | Fast | Fastest |
+| **Cost** | Highest | Medium | Lowest |
+
+---
+
+## Previous Generation Models (Still Available)
+
+### Gemini 2.0 Flash
+
+**Model ID**: `gemini-2.0-flash`
+
+**Context**: 1M input / 65K output tokens
+
+**Status**: Previous generation, 2.5 Flash recommended instead
+
+### Gemini 1.5 Pro
+
+**Model ID**: `gemini-1.5-pro`
+
+**Context**: 2M input tokens (this is the ONLY model with 2M!)
+
+**Status**: Older model, 2.5 models recommended
+
+---
+
+## Context Window Clarification
+
+**⚠️ CRITICAL CORRECTION**:
+
+**ACCURATE**: Gemini 2.5 models support **1,048,576 input tokens** (approximately 1 million)
+
+**INACCURATE**: Claiming Gemini 2.5 has 2M token context window
+
+**WHY THIS MATTERS**:
+- Gemini 1.5 Pro (older model) had 2M tokens
+- Gemini 2.5 models (current) have ~1M tokens
+- This is a common mistake that causes confusion!
+
+**This skill prevents this error by providing accurate information.**
+
+---
+
+## Model Selection Guide
+
+### Use gemini-2.5-pro When:
+- ✅ Complex reasoning required (math, logic, STEM)
+- ✅ Advanced code generation and optimization
+- ✅ Multi-step problem-solving
+- ✅ Quality is more important than cost
+- ✅ Tasks require maximum capability
+
+### Use gemini-2.5-flash When:
+- ✅ General-purpose AI applications
+- ✅ High-volume production workloads
+- ✅ Function calling required
+- ✅ Agentic workflows
+- ✅ Good balance of cost and quality needed
+- ⭐ **Recommended default choice**
+
+### Use gemini-2.5-flash-lite When:
+- ✅ Simple text generation only
+- ✅ No function calling needed
+- ✅ High throughput required
+- ✅ Cost is primary concern
+- ⚠️ **Only if you don't need function calling!**
+
+---
+
+## Common Mistakes
+
+### ❌ Mistake 1: Using Wrong Model Name
+```typescript
+// WRONG - old model name
+model: 'gemini-1.5-pro'
+
+// CORRECT - current model
+model: 'gemini-2.5-flash'
+```
+
+### ❌ Mistake 2: Claiming 2M Context for 2.5 Models
+```typescript
+// WRONG ASSUMPTION
+// "Gemini 2.5 has 2M token context window"
+
+// CORRECT
+// Gemini 2.5 has 1,048,576 input tokens
+// Only Gemini 1.5 Pro (older) had 2M
+```
+
+### ❌ Mistake 3: Using Flash-Lite for Function Calling
+```typescript
+// WRONG - Flash-Lite doesn't support function calling!
+model: 'gemini-2.5-flash-lite',
+config: {
+  tools: [{ functionDeclarations: [...] }] // This will FAIL
+}
+
+// CORRECT
+model: 'gemini-2.5-flash', // or gemini-2.5-pro
+config: {
+  tools: [{ functionDeclarations: [...] }]
+}
+```
+
+---
+
+## Rate Limits (Free vs Paid)
+
+### Free Tier
+- **15 RPM** (requests per minute)
+- **1M TPM** (tokens per minute)
+- **1,500 RPD** (requests per day)
+
+### Paid Tier
+- **360 RPM**
+- **4M TPM**
+- Unlimited daily requests
+
+**Tip**: Monitor your usage and implement rate limiting to stay within quotas.
+
+---
+
+## Official Documentation
+
+- **Models Overview**: https://ai.google.dev/gemini-api/docs/models
+- **Gemini 2.5 Announcement**: https://developers.googleblog.com/en/gemini-2-5-thinking-model-updates/
+- **Pricing**: https://ai.google.dev/pricing
+
+---
+
+**Production Tip**: Always use gemini-2.5-flash as your default unless you specifically need Pro's advanced reasoning or want to minimize cost with Flash-Lite (and don't need function calling).
diff --git a/references/multimodal-guide.md b/references/multimodal-guide.md
new file mode 100644
index 0000000..2e5ec23
--- /dev/null
+++ b/references/multimodal-guide.md
@@ -0,0 +1,58 @@
+# Multimodal Guide
+
+Complete guide to using images, video, audio, and PDFs with Gemini API.
+
+---
+
+## Supported Formats
+
+### Images
+- JPEG, PNG, WebP, HEIC, HEIF
+- Max size: 20MB
+
+### Video
+- MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV
+- Max size: 2GB
+- Max length (inline): 2 minutes
+
+### Audio
+- MP3, WAV, FLAC, AAC, OGG, OPUS
+- Max size: 20MB
+
+### PDFs
+- Max size: 30MB
+- Text-based PDFs work best
+
+---
+
+## Usage Pattern
+
+```typescript
+contents: [
+  {
+    parts: [
+      { text: 'Your question' },
+      {
+        inlineData: {
+          data: base64EncodedData,
+          mimeType: 'image/jpeg' // or video/mp4, audio/mp3, application/pdf
+        }
+      }
+    ]
+  }
+]
+```
+
+---
+
+## Best Practices
+
+- Use specific, detailed prompts
+- Combine multiple modalities in one request
+- For large files (>2GB), use File API (Phase 2)
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/vision
diff --git a/references/sdk-migration-guide.md b/references/sdk-migration-guide.md
new file mode 100644
index 0000000..1dedbbc
--- /dev/null
+++ b/references/sdk-migration-guide.md
@@ -0,0 +1,235 @@
+# SDK Migration Guide
+
+**From**: `@google/generative-ai` (DEPRECATED)
+**To**: `@google/genai` (CURRENT)
+
+**Deadline**: November 30, 2025 (deprecated SDK sunset)
+
+---
+
+## Why Migrate?
+
+The `@google/generative-ai` SDK is deprecated and will stop receiving updates on **November 30, 2025**.
+
+The new `@google/genai` SDK:
+- ✅ Works with both Gemini API and Vertex AI
+- ✅ Supports Gemini 2.0+ features
+- ✅ Better TypeScript support
+- ✅ Unified API across platforms
+- ✅ Active development and updates
+
+---
+
+## Migration Steps
+
+### 1. Update Package
+
+```bash
+# Remove deprecated SDK
+npm uninstall @google/generative-ai
+
+# Install current SDK
+npm install @google/genai@1.27.0
+```
+
+### 2. Update Imports
+
+**Old (DEPRECATED)**:
+```typescript
+import { GoogleGenerativeAI } from '@google/generative-ai';
+
+const genAI = new GoogleGenerativeAI(apiKey);
+const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
+```
+
+**New (CURRENT)**:
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey });
+// No need to get model separately
+```
+
+### 3. Update API Calls
+
+**Old**:
+```typescript
+const result = await model.generateContent(prompt);
+const response = await result.response;
+const text = response.text();
+```
+
+**New**:
+```typescript
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: prompt
+});
+const text = response.text;
+```
+
+### 4. Update Streaming
+
+**Old**:
+```typescript
+const result = await model.generateContentStream(prompt);
+for await (const chunk of result.stream) {
+  console.log(chunk.text());
+}
+```
+
+**New**:
+```typescript
+const response = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: prompt
+});
+for await (const chunk of response) {
+  console.log(chunk.text);
+}
+```
+
+### 5. Update Chat
+
+**Old**:
+```typescript
+const chat = model.startChat({
+  history: []
+});
+const result = await chat.sendMessage(message);
+const response = await result.response;
+console.log(response.text());
+```
+
+**New**:
+```typescript
+const chat = await ai.models.createChat({
+  model: 'gemini-2.5-flash',
+  history: []
+});
+const response = await chat.sendMessage(message);
+console.log(response.text);
+```
+
+---
+
+## Complete Before/After Example
+
+### Before (Deprecated SDK)
+
+```typescript
+import { GoogleGenerativeAI } from '@google/generative-ai';
+
+const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
+const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
+
+// Generate
+const result = await model.generateContent('Hello');
+const response = await result.response;
+console.log(response.text());
+
+// Stream
+const streamResult = await model.generateContentStream('Write a story');
+for await (const chunk of streamResult.stream) {
+  console.log(chunk.text());
+}
+
+// Chat
+const chat = model.startChat();
+const chatResult = await chat.sendMessage('Hi');
+const chatResponse = await chatResult.response;
+console.log(chatResponse.text());
+```
+
+### After (Current SDK)
+
+```typescript
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
+
+// Generate
+const response = await ai.models.generateContent({
+  model: 'gemini-2.5-flash',
+  contents: 'Hello'
+});
+console.log(response.text);
+
+// Stream
+const streamResponse = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: 'Write a story'
+});
+for await (const chunk of streamResponse) {
+  console.log(chunk.text);
+}
+
+// Chat
+const chat = await ai.models.createChat({ model: 'gemini-2.5-flash' });
+const chatResponse = await chat.sendMessage('Hi');
+console.log(chatResponse.text);
+```
+
+---
+
+## Key Differences
+
+| Aspect | Old SDK | New SDK |
+|--------|---------|---------|
+| Package | `@google/generative-ai` | `@google/genai` |
+| Class | `GoogleGenerativeAI` | `GoogleGenAI` |
+| Model Init | `genAI.getGenerativeModel()` | Specify in each call |
+| Text Access | `response.text()` (method) | `response.text` (property) |
+| Stream Iteration | `result.stream` | Direct iteration |
+| Chat Creation | `model.startChat()` | `ai.models.createChat()` |
+
+---
+
+## Troubleshooting
+
+### Error: "Cannot find module '@google/generative-ai'"
+
+**Cause**: Old import statement after migration
+
+**Solution**: Update all imports to `@google/genai`
+
+### Error: "Property 'text' does not exist"
+
+**Cause**: Using `response.text()` (method) instead of `response.text` (property)
+
+**Solution**: Remove parentheses: `response.text` not `response.text()`
+
+### Error: "generateContent is not a function"
+
+**Cause**: Trying to call methods on old model object
+
+**Solution**: Use `ai.models.generateContent()` directly
+
+---
+
+## Automated Migration Script
+
+```bash
+# Find all files using old SDK
+rg "@google/generative-ai" --type ts
+
+# Replace import statements
+find . -name "*.ts" -exec sed -i 's/@google\/generative-ai/@google\/genai/g' {} +
+
+# Replace class name
+find . -name "*.ts" -exec sed -i 's/GoogleGenerativeAI/GoogleGenAI/g' {} +
+```
+
+**⚠️ Note**: This script handles imports but NOT API changes. Manual review required!
+
+---
+
+## Official Resources
+
+- **Migration Guide**: https://ai.google.dev/gemini-api/docs/migrate-to-genai
+- **New SDK Docs**: https://github.com/googleapis/js-genai
+- **Deprecated SDK**: https://github.com/google-gemini/deprecated-generative-ai-js
+
+---
+
+**Deadline Reminder**: November 30, 2025 - Deprecated SDK sunset
diff --git a/references/streaming-patterns.md b/references/streaming-patterns.md
new file mode 100644
index 0000000..e01f56b
--- /dev/null
+++ b/references/streaming-patterns.md
@@ -0,0 +1,81 @@
+# Streaming Patterns
+
+Complete guide to implementing streaming with Gemini API.
+
+---
+
+## SDK Approach (Async Iteration)
+
+```typescript
+const response = await ai.models.generateContentStream({
+  model: 'gemini-2.5-flash',
+  contents: 'Write a story'
+});
+
+for await (const chunk of response) {
+  process.stdout.write(chunk.text);
+}
+```
+
+**Pros**: Simple, automatic parsing
+**Cons**: Requires Node.js or compatible runtime
+
+---
+
+## Fetch Approach (SSE Parsing)
+
+```typescript
+const response = await fetch(
+  'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent',
+  { /* ... */ }
+);
+
+const reader = response.body.getReader();
+const decoder = new TextDecoder();
+let buffer = '';
+
+while (true) {
+  const { done, value } = await reader.read();
+  if (done) break;
+
+  buffer += decoder.decode(value, { stream: true });
+  const lines = buffer.split('\n');
+  buffer = lines.pop() || '';
+
+  for (const line of lines) {
+    if (!line.startsWith('data: ')) continue;
+    
+    const data = JSON.parse(line.slice(6));
+    const text = data.candidates[0]?.content?.parts[0]?.text;
+    if (text) process.stdout.write(text);
+  }
+}
+```
+
+**Pros**: Works in any environment
+**Cons**: Manual SSE parsing required
+
+---
+
+## SSE Format
+
+```
+data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}
+data: {"candidates":[{"content":{"parts":[{"text":" world"}]}}]}
+data: [DONE]
+```
+
+---
+
+## Best Practices
+
+- Always use `streamGenerateContent` endpoint
+- Handle incomplete chunks in buffer
+- Skip empty lines and `[DONE]` markers
+- Use streaming for better UX on long responses
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/streaming
diff --git a/references/thinking-mode-guide.md b/references/thinking-mode-guide.md
new file mode 100644
index 0000000..6f3f975
--- /dev/null
+++ b/references/thinking-mode-guide.md
@@ -0,0 +1,59 @@
+# Thinking Mode Guide
+
+Complete guide to thinking mode in Gemini 2.5 models.
+
+---
+
+## What is Thinking Mode?
+
+Gemini 2.5 models "think" internally before responding, improving accuracy on complex tasks.
+
+**Key Points**:
+- ✅ Always enabled on 2.5 models (cannot disable)
+- ✅ Transparent (you don't see the thinking process)
+- ✅ Configurable thinking budget
+- ✅ Improves reasoning quality
+
+---
+
+## Configuration
+
+```typescript
+config: {
+  thinkingConfig: {
+    thinkingBudget: 8192  // Max tokens for internal reasoning
+  }
+}
+```
+
+---
+
+## When to Increase Budget
+
+✅ Complex math/logic problems
+✅ Multi-step reasoning
+✅ Code optimization
+✅ Detailed analysis
+
+---
+
+## When Default is Fine
+
+⏺️ Simple questions
+⏺️ Creative writing
+⏺️ Translation
+⏺️ Summarization
+
+---
+
+## Model Comparison
+
+- **gemini-2.5-pro**: Best for complex reasoning
+- **gemini-2.5-flash**: Good balance
+- **gemini-2.5-flash-lite**: Basic thinking
+
+---
+
+## Official Docs
+
+https://ai.google.dev/gemini-api/docs/thinking
diff --git a/references/top-errors.md b/references/top-errors.md
new file mode 100644
index 0000000..ff3f642
--- /dev/null
+++ b/references/top-errors.md
@@ -0,0 +1,304 @@
+# Top Errors and Solutions
+
+22 common Gemini API errors with solutions (Phase 1 + Phase 2).
+
+---
+
+## 1. Using Deprecated SDK
+
+**Error**: `Cannot find module '@google/generative-ai'`
+
+**Cause**: Using old SDK after migration
+
+**Solution**: Install `@google/genai` instead
+
+---
+
+## 2. Wrong Context Window Claims
+
+**Error**: Input exceeds model capacity
+
+**Cause**: Assuming 2M tokens for Gemini 2.5
+
+**Solution**: Gemini 2.5 has 1,048,576 input tokens (NOT 2M!)
+
+---
+
+## 3. Model Not Found
+
+**Error**: `models/gemini-3.0-flash is not found`
+
+**Cause**: Wrong model name
+
+**Solution**: Use: `gemini-2.5-pro`, `gemini-2.5-flash`, or `gemini-2.5-flash-lite`
+
+---
+
+## 4. Function Calling on Flash-Lite
+
+**Error**: Function calling not working
+
+**Cause**: Flash-Lite doesn't support function calling
+
+**Solution**: Use `gemini-2.5-flash` or `gemini-2.5-pro`
+
+---
+
+## 5. Invalid API Key (401)
+
+**Error**: `API key not valid`
+
+**Cause**: Missing or wrong `GEMINI_API_KEY`
+
+**Solution**: Set environment variable correctly
+
+---
+
+## 6. Rate Limit Exceeded (429)
+
+**Error**: `Resource has been exhausted`
+
+**Cause**: Too many requests
+
+**Solution**: Implement exponential backoff
+
+---
+
+## 7. Streaming Parse Errors
+
+**Error**: Invalid JSON in SSE stream
+
+**Cause**: Incomplete chunk parsing
+
+**Solution**: Use buffer to handle partial chunks
+
+---
+
+## 8. Multimodal Format Errors
+
+**Error**: Invalid base64 or MIME type
+
+**Cause**: Wrong image encoding
+
+**Solution**: Use correct base64 encoding and MIME type
+
+---
+
+## 9. Context Length Exceeded
+
+**Error**: `Request payload size exceeds the limit`
+
+**Cause**: Input too large
+
+**Solution**: Reduce input size (max 1,048,576 tokens)
+
+---
+
+## 10. Chat Not Working with Fetch
+
+**Error**: No chat helper available
+
+**Cause**: Chat helpers are SDK-only
+
+**Solution**: Manually manage conversation history or use SDK
+
+---
+
+## 11. Thinking Mode Not Supported
+
+**Error**: Trying to disable thinking mode
+
+**Cause**: Thinking mode always enabled on 2.5
+
+**Solution**: You can only configure budget, not disable
+
+---
+
+## 12. Parameter Conflicts
+
+**Error**: Unsupported parameters
+
+**Cause**: Using wrong config options
+
+**Solution**: Use only supported parameters (see generation-config.md)
+
+---
+
+## 13. System Instruction Placement
+
+**Error**: System instruction not working
+
+**Cause**: Placed inside contents array
+
+**Solution**: Place at top level, not in contents
+
+---
+
+## 14. Token Counting Errors
+
+**Error**: Unexpected token usage
+
+**Cause**: Multimodal inputs use more tokens
+
+**Solution**: Images/video/audio count toward token limit
+
+---
+
+## 15. Parallel Function Call Errors
+
+**Error**: Functions not executing in parallel
+
+**Cause**: Dependencies between functions
+
+**Solution**: Gemini auto-detects; ensure functions are independent
+
+---
+
+## Phase 2 Errors
+
+### 16. Invalid Model Version for Caching
+
+**Error**: `Invalid model name for caching`
+
+**Cause**: Using `gemini-2.5-flash` instead of `gemini-2.5-flash-001`
+
+**Solution**: Must use explicit version suffix when creating caches
+
+```typescript
+// ✅ Correct
+model: 'gemini-2.5-flash-001'
+
+// ❌ Wrong
+model: 'gemini-2.5-flash'
+```
+
+**Source**: https://ai.google.dev/gemini-api/docs/caching
+
+---
+
+### 17. Cache Expired or Not Found
+
+**Error**: `Cache not found` or `Cache expired`
+
+**Cause**: Trying to use cache after TTL expiration
+
+**Solution**: Check expiration before use or recreate cache
+
+```typescript
+const cache = await ai.caches.get({ name: cacheName });
+if (new Date(cache.expireTime) < new Date()) {
+  // Recreate cache
+  cache = await ai.caches.create({ ... });
+}
+```
+
+---
+
+### 18. Cannot Update Expired Cache TTL
+
+**Error**: `Cannot update expired cache`
+
+**Cause**: Trying to extend TTL after cache already expired
+
+**Solution**: Update TTL before expiration or create new cache
+
+```typescript
+// Update TTL before expiration
+await ai.caches.update({
+  name: cache.name,
+  config: { ttl: '7200s' }
+});
+```
+
+---
+
+### 19. Code Execution Timeout
+
+**Error**: `Execution timed out after 30 seconds` with `OUTCOME_FAILED`
+
+**Cause**: Python code taking too long to execute
+
+**Solution**: Simplify computation or reduce data size
+
+```typescript
+// Check outcome before using results
+if (part.codeExecutionResult?.outcome === 'OUTCOME_FAILED') {
+  console.error('Execution failed:', part.codeExecutionResult.output);
+}
+```
+
+**Source**: https://ai.google.dev/gemini-api/docs/code-execution
+
+---
+
+### 20. Python Package Not Available
+
+**Error**: `ModuleNotFoundError: No module named 'requests'`
+
+**Cause**: Trying to import package not in sandbox
+
+**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy)
+
+**Available Packages**:
+- Standard library: math, statistics, json, csv, datetime
+- Data science: numpy, pandas, scipy
+- Visualization: matplotlib, seaborn
+
+---
+
+### 21. Code Execution on Flash-Lite
+
+**Error**: Code execution not working
+
+**Cause**: `gemini-2.5-flash-lite` doesn't support code execution
+
+**Solution**: Use `gemini-2.5-flash` or `gemini-2.5-pro`
+
+```typescript
+// ✅ Correct
+model: 'gemini-2.5-flash' // Supports code execution
+
+// ❌ Wrong
+model: 'gemini-2.5-flash-lite' // NO code execution support
+```
+
+---
+
+### 22. Grounding Requires Google Cloud Project
+
+**Error**: `Grounding requires Google Cloud project configuration`
+
+**Cause**: Using API key not associated with GCP project
+
+**Solution**: Set up Google Cloud project and enable Generative Language API
+
+**Steps**:
+1. Create Google Cloud project
+2. Enable Generative Language API
+3. Configure billing
+4. Use API key from that project
+
+**Source**: https://ai.google.dev/gemini-api/docs/grounding
+
+---
+
+## Quick Debugging Checklist
+
+### Phase 1 (Core)
+- [ ] Using @google/genai (NOT @google/generative-ai)
+- [ ] Model name is gemini-2.5-pro/flash/flash-lite
+- [ ] API key is set correctly
+- [ ] Input under 1,048,576 tokens
+- [ ] Not using Flash-Lite for function calling
+- [ ] System instruction at top level
+- [ ] Streaming endpoint is streamGenerateContent
+- [ ] MIME types are correct for multimodal
+
+### Phase 2 (Advanced)
+- [ ] Caching: Using explicit model version (e.g., gemini-2.5-flash-001)
+- [ ] Caching: Cache not expired (check expireTime)
+- [ ] Code Execution: Not using Flash-Lite
+- [ ] Code Execution: Using only available Python packages
+- [ ] Grounding: Google Cloud project configured
+- [ ] Grounding: Checking groundingMetadata for search results
+
diff --git a/scripts/check-versions.sh b/scripts/check-versions.sh
new file mode 100755
index 0000000..cd1f161
--- /dev/null
+++ b/scripts/check-versions.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Check @google/genai package version and warn about deprecated SDK
+# Usage: ./scripts/check-versions.sh
+
+echo "🔍 Checking Gemini API SDK versions..."
+echo ""
+
+# Check if package.json exists
+if [ ! -f "package.json" ]; then
+  echo "❌ No package.json found in current directory"
+  exit 1
+fi
+
+# Check for deprecated SDK
+if grep -q "@google/generative-ai" package.json; then
+  echo "⚠️  WARNING: DEPRECATED SDK DETECTED!"
+  echo ""
+  echo "   Package: @google/generative-ai"
+  echo "   Status: DEPRECATED (sunset Nov 30, 2025)"
+  echo ""
+  echo "   Action required:"
+  echo "   1. npm uninstall @google/generative-ai"
+  echo "   2. npm install @google/genai@1.27.0"
+  echo "   3. Update imports (see sdk-migration-guide.md)"
+  echo ""
+fi
+
+# Check for current SDK
+if grep -q "@google/genai" package.json; then
+  # Get installed version
+  INSTALLED_VERSION=$(npm list @google/genai --depth=0 2>/dev/null | grep @google/genai | sed 's/.*@//' | sed 's/ .*//')
+  RECOMMENDED_VERSION="1.27.0"
+
+  echo "✅ Current SDK installed: @google/genai"
+  echo "   Installed version: $INSTALLED_VERSION"
+  echo "   Recommended version: $RECOMMENDED_VERSION"
+  echo ""
+
+  # Check if version matches recommendation
+  if [ "$INSTALLED_VERSION" != "$RECOMMENDED_VERSION" ]; then
+    echo "ℹ️  Consider updating to recommended version:"
+    echo "   npm install @google/genai@$RECOMMENDED_VERSION"
+    echo ""
+  fi
+else
+  echo "❌ @google/genai not found in package.json"
+  echo ""
+  echo "   Install with:"
+  echo "   npm install @google/genai@1.27.0"
+  echo ""
+fi
+
+# Check Node.js version
+NODE_VERSION=$(node -v | sed 's/v//')
+REQUIRED_NODE="18.0.0"
+
+echo "Node.js version: $NODE_VERSION"
+echo "Required: >= $REQUIRED_NODE"
+echo ""
+
+# Summary
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Summary:"
+echo ""
+if grep -q "@google/generative-ai" package.json; then
+  echo "❌ Migration needed: Remove deprecated SDK"
+elif grep -q "@google/genai" package.json; then
+  echo "✅ Using current SDK (@google/genai)"
+else
+  echo "❌ Gemini SDK not installed"
+fi
+echo ""
+echo "For migration help, see:"
+echo "  references/sdk-migration-guide.md"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
diff --git a/templates/cloudflare-worker.ts b/templates/cloudflare-worker.ts
new file mode 100644
index 0000000..b13bd66
--- /dev/null
+++ b/templates/cloudflare-worker.ts
@@ -0,0 +1,245 @@
+/**
+ * Complete Cloudflare Worker with Gemini API
+ *
+ * Demonstrates:
+ * - Fetch-based Gemini API integration (no SDK dependencies)
+ * - Streaming responses to client
+ * - Multi-turn chat with session storage
+ * - Error handling for production
+ * - CORS configuration
+ *
+ * Deploy:
+ * - npx wrangler deploy
+ * - Set GEMINI_API_KEY in Cloudflare dashboard or wrangler.toml
+ */
+
+interface Env {
+  GEMINI_API_KEY: string;
+}
+
+interface ChatMessage {
+  role: 'user' | 'model';
+  parts: Array<{ text: string }>;
+}
+
+export default {
+  /**
+   * Main request handler
+   */
+  async fetch(request: Request, env: Env): Promise<Response> {
+    // CORS headers
+    const corsHeaders = {
+      'Access-Control-Allow-Origin': '*',
+      'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
+      'Access-Control-Allow-Headers': 'Content-Type',
+    };
+
+    // Handle preflight
+    if (request.method === 'OPTIONS') {
+      return new Response(null, { headers: corsHeaders });
+    }
+
+    const url = new URL(request.url);
+
+    try {
+      // Route: POST /api/chat (non-streaming)
+      if (url.pathname === '/api/chat' && request.method === 'POST') {
+        return await handleChat(request, env, corsHeaders);
+      }
+
+      // Route: POST /api/chat/stream (streaming)
+      if (url.pathname === '/api/chat/stream' && request.method === 'POST') {
+        return await handleChatStream(request, env, corsHeaders);
+      }
+
+      // Route: GET / (health check)
+      if (url.pathname === '/' && request.method === 'GET') {
+        return new Response(
+          JSON.stringify({ status: 'ok', service: 'Gemini API Worker' }),
+          { headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
+        );
+      }
+
+      // 404 for unknown routes
+      return new Response('Not Found', { status: 404, headers: corsHeaders });
+
+    } catch (error: any) {
+      return new Response(
+        JSON.stringify({ error: error.message }),
+        { status: 500, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
+      );
+    }
+  }
+};
+
+/**
+ * Handle non-streaming chat request
+ */
+async function handleChat(request: Request, env: Env, corsHeaders: any): Promise<Response> {
+  const { message, history = [] } = await request.json() as {
+    message: string;
+    history?: ChatMessage[];
+  };
+
+  // Build contents array with history
+  const contents: ChatMessage[] = [
+    ...history,
+    { role: 'user', parts: [{ text: message }] }
+  ];
+
+  // Call Gemini API
+  const response = await fetch(
+    `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-goog-api-key': env.GEMINI_API_KEY,
+      },
+      body: JSON.stringify({ contents }),
+    }
+  );
+
+  if (!response.ok) {
+    const errorData = await response.json();
+    throw new Error(errorData.error?.message || 'Gemini API error');
+  }
+
+  const data = await response.json();
+  const assistantReply = data.candidates[0]?.content?.parts[0]?.text;
+
+  // Return response with updated history
+  return new Response(
+    JSON.stringify({
+      reply: assistantReply,
+      history: [
+        ...history,
+        { role: 'user', parts: [{ text: message }] },
+        { role: 'model', parts: [{ text: assistantReply }] }
+      ],
+      usage: data.usageMetadata
+    }),
+    { headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
+  );
+}
+
+/**
+ * Handle streaming chat request
+ */
+async function handleChatStream(request: Request, env: Env, corsHeaders: any): Promise<Response> {
+  const { message, history = [] } = await request.json() as {
+    message: string;
+    history?: ChatMessage[];
+  };
+
+  const contents: ChatMessage[] = [
+    ...history,
+    { role: 'user', parts: [{ text: message }] }
+  ];
+
+  // Create a TransformStream to stream to client
+  const { readable, writable } = new TransformStream();
+  const writer = writable.getWriter();
+  const encoder = new TextEncoder();
+
+  // Start streaming in background
+  (async () => {
+    try {
+      const response = await fetch(
+        `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
+        {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+            'x-goog-api-key': env.GEMINI_API_KEY,
+          },
+          body: JSON.stringify({ contents }),
+        }
+      );
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}`);
+      }
+
+      if (!response.body) {
+        throw new Error('Response body is null');
+      }
+
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+      let buffer = '';
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+
+        for (const line of lines) {
+          if (line.trim() === '' || line.startsWith('data: [DONE]')) continue;
+          if (!line.startsWith('data: ')) continue;
+
+          try {
+            const data = JSON.parse(line.slice(6));
+            const text = data.candidates[0]?.content?.parts[0]?.text;
+
+            if (text) {
+              await writer.write(encoder.encode(text));
+            }
+          } catch (e) {
+            // Skip invalid JSON
+          }
+        }
+      }
+
+      await writer.close();
+
+    } catch (error: any) {
+      await writer.write(encoder.encode(`\n\nError: ${error.message}`));
+      await writer.close();
+    }
+  })();
+
+  return new Response(readable, {
+    headers: {
+      ...corsHeaders,
+      'Content-Type': 'text/plain; charset=utf-8',
+      'Transfer-Encoding': 'chunked',
+    },
+  });
+}
+
+/**
+ * Example Client Usage (JavaScript):
+ *
+ * // Non-streaming
+ * const response = await fetch('https://your-worker.workers.dev/api/chat', {
+ *   method: 'POST',
+ *   headers: { 'Content-Type': 'application/json' },
+ *   body: JSON.stringify({
+ *     message: 'What is quantum computing?',
+ *     history: []
+ *   })
+ * });
+ * const data = await response.json();
+ * console.log(data.reply);
+ *
+ * // Streaming
+ * const response = await fetch('https://your-worker.workers.dev/api/chat/stream', {
+ *   method: 'POST',
+ *   headers: { 'Content-Type': 'application/json' },
+ *   body: JSON.stringify({
+ *     message: 'Write a story',
+ *     history: []
+ *   })
+ * });
+ * const reader = response.body.getReader();
+ * const decoder = new TextDecoder();
+ * while (true) {
+ *   const { done, value } = await reader.read();
+ *   if (done) break;
+ *   console.log(decoder.decode(value));
+ * }
+ */
diff --git a/templates/code-execution.ts b/templates/code-execution.ts
new file mode 100644
index 0000000..0b9e43d
--- /dev/null
+++ b/templates/code-execution.ts
@@ -0,0 +1,206 @@
+/**
+ * Google Gemini API - Code Execution Example
+ *
+ * Demonstrates how to enable code execution so the model can generate
+ * and run Python code to solve computational problems.
+ *
+ * Features:
+ * - Basic code execution
+ * - Data analysis with code
+ * - Chart generation
+ * - Error handling for failed execution
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ *
+ * Note: Code Execution is NOT available on gemini-2.5-flash-lite
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function basicCodeExecution() {
+  console.log('=== Basic Code Execution Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents:
+      'What is the sum of the first 50 prime numbers? Generate and run code for the calculation, and make sure you get all 50.',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  // Parse response parts
+  console.log('Response parts:\n');
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('📝 Text:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n💻 Generated Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n✅ Execution Output:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  console.log('\n=== Basic Code Execution Complete ===');
+}
+
+async function dataAnalysisExample() {
+  console.log('\n=== Data Analysis Example ===\n');
+
+  const prompt = `
+    Analyze this sales data and calculate:
+    1. Total revenue
+    2. Average sale price
+    3. Best-selling month
+    4. Month with highest revenue
+
+    Use pandas or numpy for analysis.
+
+    Data (CSV format):
+    month,sales,revenue
+    Jan,150,45000
+    Feb,200,62000
+    Mar,175,53000
+    Apr,220,68000
+    May,190,58000
+  `;
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: prompt,
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('📊 Analysis:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n💻 Analysis Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n📈 Results:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  console.log('\n=== Data Analysis Complete ===');
+}
+
+async function chartGenerationExample() {
+  console.log('\n=== Chart Generation Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents:
+      'Create a bar chart showing the distribution of prime numbers under 100 by their last digit. Generate the chart code and describe any patterns you see.',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('📊 Chart Description:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n📉 Chart Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n✓ Chart generated');
+      // Note: Image data would be in output
+    }
+  }
+
+  console.log('\n=== Chart Generation Complete ===');
+}
+
+async function chatWithCodeExecution() {
+  console.log('\n=== Chat with Code Execution Example ===\n');
+
+  const chat = await ai.chats.create({
+    model: 'gemini-2.5-flash',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  // First message
+  console.log('User: I have a math question for you.');
+  let response = await chat.sendMessage('I have a math question for you.');
+  console.log(`Assistant: ${response.text}\n`);
+
+  // Second message (will generate and execute code)
+  console.log('User: Calculate the Fibonacci sequence up to the 20th number and sum them.');
+  response = await chat.sendMessage(
+    'Calculate the Fibonacci sequence up to the 20th number and sum them.'
+  );
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log('Assistant:', part.text);
+    }
+    if (part.executableCode) {
+      console.log('\nCode:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\nOutput:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  console.log('\n=== Chat with Code Execution Complete ===');
+}
+
+async function errorHandlingExample() {
+  console.log('\n=== Error Handling Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'Write code that divides by zero and see what happens',
+    config: {
+      tools: [{ codeExecution: {} }],
+    },
+  });
+
+  for (const part of response.candidates[0].content.parts) {
+    if (part.codeExecutionResult) {
+      if (part.codeExecutionResult.outcome === 'OUTCOME_FAILED') {
+        console.error('❌ Code execution failed:');
+        console.error(part.codeExecutionResult.output);
+      } else {
+        console.log('✅ Success:');
+        console.log(part.codeExecutionResult.output);
+      }
+    }
+  }
+
+  console.log('\n=== Error Handling Example Complete ===');
+}
+
+// Run all examples
+async function main() {
+  await basicCodeExecution();
+  await dataAnalysisExample();
+  await chartGenerationExample();
+  await chatWithCodeExecution();
+  await errorHandlingExample();
+}
+
+main().catch((error) => {
+  console.error('Error:', error.message);
+  process.exit(1);
+});
diff --git a/templates/combined-advanced.ts b/templates/combined-advanced.ts
new file mode 100644
index 0000000..019f220
--- /dev/null
+++ b/templates/combined-advanced.ts
@@ -0,0 +1,278 @@
+/**
+ * Google Gemini API - Combined Advanced Features Example
+ *
+ * Demonstrates how to use all Phase 2 advanced features together:
+ * - Context Caching (cost optimization)
+ * - Code Execution (computational tasks)
+ * - Grounding with Google Search (real-time information)
+ *
+ * Use Case: Financial analysis chatbot that:
+ * 1. Caches financial data and analysis instructions
+ * 2. Uses code execution for calculations and data analysis
+ * 3. Uses grounding for current market information
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ * - Google Cloud project (for grounding)
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function financialAnalysisChatbot() {
+  console.log('=== Combined Advanced Features: Financial Analysis Chatbot ===\n');
+
+  // Step 1: Create cache with financial data and analysis instructions
+  console.log('Step 1: Creating cache with financial data...');
+
+  const financialData = `
+    Company Financial Data (Q1-Q4 2024):
+
+    Revenue:
+    Q1: $2.5M
+    Q2: $3.1M
+    Q3: $2.9M
+    Q4: $3.8M
+
+    Expenses:
+    Q1: $1.8M
+    Q2: $2.2M
+    Q3: $2.1M
+    Q4: $2.6M
+
+    Customer Acquisition:
+    Q1: 1,200 new customers
+    Q2: 1,500 new customers
+    Q3: 1,350 new customers
+    Q4: 1,800 new customers
+  `.trim();
+
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001', // Explicit version required for caching
+    config: {
+      displayName: 'financial-analysis-cache',
+      systemInstruction: `
+        You are a financial analyst chatbot. You have access to:
+        1. Company financial data (cached)
+        2. Code execution for calculations
+        3. Google Search for current market information
+
+        Provide detailed, accurate financial analysis.
+      `,
+      contents: financialData,
+      ttl: '3600s', // Cache for 1 hour
+    },
+  });
+
+  console.log(`✓ Cache created: ${cache.name}\n`);
+
+  // Step 2: Create chat with all advanced features enabled
+  console.log('Step 2: Creating chat with code execution and grounding...\n');
+
+  const chat = await ai.chats.create({
+    model: cache.name, // Use cached context
+    config: {
+      tools: [
+        { codeExecution: {} }, // Enable code execution
+        { googleSearch: {} }, // Enable grounding
+      ],
+    },
+  });
+
+  // Query 1: Analysis requiring code execution (uses cache)
+  console.log('===========================================');
+  console.log('Query 1: Calculate year-over-year growth');
+  console.log('===========================================\n');
+
+  let response = await chat.sendMessage(`
+    Calculate the following for 2024:
+    1. Total annual revenue
+    2. Total annual expenses
+    3. Net profit
+    4. Profit margin percentage
+    5. Quarter-over-quarter revenue growth rates
+
+    Use code to perform these calculations.
+  `);
+
+  console.log('📊 Financial Analysis:\n');
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) {
+      console.log(part.text);
+    }
+    if (part.executableCode) {
+      console.log('\n💻 Calculations Code:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\n📈 Results:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  // Query 2: Current market information (uses grounding)
+  console.log('\n\n===========================================');
+  console.log('Query 2: Compare with industry benchmarks');
+  console.log('===========================================\n');
+
+  response = await chat.sendMessage(`
+    What are the current industry benchmarks for SaaS companies in terms of:
+    1. Profit margins
+    2. Customer acquisition cost trends
+    3. Growth rate expectations
+
+    Use current market data to provide context.
+  `);
+
+  console.log('📰 Market Context:\n');
+  console.log(response.text);
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Used current market data from:');
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    sources.slice(0, 3).forEach((source, i) => {
+      console.log(`${i + 1}. ${source.title}`);
+      console.log(`   ${source.url}`);
+    });
+  }
+
+  // Query 3: Combined analysis (uses cache + code + grounding)
+  console.log('\n\n===========================================');
+  console.log('Query 3: Comprehensive recommendation');
+  console.log('===========================================\n');
+
+  response = await chat.sendMessage(`
+    Based on:
+    1. Our company's financial performance (from cached data)
+    2. Calculated growth metrics (using code execution)
+    3. Current industry trends (from search)
+
+    Provide a comprehensive recommendation for Q1 2025 strategy.
+  `);
+
+  console.log('💡 Strategic Recommendation:\n');
+  console.log(response.text);
+
+  // Show which features were used
+  console.log('\n\n📋 Features Used in This Query:');
+  let featuresUsed = [];
+
+  // Check for code execution
+  const hasCode = response.candidates[0].content.parts.some(
+    (part) => part.executableCode
+  );
+  if (hasCode) featuresUsed.push('✓ Code Execution');
+
+  // Check for grounding
+  if (response.candidates[0].groundingMetadata) {
+    featuresUsed.push('✓ Google Search Grounding');
+  }
+
+  // Cache is always used when we use cache.name as model
+  featuresUsed.push('✓ Context Caching');
+
+  featuresUsed.forEach((feature) => console.log(feature));
+
+  // Clean up
+  console.log('\n\nStep 3: Cleaning up...');
+  await ai.caches.delete({ name: cache.name });
+  console.log('✓ Cache deleted');
+
+  console.log('\n=== Financial Analysis Chatbot Complete ===');
+}
+
+async function researchAssistant() {
+  console.log('\n\n=== Combined Advanced Features: Research Assistant ===\n');
+
+  // Create cache with research paper
+  console.log('Step 1: Caching research paper...');
+
+  const researchPaper = `
+    Title: Climate Change Impact on Arctic Ecosystems (2024)
+
+    Abstract:
+    This study examines the effects of climate change on Arctic ecosystems
+    over the past decade...
+
+    [Imagine full research paper content here]
+
+    Key Findings:
+    - Temperature increase of 2.3°C in Arctic regions
+    - 15% reduction in sea ice coverage
+    - Migration pattern changes in polar species
+    - etc.
+  `.trim();
+
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001',
+    config: {
+      displayName: 'research-paper-cache',
+      systemInstruction: 'You are a research assistant. Analyze papers and provide insights.',
+      contents: researchPaper,
+      ttl: '1800s', // 30 minutes
+    },
+  });
+
+  console.log(`✓ Cache created\n`);
+
+  // Create chat with all tools
+  const chat = await ai.chats.create({
+    model: cache.name,
+    config: {
+      tools: [{ codeExecution: {} }, { googleSearch: {} }],
+    },
+  });
+
+  // Query combining all features
+  console.log('Query: Comprehensive climate analysis\n');
+
+  const response = await chat.sendMessage(`
+    1. Calculate the average temperature change per year from the data in the paper (use code)
+    2. Find the latest climate predictions for the next decade (use search)
+    3. Synthesize findings from both sources into a summary
+  `);
+
+  console.log('Response:\n');
+  for (const part of response.candidates[0].content.parts) {
+    if (part.text) console.log(part.text);
+    if (part.executableCode) {
+      console.log('\nCode:');
+      console.log(part.executableCode.code);
+    }
+    if (part.codeExecutionResult) {
+      console.log('\nResults:');
+      console.log(part.codeExecutionResult.output);
+    }
+  }
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\nSources:');
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    sources.slice(0, 2).forEach((s) => console.log(`- ${s.title}`));
+  }
+
+  // Clean up
+  await ai.caches.delete({ name: cache.name });
+
+  console.log('\n=== Research Assistant Complete ===');
+}
+
+// Run examples
+async function main() {
+  await financialAnalysisChatbot();
+  await researchAssistant();
+
+  console.log('\n\n✅ All advanced features demonstrated!');
+  console.log('\nKey Takeaways:');
+  console.log('- Context Caching: Saves costs by reusing large context');
+  console.log('- Code Execution: Enables computational analysis');
+  console.log('- Grounding: Provides real-time, fact-checked information');
+  console.log('- Combined: Creates powerful, cost-effective AI applications');
+}
+
+main().catch((error) => {
+  console.error('Error:', error.message);
+  process.exit(1);
+});
diff --git a/templates/context-caching.ts b/templates/context-caching.ts
new file mode 100644
index 0000000..debf5a7
--- /dev/null
+++ b/templates/context-caching.ts
@@ -0,0 +1,154 @@
+/**
+ * Google Gemini API - Context Caching Example
+ *
+ * Demonstrates how to create and use context caching to reduce costs by up to 90%
+ * when using the same large context (documents, videos, system instructions) repeatedly.
+ *
+ * Features:
+ * - Create cache with large document
+ * - Use cache for multiple queries
+ * - Update cache TTL
+ * - List and delete caches
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ *
+ * Note: You must use explicit model version suffixes like 'gemini-2.5-flash-001',
+ * not just 'gemini-2.5-flash' when creating caches.
+ */
+
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function contextCachingExample() {
+  console.log('=== Context Caching Example ===\n');
+
+  // Example: Large document content
+  const largeDocument = `
+    This is a large legal document that will be analyzed multiple times.
+
+    Article 1: ...
+    Article 2: ...
+    ... (imagine thousands of lines of legal text)
+  `.trim();
+
+  // 1. Create a cache with large content
+  console.log('1. Creating cache...');
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001', // Must use explicit version suffix!
+    config: {
+      displayName: 'legal-doc-cache',
+      systemInstruction: 'You are an expert legal analyst. Analyze documents and provide clear summaries.',
+      contents: largeDocument,
+      ttl: '3600s', // Cache for 1 hour
+    },
+  });
+
+  console.log(`✓ Cache created: ${cache.name}`);
+  console.log(`  Display name: ${cache.displayName}`);
+  console.log(`  Expires at: ${cache.expireTime}\n`);
+
+  // 2. Use cache for multiple queries (saves tokens!)
+  console.log('2. Using cache for first query...');
+  const response1 = await ai.models.generateContent({
+    model: cache.name, // Use cache name as model
+    contents: 'Summarize the key points of Article 1',
+  });
+  console.log(`Response: ${response1.text}\n`);
+
+  console.log('3. Using cache for second query...');
+  const response2 = await ai.models.generateContent({
+    model: cache.name,
+    contents: 'What are the legal implications of Article 2?',
+  });
+  console.log(`Response: ${response2.text}\n`);
+
+  // 3. Update cache TTL to extend lifetime
+  console.log('4. Extending cache TTL to 2 hours...');
+  await ai.caches.update({
+    name: cache.name,
+    config: {
+      ttl: '7200s', // Extend to 2 hours
+    },
+  });
+  console.log('✓ Cache TTL updated\n');
+
+  // 4. List all caches
+  console.log('5. Listing all caches...');
+  const caches = await ai.caches.list();
+  caches.forEach((c) => {
+    console.log(`  - ${c.displayName}: ${c.name}`);
+  });
+  console.log();
+
+  // 5. Delete cache when done
+  console.log('6. Deleting cache...');
+  await ai.caches.delete({ name: cache.name });
+  console.log('✓ Cache deleted\n');
+
+  console.log('=== Context Caching Complete ===');
+}
+
+async function videoCachingExample() {
+  console.log('\n=== Video Caching Example ===\n');
+
+  // Upload a video file
+  console.log('1. Uploading video file...');
+  const videoFile = await ai.files.upload({
+    file: fs.createReadStream('./example-video.mp4'),
+  });
+
+  console.log('2. Waiting for video processing...');
+  let processedFile = videoFile;
+  while (processedFile.state.name === 'PROCESSING') {
+    await new Promise((resolve) => setTimeout(resolve, 2000));
+    processedFile = await ai.files.get({ name: videoFile.name });
+  }
+  console.log(`✓ Video processed: ${processedFile.uri}\n`);
+
+  // Create cache with video
+  console.log('3. Creating cache with video...');
+  const cache = await ai.caches.create({
+    model: 'gemini-2.5-flash-001',
+    config: {
+      displayName: 'video-analysis-cache',
+      systemInstruction: 'You are an expert video analyzer.',
+      contents: [processedFile],
+      ttl: '300s', // 5 minutes
+    },
+  });
+  console.log(`✓ Cache created: ${cache.name}\n`);
+
+  // Use cache for multiple video queries
+  console.log('4. Query 1: What happens in the first minute?');
+  const response1 = await ai.models.generateContent({
+    model: cache.name,
+    contents: 'What happens in the first minute?',
+  });
+  console.log(`Response: ${response1.text}\n`);
+
+  console.log('5. Query 2: Describe the main characters');
+  const response2 = await ai.models.generateContent({
+    model: cache.name,
+    contents: 'Describe the main characters',
+  });
+  console.log(`Response: ${response2.text}\n`);
+
+  // Clean up
+  await ai.caches.delete({ name: cache.name });
+  await ai.files.delete({ name: videoFile.name });
+  console.log('✓ Cache and video file deleted');
+
+  console.log('\n=== Video Caching Complete ===');
+}
+
+// Run examples
+contextCachingExample()
+  .then(() => videoCachingExample())
+  .catch((error) => {
+    console.error('Error:', error.message);
+    process.exit(1);
+  });
diff --git a/templates/function-calling-basic.ts b/templates/function-calling-basic.ts
new file mode 100644
index 0000000..872e86c
--- /dev/null
+++ b/templates/function-calling-basic.ts
@@ -0,0 +1,156 @@
+/**
+ * Basic Function Calling with Gemini API
+ *
+ * Demonstrates:
+ * - Defining function declarations (tools)
+ * - Detecting when model wants to call a function
+ * - Executing functions and returning results
+ * - Multi-turn function calling workflow
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ *
+ * ⚠️ IMPORTANT: Gemini 2.5 Flash-Lite does NOT support function calling!
+ *   Use gemini-2.5-flash or gemini-2.5-pro
+ */
+
+import { GoogleGenAI, FunctionCallingConfigMode } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Step 1: Define function declarations
+    const getCurrentWeather = {
+      name: 'get_current_weather',
+      description: 'Get the current weather for a specific location',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          location: {
+            type: 'string',
+            description: 'The city name, e.g. San Francisco, Tokyo, London'
+          },
+          unit: {
+            type: 'string',
+            enum: ['celsius', 'fahrenheit'],
+            description: 'Temperature unit'
+          }
+        },
+        required: ['location']
+      }
+    };
+
+    // Step 2: Make request with tools
+    console.log('User: What\'s the weather in Tokyo?\n');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash', // ⚠️ NOT flash-lite!
+      contents: 'What\'s the weather in Tokyo?',
+      config: {
+        tools: [
+          { functionDeclarations: [getCurrentWeather] }
+        ]
+      }
+    });
+
+    // Step 3: Check if model wants to call a function
+    const functionCall = response1.candidates[0]?.content?.parts?.find(
+      part => part.functionCall
+    )?.functionCall;
+
+    if (!functionCall) {
+      console.log('Model response (no function call):', response1.text);
+      return;
+    }
+
+    console.log('Model wants to call function:');
+    console.log('- Function name:', functionCall.name);
+    console.log('- Arguments:', JSON.stringify(functionCall.args, null, 2));
+    console.log('');
+
+    // Step 4: Execute the function (your implementation)
+    console.log('Executing function...\n');
+    const weatherData = await getCurrentWeatherImpl(
+      functionCall.args.location,
+      functionCall.args.unit || 'celsius'
+    );
+
+    console.log('Function result:', JSON.stringify(weatherData, null, 2));
+    console.log('');
+
+    // Step 5: Send function result back to model
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        { parts: [{ text: 'What\'s the weather in Tokyo?' }] },
+        response1.candidates[0].content, // Original assistant response with function call
+        {
+          parts: [
+            {
+              functionResponse: {
+                name: functionCall.name,
+                response: weatherData
+              }
+            }
+          ]
+        }
+      ],
+      config: {
+        tools: [
+          { functionDeclarations: [getCurrentWeather] }
+        ]
+      }
+    });
+
+    console.log('Model final response:');
+    console.log(response2.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+/**
+ * Mock implementation of weather API
+ * Replace with actual API call in production
+ */
+async function getCurrentWeatherImpl(location: string, unit: string) {
+  // Simulate API call
+  await new Promise(resolve => setTimeout(resolve, 500));
+
+  // Mock data
+  return {
+    location,
+    temperature: unit === 'celsius' ? 22 : 72,
+    unit,
+    conditions: 'Partly cloudy',
+    humidity: 65,
+    windSpeed: 10
+  };
+}
+
+/**
+ * Function Calling Modes:
+ *
+ * AUTO (default): Model decides whether to call functions
+ * ANY: Force model to call at least one function
+ * NONE: Disable function calling for this request
+ *
+ * Example with mode:
+ *
+ * config: {
+ *   tools: [...],
+ *   toolConfig: {
+ *     functionCallingConfig: {
+ *       mode: FunctionCallingConfigMode.ANY,
+ *       allowedFunctionNames: ['get_current_weather']
+ *     }
+ *   }
+ * }
+ */
+
+main();
diff --git a/templates/function-calling-parallel.ts b/templates/function-calling-parallel.ts
new file mode 100644
index 0000000..1597e96
--- /dev/null
+++ b/templates/function-calling-parallel.ts
@@ -0,0 +1,177 @@
+/**
+ * Parallel Function Calling with Gemini API
+ *
+ * Demonstrates:
+ * - Multiple independent function calls in one request
+ * - Handling multiple function call responses
+ * - Compositional (sequential) vs parallel execution
+ * - Complex multi-step workflows
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ *
+ * ⚠️ IMPORTANT: Only gemini-2.5-flash and gemini-2.5-pro support function calling
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Define multiple functions
+    const getWeather = {
+      name: 'get_weather',
+      description: 'Get current weather for a location',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          location: { type: 'string', description: 'City name' }
+        },
+        required: ['location']
+      }
+    };
+
+    const getPopulation = {
+      name: 'get_population',
+      description: 'Get population of a city',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          city: { type: 'string', description: 'City name' }
+        },
+        required: ['city']
+      }
+    };
+
+    const getTimezone = {
+      name: 'get_timezone',
+      description: 'Get timezone information for a location',
+      parametersJsonSchema: {
+        type: 'object',
+        properties: {
+          location: { type: 'string', description: 'City name' }
+        },
+        required: ['location']
+      }
+    };
+
+    // Make request that requires multiple independent functions
+    console.log('User: What is the weather, population, and timezone of Tokyo?\n');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: 'What is the weather, population, and timezone of Tokyo?',
+      config: {
+        tools: [
+          { functionDeclarations: [getWeather, getPopulation, getTimezone] }
+        ]
+      }
+    });
+
+    // Extract all function calls
+    const functionCalls = response1.candidates[0]?.content?.parts?.filter(
+      part => part.functionCall
+    ) || [];
+
+    console.log(`Model wants to call ${functionCalls.length} functions in parallel:\n`);
+
+    // Execute all functions in parallel
+    const functionResponses = await Promise.all(
+      functionCalls.map(async (part) => {
+        const functionCall = part.functionCall!;
+        console.log(`- Calling ${functionCall.name} with args:`, functionCall.args);
+
+        // Execute function
+        let result;
+        if (functionCall.name === 'get_weather') {
+          result = await getWeatherImpl(functionCall.args.location);
+        } else if (functionCall.name === 'get_population') {
+          result = await getPopulationImpl(functionCall.args.city);
+        } else if (functionCall.name === 'get_timezone') {
+          result = await getTimezoneImpl(functionCall.args.location);
+        }
+
+        return {
+          functionResponse: {
+            name: functionCall.name,
+            response: result
+          }
+        };
+      })
+    );
+
+    console.log('\nAll functions executed.\n');
+
+    // Send all function results back to model
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        { parts: [{ text: 'What is the weather, population, and timezone of Tokyo?' }] },
+        response1.candidates[0].content,
+        { parts: functionResponses }
+      ],
+      config: {
+        tools: [
+          { functionDeclarations: [getWeather, getPopulation, getTimezone] }
+        ]
+      }
+    });
+
+    console.log('Model final response:');
+    console.log(response2.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+/**
+ * Mock function implementations
+ */
+async function getWeatherImpl(location: string) {
+  await new Promise(resolve => setTimeout(resolve, 300));
+  return {
+    location,
+    temperature: 22,
+    conditions: 'Sunny',
+    humidity: 60
+  };
+}
+
+async function getPopulationImpl(city: string) {
+  await new Promise(resolve => setTimeout(resolve, 300));
+  return {
+    city,
+    population: 13960000,
+    metropolitan: 37400000
+  };
+}
+
+async function getTimezoneImpl(location: string) {
+  await new Promise(resolve => setTimeout(resolve, 300));
+  return {
+    location,
+    timezone: 'Asia/Tokyo',
+    offset: '+09:00'
+  };
+}
+
+/**
+ * Parallel vs Compositional Function Calling:
+ *
+ * PARALLEL: Functions are independent and can run simultaneously
+ * - Example: "What is the weather AND population of Tokyo?"
+ * - Model calls get_weather() and get_population() together
+ *
+ * COMPOSITIONAL: Functions depend on each other (sequential)
+ * - Example: "What is the weather at my current location?"
+ * - Model first calls get_current_location(), then uses result for get_weather()
+ *
+ * Gemini automatically determines which pattern to use based on dependencies.
+ */
+
+main();
diff --git a/templates/grounding-search.ts b/templates/grounding-search.ts
new file mode 100644
index 0000000..dd04b43
--- /dev/null
+++ b/templates/grounding-search.ts
@@ -0,0 +1,271 @@
+/**
+ * Google Gemini API - Grounding with Google Search Example
+ *
+ * Demonstrates how to enable grounding to connect the model to real-time
+ * web information, reducing hallucinations and providing up-to-date responses
+ * with citations.
+ *
+ * Features:
+ * - Basic grounding with Google Search (Gemini 2.5)
+ * - Dynamic retrieval with threshold (Gemini 1.5)
+ * - Chat with grounding
+ * - Combining grounding with function calling
+ * - Checking grounding metadata and citations
+ *
+ * Requirements:
+ * - @google/genai@1.27.0+
+ * - GEMINI_API_KEY environment variable
+ * - Google Cloud project (grounding requires GCP project, not just API key)
+ *
+ * Note: Use `googleSearch` for Gemini 2.5 models (recommended)
+ *       Use `googleSearchRetrieval` for Gemini 1.5 models (legacy)
+ */
+
+import { GoogleGenAI, DynamicRetrievalConfigMode } from '@google/genai';
+
+const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
+
+async function basicGrounding() {
+  console.log('=== Basic Grounding Example (Gemini 2.5) ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'Who won the euro 2024?',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log('Response:', response.text);
+
+  // Check if grounding was used
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Search was performed!');
+    console.log('\nGrounding Metadata:');
+    console.log(JSON.stringify(response.candidates[0].groundingMetadata, null, 2));
+  } else {
+    console.log('\n✓ Model answered from its own knowledge (no search needed)');
+  }
+
+  console.log('\n=== Basic Grounding Complete ===');
+}
+
+async function dynamicRetrievalExample() {
+  console.log('\n=== Dynamic Retrieval Example (Gemini 1.5) ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-1.5-flash',
+    contents: 'Who won the euro 2024?',
+    config: {
+      tools: [
+        {
+          googleSearchRetrieval: {
+            dynamicRetrievalConfig: {
+              mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
+              dynamicThreshold: 0.7, // Search only if confidence < 70%
+            },
+          },
+        },
+      ],
+    },
+  });
+
+  console.log('Response:', response.text);
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Search performed (confidence < 70%)');
+  } else {
+    console.log('\n✓ Answered from knowledge (confidence >= 70%)');
+  }
+
+  console.log('\n=== Dynamic Retrieval Complete ===');
+}
+
+async function chatWithGrounding() {
+  console.log('\n=== Chat with Grounding Example ===\n');
+
+  const chat = await ai.chats.create({
+    model: 'gemini-2.5-flash',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  // First message
+  console.log('User: What are the latest developments in quantum computing?');
+  let response = await chat.sendMessage('What are the latest developments in quantum computing?');
+  console.log(`\nAssistant: ${response.text}`);
+
+  // Check and display sources
+  if (response.candidates[0].groundingMetadata) {
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    console.log(`\n📚 Sources used: ${sources.length}`);
+    sources.forEach((source, i) => {
+      console.log(`${i + 1}. ${source.title}`);
+      console.log(`   ${source.url}`);
+    });
+  }
+
+  // Follow-up question
+  console.log('\n\nUser: Which company made the biggest breakthrough?');
+  response = await chat.sendMessage('Which company made the biggest breakthrough?');
+  console.log(`\nAssistant: ${response.text}`);
+
+  if (response.candidates[0].groundingMetadata) {
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    console.log(`\n📚 Sources used: ${sources.length}`);
+    sources.forEach((source, i) => {
+      console.log(`${i + 1}. ${source.title} - ${source.url}`);
+    });
+  }
+
+  console.log('\n=== Chat with Grounding Complete ===');
+}
+
+async function groundingWithFunctionCalling() {
+  console.log('\n=== Grounding + Function Calling Example ===\n');
+
+  // Define weather function
+  const weatherFunction = {
+    name: 'get_current_weather',
+    description: 'Get current weather for a location',
+    parametersJsonSchema: {
+      type: 'object',
+      properties: {
+        location: { type: 'string', description: 'City name' },
+        unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
+      },
+      required: ['location'],
+    },
+  };
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'What is the weather like in the city that won Euro 2024?',
+    config: {
+      tools: [{ googleSearch: {} }, { functionDeclarations: [weatherFunction] }],
+    },
+  });
+
+  console.log('User query: What is the weather like in the city that won Euro 2024?');
+  console.log('\nModel will:');
+  console.log('1. Use Google Search to find Euro 2024 winner');
+  console.log('2. Call get_current_weather function with the city');
+  console.log('3. Combine both results in response\n');
+
+  // Check for function calls
+  const functionCall = response.candidates[0].content.parts.find((part) => part.functionCall);
+
+  if (functionCall) {
+    console.log('✓ Function call detected:');
+    console.log(`  Function: ${functionCall.functionCall?.name}`);
+    console.log(`  Arguments:`, functionCall.functionCall?.args);
+  }
+
+  if (response.candidates[0].groundingMetadata) {
+    console.log('\n✓ Grounding was used');
+    const sources = response.candidates[0].groundingMetadata.webPages || [];
+    console.log(`  Sources: ${sources.length} web pages`);
+  }
+
+  console.log('\n=== Grounding + Function Calling Complete ===');
+}
+
+async function checkingGroundingUsage() {
+  console.log('\n=== Checking Grounding Usage Example ===\n');
+
+  // Query that doesn't need search
+  console.log('Query 1: What is 2+2? (Should NOT need search)');
+  const response1 = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'What is 2+2?',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log(`Answer: ${response1.text}`);
+  console.log(
+    `Grounding used: ${response1.candidates[0].groundingMetadata ? 'YES' : 'NO'}\n`
+  );
+
+  // Query that needs current information
+  console.log('Query 2: What happened in the news today? (Should need search)');
+  const response2 = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'What are the top news headlines today?',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log(`Answer: ${response2.text}`);
+  console.log(`Grounding used: ${response2.candidates[0].groundingMetadata ? 'YES' : 'NO'}`);
+
+  if (response2.candidates[0].groundingMetadata) {
+    console.log('\nSearch queries performed:');
+    const queries = response2.candidates[0].groundingMetadata.searchQueries || [];
+    queries.forEach((q, i) => {
+      console.log(`${i + 1}. ${q.text || q}`);
+    });
+  }
+
+  console.log('\n=== Checking Grounding Usage Complete ===');
+}
+
+async function citationsExample() {
+  console.log('\n=== Citations Example ===\n');
+
+  const response = await ai.models.generateContent({
+    model: 'gemini-2.5-flash',
+    contents: 'Tell me about the recent Mars rover discoveries',
+    config: {
+      tools: [{ googleSearch: {} }],
+    },
+  });
+
+  console.log('Response:', response.text);
+
+  if (response.candidates[0].groundingMetadata) {
+    const metadata = response.candidates[0].groundingMetadata;
+
+    console.log('\n📚 Web Pages:');
+    const webPages = metadata.webPages || [];
+    webPages.forEach((page, i) => {
+      console.log(`\n${i + 1}. ${page.title}`);
+      console.log(`   URL: ${page.url}`);
+      if (page.snippet) {
+        console.log(`   Snippet: ${page.snippet.substring(0, 100)}...`);
+      }
+    });
+
+    console.log('\n🔗 Citations:');
+    const citations = metadata.citations || [];
+    citations.forEach((citation, i) => {
+      console.log(`\n${i + 1}. Position: ${citation.startIndex}-${citation.endIndex}`);
+      console.log(`   Source: ${citation.uri}`);
+    });
+  }
+
+  console.log('\n=== Citations Example Complete ===');
+}
+
+// Run all examples
+async function main() {
+  await basicGrounding();
+  await dynamicRetrievalExample();
+  await chatWithGrounding();
+  await groundingWithFunctionCalling();
+  await checkingGroundingUsage();
+  await citationsExample();
+}
+
+main().catch((error) => {
+  console.error('Error:', error.message);
+  if (error.message.includes('Google Cloud project')) {
+    console.error(
+      '\nNote: Grounding requires a Google Cloud project, not just an API key.'
+    );
+  }
+  process.exit(1);
+});
diff --git a/templates/multimodal-image.ts b/templates/multimodal-image.ts
new file mode 100644
index 0000000..8b8be09
--- /dev/null
+++ b/templates/multimodal-image.ts
@@ -0,0 +1,117 @@
+/**
+ * Multimodal Image Understanding with Gemini API
+ *
+ * Demonstrates:
+ * - Image analysis with vision capabilities
+ * - Base64 encoding of images
+ * - Combining text and image inputs
+ * - Multiple images in one request
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Analyze a single image
+    console.log('Example 1: Analyze Single Image\n');
+
+    // Load image from file
+    const imagePath = '/path/to/image.jpg'; // Replace with actual path
+    const imageData = fs.readFileSync(imagePath);
+    const base64Image = imageData.toString('base64');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
+            {
+              inlineData: {
+                data: base64Image,
+                mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response1.text);
+    console.log('\n---\n');
+
+    // Example 2: Compare two images
+    console.log('Example 2: Compare Two Images\n');
+
+    const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
+    const imageData2 = fs.readFileSync(imagePath2);
+    const base64Image2 = imageData2.toString('base64');
+
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Compare these two images. What are the similarities and differences?' },
+            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
+            { inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response2.text);
+    console.log('\n---\n');
+
+    // Example 3: Specific questions about image
+    console.log('Example 3: Specific Questions\n');
+
+    const response3 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'How many people are in this image? What are they wearing?' },
+            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response3.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+
+    if (error.message.includes('ENOENT')) {
+      console.error('\n⚠️ Image file not found. Update the imagePath variable with a valid path.');
+    }
+  }
+}
+
+/**
+ * Supported image formats:
+ * - JPEG (.jpg, .jpeg)
+ * - PNG (.png)
+ * - WebP (.webp)
+ * - HEIC (.heic)
+ * - HEIF (.heif)
+ *
+ * Max size: 20MB per image
+ *
+ * Tips:
+ * - Use specific, detailed prompts for better results
+ * - You can analyze multiple images in one request
+ * - gemini-2.5-flash and gemini-2.5-pro both support vision
+ */
+
+main();
diff --git a/templates/multimodal-video-audio.ts b/templates/multimodal-video-audio.ts
new file mode 100644
index 0000000..47e25bd
--- /dev/null
+++ b/templates/multimodal-video-audio.ts
@@ -0,0 +1,152 @@
+/**
+ * Multimodal Video and Audio Understanding with Gemini API
+ *
+ * Demonstrates:
+ * - Video analysis (what happens in the video)
+ * - Audio transcription and understanding
+ * - PDF document parsing
+ * - Combining multiple modalities
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Analyze video
+    console.log('Example 1: Video Analysis\n');
+
+    const videoPath = '/path/to/video.mp4'; // Replace with actual path
+    const videoData = fs.readFileSync(videoPath);
+    const base64Video = videoData.toString('base64');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Describe what happens in this video. Summarize the key events.' },
+            {
+              inlineData: {
+                data: base64Video,
+                mimeType: 'video/mp4'
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response1.text);
+    console.log('\n---\n');
+
+    // Example 2: Transcribe and analyze audio
+    console.log('Example 2: Audio Transcription and Analysis\n');
+
+    const audioPath = '/path/to/audio.mp3'; // Replace with actual path
+    const audioData = fs.readFileSync(audioPath);
+    const base64Audio = audioData.toString('base64');
+
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Transcribe this audio and provide a summary of the main points discussed.' },
+            {
+              inlineData: {
+                data: base64Audio,
+                mimeType: 'audio/mp3'
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response2.text);
+    console.log('\n---\n');
+
+    // Example 3: Parse PDF document
+    console.log('Example 3: PDF Document Parsing\n');
+
+    const pdfPath = '/path/to/document.pdf'; // Replace with actual path
+    const pdfData = fs.readFileSync(pdfPath);
+    const base64Pdf = pdfData.toString('base64');
+
+    const response3 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Summarize the key points in this PDF document. Extract any important data or conclusions.' },
+            {
+              inlineData: {
+                data: base64Pdf,
+                mimeType: 'application/pdf'
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response3.text);
+    console.log('\n---\n');
+
+    // Example 4: Combine multiple modalities
+    console.log('Example 4: Multiple Modalities (Video + Text Questions)\n');
+
+    const response4 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Based on this video, answer these questions:\n1. How many people appear?\n2. What is the main activity?\n3. Where does this take place?' },
+            { inlineData: { data: base64Video, mimeType: 'video/mp4' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response4.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+
+    if (error.message.includes('ENOENT')) {
+      console.error('\n⚠️ File not found. Update the file path variables with valid paths.');
+    }
+  }
+}
+
+/**
+ * Supported Video Formats:
+ * - MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV
+ * Max size: 2GB (use File API for larger - Phase 2)
+ * Max length (inline): 2 minutes
+ *
+ * Supported Audio Formats:
+ * - MP3, WAV, FLAC, AAC, OGG, OPUS
+ * Max size: 20MB
+ *
+ * PDF:
+ * - Max size: 30MB
+ * - Text-based PDFs work best
+ * - Scanned images may have lower accuracy
+ *
+ * Tips:
+ * - For videos > 2 minutes, use the File API (Phase 2)
+ * - Specific prompts yield better results
+ * - You can combine text, images, video, audio, and PDFs in one request
+ */
+
+main();
diff --git a/templates/package.json b/templates/package.json
new file mode 100644
index 0000000..926c008
--- /dev/null
+++ b/templates/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "gemini-api-example",
+  "version": "1.0.0",
+  "description": "Google Gemini API examples using @google/genai SDK",
+  "type": "module",
+  "scripts": {
+    "dev": "tsx watch src/index.ts",
+    "start": "node dist/index.js",
+    "build": "tsc"
+  },
+  "dependencies": {
+    "@google/genai": "^1.27.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "tsx": "^4.7.0",
+    "typescript": "^5.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  }
+}
diff --git a/templates/streaming-chat.ts b/templates/streaming-chat.ts
new file mode 100644
index 0000000..f7003e1
--- /dev/null
+++ b/templates/streaming-chat.ts
@@ -0,0 +1,81 @@
+/**
+ * Streaming Text Generation with Gemini API (Node.js SDK)
+ *
+ * Demonstrates:
+ * - Streaming responses with async iteration
+ * - Real-time token delivery for better UX
+ * - Handling partial chunks
+ * - Multi-turn chat with streaming
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Basic streaming
+    console.log('Example 1: Basic Streaming\n');
+    console.log('Prompt: Write a 200-word story about time travel\n');
+
+    const response = await ai.models.generateContentStream({
+      model: 'gemini-2.5-flash',
+      contents: 'Write a 200-word story about time travel'
+    });
+
+    // Stream chunks as they arrive
+    for await (const chunk of response) {
+      // Each chunk may contain partial text
+      if (chunk.text) {
+        process.stdout.write(chunk.text);
+      }
+    }
+
+    console.log('\n\n---\n');
+
+    // Example 2: Streaming with chat
+    console.log('Example 2: Streaming Chat\n');
+
+    const chat = await ai.models.createChat({
+      model: 'gemini-2.5-flash',
+      systemInstruction: 'You are a helpful coding assistant.'
+    });
+
+    // First turn with streaming
+    console.log('User: What is TypeScript?\n');
+    const response1 = await chat.sendMessageStream('What is TypeScript?');
+
+    console.log('Assistant: ');
+    for await (const chunk of response1) {
+      process.stdout.write(chunk.text);
+    }
+
+    console.log('\n\n');
+
+    // Second turn with streaming (context maintained)
+    console.log('User: How do I install it?\n');
+    const response2 = await chat.sendMessageStream('How do I install it?');
+
+    console.log('Assistant: ');
+    for await (const chunk of response2) {
+      process.stdout.write(chunk.text);
+    }
+
+    console.log('\n\n');
+
+    // Get full chat history
+    const history = chat.getHistory();
+    console.log(`Total messages in history: ${history.length}`);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+main();
diff --git a/templates/streaming-fetch.ts b/templates/streaming-fetch.ts
new file mode 100644
index 0000000..99dbb7a
--- /dev/null
+++ b/templates/streaming-fetch.ts
@@ -0,0 +1,190 @@
+/**
+ * Streaming Text Generation with Gemini API (Fetch - SSE Parsing)
+ *
+ * Demonstrates:
+ * - Server-Sent Events (SSE) parsing with fetch
+ * - Manual stream handling for Cloudflare Workers or edge runtimes
+ * - Buffer management for incomplete chunks
+ * - Error handling during streaming
+ *
+ * Prerequisites:
+ * - Set GEMINI_API_KEY environment variable
+ */
+
+interface Env {
+  GEMINI_API_KEY: string;
+}
+
+/**
+ * Example for Cloudflare Workers with streaming response
+ */
+export default {
+  async fetch(request: Request, env: Env): Promise<Response> {
+    // Create a TransformStream to stream tokens to the client
+    const { readable, writable } = new TransformStream();
+    const writer = writable.getWriter();
+    const encoder = new TextEncoder();
+
+    // Start streaming in the background
+    (async () => {
+      try {
+        const response = await fetch(
+          `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
+          {
+            method: 'POST',
+            headers: {
+              'Content-Type': 'application/json',
+              'x-goog-api-key': env.GEMINI_API_KEY,
+            },
+            body: JSON.stringify({
+              contents: [
+                {
+                  parts: [
+                    { text: 'Write a 200-word story about time travel' }
+                  ]
+                }
+              ]
+            }),
+          }
+        );
+
+        if (!response.ok) {
+          throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+        }
+
+        if (!response.body) {
+          throw new Error('Response body is null');
+        }
+
+        const reader = response.body.getReader();
+        const decoder = new TextDecoder();
+        let buffer = '';
+
+        while (true) {
+          const { done, value } = await reader.read();
+
+          if (done) {
+            break;
+          }
+
+          // Append new data to buffer
+          buffer += decoder.decode(value, { stream: true });
+
+          // Split by newlines (SSE format)
+          const lines = buffer.split('\n');
+
+          // Keep the last incomplete line in buffer
+          buffer = lines.pop() || '';
+
+          for (const line of lines) {
+            // Skip empty lines and metadata
+            if (line.trim() === '' || line.startsWith('data: [DONE]')) {
+              continue;
+            }
+
+            // Parse SSE format: "data: {json}"
+            if (!line.startsWith('data: ')) {
+              continue;
+            }
+
+            try {
+              const jsonData = JSON.parse(line.slice(6)); // Remove "data: " prefix
+              const text = jsonData.candidates[0]?.content?.parts[0]?.text;
+
+              if (text) {
+                // Write chunk to client
+                await writer.write(encoder.encode(text));
+              }
+            } catch (e) {
+              // Skip invalid JSON chunks
+              console.error('Failed to parse chunk:', e);
+            }
+          }
+        }
+
+        // Close the stream
+        await writer.close();
+
+      } catch (error: any) {
+        await writer.write(encoder.encode(`\n\nError: ${error.message}`));
+        await writer.close();
+      }
+    })();
+
+    // Return streaming response immediately
+    return new Response(readable, {
+      headers: {
+        'Content-Type': 'text/plain; charset=utf-8',
+        'Transfer-Encoding': 'chunked',
+      },
+    });
+  }
+};
+
+/**
+ * Example for Node.js/Standalone
+ */
+async function mainNodeJS() {
+  const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
+
+  if (!GEMINI_API_KEY) {
+    throw new Error('GEMINI_API_KEY environment variable not set');
+  }
+
+  const response = await fetch(
+    `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-goog-api-key': GEMINI_API_KEY,
+      },
+      body: JSON.stringify({
+        contents: [
+          {
+            parts: [
+              { text: 'Write a 200-word story about time travel' }
+            ]
+          }
+        ]
+      }),
+    }
+  );
+
+  if (!response.body) {
+    throw new Error('Response body is null');
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+
+    buffer += decoder.decode(value, { stream: true });
+    const lines = buffer.split('\n');
+    buffer = lines.pop() || '';
+
+    for (const line of lines) {
+      if (line.trim() === '' || line.startsWith('data: [DONE]')) continue;
+      if (!line.startsWith('data: ')) continue;
+
+      try {
+        const data = JSON.parse(line.slice(6));
+        const text = data.candidates[0]?.content?.parts[0]?.text;
+        if (text) {
+          process.stdout.write(text);
+        }
+      } catch (e) {
+        // Skip invalid JSON
+      }
+    }
+  }
+
+  console.log('\n');
+}
+
+// Uncomment to run in Node.js
+// mainNodeJS();
diff --git a/templates/text-generation-basic.ts b/templates/text-generation-basic.ts
new file mode 100644
index 0000000..b570d6d
--- /dev/null
+++ b/templates/text-generation-basic.ts
@@ -0,0 +1,61 @@
+/**
+ * Basic Text Generation with Gemini API (Node.js SDK)
+ *
+ * Demonstrates:
+ * - Installing @google/genai (CORRECT SDK, NOT @google/generative-ai)
+ * - Basic text generation with gemini-2.5-flash
+ * - Accessing response text
+ * - Error handling
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  // Initialize the Google GenAI client
+  // ⚠️ IMPORTANT: Use @google/genai, NOT @google/generative-ai (deprecated)
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Generate content with gemini-2.5-flash
+    // Models available: gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite
+    const response = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: 'Explain quantum computing in simple terms for a 10-year-old'
+    });
+
+    // Access the generated text
+    console.log('Generated text:');
+    console.log(response.text);
+
+    // Access full response metadata
+    console.log('\nToken usage:');
+    console.log('- Prompt tokens:', response.usageMetadata.promptTokenCount);
+    console.log('- Response tokens:', response.usageMetadata.candidatesTokenCount);
+    console.log('- Total tokens:', response.usageMetadata.totalTokenCount);
+
+    // Check finish reason
+    console.log('\nFinish reason:', response.candidates[0].finishReason);
+    // Possible values: "STOP" (normal), "MAX_TOKENS", "SAFETY", "OTHER"
+
+  } catch (error: any) {
+    console.error('Error generating content:');
+
+    if (error.status === 401) {
+      console.error('❌ Invalid API key. Set GEMINI_API_KEY environment variable.');
+    } else if (error.status === 429) {
+      console.error('❌ Rate limit exceeded. Try again later or implement exponential backoff.');
+    } else if (error.status === 404) {
+      console.error('❌ Model not found. Use: gemini-2.5-pro, gemini-2.5-flash, or gemini-2.5-flash-lite');
+    } else {
+      console.error('❌', error.message);
+    }
+  }
+}
+
+main();
diff --git a/templates/text-generation-fetch.ts b/templates/text-generation-fetch.ts
new file mode 100644
index 0000000..a949c49
--- /dev/null
+++ b/templates/text-generation-fetch.ts
@@ -0,0 +1,124 @@
+/**
+ * Basic Text Generation with Gemini API (Fetch - Cloudflare Workers)
+ *
+ * Demonstrates:
+ * - Direct REST API calls using fetch (no SDK dependencies)
+ * - Perfect for Cloudflare Workers, Deno, Bun, or edge runtimes
+ * - Manual JSON parsing
+ * - Error handling with fetch
+ *
+ * Prerequisites:
+ * - Set GEMINI_API_KEY environment variable (or use env.GEMINI_API_KEY in Workers)
+ */
+
+/**
+ * Example for Cloudflare Workers
+ */
+interface Env {
+  GEMINI_API_KEY: string;
+}
+
+export default {
+  async fetch(request: Request, env: Env): Promise<Response> {
+    try {
+      // Make direct API call to Gemini
+      const response = await fetch(
+        `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+        {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+            'x-goog-api-key': env.GEMINI_API_KEY,
+          },
+          body: JSON.stringify({
+            contents: [
+              {
+                parts: [
+                  {
+                    text: 'Explain quantum computing in simple terms for a 10-year-old'
+                  }
+                ]
+              }
+            ]
+          }),
+        }
+      );
+
+      // Check for HTTP errors
+      if (!response.ok) {
+        const errorData = await response.json();
+        return new Response(
+          JSON.stringify({
+            error: errorData.error?.message || 'Unknown error',
+            status: response.status
+          }),
+          { status: response.status, headers: { 'Content-Type': 'application/json' } }
+        );
+      }
+
+      // Parse response
+      const data = await response.json();
+
+      // Extract text from response structure
+      const generatedText = data.candidates[0]?.content?.parts[0]?.text;
+      const usageMetadata = data.usageMetadata;
+      const finishReason = data.candidates[0]?.finishReason;
+
+      return new Response(
+        JSON.stringify({
+          text: generatedText,
+          usage: {
+            promptTokens: usageMetadata.promptTokenCount,
+            responseTokens: usageMetadata.candidatesTokenCount,
+            totalTokens: usageMetadata.totalTokenCount
+          },
+          finishReason
+        }),
+        { headers: { 'Content-Type': 'application/json' } }
+      );
+
+    } catch (error: any) {
+      return new Response(
+        JSON.stringify({ error: error.message }),
+        { status: 500, headers: { 'Content-Type': 'application/json' } }
+      );
+    }
+  }
+};
+
+/**
+ * Example for Node.js/Standalone
+ */
+async function mainNodeJS() {
+  const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
+
+  if (!GEMINI_API_KEY) {
+    throw new Error('GEMINI_API_KEY environment variable not set');
+  }
+
+  const response = await fetch(
+    `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-goog-api-key': GEMINI_API_KEY,
+      },
+      body: JSON.stringify({
+        contents: [
+          {
+            parts: [
+              { text: 'Explain quantum computing in simple terms' }
+            ]
+          }
+        ]
+      }),
+    }
+  );
+
+  const data = await response.json();
+  console.log(data.candidates[0].content.parts[0].text);
+}
+
+// Uncomment to run in Node.js
+// mainNodeJS();
diff --git a/templates/thinking-mode.ts b/templates/thinking-mode.ts
new file mode 100644
index 0000000..4975e2d
--- /dev/null
+++ b/templates/thinking-mode.ts
@@ -0,0 +1,137 @@
+/**
+ * Thinking Mode Configuration with Gemini API
+ *
+ * Demonstrates:
+ * - Thinking mode (enabled by default on Gemini 2.5 models)
+ * - Configuring thinking budget
+ * - When to use thinking mode
+ * - Impact on latency and quality
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ *
+ * ℹ️ Thinking mode is ALWAYS ENABLED on Gemini 2.5 models (cannot be disabled)
+ */
+
+import { GoogleGenAI } from '@google/genai';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Default thinking budget
+    console.log('Example 1: Default Thinking Budget\n');
+    console.log('Prompt: Solve this complex math problem:\n');
+    console.log('If a train travels 120 km in 1.5 hours, then slows down to 60 km/h for 45 minutes, how far has it traveled total?\n');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: 'If a train travels 120 km in 1.5 hours, then slows down to 60 km/h for 45 minutes, how far has it traveled total?'
+      // No thinkingConfig = uses default budget
+    });
+
+    console.log('Answer:', response1.text);
+    console.log('\nToken usage:', response1.usageMetadata);
+    console.log('\n---\n');
+
+    // Example 2: Increased thinking budget for complex reasoning
+    console.log('Example 2: Increased Thinking Budget (8192 tokens)\n');
+    console.log('Prompt: Complex logic puzzle\n');
+
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: `
+        Three people (Alice, Bob, Carol) have different jobs (doctor, engineer, teacher).
+        Clues:
+        1. Alice is not a doctor
+        2. The engineer is older than Bob
+        3. Carol is younger than the teacher
+        4. The doctor is the youngest
+
+        Who has which job?
+      `,
+      config: {
+        thinkingConfig: {
+          thinkingBudget: 8192 // Increase budget for complex reasoning
+        }
+      }
+    });
+
+    console.log('Answer:', response2.text);
+    console.log('\nToken usage:', response2.usageMetadata);
+    console.log('\n---\n');
+
+    // Example 3: Comparison with gemini-2.5-pro (more thinking capability)
+    console.log('Example 3: Using gemini-2.5-pro for Advanced Reasoning\n');
+    console.log('Prompt: Multi-step code optimization problem\n');
+
+    const response3 = await ai.models.generateContent({
+      model: 'gemini-2.5-pro', // Pro model has better reasoning
+      contents: `
+        Optimize this Python code for better performance:
+
+        def find_duplicates(arr):
+            duplicates = []
+            for i in range(len(arr)):
+                for j in range(i + 1, len(arr)):
+                    if arr[i] == arr[j] and arr[i] not in duplicates:
+                        duplicates.append(arr[i])
+            return duplicates
+
+        Explain your optimization strategy step by step.
+      `,
+      config: {
+        thinkingConfig: {
+          thinkingBudget: 8192
+        }
+      }
+    });
+
+    console.log('Optimization:', response3.text);
+    console.log('\nToken usage:', response3.usageMetadata);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+  }
+}
+
+/**
+ * Thinking Mode Guidelines:
+ *
+ * What is Thinking Mode?
+ * - Gemini 2.5 models "think" before responding, improving accuracy
+ * - The model internally reasons through the problem
+ * - This happens transparently (you don't see the thinking process)
+ *
+ * Thinking Budget:
+ * - Controls max tokens allocated for internal reasoning
+ * - Higher budget = more thorough reasoning (may increase latency)
+ * - Default budget is usually sufficient for most tasks
+ *
+ * When to Increase Budget:
+ * ✅ Complex math/logic problems
+ * ✅ Multi-step reasoning tasks
+ * ✅ Code optimization challenges
+ * ✅ Detailed analysis requiring careful consideration
+ *
+ * When Default is Fine:
+ * ⏺️ Simple factual questions
+ * ⏺️ Creative writing
+ * ⏺️ Translation
+ * ⏺️ Summarization
+ *
+ * Model Comparison:
+ * - gemini-2.5-pro: Best for complex reasoning, higher default thinking budget
+ * - gemini-2.5-flash: Good balance, suitable for most thinking tasks
+ * - gemini-2.5-flash-lite: Basic thinking, optimized for speed
+ *
+ * Important Notes:
+ * - You CANNOT disable thinking mode on 2.5 models (always on)
+ * - Thinking tokens count toward total usage
+ * - Higher thinking budget may increase latency slightly
+ */
+
+main();