Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:24:51 +08:00
commit 8aebb293cd
31 changed files with 7386 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
{
"name": "google-gemini-api",
"description": "Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun",
"version": "1.0.0",
"author": {
"name": "Jeremy Dawes",
"email": "jeremy@jezweb.net"
},
"skills": [
"./"
]
}

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# google-gemini-api
Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun

2173
SKILL.md Normal file

File diff suppressed because it is too large Load Diff

153
plugin.lock.json Normal file
View File

@@ -0,0 +1,153 @@
{
"$schema": "internal://schemas/plugin.lock.v1.json",
"pluginId": "gh:jezweb/claude-skills:skills/google-gemini-api",
"normalized": {
"repo": null,
"ref": "refs/tags/v20251128.0",
"commit": "bb3e9b7656ff919916c1b773f1e5d845bb3c6633",
"treeHash": "b8bbae8bede51e3e5854007aa5c1ab3cb3a7e1050bbca038893d7f48dd558ba6",
"generatedAt": "2025-11-28T10:19:01.568870Z",
"toolVersion": "publish_plugins.py@0.2.0"
},
"origin": {
"remote": "git@github.com:zhongweili/42plugin-data.git",
"branch": "master",
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
},
"manifest": {
"name": "google-gemini-api",
"description": "Integrate Gemini API with correct current SDK (@google/genai v1.27+, NOT deprecated @google/generative-ai). Supports text generation, multimodal (images/video/audio/PDFs), function calling, and thinking mode. 1M input tokens. Use when: integrating Gemini API, implementing multimodal AI, using thinking mode for reasoning, function calling with parallel execution, streaming responses, deploying to Cloudflare Workers, building chat, or troubleshooting SDK deprecation, context window, model not foun",
"version": "1.0.0"
},
"content": {
"files": [
{
"path": "README.md",
"sha256": "1d4a239aa8f7a1d70453fbb0c216b4ebdc01b312dcf5d14325819f73c513439c"
},
{
"path": "SKILL.md",
"sha256": "c17553ecbdb69ac44a1733121a9e728d9575b3d80d40ff72e7ccdf57044bf379"
},
{
"path": "references/thinking-mode-guide.md",
"sha256": "c1c85ddc27b582e0fab36385c0cc50863d2a032c5d03730fadfe10d6f4efe5ab"
},
{
"path": "references/grounding-guide.md",
"sha256": "dd7a2ef2f5e09e090a300caeaebe1bbb333c03496e7e572c6dd7de50549c85c2"
},
{
"path": "references/multimodal-guide.md",
"sha256": "cd2361169dd4d6be8941ca862157878772bd748aea8655fd7ca2b66561234743"
},
{
"path": "references/streaming-patterns.md",
"sha256": "3bd053362626b4646e366da339cd0eadcc7085e48121eb36bafd56d110b6e322"
},
{
"path": "references/context-caching-guide.md",
"sha256": "c967b0d121477ecb89345b13ad68c417d6c18d367e02f081bd84692f9e1d4397"
},
{
"path": "references/top-errors.md",
"sha256": "f4de6f9304496d740bd48dba2722b08160a007e7df70d6c782f14c99447eba79"
},
{
"path": "references/code-execution-patterns.md",
"sha256": "374ecca169795f5d2299bb0d8917a39dc79bef7d18c61330dff07f31624486ea"
},
{
"path": "references/models-guide.md",
"sha256": "8f142fee3071ce4f1a071c2a3966b02e725c01357a0c049d29b236d810ff3858"
},
{
"path": "references/sdk-migration-guide.md",
"sha256": "a61f0c8adb118cdd615cb09bc4f531c2c96e898d820045a8dac3ec48928eaf63"
},
{
"path": "references/function-calling-patterns.md",
"sha256": "5900df715f79137126a43fc9011c680f9f01afdcc515a7c3ecd3e02f6f8062fe"
},
{
"path": "references/generation-config.md",
"sha256": "c7dc7abdb40f16d31d950deacfb45f8bcc1af89671e29d5baa83bddc2e924844"
},
{
"path": "scripts/check-versions.sh",
"sha256": "944f9ad6dd1c8749bf15555c9855c2bf09659c1766c8dad3490cc29fc7626e05"
},
{
"path": ".claude-plugin/plugin.json",
"sha256": "42aee177ecd23e9adf0f9b406bc329405300f2be1fd5d8e721eca1851714a1a1"
},
{
"path": "templates/function-calling-basic.ts",
"sha256": "f1af0c619c93aa97f782899d2e5a0425a72c26a31141ca9cd1e8bf3b9e51579b"
},
{
"path": "templates/thinking-mode.ts",
"sha256": "42d2a63670690e74005948543e3579bdb214844f2c3de5896eb23d3f1efdc3ea"
},
{
"path": "templates/function-calling-parallel.ts",
"sha256": "a15b99eed2c9496a23d118a70893a3c1dc797aced2866ea953adcae34cf1d24f"
},
{
"path": "templates/multimodal-video-audio.ts",
"sha256": "9a709be0fbe5bcf56cea600fc8f228cec5511a3e02e91ce6f2614502b3bbb59f"
},
{
"path": "templates/text-generation-fetch.ts",
"sha256": "3a7cadf7990cc39a58b04b3e2766099235bd3c32ebe71f61910436bbff96bf31"
},
{
"path": "templates/grounding-search.ts",
"sha256": "d0ee315b13d64eeb7896578edddf7efd8f6057ddeac889751a429bcd7014184e"
},
{
"path": "templates/code-execution.ts",
"sha256": "0a7b261c5665fba8bc661627dc9295eb8ed7bb6ea80087899d4287fafd464eaf"
},
{
"path": "templates/streaming-chat.ts",
"sha256": "a8883cf071c083e2b50cddfe4754fd916e6260cc15d021e10082e4aa84179b80"
},
{
"path": "templates/package.json",
"sha256": "cd97bba41f70e8a1b16381fb1c05927d74b3c4afe30f81d3be91167765be1ebb"
},
{
"path": "templates/context-caching.ts",
"sha256": "395418ad8397a7e0a2698cc4308ea1674d3ef00bec3def54a19faefa9bd985e4"
},
{
"path": "templates/text-generation-basic.ts",
"sha256": "8722c5eea9efc8411513ec53b6df45d827d382b4c50fe0dfce21d7af97f9a7f0"
},
{
"path": "templates/cloudflare-worker.ts",
"sha256": "8b026ae94b2797e8b15bfac43442d0f95bcfdd6581863e49e1d199945e93594d"
},
{
"path": "templates/multimodal-image.ts",
"sha256": "88c50979e7f71f670e022accf669e9fa757887b645d7e318d2ba1ac0ffe85d16"
},
{
"path": "templates/streaming-fetch.ts",
"sha256": "9dd0a6bd48d69290787b7f526600a7acb99f47c0d9823cbd81158e358b5d108d"
},
{
"path": "templates/combined-advanced.ts",
"sha256": "37972b62e1e139c0357f5d2a03c1b335879ec472a5c4b47a90422630f485fa89"
}
],
"dirSha256": "b8bbae8bede51e3e5854007aa5c1ab3cb3a7e1050bbca038893d7f48dd558ba6"
},
"security": {
"scannedAt": null,
"scannerVersion": null,
"flags": []
}
}

View File

@@ -0,0 +1,481 @@
# Code Execution Patterns
Complete guide to using code execution with Google Gemini API for computational tasks, data analysis, and problem-solving.
---
## What is Code Execution?
Code Execution allows Gemini models to generate and execute Python code to solve problems requiring computation, enabling the model to:
- Perform precise mathematical calculations
- Analyze data with pandas/numpy
- Generate charts and visualizations
- Implement algorithms
- Process files and data structures
---
## How It Works
1. **Model receives prompt** requiring computation
2. **Model generates Python code** to solve the problem
3. **Code executes in sandbox** (secure, isolated environment)
4. **Results return to model** for incorporation into response
5. **Model explains results** in natural language
---
## Enabling Code Execution
### Basic Setup (SDK)
```typescript
import { GoogleGenAI } from '@google/genai';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash', // Or gemini-2.5-pro
contents: 'Calculate the sum of first 50 prime numbers',
config: {
tools: [{ codeExecution: {} }] // Enable code execution
}
});
```
### Basic Setup (Fetch)
```typescript
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': env.GEMINI_API_KEY,
},
body: JSON.stringify({
tools: [{ code_execution: {} }],
contents: [{ parts: [{ text: 'Calculate...' }] }]
}),
}
);
```
---
## Available Python Packages
### Standard Library
- `math`, `statistics`, `random`
- `datetime`, `time`, `calendar`
- `json`, `csv`, `re`
- `collections`, `itertools`, `functools`
### Data Science
- `numpy` - numerical computing
- `pandas` - data analysis and manipulation
- `scipy` - scientific computing
### Visualization
- `matplotlib` - plotting and charts
- `seaborn` - statistical visualization
**Note**: This is a **limited sandbox environment** - not all PyPI packages are available.
---
## Response Structure
### Parsing Code Execution Results
```typescript
for (const part of response.candidates[0].content.parts) {
// Inline text
if (part.text) {
console.log('Text:', part.text);
}
// Generated code
if (part.executableCode) {
console.log('Language:', part.executableCode.language); // "PYTHON"
console.log('Code:', part.executableCode.code);
}
// Execution results
if (part.codeExecutionResult) {
console.log('Outcome:', part.codeExecutionResult.outcome); // "OUTCOME_OK" or "OUTCOME_FAILED"
console.log('Output:', part.codeExecutionResult.output);
}
}
```
### Example Response
```json
{
"candidates": [{
"content": {
"parts": [
{ "text": "I'll calculate that for you." },
{
"executableCode": {
"language": "PYTHON",
"code": "primes = []\nnum = 2\nwhile len(primes) < 50:\n if is_prime(num):\n primes.append(num)\n num += 1\nprint(sum(primes))"
}
},
{
"codeExecutionResult": {
"outcome": "OUTCOME_OK",
"output": "5117\n"
}
},
{ "text": "The sum is 5117." }
]
}
}]
}
```
---
## Common Patterns
### 1. Mathematical Calculations
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Calculate the 100th Fibonacci number',
config: { tools: [{ codeExecution: {} }] }
});
```
**Prompting Tip**: Use phrases like "generate and run code" or "calculate using code" to explicitly request code execution.
### 2. Data Analysis
```typescript
const prompt = `
Analyze this sales data:
month,revenue,customers
Jan,50000,120
Feb,62000,145
Mar,58000,138
Calculate:
1. Total revenue
2. Average revenue per customer
3. Month-over-month growth rate
Use pandas or numpy for analysis.
`;
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: prompt,
config: { tools: [{ codeExecution: {} }] }
});
```
### 3. Chart Generation
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Create a bar chart showing prime number distribution by last digit (0-9) for primes under 100',
config: { tools: [{ codeExecution: {} }] }
});
```
**Note**: Chart image data appears in `codeExecutionResult.output` (base64 encoded in some cases).
### 4. Algorithm Implementation
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Implement quicksort and sort this list: [64, 34, 25, 12, 22, 11, 90]. Show the sorted result.',
config: { tools: [{ codeExecution: {} }] }
});
```
### 5. File Processing (In-Memory)
```typescript
const csvData = `name,age,city
Alice,30,NYC
Bob,25,LA
Charlie,35,Chicago`;
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: `Parse this CSV data and calculate average age:\n\n${csvData}`,
config: { tools: [{ codeExecution: {} }] }
});
```
---
## Chat with Code Execution
### Multi-Turn Computational Conversations
```typescript
const chat = await ai.chats.create({
model: 'gemini-2.5-flash',
config: { tools: [{ codeExecution: {} }] }
});
// First turn
let response = await chat.sendMessage('I have a data analysis question');
console.log(response.text);
// Second turn (will use code execution)
response = await chat.sendMessage(`
Calculate statistics for: [12, 15, 18, 22, 25, 28, 30]
- Mean
- Median
- Standard deviation
`);
for (const part of response.candidates[0].content.parts) {
if (part.text) console.log(part.text);
if (part.executableCode) console.log('Code:', part.executableCode.code);
if (part.codeExecutionResult) console.log('Results:', part.codeExecutionResult.output);
}
```
---
## Error Handling
### Checking Execution Outcome
```typescript
for (const part of response.candidates[0].content.parts) {
if (part.codeExecutionResult) {
if (part.codeExecutionResult.outcome === 'OUTCOME_OK') {
console.log('✅ Success:', part.codeExecutionResult.output);
} else if (part.codeExecutionResult.outcome === 'OUTCOME_FAILED') {
console.error('❌ Execution failed:', part.codeExecutionResult.output);
}
}
}
```
### Common Execution Errors
**Timeout**:
```
Error: Execution timed out after 30 seconds
```
**Solution**: Simplify computation or reduce data size.
**Import Error**:
```
ModuleNotFoundError: No module named 'requests'
```
**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy).
**Syntax Error**:
```
SyntaxError: invalid syntax
```
**Solution**: Model generated invalid code - try rephrasing prompt or regenerating.
---
## Best Practices
### ✅ Do
1. **Be Explicit**: Use phrases like "generate and run code" to trigger code execution
2. **Provide Data**: Include data directly in prompt for analysis
3. **Specify Output**: Ask for specific calculations or metrics
4. **Use Available Packages**: Stick to numpy, pandas, matplotlib, scipy
5. **Check Outcome**: Always verify `outcome === 'OUTCOME_OK'`
### ❌ Don't
1. **Network Access**: Code cannot make HTTP requests
2. **File System**: No persistent file storage between executions
3. **Long Computations**: Timeout limits apply (~30 seconds)
4. **External Dependencies**: Can't install new packages
5. **State Persistence**: Each execution is isolated (no global state)
---
## Limitations
### Sandbox Restrictions
- **No Network Access**: Cannot call external APIs
- **No File I/O**: Cannot read/write to disk (in-memory only)
- **Limited Packages**: Only pre-installed packages available
- **Execution Timeout**: ~30 seconds maximum
- **No State**: Each execution is independent
### Supported Models
**Works with**:
- `gemini-2.5-pro`
- `gemini-2.5-flash`
**Does NOT work with**:
- `gemini-2.5-flash-lite` (no code execution support)
- Gemini 1.5 models (use Gemini 2.5)
---
## Advanced Patterns
### Iterative Analysis
```typescript
const chat = await ai.chats.create({
model: 'gemini-2.5-flash',
config: { tools: [{ codeExecution: {} }] }
});
// Step 1: Initial analysis
let response = await chat.sendMessage('Analyze data: [10, 20, 30, 40, 50]');
// Step 2: Follow-up based on results
response = await chat.sendMessage('Now calculate the variance');
// Step 3: Visualization
response = await chat.sendMessage('Create a histogram of this data');
```
### Combining with Function Calling
```typescript
const weatherFunction = {
name: 'get_current_weather',
description: 'Get weather for a city',
parametersJsonSchema: {
type: 'object',
properties: { city: { type: 'string' } },
required: ['city']
}
};
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Get weather for NYC, LA, Chicago. Calculate the average temperature.',
config: {
tools: [
{ functionDeclarations: [weatherFunction] },
{ codeExecution: {} }
]
}
});
// Model will:
// 1. Call get_current_weather for each city
// 2. Generate code to calculate average
// 3. Return result
```
### Data Transformation Pipeline
```typescript
const prompt = `
Transform this data:
Input: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Pipeline:
1. Filter odd numbers
2. Square each number
3. Calculate sum
4. Return result
Use code to process.
`;
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: prompt,
config: { tools: [{ codeExecution: {} }] }
});
```
---
## Optimization Tips
### 1. Clear Instructions
**❌ Vague**:
```typescript
contents: 'Analyze this data'
```
**✅ Specific**:
```typescript
contents: 'Calculate mean, median, and standard deviation for: [12, 15, 18, 22, 25]'
```
### 2. Provide Complete Data
```typescript
const csvData = `...complete dataset...`;
const prompt = `Analyze this CSV data:\n\n${csvData}\n\nCalculate total revenue.`;
```
### 3. Request Code Explicitly
```typescript
contents: 'Generate and run code to calculate the factorial of 20'
```
### 4. Handle Large Datasets
For large data, consider:
- Sampling (analyze subset)
- Aggregation (group by categories)
- Pagination (process in chunks)
---
## Troubleshooting
### Code Not Executing
**Symptom**: Response has text but no `executableCode`
**Causes**:
1. Code execution not enabled (`tools: [{ codeExecution: {} }]`)
2. Model decided code wasn't necessary
3. Using `gemini-2.5-flash-lite` (doesn't support code execution)
**Solution**: Be explicit in prompt: "Use code to calculate..."
### Timeout Errors
**Symptom**: `OUTCOME_FAILED` with timeout message
**Causes**: Computation too complex or data too large
**Solution**:
- Simplify algorithm
- Reduce data size
- Use more efficient approach
### Import Errors
**Symptom**: `ModuleNotFoundError`
**Causes**: Trying to import unavailable package
**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy)
---
## References
- Official Docs: https://ai.google.dev/gemini-api/docs/code-execution
- Templates: See `code-execution.ts` for working examples
- Available Packages: See "Available Python Packages" section above

View File

@@ -0,0 +1,373 @@
# Context Caching Guide
Complete guide to using context caching with Google Gemini API to reduce costs by up to 90%.
---
## What is Context Caching?
Context caching allows you to cache frequently used content (system instructions, large documents, videos) and reuse it across multiple requests, significantly reducing token costs and improving latency.
---
## How It Works
1. **Create a cache** with your repeated content (documents, videos, system instructions)
2. **Set TTL** (time-to-live) for cache expiration
3. **Reference the cache** in subsequent API calls
4. **Pay less** - cached tokens cost ~90% less than regular input tokens
---
## Benefits
### Cost Savings
- **Cached input tokens**: ~90% cheaper than regular tokens
- **Output tokens**: Same price (not cached)
- **Example**: 100K token document cached → ~10K token cost equivalent
### Performance
- **Reduced latency**: Cached content is preprocessed
- **Faster responses**: No need to reprocess large context
- **Consistent results**: Same context every time
### Use Cases
- Large documents analyzed repeatedly
- Long system instructions used across sessions
- Video/audio files queried multiple times
- Consistent conversation context
---
## Cache Creation
### Basic Cache (SDK)
```typescript
import { GoogleGenAI } from '@google/genai';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001', // Must use explicit version!
config: {
displayName: 'my-cache',
systemInstruction: 'You are a helpful assistant.',
contents: 'Large document content here...',
ttl: '3600s', // 1 hour
}
});
```
### Cache with Expiration Time
```typescript
// Set specific expiration time (timezone-aware)
const expirationTime = new Date(Date.now() + 2 * 60 * 60 * 1000); // 2 hours from now
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001',
config: {
displayName: 'my-cache',
contents: documentText,
expireTime: expirationTime, // Use expireTime instead of ttl
}
});
```
---
## TTL (Time-To-Live) Guidelines
### Recommended TTL Values
| Use Case | TTL | Reason |
|----------|-----|--------|
| Quick analysis session | 300s (5 min) | Short-lived tasks |
| Extended conversation | 3600s (1 hour) | Standard session length |
| Daily batch processing | 86400s (24 hours) | Reuse across day |
| Long-term analysis | 604800s (7 days) | Maximum allowed |
### TTL vs Expiration Time
**TTL (time-to-live)**:
- Relative duration from cache creation
- Format: `"3600s"` (string with 's' suffix)
- Easy for session-based caching
**Expiration Time**:
- Absolute timestamp
- Must be timezone-aware Date object
- Precise control over cache lifetime
---
## Using a Cache
### Generate Content with Cache (SDK)
```typescript
// Use cache name as model parameter
const response = await ai.models.generateContent({
model: cache.name, // Use cache.name, not original model name
contents: 'Summarize the document'
});
console.log(response.text);
```
### Multiple Queries with Same Cache
```typescript
const queries = [
'What are the key points?',
'Who are the main characters?',
'What is the conclusion?'
];
for (const query of queries) {
const response = await ai.models.generateContent({
model: cache.name,
contents: query
});
console.log(`Q: ${query}`);
console.log(`A: ${response.text}\n`);
}
```
---
## Cache Management
### Update Cache TTL
```typescript
// Extend cache lifetime before it expires
await ai.caches.update({
name: cache.name,
config: {
ttl: '7200s' // Extend to 2 hours
}
});
```
### List All Caches
```typescript
const caches = await ai.caches.list();
caches.forEach(cache => {
console.log(`${cache.displayName}: ${cache.name}`);
console.log(`Expires: ${cache.expireTime}`);
});
```
### Delete Cache
```typescript
// Delete when no longer needed
await ai.caches.delete({ name: cache.name });
```
---
## Advanced Use Cases
### Caching Video Files
```typescript
import fs from 'fs';
// 1. Upload video
const videoFile = await ai.files.upload({
file: fs.createReadStream('./video.mp4')
});
// 2. Wait for processing
while (videoFile.state.name === 'PROCESSING') {
await new Promise(resolve => setTimeout(resolve, 2000));
videoFile = await ai.files.get({ name: videoFile.name });
}
// 3. Create cache with video
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001',
config: {
displayName: 'video-cache',
systemInstruction: 'Analyze this video.',
contents: [videoFile],
ttl: '600s'
}
});
// 4. Query video multiple times
const response1 = await ai.models.generateContent({
model: cache.name,
contents: 'What happens in the first minute?'
});
const response2 = await ai.models.generateContent({
model: cache.name,
contents: 'Who are the main people?'
});
```
### Caching with System Instructions
```typescript
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001',
config: {
displayName: 'legal-expert-cache',
systemInstruction: `
You are a legal expert specializing in contract law.
Always cite relevant sections when making claims.
Use clear, professional language.
`,
contents: largeContractDocument,
ttl: '3600s'
}
});
// System instruction is part of cached context
const response = await ai.models.generateContent({
model: cache.name,
contents: 'Is this contract enforceable?'
});
```
---
## Important Notes
### Model Version Requirement
**⚠️ You MUST use explicit version suffixes when creating caches:**
```typescript
// ✅ CORRECT
model: 'gemini-2.5-flash-001'
// ❌ WRONG (will fail)
model: 'gemini-2.5-flash'
```
### Cache Expiration
- Caches are **automatically deleted** after TTL expires
- **Cannot recover** expired caches - must recreate
- Update TTL **before expiration** to extend lifetime
### Cost Calculation
```
Regular request: 100,000 input tokens = 100K token cost
With caching (after cache creation):
- Cached tokens: 100,000 × 0.1 (90% discount) = 10K equivalent cost
- New tokens: 1,000 × 1.0 = 1K cost
- Total: 11K equivalent (89% savings!)
```
### Limitations
- Maximum TTL: 7 days (604800s)
- Cache creation costs same as regular tokens (first time only)
- Subsequent uses get 90% discount
- Only input tokens are cached (output tokens never cached)
---
## Best Practices
### When to Use Caching
**Good Use Cases:**
- Large documents queried repeatedly (legal docs, research papers)
- Video/audio files analyzed with different questions
- Long system instructions used across many requests
- Consistent context in multi-turn conversations
**Bad Use Cases:**
- Single-use content (no benefit)
- Frequently changing content
- Short content (<1000 tokens) - minimal savings
- Content used only once per day (cache might expire)
### Optimization Tips
1. **Cache Early**: Create cache at session start
2. **Extend TTL**: Update before expiration if still needed
3. **Monitor Usage**: Track how often cache is reused
4. **Clean Up**: Delete unused caches to avoid clutter
5. **Combine Features**: Use caching with code execution, grounding for powerful workflows
### Cache Naming
Use descriptive `displayName` for easy identification:
```typescript
// ✅ Good names
displayName: 'financial-report-2024-q3'
displayName: 'legal-contract-acme-corp'
displayName: 'video-analysis-project-x'
// ❌ Vague names
displayName: 'cache1'
displayName: 'test'
```
---
## Troubleshooting
### "Invalid model name" Error
**Problem**: Using `gemini-2.5-flash` instead of `gemini-2.5-flash-001`
**Solution**: Always use explicit version suffix:
```typescript
model: 'gemini-2.5-flash-001' // Correct
```
### Cache Expired Error
**Problem**: Trying to use cache after TTL expired
**Solution**: Check expiration before use or extend TTL proactively:
```typescript
const cache = await ai.caches.get({ name: cacheName });
if (new Date(cache.expireTime) < new Date()) {
// Cache expired, recreate it
cache = await ai.caches.create({ ... });
}
```
### High Costs Despite Caching
**Problem**: Creating new cache for each request
**Solution**: Reuse the same cache across multiple requests:
```typescript
// ❌ Wrong - creates new cache each time
for (const query of queries) {
const cache = await ai.caches.create({ ... }); // Expensive!
const response = await ai.models.generateContent({ model: cache.name, ... });
}
// ✅ Correct - create once, use many times
const cache = await ai.caches.create({ ... }); // Create once
for (const query of queries) {
const response = await ai.models.generateContent({ model: cache.name, ... });
}
```
---
## References
- Official Docs: https://ai.google.dev/gemini-api/docs/caching
- Cost Optimization: See "Cost Optimization" in main SKILL.md
- Templates: See `context-caching.ts` for working examples

View File

@@ -0,0 +1,59 @@
# Function Calling Patterns
Complete guide to implementing function calling (tool use) with Gemini API.
---
## Basic Pattern
1. Define function declarations
2. Send request with tools
3. Check if model wants to call functions
4. Execute functions
5. Send results back to model
6. Get final response
---
## Function Declaration Schema
```typescript
{
name: string, // Function name (no spaces)
description: string, // What the function does
parametersJsonSchema: { // Subset of OpenAPI schema
type: 'object',
properties: {
[paramName]: {
type: string, // 'string' | 'number' | 'boolean' | 'array' | 'object'
description: string, // Parameter description
enum?: string[] // Optional: allowed values
}
},
required: string[] // Required parameter names
}
}
```
---
## Calling Modes
- **AUTO** (default): Model decides when to call
- **ANY**: Force at least one function call
- **NONE**: Disable function calling
---
## Parallel vs Compositional
**Parallel**: Independent functions run simultaneously
**Compositional**: Sequential dependencies (A → B → C)
Gemini automatically detects which pattern to use.
---
## Official Docs
https://ai.google.dev/gemini-api/docs/function-calling

View File

@@ -0,0 +1,57 @@
# Generation Configuration Reference
Complete reference for all generation parameters.
---
## All Parameters
```typescript
config: {
temperature: number, // 0.0-2.0 (default: 1.0)
topP: number, // 0.0-1.0 (default: 0.95)
topK: number, // 1-100+ (default: 40)
maxOutputTokens: number, // 1-65536
stopSequences: string[], // Stop at these strings
responseMimeType: string, // 'text/plain' | 'application/json'
candidateCount: number, // Usually 1
thinkingConfig: {
thinkingBudget: number // Max thinking tokens
}
}
```
---
## Parameter Guidelines
### temperature
- **0.0**: Deterministic, focused
- **1.0**: Balanced (default)
- **2.0**: Very creative, random
### topP (nucleus sampling)
- **0.95**: Default, good balance
- Lower = more focused
### topK
- **40**: Default
- Higher = more diversity
### maxOutputTokens
- Always set this to prevent excessive generation
- Max: 65,536 tokens
---
## Use Cases
**Factual tasks**: temperature=0.0, topP=0.8
**Creative tasks**: temperature=1.2, topP=0.95
**Code generation**: temperature=0.3, topP=0.9
---
## Official Docs
https://ai.google.dev/gemini-api/docs/models/generative-models#model-parameters

View File

@@ -0,0 +1,602 @@
# Grounding with Google Search Guide
Complete guide to using grounding with Google Search to connect Gemini models to real-time web information, reducing hallucinations and providing verifiable, up-to-date responses.
---
## What is Grounding?
Grounding connects the Gemini model to Google Search, allowing it to:
- Access real-time information beyond training cutoff
- Reduce hallucinations with fact-checked web sources
- Provide citations and source URLs
- Answer questions about current events
- Verify information against the web
---
## How It Works
1. **Model receives query** (e.g., "Who won Euro 2024?")
2. **Model determines** if current information is needed
3. **Performs Google Search** automatically
4. **Processes search results** (web pages, snippets)
5. **Incorporates findings** into response
6. **Provides citations** with source URLs
---
## Two Grounding APIs
### 1. Google Search (`googleSearch`) - Recommended for Gemini 2.5
**Simple, automatic grounding**:
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Who won the euro 2024?',
config: {
tools: [{ googleSearch: {} }]
}
});
```
**Features**:
- Simple configuration (empty object)
- Automatic search when model needs current info
- Available on all Gemini 2.5 models
- Recommended for new projects
### 2. Google Search Retrieval (`googleSearchRetrieval`) - Legacy for Gemini 1.5
**Dynamic threshold control**:
```typescript
import { DynamicRetrievalConfigMode } from '@google/genai';
const response = await ai.models.generateContent({
model: 'gemini-1.5-flash',
contents: 'Who won the euro 2024?',
config: {
tools: [{
googleSearchRetrieval: {
dynamicRetrievalConfig: {
mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
dynamicThreshold: 0.7 // Search only if confidence < 70%
}
}
}]
}
});
```
**Features**:
- Control when searches happen via threshold
- Used with Gemini 1.5 models
- More configuration options
**Recommendation**: Use `googleSearch` for Gemini 2.5 models (simpler and newer).
---
## Basic Usage
### SDK Approach (Gemini 2.5)
```typescript
import { GoogleGenAI } from '@google/genai';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What are the latest developments in AI?',
config: {
tools: [{ googleSearch: {} }]
}
});
console.log(response.text);
// Check if grounding was used
if (response.candidates[0].groundingMetadata) {
console.log('✓ Search performed');
console.log('Sources:', response.candidates[0].groundingMetadata.webPages);
} else {
console.log('✓ Answered from model knowledge');
}
```
### Fetch Approach (Cloudflare Workers)
```typescript
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': env.GEMINI_API_KEY,
},
body: JSON.stringify({
contents: [{ parts: [{ text: 'What are the latest developments in AI?' }] }],
tools: [{ google_search: {} }]
}),
}
);
const data = await response.json();
console.log(data.candidates[0].content.parts[0].text);
```
---
## Grounding Metadata
### Structure
```typescript
{
groundingMetadata: {
// Search queries performed
searchQueries: [
{ text: "euro 2024 winner" }
],
// Web pages retrieved
webPages: [
{
url: "https://example.com/euro-2024",
title: "UEFA Euro 2024 Results",
snippet: "Spain won UEFA Euro 2024..."
}
],
// Citations (inline references)
citations: [
{
startIndex: 42,
endIndex: 47,
uri: "https://example.com/euro-2024"
}
],
// Retrieval queries (alternative search terms)
retrievalQueries: [
{ query: "who won euro 2024 final" }
]
}
}
```
### Accessing Metadata
```typescript
if (response.candidates[0].groundingMetadata) {
const metadata = response.candidates[0].groundingMetadata;
// Display sources
console.log('Sources:');
metadata.webPages?.forEach((page, i) => {
console.log(`${i + 1}. ${page.title}`);
console.log(` ${page.url}`);
});
// Display citations
console.log('\nCitations:');
metadata.citations?.forEach((citation) => {
console.log(`Position ${citation.startIndex}-${citation.endIndex}: ${citation.uri}`);
});
}
```
---
## When to Use Grounding
### ✅ Good Use Cases
**Current Events**:
```typescript
'What happened in the news today?'
'Who won the latest sports championship?'
'What are the current stock prices?'
```
**Recent Developments**:
```typescript
'What are the latest AI breakthroughs?'
'What are recent changes in climate policy?'
```
**Fact-Checking**:
```typescript
'Is this claim true: [claim]?'
'What does the latest research say about [topic]?'
```
**Real-Time Data**:
```typescript
'What is the current weather in Tokyo?'
'What are today's cryptocurrency prices?'
```
### ❌ Not Recommended For
**General Knowledge**:
```typescript
'What is the capital of France?' // Model knows this
'How does photosynthesis work?' // Stable knowledge
```
**Mathematical Calculations**:
```typescript
'What is 15 * 27?' // Use code execution instead
```
**Creative Tasks**:
```typescript
'Write a poem about autumn' // No search needed
```
**Code Generation**:
```typescript
'Write a sorting algorithm' // Internal reasoning sufficient
```
---
## Chat with Grounding
### Multi-Turn Conversations
```typescript
const chat = await ai.chats.create({
model: 'gemini-2.5-flash',
config: {
tools: [{ googleSearch: {} }]
}
});
// First question
let response = await chat.sendMessage('What are the latest quantum computing developments?');
console.log(response.text);
// Display sources
if (response.candidates[0].groundingMetadata) {
const sources = response.candidates[0].groundingMetadata.webPages || [];
console.log(`\nSources: ${sources.length} web pages`);
sources.forEach(s => console.log(`- ${s.title}: ${s.url}`));
}
// Follow-up question
response = await chat.sendMessage('Which company made the biggest breakthrough?');
console.log('\n' + response.text);
```
---
## Combining with Other Features
### Grounding + Function Calling
```typescript
const weatherFunction = {
name: 'get_current_weather',
description: 'Get weather for a location',
parametersJsonSchema: {
type: 'object',
properties: {
location: { type: 'string', description: 'City name' }
},
required: ['location']
}
};
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What is the weather like in the city that won Euro 2024?',
config: {
tools: [
{ googleSearch: {} }, // For finding Euro 2024 winner
{ functionDeclarations: [weatherFunction] } // For weather lookup
]
}
});
// Model will:
// 1. Use Google Search to find Euro 2024 winner (Madrid/Spain)
// 2. Call get_current_weather function with the city
// 3. Combine both results in response
```
### Grounding + Code Execution
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Find the current stock prices for AAPL, GOOGL, MSFT and calculate their average',
config: {
tools: [
{ googleSearch: {} }, // For current stock prices
{ codeExecution: {} } // For averaging
]
}
});
// Model will:
// 1. Search for current stock prices
// 2. Generate code to calculate average
// 3. Execute code with the found prices
// 4. Return result with citations
```
---
## Checking Grounding Usage
### Determine if Search Was Performed
```typescript
const queries = [
'What is 2+2?', // Should NOT use search
'What happened in the news today?' // Should use search
];
for (const query of queries) {
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: query,
config: { tools: [{ googleSearch: {} }] }
});
console.log(`Query: ${query}`);
console.log(`Search used: ${response.candidates[0].groundingMetadata ? 'YES' : 'NO'}`);
console.log();
}
```
**Output**:
```
Query: What is 2+2?
Search used: NO
Query: What happened in the news today?
Search used: YES
```
---
## Dynamic Retrieval (Gemini 1.5)
### Threshold-Based Grounding
```typescript
const response = await ai.models.generateContent({
model: 'gemini-1.5-flash',
contents: 'Who won the euro 2024?',
config: {
tools: [{
googleSearchRetrieval: {
dynamicRetrievalConfig: {
mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
dynamicThreshold: 0.7 // Search only if confidence < 70%
}
}
}]
}
});
if (!response.candidates[0].groundingMetadata) {
console.log('Model answered from knowledge (confidence >= 70%)');
} else {
console.log('Search performed (confidence < 70%)');
}
```
**How It Works**:
- Model evaluates confidence in its internal knowledge
- If confidence < threshold → performs search
- If confidence >= threshold → uses internal knowledge
**Threshold Values**:
- `0.0`: Never search (always use internal knowledge)
- `0.5`: Search if moderately uncertain
- `0.7`: Search if somewhat uncertain (good default)
- `1.0`: Always search
---
## Best Practices
### ✅ Do
1. **Check Metadata**: Always verify if grounding was used
```typescript
if (response.candidates[0].groundingMetadata) { ... }
```
2. **Display Citations**: Show sources to users for transparency
```typescript
metadata.webPages.forEach(page => {
console.log(`Source: ${page.title} (${page.url})`);
});
```
3. **Use Specific Queries**: Better search results with clear questions
```typescript
// ✅ Good: "What are Microsoft's Q3 2024 earnings?"
// ❌ Vague: "Tell me about Microsoft"
```
4. **Combine Features**: Use with function calling/code execution for powerful workflows
5. **Handle Missing Metadata**: Not all queries trigger search
```typescript
const sources = response.candidates[0].groundingMetadata?.webPages || [];
```
### ❌ Don't
1. **Don't Assume Search Always Happens**: Model decides when to search
2. **Don't Ignore Citations**: They're crucial for fact-checking
3. **Don't Use for Stable Knowledge**: Waste of resources for unchanging facts
4. **Don't Expect Perfect Coverage**: Not all information is on the web
---
## Cost and Performance
### Cost Considerations
- **Added Latency**: Search takes 1-3 seconds typically
- **Token Costs**: Retrieved content counts as input tokens
- **Rate Limits**: Subject to API rate limits
### Optimization
**Use Dynamic Threshold** (Gemini 1.5):
```typescript
dynamicThreshold: 0.7 // Higher = more searches, lower = fewer searches
```
**Cache Grounding Results** (if appropriate):
```typescript
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001',
config: {
displayName: 'grounding-cache',
tools: [{ googleSearch: {} }],
contents: 'Initial query that triggers search...',
ttl: '3600s'
}
});
// Subsequent queries reuse cached grounding results
```
---
## Troubleshooting
### Grounding Not Working
**Symptom**: No `groundingMetadata` in response
**Causes**:
1. Grounding not enabled: `tools: [{ googleSearch: {} }]`
2. Model decided search wasn't needed (query answerable from knowledge)
3. Google Cloud project not configured (grounding requires GCP)
**Solution**:
- Verify `tools` configuration
- Use queries requiring current information
- Set up Google Cloud project
### Poor Search Quality
**Symptom**: Irrelevant sources or wrong information
**Causes**:
- Vague query
- Search terms ambiguous
- Recent events not yet indexed
**Solution**:
- Make queries more specific
- Include context in prompt
- Verify search queries in metadata
### Citations Missing
**Symptom**: `groundingMetadata` present but no citations
**Explanation**: Citations are **inline references** - they may not always be present if model doesn't directly quote sources.
**Solution**: Check `webPages` instead for full source list
---
## Important Requirements
### Google Cloud Project
**⚠️ Grounding requires a Google Cloud project, not just an API key.**
**Setup**:
1. Create Google Cloud project
2. Enable Generative Language API
3. Configure billing
4. Use API key from that project
**Error if Missing**:
```
Error: Grounding requires Google Cloud project configuration
```
### Model Support
**✅ Supported**:
- All Gemini 2.5 models (`googleSearch`)
- All Gemini 1.5 models (`googleSearchRetrieval`)
**❌ Not Supported**:
- Gemini 1.0 models
---
## Examples
### News Summary
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Summarize today's top 3 technology news headlines',
config: { tools: [{ googleSearch: {} }] }
});
console.log(response.text);
metadata.webPages?.forEach((page, i) => {
console.log(`${i + 1}. ${page.title}: ${page.url}`);
});
```
### Fact Verification
```typescript
const claim = "The Earth is flat";
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: `Is this claim true: "${claim}"? Use reliable sources to verify.`,
config: { tools: [{ googleSearch: {} }] }
});
console.log(response.text);
```
### Market Research
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What are the current trends in electric vehicle adoption in 2024?',
config: { tools: [{ googleSearch: {} }] }
});
console.log(response.text);
console.log('\nSources:');
metadata.webPages?.forEach(page => {
console.log(`- ${page.title}`);
});
```
---
## References
- Official Docs: https://ai.google.dev/gemini-api/docs/grounding
- Google Search Docs: https://ai.google.dev/gemini-api/docs/google-search
- Templates: See `grounding-search.ts` for working examples
- Combined Features: See `combined-advanced.ts` for integration patterns

289
references/models-guide.md Normal file
View File

@@ -0,0 +1,289 @@
# Gemini Models Guide (2025)
**Last Updated**: 2025-11-19 (Gemini 3 preview release)
---
## Gemini 3 Series (Preview - November 2025)
### gemini-3-pro-preview
**Model ID**: `gemini-3-pro-preview`
**Status**: 🆕 Preview release (November 18, 2025)
**Context Windows**:
- Input: TBD (documentation pending)
- Output: TBD (documentation pending)
**Description**: Google's newest and most intelligent AI model with state-of-the-art reasoning and multimodal understanding. Outperforms Gemini 2.5 Pro on every major AI benchmark.
**Best For**:
- Most complex reasoning tasks
- Advanced multimodal analysis (images, videos, PDFs, audio)
- Benchmark-critical applications
- Cutting-edge projects requiring latest capabilities
- Tasks requiring absolute best quality
**Features**:
- ✅ Enhanced multimodal understanding
- ✅ Function calling
- ✅ Streaming
- ✅ System instructions
- ✅ JSON mode
- TBD Thinking mode (documentation pending)
**Knowledge Cutoff**: TBD
**Pricing**: Preview pricing (likely higher than 2.5 Pro)
**⚠️ Preview Status**: Use for evaluation and testing. Consider `gemini-2.5-pro` for production-critical decisions until Gemini 3 reaches stable general availability.
**New Capabilities**:
- Record-breaking benchmark performance
- Enhanced generative UI responses
- Advanced coding capabilities (Google Antigravity integration)
- State-of-the-art multimodal understanding
---
## Current Production Models (Gemini 2.5 - Stable)
### gemini-2.5-pro
**Model ID**: `gemini-2.5-pro`
**Context Windows**:
- Input: 1,048,576 tokens (NOT 2M!)
- Output: 65,536 tokens
**Description**: State-of-the-art thinking model capable of reasoning over complex problems in code, math, and STEM.
**Best For**:
- Complex reasoning tasks
- Advanced code generation and optimization
- Mathematical problem-solving
- Multi-step logical analysis
- STEM applications
**Features**:
- ✅ Thinking mode (enabled by default)
- ✅ Function calling
- ✅ Multimodal (text, images, video, audio, PDFs)
- ✅ Streaming
- ✅ System instructions
- ✅ JSON mode
**Knowledge Cutoff**: January 2025
**Pricing**: Higher cost, use for tasks requiring best quality
---
### gemini-2.5-flash
**Model ID**: `gemini-2.5-flash`
**Context Windows**:
- Input: 1,048,576 tokens
- Output: 65,536 tokens
**Description**: Best price-performance model for large-scale processing, low-latency, and high-volume tasks.
**Best For**:
- General-purpose AI applications
- High-volume API calls
- Agentic workflows
- Cost-sensitive applications
- Production workloads
**Features**:
- ✅ Thinking mode (enabled by default)
- ✅ Function calling
- ✅ Multimodal (text, images, video, audio, PDFs)
- ✅ Streaming
- ✅ System instructions
- ✅ JSON mode
**Knowledge Cutoff**: January 2025
**Pricing**: Best price-performance ratio
**⭐ Recommended**: This is the default choice for most applications
---
### gemini-2.5-flash-lite
**Model ID**: `gemini-2.5-flash-lite`
**Context Windows**:
- Input: 1,048,576 tokens
- Output: 65,536 tokens
**Description**: Most cost-efficient and fastest 2.5 model, optimized for high throughput.
**Best For**:
- High-throughput applications
- Simple text generation
- Cost-critical use cases
- Speed-prioritized workloads
**Features**:
- ✅ Thinking mode (enabled by default)
-**NO function calling** (critical limitation!)
- ✅ Multimodal (text, images, video, audio, PDFs)
- ✅ Streaming
- ✅ System instructions
- ✅ JSON mode
**Knowledge Cutoff**: January 2025
**Pricing**: Lowest cost
**⚠️ Important**: Flash-Lite does NOT support function calling! Use Flash or Pro if you need tool use.
---
## Model Comparison Matrix
| Feature | Pro | Flash | Flash-Lite |
|---------|-----|-------|------------|
| **Thinking Mode** | ✅ Default ON | ✅ Default ON | ✅ Default ON |
| **Function Calling** | ✅ Yes | ✅ Yes | ❌ **NO** |
| **Multimodal** | ✅ Full | ✅ Full | ✅ Full |
| **Streaming** | ✅ Yes | ✅ Yes | ✅ Yes |
| **Input Tokens** | 1,048,576 | 1,048,576 | 1,048,576 |
| **Output Tokens** | 65,536 | 65,536 | 65,536 |
| **Reasoning Quality** | Best | Good | Basic |
| **Speed** | Moderate | Fast | Fastest |
| **Cost** | Highest | Medium | Lowest |
---
## Previous Generation Models (Still Available)
### Gemini 2.0 Flash
**Model ID**: `gemini-2.0-flash`
**Context**: 1M input / 65K output tokens
**Status**: Previous generation, 2.5 Flash recommended instead
### Gemini 1.5 Pro
**Model ID**: `gemini-1.5-pro`
**Context**: 2M input tokens (this is the ONLY model with 2M!)
**Status**: Older model, 2.5 models recommended
---
## Context Window Clarification
**⚠️ CRITICAL CORRECTION**:
**ACCURATE**: Gemini 2.5 models support **1,048,576 input tokens** (approximately 1 million)
**INACCURATE**: Claiming Gemini 2.5 has 2M token context window
**WHY THIS MATTERS**:
- Gemini 1.5 Pro (older model) had 2M tokens
- Gemini 2.5 models (current) have ~1M tokens
- This is a common mistake that causes confusion!
**This skill prevents this error by providing accurate information.**
---
## Model Selection Guide
### Use gemini-2.5-pro When:
- ✅ Complex reasoning required (math, logic, STEM)
- ✅ Advanced code generation and optimization
- ✅ Multi-step problem-solving
- ✅ Quality is more important than cost
- ✅ Tasks require maximum capability
### Use gemini-2.5-flash When:
- ✅ General-purpose AI applications
- ✅ High-volume production workloads
- ✅ Function calling required
- ✅ Agentic workflows
- ✅ Good balance of cost and quality needed
-**Recommended default choice**
### Use gemini-2.5-flash-lite When:
- ✅ Simple text generation only
- ✅ No function calling needed
- ✅ High throughput required
- ✅ Cost is primary concern
- ⚠️ **Only if you don't need function calling!**
---
## Common Mistakes
### ❌ Mistake 1: Using Wrong Model Name
```typescript
// WRONG - old model name
model: 'gemini-1.5-pro'
// CORRECT - current model
model: 'gemini-2.5-flash'
```
### ❌ Mistake 2: Claiming 2M Context for 2.5 Models
```typescript
// WRONG ASSUMPTION
// "Gemini 2.5 has 2M token context window"
// CORRECT
// Gemini 2.5 has 1,048,576 input tokens
// Only Gemini 1.5 Pro (older) had 2M
```
### ❌ Mistake 3: Using Flash-Lite for Function Calling
```typescript
// WRONG - Flash-Lite doesn't support function calling!
model: 'gemini-2.5-flash-lite',
config: {
tools: [{ functionDeclarations: [...] }] // This will FAIL
}
// CORRECT
model: 'gemini-2.5-flash', // or gemini-2.5-pro
config: {
tools: [{ functionDeclarations: [...] }]
}
```
---
## Rate Limits (Free vs Paid)
### Free Tier
- **15 RPM** (requests per minute)
- **1M TPM** (tokens per minute)
- **1,500 RPD** (requests per day)
### Paid Tier
- **360 RPM**
- **4M TPM**
- Unlimited daily requests
**Tip**: Monitor your usage and implement rate limiting to stay within quotas.
---
## Official Documentation
- **Models Overview**: https://ai.google.dev/gemini-api/docs/models
- **Gemini 2.5 Announcement**: https://developers.googleblog.com/en/gemini-2-5-thinking-model-updates/
- **Pricing**: https://ai.google.dev/pricing
---
**Production Tip**: Always use gemini-2.5-flash as your default unless you specifically need Pro's advanced reasoning or want to minimize cost with Flash-Lite (and don't need function calling).

View File

@@ -0,0 +1,58 @@
# Multimodal Guide
Complete guide to using images, video, audio, and PDFs with Gemini API.
---
## Supported Formats
### Images
- JPEG, PNG, WebP, HEIC, HEIF
- Max size: 20MB
### Video
- MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV
- Max size: 2GB
- Max length (inline): 2 minutes
### Audio
- MP3, WAV, FLAC, AAC, OGG, OPUS
- Max size: 20MB
### PDFs
- Max size: 30MB
- Text-based PDFs work best
---
## Usage Pattern
```typescript
contents: [
{
parts: [
{ text: 'Your question' },
{
inlineData: {
data: base64EncodedData,
mimeType: 'image/jpeg' // or video/mp4, audio/mp3, application/pdf
}
}
]
}
]
```
---
## Best Practices
- Use specific, detailed prompts
- Combine multiple modalities in one request
- For large files (>2GB), use File API (Phase 2)
---
## Official Docs
https://ai.google.dev/gemini-api/docs/vision

View File

@@ -0,0 +1,235 @@
# SDK Migration Guide
**From**: `@google/generative-ai` (DEPRECATED)
**To**: `@google/genai` (CURRENT)
**Deadline**: November 30, 2025 (deprecated SDK sunset)
---
## Why Migrate?
The `@google/generative-ai` SDK is deprecated and will stop receiving updates on **November 30, 2025**.
The new `@google/genai` SDK:
- ✅ Works with both Gemini API and Vertex AI
- ✅ Supports Gemini 2.0+ features
- ✅ Better TypeScript support
- ✅ Unified API across platforms
- ✅ Active development and updates
---
## Migration Steps
### 1. Update Package
```bash
# Remove deprecated SDK
npm uninstall @google/generative-ai
# Install current SDK
npm install @google/genai@1.27.0
```
### 2. Update Imports
**Old (DEPRECATED)**:
```typescript
import { GoogleGenerativeAI } from '@google/generative-ai';
const genAI = new GoogleGenerativeAI(apiKey);
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
```
**New (CURRENT)**:
```typescript
import { GoogleGenAI } from '@google/genai';
const ai = new GoogleGenAI({ apiKey });
// No need to get model separately
```
### 3. Update API Calls
**Old**:
```typescript
const result = await model.generateContent(prompt);
const response = await result.response;
const text = response.text();
```
**New**:
```typescript
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: prompt
});
const text = response.text;
```
### 4. Update Streaming
**Old**:
```typescript
const result = await model.generateContentStream(prompt);
for await (const chunk of result.stream) {
console.log(chunk.text());
}
```
**New**:
```typescript
const response = await ai.models.generateContentStream({
model: 'gemini-2.5-flash',
contents: prompt
});
for await (const chunk of response) {
console.log(chunk.text);
}
```
### 5. Update Chat
**Old**:
```typescript
const chat = model.startChat({
history: []
});
const result = await chat.sendMessage(message);
const response = await result.response;
console.log(response.text());
```
**New**:
```typescript
const chat = await ai.models.createChat({
model: 'gemini-2.5-flash',
history: []
});
const response = await chat.sendMessage(message);
console.log(response.text);
```
---
## Complete Before/After Example
### Before (Deprecated SDK)
```typescript
import { GoogleGenerativeAI } from '@google/generative-ai';
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
const model = genAI.getGenerativeModel({ model: 'gemini-2.5-flash' });
// Generate
const result = await model.generateContent('Hello');
const response = await result.response;
console.log(response.text());
// Stream
const streamResult = await model.generateContentStream('Write a story');
for await (const chunk of streamResult.stream) {
console.log(chunk.text());
}
// Chat
const chat = model.startChat();
const chatResult = await chat.sendMessage('Hi');
const chatResponse = await chatResult.response;
console.log(chatResponse.text());
```
### After (Current SDK)
```typescript
import { GoogleGenAI } from '@google/genai';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY });
// Generate
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Hello'
});
console.log(response.text);
// Stream
const streamResponse = await ai.models.generateContentStream({
model: 'gemini-2.5-flash',
contents: 'Write a story'
});
for await (const chunk of streamResponse) {
console.log(chunk.text);
}
// Chat
const chat = await ai.models.createChat({ model: 'gemini-2.5-flash' });
const chatResponse = await chat.sendMessage('Hi');
console.log(chatResponse.text);
```
---
## Key Differences
| Aspect | Old SDK | New SDK |
|--------|---------|---------|
| Package | `@google/generative-ai` | `@google/genai` |
| Class | `GoogleGenerativeAI` | `GoogleGenAI` |
| Model Init | `genAI.getGenerativeModel()` | Specify in each call |
| Text Access | `response.text()` (method) | `response.text` (property) |
| Stream Iteration | `result.stream` | Direct iteration |
| Chat Creation | `model.startChat()` | `ai.models.createChat()` |
---
## Troubleshooting
### Error: "Cannot find module '@google/generative-ai'"
**Cause**: Old import statement after migration
**Solution**: Update all imports to `@google/genai`
### Error: "Property 'text' does not exist"
**Cause**: Using `response.text()` (method) instead of `response.text` (property)
**Solution**: Remove parentheses: `response.text` not `response.text()`
### Error: "generateContent is not a function"
**Cause**: Trying to call methods on old model object
**Solution**: Use `ai.models.generateContent()` directly
---
## Automated Migration Script
```bash
# Find all files using old SDK
rg "@google/generative-ai" --type ts
# Replace import statements
find . -name "*.ts" -exec sed -i 's/@google\/generative-ai/@google\/genai/g' {} +
# Replace class name
find . -name "*.ts" -exec sed -i 's/GoogleGenerativeAI/GoogleGenAI/g' {} +
```
**⚠️ Note**: This script handles imports but NOT API changes. Manual review required!
---
## Official Resources
- **Migration Guide**: https://ai.google.dev/gemini-api/docs/migrate-to-genai
- **New SDK Docs**: https://github.com/googleapis/js-genai
- **Deprecated SDK**: https://github.com/google-gemini/deprecated-generative-ai-js
---
**Deadline Reminder**: November 30, 2025 - Deprecated SDK sunset

View File

@@ -0,0 +1,81 @@
# Streaming Patterns
Complete guide to implementing streaming with Gemini API.
---
## SDK Approach (Async Iteration)
```typescript
const response = await ai.models.generateContentStream({
model: 'gemini-2.5-flash',
contents: 'Write a story'
});
for await (const chunk of response) {
process.stdout.write(chunk.text);
}
```
**Pros**: Simple, automatic parsing
**Cons**: Requires Node.js or compatible runtime
---
## Fetch Approach (SSE Parsing)
```typescript
const response = await fetch(
'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent',
{ /* ... */ }
);
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
const data = JSON.parse(line.slice(6));
const text = data.candidates[0]?.content?.parts[0]?.text;
if (text) process.stdout.write(text);
}
}
```
**Pros**: Works in any environment
**Cons**: Manual SSE parsing required
---
## SSE Format
```
data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}
data: {"candidates":[{"content":{"parts":[{"text":" world"}]}}]}
data: [DONE]
```
---
## Best Practices
- Always use `streamGenerateContent` endpoint
- Handle incomplete chunks in buffer
- Skip empty lines and `[DONE]` markers
- Use streaming for better UX on long responses
---
## Official Docs
https://ai.google.dev/gemini-api/docs/streaming

View File

@@ -0,0 +1,59 @@
# Thinking Mode Guide
Complete guide to thinking mode in Gemini 2.5 models.
---
## What is Thinking Mode?
Gemini 2.5 models "think" internally before responding, improving accuracy on complex tasks.
**Key Points**:
- ✅ Always enabled on 2.5 models (cannot disable)
- ✅ Transparent (you don't see the thinking process)
- ✅ Configurable thinking budget
- ✅ Improves reasoning quality
---
## Configuration
```typescript
config: {
thinkingConfig: {
thinkingBudget: 8192 // Max tokens for internal reasoning
}
}
```
---
## When to Increase Budget
✅ Complex math/logic problems
✅ Multi-step reasoning
✅ Code optimization
✅ Detailed analysis
---
## When Default is Fine
⏺️ Simple questions
⏺️ Creative writing
⏺️ Translation
⏺️ Summarization
---
## Model Comparison
- **gemini-2.5-pro**: Best for complex reasoning
- **gemini-2.5-flash**: Good balance
- **gemini-2.5-flash-lite**: Basic thinking
---
## Official Docs
https://ai.google.dev/gemini-api/docs/thinking

304
references/top-errors.md Normal file
View File

@@ -0,0 +1,304 @@
# Top Errors and Solutions
22 common Gemini API errors with solutions (Phase 1 + Phase 2).
---
## 1. Using Deprecated SDK
**Error**: `Cannot find module '@google/generative-ai'`
**Cause**: Using old SDK after migration
**Solution**: Install `@google/genai` instead
---
## 2. Wrong Context Window Claims
**Error**: Input exceeds model capacity
**Cause**: Assuming 2M tokens for Gemini 2.5
**Solution**: Gemini 2.5 has 1,048,576 input tokens (NOT 2M!)
---
## 3. Model Not Found
**Error**: `models/gemini-3.0-flash is not found`
**Cause**: Wrong model name
**Solution**: Use: `gemini-2.5-pro`, `gemini-2.5-flash`, or `gemini-2.5-flash-lite`
---
## 4. Function Calling on Flash-Lite
**Error**: Function calling not working
**Cause**: Flash-Lite doesn't support function calling
**Solution**: Use `gemini-2.5-flash` or `gemini-2.5-pro`
---
## 5. Invalid API Key (401)
**Error**: `API key not valid`
**Cause**: Missing or wrong `GEMINI_API_KEY`
**Solution**: Set environment variable correctly
---
## 6. Rate Limit Exceeded (429)
**Error**: `Resource has been exhausted`
**Cause**: Too many requests
**Solution**: Implement exponential backoff
---
## 7. Streaming Parse Errors
**Error**: Invalid JSON in SSE stream
**Cause**: Incomplete chunk parsing
**Solution**: Use buffer to handle partial chunks
---
## 8. Multimodal Format Errors
**Error**: Invalid base64 or MIME type
**Cause**: Wrong image encoding
**Solution**: Use correct base64 encoding and MIME type
---
## 9. Context Length Exceeded
**Error**: `Request payload size exceeds the limit`
**Cause**: Input too large
**Solution**: Reduce input size (max 1,048,576 tokens)
---
## 10. Chat Not Working with Fetch
**Error**: No chat helper available
**Cause**: Chat helpers are SDK-only
**Solution**: Manually manage conversation history or use SDK
---
## 11. Thinking Mode Not Supported
**Error**: Trying to disable thinking mode
**Cause**: Thinking mode always enabled on 2.5
**Solution**: You can only configure budget, not disable
---
## 12. Parameter Conflicts
**Error**: Unsupported parameters
**Cause**: Using wrong config options
**Solution**: Use only supported parameters (see generation-config.md)
---
## 13. System Instruction Placement
**Error**: System instruction not working
**Cause**: Placed inside contents array
**Solution**: Place at top level, not in contents
---
## 14. Token Counting Errors
**Error**: Unexpected token usage
**Cause**: Multimodal inputs use more tokens
**Solution**: Images/video/audio count toward token limit
---
## 15. Parallel Function Call Errors
**Error**: Functions not executing in parallel
**Cause**: Dependencies between functions
**Solution**: Gemini auto-detects; ensure functions are independent
---
## Phase 2 Errors
### 16. Invalid Model Version for Caching
**Error**: `Invalid model name for caching`
**Cause**: Using `gemini-2.5-flash` instead of `gemini-2.5-flash-001`
**Solution**: Must use explicit version suffix when creating caches
```typescript
// ✅ Correct
model: 'gemini-2.5-flash-001'
// ❌ Wrong
model: 'gemini-2.5-flash'
```
**Source**: https://ai.google.dev/gemini-api/docs/caching
---
### 17. Cache Expired or Not Found
**Error**: `Cache not found` or `Cache expired`
**Cause**: Trying to use cache after TTL expiration
**Solution**: Check expiration before use or recreate cache
```typescript
const cache = await ai.caches.get({ name: cacheName });
if (new Date(cache.expireTime) < new Date()) {
// Recreate cache
cache = await ai.caches.create({ ... });
}
```
---
### 18. Cannot Update Expired Cache TTL
**Error**: `Cannot update expired cache`
**Cause**: Trying to extend TTL after cache already expired
**Solution**: Update TTL before expiration or create new cache
```typescript
// Update TTL before expiration
await ai.caches.update({
name: cache.name,
config: { ttl: '7200s' }
});
```
---
### 19. Code Execution Timeout
**Error**: `Execution timed out after 30 seconds` with `OUTCOME_FAILED`
**Cause**: Python code taking too long to execute
**Solution**: Simplify computation or reduce data size
```typescript
// Check outcome before using results
if (part.codeExecutionResult?.outcome === 'OUTCOME_FAILED') {
console.error('Execution failed:', part.codeExecutionResult.output);
}
```
**Source**: https://ai.google.dev/gemini-api/docs/code-execution
---
### 20. Python Package Not Available
**Error**: `ModuleNotFoundError: No module named 'requests'`
**Cause**: Trying to import package not in sandbox
**Solution**: Use only available packages (numpy, pandas, matplotlib, seaborn, scipy)
**Available Packages**:
- Standard library: math, statistics, json, csv, datetime
- Data science: numpy, pandas, scipy
- Visualization: matplotlib, seaborn
---
### 21. Code Execution on Flash-Lite
**Error**: Code execution not working
**Cause**: `gemini-2.5-flash-lite` doesn't support code execution
**Solution**: Use `gemini-2.5-flash` or `gemini-2.5-pro`
```typescript
// ✅ Correct
model: 'gemini-2.5-flash' // Supports code execution
// ❌ Wrong
model: 'gemini-2.5-flash-lite' // NO code execution support
```
---
### 22. Grounding Requires Google Cloud Project
**Error**: `Grounding requires Google Cloud project configuration`
**Cause**: Using API key not associated with GCP project
**Solution**: Set up Google Cloud project and enable Generative Language API
**Steps**:
1. Create Google Cloud project
2. Enable Generative Language API
3. Configure billing
4. Use API key from that project
**Source**: https://ai.google.dev/gemini-api/docs/grounding
---
## Quick Debugging Checklist
### Phase 1 (Core)
- [ ] Using @google/genai (NOT @google/generative-ai)
- [ ] Model name is gemini-2.5-pro/flash/flash-lite
- [ ] API key is set correctly
- [ ] Input under 1,048,576 tokens
- [ ] Not using Flash-Lite for function calling
- [ ] System instruction at top level
- [ ] Streaming endpoint is streamGenerateContent
- [ ] MIME types are correct for multimodal
### Phase 2 (Advanced)
- [ ] Caching: Using explicit model version (e.g., gemini-2.5-flash-001)
- [ ] Caching: Cache not expired (check expireTime)
- [ ] Code Execution: Not using Flash-Lite
- [ ] Code Execution: Using only available Python packages
- [ ] Grounding: Google Cloud project configured
- [ ] Grounding: Checking groundingMetadata for search results

76
scripts/check-versions.sh Executable file
View File

@@ -0,0 +1,76 @@
#!/bin/bash
# Check @google/genai package version and warn about deprecated SDK
# Usage: ./scripts/check-versions.sh
echo "🔍 Checking Gemini API SDK versions..."
echo ""
# Check if package.json exists
if [ ! -f "package.json" ]; then
echo "❌ No package.json found in current directory"
exit 1
fi
# Check for deprecated SDK
if grep -q "@google/generative-ai" package.json; then
echo "⚠️ WARNING: DEPRECATED SDK DETECTED!"
echo ""
echo " Package: @google/generative-ai"
echo " Status: DEPRECATED (sunset Nov 30, 2025)"
echo ""
echo " Action required:"
echo " 1. npm uninstall @google/generative-ai"
echo " 2. npm install @google/genai@1.27.0"
echo " 3. Update imports (see sdk-migration-guide.md)"
echo ""
fi
# Check for current SDK
if grep -q "@google/genai" package.json; then
# Get installed version
INSTALLED_VERSION=$(npm list @google/genai --depth=0 2>/dev/null | grep @google/genai | sed 's/.*@//' | sed 's/ .*//')
RECOMMENDED_VERSION="1.27.0"
echo "✅ Current SDK installed: @google/genai"
echo " Installed version: $INSTALLED_VERSION"
echo " Recommended version: $RECOMMENDED_VERSION"
echo ""
# Check if version matches recommendation
if [ "$INSTALLED_VERSION" != "$RECOMMENDED_VERSION" ]; then
echo " Consider updating to recommended version:"
echo " npm install @google/genai@$RECOMMENDED_VERSION"
echo ""
fi
else
echo "❌ @google/genai not found in package.json"
echo ""
echo " Install with:"
echo " npm install @google/genai@1.27.0"
echo ""
fi
# Check Node.js version
NODE_VERSION=$(node -v | sed 's/v//')
REQUIRED_NODE="18.0.0"
echo "Node.js version: $NODE_VERSION"
echo "Required: >= $REQUIRED_NODE"
echo ""
# Summary
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Summary:"
echo ""
if grep -q "@google/generative-ai" package.json; then
echo "❌ Migration needed: Remove deprecated SDK"
elif grep -q "@google/genai" package.json; then
echo "✅ Using current SDK (@google/genai)"
else
echo "❌ Gemini SDK not installed"
fi
echo ""
echo "For migration help, see:"
echo " references/sdk-migration-guide.md"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

View File

@@ -0,0 +1,245 @@
/**
* Complete Cloudflare Worker with Gemini API
*
* Demonstrates:
* - Fetch-based Gemini API integration (no SDK dependencies)
* - Streaming responses to client
* - Multi-turn chat with session storage
* - Error handling for production
* - CORS configuration
*
* Deploy:
* - npx wrangler deploy
* - Set GEMINI_API_KEY in Cloudflare dashboard or wrangler.toml
*/
interface Env {
GEMINI_API_KEY: string;
}
interface ChatMessage {
role: 'user' | 'model';
parts: Array<{ text: string }>;
}
export default {
/**
* Main request handler
*/
async fetch(request: Request, env: Env): Promise<Response> {
// CORS headers
const corsHeaders = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type',
};
// Handle preflight
if (request.method === 'OPTIONS') {
return new Response(null, { headers: corsHeaders });
}
const url = new URL(request.url);
try {
// Route: POST /api/chat (non-streaming)
if (url.pathname === '/api/chat' && request.method === 'POST') {
return await handleChat(request, env, corsHeaders);
}
// Route: POST /api/chat/stream (streaming)
if (url.pathname === '/api/chat/stream' && request.method === 'POST') {
return await handleChatStream(request, env, corsHeaders);
}
// Route: GET / (health check)
if (url.pathname === '/' && request.method === 'GET') {
return new Response(
JSON.stringify({ status: 'ok', service: 'Gemini API Worker' }),
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
);
}
// 404 for unknown routes
return new Response('Not Found', { status: 404, headers: corsHeaders });
} catch (error: any) {
return new Response(
JSON.stringify({ error: error.message }),
{ status: 500, headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
);
}
}
};
/**
* Handle non-streaming chat request
*/
async function handleChat(request: Request, env: Env, corsHeaders: any): Promise<Response> {
const { message, history = [] } = await request.json() as {
message: string;
history?: ChatMessage[];
};
// Build contents array with history
const contents: ChatMessage[] = [
...history,
{ role: 'user', parts: [{ text: message }] }
];
// Call Gemini API
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': env.GEMINI_API_KEY,
},
body: JSON.stringify({ contents }),
}
);
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.error?.message || 'Gemini API error');
}
const data = await response.json();
const assistantReply = data.candidates[0]?.content?.parts[0]?.text;
// Return response with updated history
return new Response(
JSON.stringify({
reply: assistantReply,
history: [
...history,
{ role: 'user', parts: [{ text: message }] },
{ role: 'model', parts: [{ text: assistantReply }] }
],
usage: data.usageMetadata
}),
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' } }
);
}
/**
* Handle streaming chat request
*/
async function handleChatStream(request: Request, env: Env, corsHeaders: any): Promise<Response> {
const { message, history = [] } = await request.json() as {
message: string;
history?: ChatMessage[];
};
const contents: ChatMessage[] = [
...history,
{ role: 'user', parts: [{ text: message }] }
];
// Create a TransformStream to stream to client
const { readable, writable } = new TransformStream();
const writer = writable.getWriter();
const encoder = new TextEncoder();
// Start streaming in background
(async () => {
try {
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': env.GEMINI_API_KEY,
},
body: JSON.stringify({ contents }),
}
);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
if (!response.body) {
throw new Error('Response body is null');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.trim() === '' || line.startsWith('data: [DONE]')) continue;
if (!line.startsWith('data: ')) continue;
try {
const data = JSON.parse(line.slice(6));
const text = data.candidates[0]?.content?.parts[0]?.text;
if (text) {
await writer.write(encoder.encode(text));
}
} catch (e) {
// Skip invalid JSON
}
}
}
await writer.close();
} catch (error: any) {
await writer.write(encoder.encode(`\n\nError: ${error.message}`));
await writer.close();
}
})();
return new Response(readable, {
headers: {
...corsHeaders,
'Content-Type': 'text/plain; charset=utf-8',
'Transfer-Encoding': 'chunked',
},
});
}
/**
* Example Client Usage (JavaScript):
*
* // Non-streaming
* const response = await fetch('https://your-worker.workers.dev/api/chat', {
* method: 'POST',
* headers: { 'Content-Type': 'application/json' },
* body: JSON.stringify({
* message: 'What is quantum computing?',
* history: []
* })
* });
* const data = await response.json();
* console.log(data.reply);
*
* // Streaming
* const response = await fetch('https://your-worker.workers.dev/api/chat/stream', {
* method: 'POST',
* headers: { 'Content-Type': 'application/json' },
* body: JSON.stringify({
* message: 'Write a story',
* history: []
* })
* });
* const reader = response.body.getReader();
* const decoder = new TextDecoder();
* while (true) {
* const { done, value } = await reader.read();
* if (done) break;
* console.log(decoder.decode(value));
* }
*/

206
templates/code-execution.ts Normal file
View File

@@ -0,0 +1,206 @@
/**
* Google Gemini API - Code Execution Example
*
* Demonstrates how to enable code execution so the model can generate
* and run Python code to solve computational problems.
*
* Features:
* - Basic code execution
* - Data analysis with code
* - Chart generation
* - Error handling for failed execution
*
* Requirements:
* - @google/genai@1.27.0+
* - GEMINI_API_KEY environment variable
*
* Note: Code Execution is NOT available on gemini-2.5-flash-lite
*/
import { GoogleGenAI } from '@google/genai';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
async function basicCodeExecution() {
console.log('=== Basic Code Execution Example ===\n');
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents:
'What is the sum of the first 50 prime numbers? Generate and run code for the calculation, and make sure you get all 50.',
config: {
tools: [{ codeExecution: {} }],
},
});
// Parse response parts
console.log('Response parts:\n');
for (const part of response.candidates[0].content.parts) {
if (part.text) {
console.log('📝 Text:', part.text);
}
if (part.executableCode) {
console.log('\n💻 Generated Code:');
console.log(part.executableCode.code);
}
if (part.codeExecutionResult) {
console.log('\n✅ Execution Output:');
console.log(part.codeExecutionResult.output);
}
}
console.log('\n=== Basic Code Execution Complete ===');
}
async function dataAnalysisExample() {
console.log('\n=== Data Analysis Example ===\n');
const prompt = `
Analyze this sales data and calculate:
1. Total revenue
2. Average sale price
3. Best-selling month
4. Month with highest revenue
Use pandas or numpy for analysis.
Data (CSV format):
month,sales,revenue
Jan,150,45000
Feb,200,62000
Mar,175,53000
Apr,220,68000
May,190,58000
`;
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: prompt,
config: {
tools: [{ codeExecution: {} }],
},
});
for (const part of response.candidates[0].content.parts) {
if (part.text) {
console.log('📊 Analysis:', part.text);
}
if (part.executableCode) {
console.log('\n💻 Analysis Code:');
console.log(part.executableCode.code);
}
if (part.codeExecutionResult) {
console.log('\n📈 Results:');
console.log(part.codeExecutionResult.output);
}
}
console.log('\n=== Data Analysis Complete ===');
}
async function chartGenerationExample() {
console.log('\n=== Chart Generation Example ===\n');
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents:
'Create a bar chart showing the distribution of prime numbers under 100 by their last digit. Generate the chart code and describe any patterns you see.',
config: {
tools: [{ codeExecution: {} }],
},
});
for (const part of response.candidates[0].content.parts) {
if (part.text) {
console.log('📊 Chart Description:', part.text);
}
if (part.executableCode) {
console.log('\n📉 Chart Code:');
console.log(part.executableCode.code);
}
if (part.codeExecutionResult) {
console.log('\n✓ Chart generated');
// Note: Image data would be in output
}
}
console.log('\n=== Chart Generation Complete ===');
}
async function chatWithCodeExecution() {
console.log('\n=== Chat with Code Execution Example ===\n');
const chat = await ai.chats.create({
model: 'gemini-2.5-flash',
config: {
tools: [{ codeExecution: {} }],
},
});
// First message
console.log('User: I have a math question for you.');
let response = await chat.sendMessage('I have a math question for you.');
console.log(`Assistant: ${response.text}\n`);
// Second message (will generate and execute code)
console.log('User: Calculate the Fibonacci sequence up to the 20th number and sum them.');
response = await chat.sendMessage(
'Calculate the Fibonacci sequence up to the 20th number and sum them.'
);
for (const part of response.candidates[0].content.parts) {
if (part.text) {
console.log('Assistant:', part.text);
}
if (part.executableCode) {
console.log('\nCode:');
console.log(part.executableCode.code);
}
if (part.codeExecutionResult) {
console.log('\nOutput:');
console.log(part.codeExecutionResult.output);
}
}
console.log('\n=== Chat with Code Execution Complete ===');
}
async function errorHandlingExample() {
console.log('\n=== Error Handling Example ===\n');
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Write code that divides by zero and see what happens',
config: {
tools: [{ codeExecution: {} }],
},
});
for (const part of response.candidates[0].content.parts) {
if (part.codeExecutionResult) {
if (part.codeExecutionResult.outcome === 'OUTCOME_FAILED') {
console.error('❌ Code execution failed:');
console.error(part.codeExecutionResult.output);
} else {
console.log('✅ Success:');
console.log(part.codeExecutionResult.output);
}
}
}
console.log('\n=== Error Handling Example Complete ===');
}
// Run all examples
async function main() {
await basicCodeExecution();
await dataAnalysisExample();
await chartGenerationExample();
await chatWithCodeExecution();
await errorHandlingExample();
}
main().catch((error) => {
console.error('Error:', error.message);
process.exit(1);
});

View File

@@ -0,0 +1,278 @@
/**
* Google Gemini API - Combined Advanced Features Example
*
* Demonstrates how to use all Phase 2 advanced features together:
* - Context Caching (cost optimization)
* - Code Execution (computational tasks)
* - Grounding with Google Search (real-time information)
*
* Use Case: Financial analysis chatbot that:
* 1. Caches financial data and analysis instructions
* 2. Uses code execution for calculations and data analysis
* 3. Uses grounding for current market information
*
* Requirements:
* - @google/genai@1.27.0+
* - GEMINI_API_KEY environment variable
* - Google Cloud project (for grounding)
*/
import { GoogleGenAI } from '@google/genai';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
async function financialAnalysisChatbot() {
console.log('=== Combined Advanced Features: Financial Analysis Chatbot ===\n');
// Step 1: Create cache with financial data and analysis instructions
console.log('Step 1: Creating cache with financial data...');
const financialData = `
Company Financial Data (Q1-Q4 2024):
Revenue:
Q1: $2.5M
Q2: $3.1M
Q3: $2.9M
Q4: $3.8M
Expenses:
Q1: $1.8M
Q2: $2.2M
Q3: $2.1M
Q4: $2.6M
Customer Acquisition:
Q1: 1,200 new customers
Q2: 1,500 new customers
Q3: 1,350 new customers
Q4: 1,800 new customers
`.trim();
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001', // Explicit version required for caching
config: {
displayName: 'financial-analysis-cache',
systemInstruction: `
You are a financial analyst chatbot. You have access to:
1. Company financial data (cached)
2. Code execution for calculations
3. Google Search for current market information
Provide detailed, accurate financial analysis.
`,
contents: financialData,
ttl: '3600s', // Cache for 1 hour
},
});
console.log(`✓ Cache created: ${cache.name}\n`);
// Step 2: Create chat with all advanced features enabled
console.log('Step 2: Creating chat with code execution and grounding...\n');
const chat = await ai.chats.create({
model: cache.name, // Use cached context
config: {
tools: [
{ codeExecution: {} }, // Enable code execution
{ googleSearch: {} }, // Enable grounding
],
},
});
// Query 1: Analysis requiring code execution (uses cache)
console.log('===========================================');
console.log('Query 1: Calculate year-over-year growth');
console.log('===========================================\n');
let response = await chat.sendMessage(`
Calculate the following for 2024:
1. Total annual revenue
2. Total annual expenses
3. Net profit
4. Profit margin percentage
5. Quarter-over-quarter revenue growth rates
Use code to perform these calculations.
`);
console.log('📊 Financial Analysis:\n');
for (const part of response.candidates[0].content.parts) {
if (part.text) {
console.log(part.text);
}
if (part.executableCode) {
console.log('\n💻 Calculations Code:');
console.log(part.executableCode.code);
}
if (part.codeExecutionResult) {
console.log('\n📈 Results:');
console.log(part.codeExecutionResult.output);
}
}
// Query 2: Current market information (uses grounding)
console.log('\n\n===========================================');
console.log('Query 2: Compare with industry benchmarks');
console.log('===========================================\n');
response = await chat.sendMessage(`
What are the current industry benchmarks for SaaS companies in terms of:
1. Profit margins
2. Customer acquisition cost trends
3. Growth rate expectations
Use current market data to provide context.
`);
console.log('📰 Market Context:\n');
console.log(response.text);
if (response.candidates[0].groundingMetadata) {
console.log('\n✓ Used current market data from:');
const sources = response.candidates[0].groundingMetadata.webPages || [];
sources.slice(0, 3).forEach((source, i) => {
console.log(`${i + 1}. ${source.title}`);
console.log(` ${source.url}`);
});
}
// Query 3: Combined analysis (uses cache + code + grounding)
console.log('\n\n===========================================');
console.log('Query 3: Comprehensive recommendation');
console.log('===========================================\n');
response = await chat.sendMessage(`
Based on:
1. Our company's financial performance (from cached data)
2. Calculated growth metrics (using code execution)
3. Current industry trends (from search)
Provide a comprehensive recommendation for Q1 2025 strategy.
`);
console.log('💡 Strategic Recommendation:\n');
console.log(response.text);
// Show which features were used
console.log('\n\n📋 Features Used in This Query:');
let featuresUsed = [];
// Check for code execution
const hasCode = response.candidates[0].content.parts.some(
(part) => part.executableCode
);
if (hasCode) featuresUsed.push('✓ Code Execution');
// Check for grounding
if (response.candidates[0].groundingMetadata) {
featuresUsed.push('✓ Google Search Grounding');
}
// Cache is always used when we use cache.name as model
featuresUsed.push('✓ Context Caching');
featuresUsed.forEach((feature) => console.log(feature));
// Clean up
console.log('\n\nStep 3: Cleaning up...');
await ai.caches.delete({ name: cache.name });
console.log('✓ Cache deleted');
console.log('\n=== Financial Analysis Chatbot Complete ===');
}
async function researchAssistant() {
console.log('\n\n=== Combined Advanced Features: Research Assistant ===\n');
// Create cache with research paper
console.log('Step 1: Caching research paper...');
const researchPaper = `
Title: Climate Change Impact on Arctic Ecosystems (2024)
Abstract:
This study examines the effects of climate change on Arctic ecosystems
over the past decade...
[Imagine full research paper content here]
Key Findings:
- Temperature increase of 2.3°C in Arctic regions
- 15% reduction in sea ice coverage
- Migration pattern changes in polar species
- etc.
`.trim();
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001',
config: {
displayName: 'research-paper-cache',
systemInstruction: 'You are a research assistant. Analyze papers and provide insights.',
contents: researchPaper,
ttl: '1800s', // 30 minutes
},
});
console.log(`✓ Cache created\n`);
// Create chat with all tools
const chat = await ai.chats.create({
model: cache.name,
config: {
tools: [{ codeExecution: {} }, { googleSearch: {} }],
},
});
// Query combining all features
console.log('Query: Comprehensive climate analysis\n');
const response = await chat.sendMessage(`
1. Calculate the average temperature change per year from the data in the paper (use code)
2. Find the latest climate predictions for the next decade (use search)
3. Synthesize findings from both sources into a summary
`);
console.log('Response:\n');
for (const part of response.candidates[0].content.parts) {
if (part.text) console.log(part.text);
if (part.executableCode) {
console.log('\nCode:');
console.log(part.executableCode.code);
}
if (part.codeExecutionResult) {
console.log('\nResults:');
console.log(part.codeExecutionResult.output);
}
}
if (response.candidates[0].groundingMetadata) {
console.log('\nSources:');
const sources = response.candidates[0].groundingMetadata.webPages || [];
sources.slice(0, 2).forEach((s) => console.log(`- ${s.title}`));
}
// Clean up
await ai.caches.delete({ name: cache.name });
console.log('\n=== Research Assistant Complete ===');
}
// Run examples
async function main() {
await financialAnalysisChatbot();
await researchAssistant();
console.log('\n\n✅ All advanced features demonstrated!');
console.log('\nKey Takeaways:');
console.log('- Context Caching: Saves costs by reusing large context');
console.log('- Code Execution: Enables computational analysis');
console.log('- Grounding: Provides real-time, fact-checked information');
console.log('- Combined: Creates powerful, cost-effective AI applications');
}
main().catch((error) => {
console.error('Error:', error.message);
process.exit(1);
});

View File

@@ -0,0 +1,154 @@
/**
* Google Gemini API - Context Caching Example
*
* Demonstrates how to create and use context caching to reduce costs by up to 90%
* when using the same large context (documents, videos, system instructions) repeatedly.
*
* Features:
* - Create cache with large document
* - Use cache for multiple queries
* - Update cache TTL
* - List and delete caches
*
* Requirements:
* - @google/genai@1.27.0+
* - GEMINI_API_KEY environment variable
*
* Note: You must use explicit model version suffixes like 'gemini-2.5-flash-001',
* not just 'gemini-2.5-flash' when creating caches.
*/
import { GoogleGenAI } from '@google/genai';
import fs from 'fs';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
async function contextCachingExample() {
console.log('=== Context Caching Example ===\n');
// Example: Large document content
const largeDocument = `
This is a large legal document that will be analyzed multiple times.
Article 1: ...
Article 2: ...
... (imagine thousands of lines of legal text)
`.trim();
// 1. Create a cache with large content
console.log('1. Creating cache...');
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001', // Must use explicit version suffix!
config: {
displayName: 'legal-doc-cache',
systemInstruction: 'You are an expert legal analyst. Analyze documents and provide clear summaries.',
contents: largeDocument,
ttl: '3600s', // Cache for 1 hour
},
});
console.log(`✓ Cache created: ${cache.name}`);
console.log(` Display name: ${cache.displayName}`);
console.log(` Expires at: ${cache.expireTime}\n`);
// 2. Use cache for multiple queries (saves tokens!)
console.log('2. Using cache for first query...');
const response1 = await ai.models.generateContent({
model: cache.name, // Use cache name as model
contents: 'Summarize the key points of Article 1',
});
console.log(`Response: ${response1.text}\n`);
console.log('3. Using cache for second query...');
const response2 = await ai.models.generateContent({
model: cache.name,
contents: 'What are the legal implications of Article 2?',
});
console.log(`Response: ${response2.text}\n`);
// 3. Update cache TTL to extend lifetime
console.log('4. Extending cache TTL to 2 hours...');
await ai.caches.update({
name: cache.name,
config: {
ttl: '7200s', // Extend to 2 hours
},
});
console.log('✓ Cache TTL updated\n');
// 4. List all caches
console.log('5. Listing all caches...');
const caches = await ai.caches.list();
caches.forEach((c) => {
console.log(` - ${c.displayName}: ${c.name}`);
});
console.log();
// 5. Delete cache when done
console.log('6. Deleting cache...');
await ai.caches.delete({ name: cache.name });
console.log('✓ Cache deleted\n');
console.log('=== Context Caching Complete ===');
}
async function videoCachingExample() {
console.log('\n=== Video Caching Example ===\n');
// Upload a video file
console.log('1. Uploading video file...');
const videoFile = await ai.files.upload({
file: fs.createReadStream('./example-video.mp4'),
});
console.log('2. Waiting for video processing...');
let processedFile = videoFile;
while (processedFile.state.name === 'PROCESSING') {
await new Promise((resolve) => setTimeout(resolve, 2000));
processedFile = await ai.files.get({ name: videoFile.name });
}
console.log(`✓ Video processed: ${processedFile.uri}\n`);
// Create cache with video
console.log('3. Creating cache with video...');
const cache = await ai.caches.create({
model: 'gemini-2.5-flash-001',
config: {
displayName: 'video-analysis-cache',
systemInstruction: 'You are an expert video analyzer.',
contents: [processedFile],
ttl: '300s', // 5 minutes
},
});
console.log(`✓ Cache created: ${cache.name}\n`);
// Use cache for multiple video queries
console.log('4. Query 1: What happens in the first minute?');
const response1 = await ai.models.generateContent({
model: cache.name,
contents: 'What happens in the first minute?',
});
console.log(`Response: ${response1.text}\n`);
console.log('5. Query 2: Describe the main characters');
const response2 = await ai.models.generateContent({
model: cache.name,
contents: 'Describe the main characters',
});
console.log(`Response: ${response2.text}\n`);
// Clean up
await ai.caches.delete({ name: cache.name });
await ai.files.delete({ name: videoFile.name });
console.log('✓ Cache and video file deleted');
console.log('\n=== Video Caching Complete ===');
}
// Run examples
contextCachingExample()
.then(() => videoCachingExample())
.catch((error) => {
console.error('Error:', error.message);
process.exit(1);
});

View File

@@ -0,0 +1,156 @@
/**
* Basic Function Calling with Gemini API
*
* Demonstrates:
* - Defining function declarations (tools)
* - Detecting when model wants to call a function
* - Executing functions and returning results
* - Multi-turn function calling workflow
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*
* ⚠️ IMPORTANT: Gemini 2.5 Flash-Lite does NOT support function calling!
* Use gemini-2.5-flash or gemini-2.5-pro
*/
import { GoogleGenAI, FunctionCallingConfigMode } from '@google/genai';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Step 1: Define function declarations
const getCurrentWeather = {
name: 'get_current_weather',
description: 'Get the current weather for a specific location',
parametersJsonSchema: {
type: 'object',
properties: {
location: {
type: 'string',
description: 'The city name, e.g. San Francisco, Tokyo, London'
},
unit: {
type: 'string',
enum: ['celsius', 'fahrenheit'],
description: 'Temperature unit'
}
},
required: ['location']
}
};
// Step 2: Make request with tools
console.log('User: What\'s the weather in Tokyo?\n');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash', // ⚠️ NOT flash-lite!
contents: 'What\'s the weather in Tokyo?',
config: {
tools: [
{ functionDeclarations: [getCurrentWeather] }
]
}
});
// Step 3: Check if model wants to call a function
const functionCall = response1.candidates[0]?.content?.parts?.find(
part => part.functionCall
)?.functionCall;
if (!functionCall) {
console.log('Model response (no function call):', response1.text);
return;
}
console.log('Model wants to call function:');
console.log('- Function name:', functionCall.name);
console.log('- Arguments:', JSON.stringify(functionCall.args, null, 2));
console.log('');
// Step 4: Execute the function (your implementation)
console.log('Executing function...\n');
const weatherData = await getCurrentWeatherImpl(
functionCall.args.location,
functionCall.args.unit || 'celsius'
);
console.log('Function result:', JSON.stringify(weatherData, null, 2));
console.log('');
// Step 5: Send function result back to model
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{ parts: [{ text: 'What\'s the weather in Tokyo?' }] },
response1.candidates[0].content, // Original assistant response with function call
{
parts: [
{
functionResponse: {
name: functionCall.name,
response: weatherData
}
}
]
}
],
config: {
tools: [
{ functionDeclarations: [getCurrentWeather] }
]
}
});
console.log('Model final response:');
console.log(response2.text);
} catch (error: any) {
console.error('Error:', error.message);
}
}
/**
* Mock implementation of weather API
* Replace with actual API call in production
*/
async function getCurrentWeatherImpl(location: string, unit: string) {
// Simulate API call
await new Promise(resolve => setTimeout(resolve, 500));
// Mock data
return {
location,
temperature: unit === 'celsius' ? 22 : 72,
unit,
conditions: 'Partly cloudy',
humidity: 65,
windSpeed: 10
};
}
/**
* Function Calling Modes:
*
* AUTO (default): Model decides whether to call functions
* ANY: Force model to call at least one function
* NONE: Disable function calling for this request
*
* Example with mode:
*
* config: {
* tools: [...],
* toolConfig: {
* functionCallingConfig: {
* mode: FunctionCallingConfigMode.ANY,
* allowedFunctionNames: ['get_current_weather']
* }
* }
* }
*/
main();

View File

@@ -0,0 +1,177 @@
/**
* Parallel Function Calling with Gemini API
*
* Demonstrates:
* - Multiple independent function calls in one request
* - Handling multiple function call responses
* - Compositional (sequential) vs parallel execution
* - Complex multi-step workflows
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*
* ⚠️ IMPORTANT: Only gemini-2.5-flash and gemini-2.5-pro support function calling
*/
import { GoogleGenAI } from '@google/genai';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Define multiple functions
const getWeather = {
name: 'get_weather',
description: 'Get current weather for a location',
parametersJsonSchema: {
type: 'object',
properties: {
location: { type: 'string', description: 'City name' }
},
required: ['location']
}
};
const getPopulation = {
name: 'get_population',
description: 'Get population of a city',
parametersJsonSchema: {
type: 'object',
properties: {
city: { type: 'string', description: 'City name' }
},
required: ['city']
}
};
const getTimezone = {
name: 'get_timezone',
description: 'Get timezone information for a location',
parametersJsonSchema: {
type: 'object',
properties: {
location: { type: 'string', description: 'City name' }
},
required: ['location']
}
};
// Make request that requires multiple independent functions
console.log('User: What is the weather, population, and timezone of Tokyo?\n');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What is the weather, population, and timezone of Tokyo?',
config: {
tools: [
{ functionDeclarations: [getWeather, getPopulation, getTimezone] }
]
}
});
// Extract all function calls
const functionCalls = response1.candidates[0]?.content?.parts?.filter(
part => part.functionCall
) || [];
console.log(`Model wants to call ${functionCalls.length} functions in parallel:\n`);
// Execute all functions in parallel
const functionResponses = await Promise.all(
functionCalls.map(async (part) => {
const functionCall = part.functionCall!;
console.log(`- Calling ${functionCall.name} with args:`, functionCall.args);
// Execute function
let result;
if (functionCall.name === 'get_weather') {
result = await getWeatherImpl(functionCall.args.location);
} else if (functionCall.name === 'get_population') {
result = await getPopulationImpl(functionCall.args.city);
} else if (functionCall.name === 'get_timezone') {
result = await getTimezoneImpl(functionCall.args.location);
}
return {
functionResponse: {
name: functionCall.name,
response: result
}
};
})
);
console.log('\nAll functions executed.\n');
// Send all function results back to model
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{ parts: [{ text: 'What is the weather, population, and timezone of Tokyo?' }] },
response1.candidates[0].content,
{ parts: functionResponses }
],
config: {
tools: [
{ functionDeclarations: [getWeather, getPopulation, getTimezone] }
]
}
});
console.log('Model final response:');
console.log(response2.text);
} catch (error: any) {
console.error('Error:', error.message);
}
}
/**
* Mock function implementations
*/
async function getWeatherImpl(location: string) {
await new Promise(resolve => setTimeout(resolve, 300));
return {
location,
temperature: 22,
conditions: 'Sunny',
humidity: 60
};
}
async function getPopulationImpl(city: string) {
await new Promise(resolve => setTimeout(resolve, 300));
return {
city,
population: 13960000,
metropolitan: 37400000
};
}
async function getTimezoneImpl(location: string) {
await new Promise(resolve => setTimeout(resolve, 300));
return {
location,
timezone: 'Asia/Tokyo',
offset: '+09:00'
};
}
/**
* Parallel vs Compositional Function Calling:
*
* PARALLEL: Functions are independent and can run simultaneously
* - Example: "What is the weather AND population of Tokyo?"
* - Model calls get_weather() and get_population() together
*
* COMPOSITIONAL: Functions depend on each other (sequential)
* - Example: "What is the weather at my current location?"
* - Model first calls get_current_location(), then uses result for get_weather()
*
* Gemini automatically determines which pattern to use based on dependencies.
*/
main();

View File

@@ -0,0 +1,271 @@
/**
* Google Gemini API - Grounding with Google Search Example
*
* Demonstrates how to enable grounding to connect the model to real-time
* web information, reducing hallucinations and providing up-to-date responses
* with citations.
*
* Features:
* - Basic grounding with Google Search (Gemini 2.5)
* - Dynamic retrieval with threshold (Gemini 1.5)
* - Chat with grounding
* - Combining grounding with function calling
* - Checking grounding metadata and citations
*
* Requirements:
* - @google/genai@1.27.0+
* - GEMINI_API_KEY environment variable
* - Google Cloud project (grounding requires GCP project, not just API key)
*
* Note: Use `googleSearch` for Gemini 2.5 models (recommended)
* Use `googleSearchRetrieval` for Gemini 1.5 models (legacy)
*/
import { GoogleGenAI, DynamicRetrievalConfigMode } from '@google/genai';
const ai = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! });
async function basicGrounding() {
console.log('=== Basic Grounding Example (Gemini 2.5) ===\n');
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Who won the euro 2024?',
config: {
tools: [{ googleSearch: {} }],
},
});
console.log('Response:', response.text);
// Check if grounding was used
if (response.candidates[0].groundingMetadata) {
console.log('\n✓ Search was performed!');
console.log('\nGrounding Metadata:');
console.log(JSON.stringify(response.candidates[0].groundingMetadata, null, 2));
} else {
console.log('\n✓ Model answered from its own knowledge (no search needed)');
}
console.log('\n=== Basic Grounding Complete ===');
}
async function dynamicRetrievalExample() {
console.log('\n=== Dynamic Retrieval Example (Gemini 1.5) ===\n');
const response = await ai.models.generateContent({
model: 'gemini-1.5-flash',
contents: 'Who won the euro 2024?',
config: {
tools: [
{
googleSearchRetrieval: {
dynamicRetrievalConfig: {
mode: DynamicRetrievalConfigMode.MODE_DYNAMIC,
dynamicThreshold: 0.7, // Search only if confidence < 70%
},
},
},
],
},
});
console.log('Response:', response.text);
if (response.candidates[0].groundingMetadata) {
console.log('\n✓ Search performed (confidence < 70%)');
} else {
console.log('\n✓ Answered from knowledge (confidence >= 70%)');
}
console.log('\n=== Dynamic Retrieval Complete ===');
}
async function chatWithGrounding() {
console.log('\n=== Chat with Grounding Example ===\n');
const chat = await ai.chats.create({
model: 'gemini-2.5-flash',
config: {
tools: [{ googleSearch: {} }],
},
});
// First message
console.log('User: What are the latest developments in quantum computing?');
let response = await chat.sendMessage('What are the latest developments in quantum computing?');
console.log(`\nAssistant: ${response.text}`);
// Check and display sources
if (response.candidates[0].groundingMetadata) {
const sources = response.candidates[0].groundingMetadata.webPages || [];
console.log(`\n📚 Sources used: ${sources.length}`);
sources.forEach((source, i) => {
console.log(`${i + 1}. ${source.title}`);
console.log(` ${source.url}`);
});
}
// Follow-up question
console.log('\n\nUser: Which company made the biggest breakthrough?');
response = await chat.sendMessage('Which company made the biggest breakthrough?');
console.log(`\nAssistant: ${response.text}`);
if (response.candidates[0].groundingMetadata) {
const sources = response.candidates[0].groundingMetadata.webPages || [];
console.log(`\n📚 Sources used: ${sources.length}`);
sources.forEach((source, i) => {
console.log(`${i + 1}. ${source.title} - ${source.url}`);
});
}
console.log('\n=== Chat with Grounding Complete ===');
}
async function groundingWithFunctionCalling() {
console.log('\n=== Grounding + Function Calling Example ===\n');
// Define weather function
const weatherFunction = {
name: 'get_current_weather',
description: 'Get current weather for a location',
parametersJsonSchema: {
type: 'object',
properties: {
location: { type: 'string', description: 'City name' },
unit: { type: 'string', enum: ['celsius', 'fahrenheit'] },
},
required: ['location'],
},
};
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What is the weather like in the city that won Euro 2024?',
config: {
tools: [{ googleSearch: {} }, { functionDeclarations: [weatherFunction] }],
},
});
console.log('User query: What is the weather like in the city that won Euro 2024?');
console.log('\nModel will:');
console.log('1. Use Google Search to find Euro 2024 winner');
console.log('2. Call get_current_weather function with the city');
console.log('3. Combine both results in response\n');
// Check for function calls
const functionCall = response.candidates[0].content.parts.find((part) => part.functionCall);
if (functionCall) {
console.log('✓ Function call detected:');
console.log(` Function: ${functionCall.functionCall?.name}`);
console.log(` Arguments:`, functionCall.functionCall?.args);
}
if (response.candidates[0].groundingMetadata) {
console.log('\n✓ Grounding was used');
const sources = response.candidates[0].groundingMetadata.webPages || [];
console.log(` Sources: ${sources.length} web pages`);
}
console.log('\n=== Grounding + Function Calling Complete ===');
}
async function checkingGroundingUsage() {
console.log('\n=== Checking Grounding Usage Example ===\n');
// Query that doesn't need search
console.log('Query 1: What is 2+2? (Should NOT need search)');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What is 2+2?',
config: {
tools: [{ googleSearch: {} }],
},
});
console.log(`Answer: ${response1.text}`);
console.log(
`Grounding used: ${response1.candidates[0].groundingMetadata ? 'YES' : 'NO'}\n`
);
// Query that needs current information
console.log('Query 2: What happened in the news today? (Should need search)');
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'What are the top news headlines today?',
config: {
tools: [{ googleSearch: {} }],
},
});
console.log(`Answer: ${response2.text}`);
console.log(`Grounding used: ${response2.candidates[0].groundingMetadata ? 'YES' : 'NO'}`);
if (response2.candidates[0].groundingMetadata) {
console.log('\nSearch queries performed:');
const queries = response2.candidates[0].groundingMetadata.searchQueries || [];
queries.forEach((q, i) => {
console.log(`${i + 1}. ${q.text || q}`);
});
}
console.log('\n=== Checking Grounding Usage Complete ===');
}
async function citationsExample() {
console.log('\n=== Citations Example ===\n');
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Tell me about the recent Mars rover discoveries',
config: {
tools: [{ googleSearch: {} }],
},
});
console.log('Response:', response.text);
if (response.candidates[0].groundingMetadata) {
const metadata = response.candidates[0].groundingMetadata;
console.log('\n📚 Web Pages:');
const webPages = metadata.webPages || [];
webPages.forEach((page, i) => {
console.log(`\n${i + 1}. ${page.title}`);
console.log(` URL: ${page.url}`);
if (page.snippet) {
console.log(` Snippet: ${page.snippet.substring(0, 100)}...`);
}
});
console.log('\n🔗 Citations:');
const citations = metadata.citations || [];
citations.forEach((citation, i) => {
console.log(`\n${i + 1}. Position: ${citation.startIndex}-${citation.endIndex}`);
console.log(` Source: ${citation.uri}`);
});
}
console.log('\n=== Citations Example Complete ===');
}
// Run all examples
async function main() {
await basicGrounding();
await dynamicRetrievalExample();
await chatWithGrounding();
await groundingWithFunctionCalling();
await checkingGroundingUsage();
await citationsExample();
}
main().catch((error) => {
console.error('Error:', error.message);
if (error.message.includes('Google Cloud project')) {
console.error(
'\nNote: Grounding requires a Google Cloud project, not just an API key.'
);
}
process.exit(1);
});

View File

@@ -0,0 +1,117 @@
/**
* Multimodal Image Understanding with Gemini API
*
* Demonstrates:
* - Image analysis with vision capabilities
* - Base64 encoding of images
* - Combining text and image inputs
* - Multiple images in one request
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*/
import { GoogleGenAI } from '@google/genai';
import fs from 'fs';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Example 1: Analyze a single image
console.log('Example 1: Analyze Single Image\n');
// Load image from file
const imagePath = '/path/to/image.jpg'; // Replace with actual path
const imageData = fs.readFileSync(imagePath);
const base64Image = imageData.toString('base64');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
{
inlineData: {
data: base64Image,
mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
}
}
]
}
]
});
console.log(response1.text);
console.log('\n---\n');
// Example 2: Compare two images
console.log('Example 2: Compare Two Images\n');
const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
const imageData2 = fs.readFileSync(imagePath2);
const base64Image2 = imageData2.toString('base64');
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Compare these two images. What are the similarities and differences?' },
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
{ inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
]
}
]
});
console.log(response2.text);
console.log('\n---\n');
// Example 3: Specific questions about image
console.log('Example 3: Specific Questions\n');
const response3 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'How many people are in this image? What are they wearing?' },
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
]
}
]
});
console.log(response3.text);
} catch (error: any) {
console.error('Error:', error.message);
if (error.message.includes('ENOENT')) {
console.error('\n⚠ Image file not found. Update the imagePath variable with a valid path.');
}
}
}
/**
* Supported image formats:
* - JPEG (.jpg, .jpeg)
* - PNG (.png)
* - WebP (.webp)
* - HEIC (.heic)
* - HEIF (.heif)
*
* Max size: 20MB per image
*
* Tips:
* - Use specific, detailed prompts for better results
* - You can analyze multiple images in one request
* - gemini-2.5-flash and gemini-2.5-pro both support vision
*/
main();

View File

@@ -0,0 +1,152 @@
/**
* Multimodal Video and Audio Understanding with Gemini API
*
* Demonstrates:
* - Video analysis (what happens in the video)
* - Audio transcription and understanding
* - PDF document parsing
* - Combining multiple modalities
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*/
import { GoogleGenAI } from '@google/genai';
import fs from 'fs';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Example 1: Analyze video
console.log('Example 1: Video Analysis\n');
const videoPath = '/path/to/video.mp4'; // Replace with actual path
const videoData = fs.readFileSync(videoPath);
const base64Video = videoData.toString('base64');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Describe what happens in this video. Summarize the key events.' },
{
inlineData: {
data: base64Video,
mimeType: 'video/mp4'
}
}
]
}
]
});
console.log(response1.text);
console.log('\n---\n');
// Example 2: Transcribe and analyze audio
console.log('Example 2: Audio Transcription and Analysis\n');
const audioPath = '/path/to/audio.mp3'; // Replace with actual path
const audioData = fs.readFileSync(audioPath);
const base64Audio = audioData.toString('base64');
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Transcribe this audio and provide a summary of the main points discussed.' },
{
inlineData: {
data: base64Audio,
mimeType: 'audio/mp3'
}
}
]
}
]
});
console.log(response2.text);
console.log('\n---\n');
// Example 3: Parse PDF document
console.log('Example 3: PDF Document Parsing\n');
const pdfPath = '/path/to/document.pdf'; // Replace with actual path
const pdfData = fs.readFileSync(pdfPath);
const base64Pdf = pdfData.toString('base64');
const response3 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Summarize the key points in this PDF document. Extract any important data or conclusions.' },
{
inlineData: {
data: base64Pdf,
mimeType: 'application/pdf'
}
}
]
}
]
});
console.log(response3.text);
console.log('\n---\n');
// Example 4: Combine multiple modalities
console.log('Example 4: Multiple Modalities (Video + Text Questions)\n');
const response4 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Based on this video, answer these questions:\n1. How many people appear?\n2. What is the main activity?\n3. Where does this take place?' },
{ inlineData: { data: base64Video, mimeType: 'video/mp4' } }
]
}
]
});
console.log(response4.text);
} catch (error: any) {
console.error('Error:', error.message);
if (error.message.includes('ENOENT')) {
console.error('\n⚠ File not found. Update the file path variables with valid paths.');
}
}
}
/**
* Supported Video Formats:
* - MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV
* Max size: 2GB (use File API for larger - Phase 2)
* Max length (inline): 2 minutes
*
* Supported Audio Formats:
* - MP3, WAV, FLAC, AAC, OGG, OPUS
* Max size: 20MB
*
* PDF:
* - Max size: 30MB
* - Text-based PDFs work best
* - Scanned images may have lower accuracy
*
* Tips:
* - For videos > 2 minutes, use the File API (Phase 2)
* - Specific prompts yield better results
* - You can combine text, images, video, audio, and PDFs in one request
*/
main();

22
templates/package.json Normal file
View File

@@ -0,0 +1,22 @@
{
"name": "gemini-api-example",
"version": "1.0.0",
"description": "Google Gemini API examples using @google/genai SDK",
"type": "module",
"scripts": {
"dev": "tsx watch src/index.ts",
"start": "node dist/index.js",
"build": "tsc"
},
"dependencies": {
"@google/genai": "^1.27.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"tsx": "^4.7.0",
"typescript": "^5.3.0"
},
"engines": {
"node": ">=18.0.0"
}
}

View File

@@ -0,0 +1,81 @@
/**
* Streaming Text Generation with Gemini API (Node.js SDK)
*
* Demonstrates:
* - Streaming responses with async iteration
* - Real-time token delivery for better UX
* - Handling partial chunks
* - Multi-turn chat with streaming
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*/
import { GoogleGenAI } from '@google/genai';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Example 1: Basic streaming
console.log('Example 1: Basic Streaming\n');
console.log('Prompt: Write a 200-word story about time travel\n');
const response = await ai.models.generateContentStream({
model: 'gemini-2.5-flash',
contents: 'Write a 200-word story about time travel'
});
// Stream chunks as they arrive
for await (const chunk of response) {
// Each chunk may contain partial text
if (chunk.text) {
process.stdout.write(chunk.text);
}
}
console.log('\n\n---\n');
// Example 2: Streaming with chat
console.log('Example 2: Streaming Chat\n');
const chat = await ai.models.createChat({
model: 'gemini-2.5-flash',
systemInstruction: 'You are a helpful coding assistant.'
});
// First turn with streaming
console.log('User: What is TypeScript?\n');
const response1 = await chat.sendMessageStream('What is TypeScript?');
console.log('Assistant: ');
for await (const chunk of response1) {
process.stdout.write(chunk.text);
}
console.log('\n\n');
// Second turn with streaming (context maintained)
console.log('User: How do I install it?\n');
const response2 = await chat.sendMessageStream('How do I install it?');
console.log('Assistant: ');
for await (const chunk of response2) {
process.stdout.write(chunk.text);
}
console.log('\n\n');
// Get full chat history
const history = chat.getHistory();
console.log(`Total messages in history: ${history.length}`);
} catch (error: any) {
console.error('Error:', error.message);
}
}
main();

View File

@@ -0,0 +1,190 @@
/**
* Streaming Text Generation with Gemini API (Fetch - SSE Parsing)
*
* Demonstrates:
* - Server-Sent Events (SSE) parsing with fetch
* - Manual stream handling for Cloudflare Workers or edge runtimes
* - Buffer management for incomplete chunks
* - Error handling during streaming
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
*/
interface Env {
GEMINI_API_KEY: string;
}
/**
* Example for Cloudflare Workers with streaming response
*/
export default {
async fetch(request: Request, env: Env): Promise<Response> {
// Create a TransformStream to stream tokens to the client
const { readable, writable } = new TransformStream();
const writer = writable.getWriter();
const encoder = new TextEncoder();
// Start streaming in the background
(async () => {
try {
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': env.GEMINI_API_KEY,
},
body: JSON.stringify({
contents: [
{
parts: [
{ text: 'Write a 200-word story about time travel' }
]
}
]
}),
}
);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
if (!response.body) {
throw new Error('Response body is null');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
// Append new data to buffer
buffer += decoder.decode(value, { stream: true });
// Split by newlines (SSE format)
const lines = buffer.split('\n');
// Keep the last incomplete line in buffer
buffer = lines.pop() || '';
for (const line of lines) {
// Skip empty lines and metadata
if (line.trim() === '' || line.startsWith('data: [DONE]')) {
continue;
}
// Parse SSE format: "data: {json}"
if (!line.startsWith('data: ')) {
continue;
}
try {
const jsonData = JSON.parse(line.slice(6)); // Remove "data: " prefix
const text = jsonData.candidates[0]?.content?.parts[0]?.text;
if (text) {
// Write chunk to client
await writer.write(encoder.encode(text));
}
} catch (e) {
// Skip invalid JSON chunks
console.error('Failed to parse chunk:', e);
}
}
}
// Close the stream
await writer.close();
} catch (error: any) {
await writer.write(encoder.encode(`\n\nError: ${error.message}`));
await writer.close();
}
})();
// Return streaming response immediately
return new Response(readable, {
headers: {
'Content-Type': 'text/plain; charset=utf-8',
'Transfer-Encoding': 'chunked',
},
});
}
};
/**
* Example for Node.js/Standalone
*/
async function mainNodeJS() {
const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
if (!GEMINI_API_KEY) {
throw new Error('GEMINI_API_KEY environment variable not set');
}
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': GEMINI_API_KEY,
},
body: JSON.stringify({
contents: [
{
parts: [
{ text: 'Write a 200-word story about time travel' }
]
}
]
}),
}
);
if (!response.body) {
throw new Error('Response body is null');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (line.trim() === '' || line.startsWith('data: [DONE]')) continue;
if (!line.startsWith('data: ')) continue;
try {
const data = JSON.parse(line.slice(6));
const text = data.candidates[0]?.content?.parts[0]?.text;
if (text) {
process.stdout.write(text);
}
} catch (e) {
// Skip invalid JSON
}
}
}
console.log('\n');
}
// Uncomment to run in Node.js
// mainNodeJS();

View File

@@ -0,0 +1,61 @@
/**
* Basic Text Generation with Gemini API (Node.js SDK)
*
* Demonstrates:
* - Installing @google/genai (CORRECT SDK, NOT @google/generative-ai)
* - Basic text generation with gemini-2.5-flash
* - Accessing response text
* - Error handling
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*/
import { GoogleGenAI } from '@google/genai';
async function main() {
// Initialize the Google GenAI client
// ⚠️ IMPORTANT: Use @google/genai, NOT @google/generative-ai (deprecated)
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Generate content with gemini-2.5-flash
// Models available: gemini-2.5-pro, gemini-2.5-flash, gemini-2.5-flash-lite
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'Explain quantum computing in simple terms for a 10-year-old'
});
// Access the generated text
console.log('Generated text:');
console.log(response.text);
// Access full response metadata
console.log('\nToken usage:');
console.log('- Prompt tokens:', response.usageMetadata.promptTokenCount);
console.log('- Response tokens:', response.usageMetadata.candidatesTokenCount);
console.log('- Total tokens:', response.usageMetadata.totalTokenCount);
// Check finish reason
console.log('\nFinish reason:', response.candidates[0].finishReason);
// Possible values: "STOP" (normal), "MAX_TOKENS", "SAFETY", "OTHER"
} catch (error: any) {
console.error('Error generating content:');
if (error.status === 401) {
console.error('❌ Invalid API key. Set GEMINI_API_KEY environment variable.');
} else if (error.status === 429) {
console.error('❌ Rate limit exceeded. Try again later or implement exponential backoff.');
} else if (error.status === 404) {
console.error('❌ Model not found. Use: gemini-2.5-pro, gemini-2.5-flash, or gemini-2.5-flash-lite');
} else {
console.error('❌', error.message);
}
}
}
main();

View File

@@ -0,0 +1,124 @@
/**
* Basic Text Generation with Gemini API (Fetch - Cloudflare Workers)
*
* Demonstrates:
* - Direct REST API calls using fetch (no SDK dependencies)
* - Perfect for Cloudflare Workers, Deno, Bun, or edge runtimes
* - Manual JSON parsing
* - Error handling with fetch
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable (or use env.GEMINI_API_KEY in Workers)
*/
/**
* Example for Cloudflare Workers
*/
interface Env {
GEMINI_API_KEY: string;
}
export default {
async fetch(request: Request, env: Env): Promise<Response> {
try {
// Make direct API call to Gemini
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': env.GEMINI_API_KEY,
},
body: JSON.stringify({
contents: [
{
parts: [
{
text: 'Explain quantum computing in simple terms for a 10-year-old'
}
]
}
]
}),
}
);
// Check for HTTP errors
if (!response.ok) {
const errorData = await response.json();
return new Response(
JSON.stringify({
error: errorData.error?.message || 'Unknown error',
status: response.status
}),
{ status: response.status, headers: { 'Content-Type': 'application/json' } }
);
}
// Parse response
const data = await response.json();
// Extract text from response structure
const generatedText = data.candidates[0]?.content?.parts[0]?.text;
const usageMetadata = data.usageMetadata;
const finishReason = data.candidates[0]?.finishReason;
return new Response(
JSON.stringify({
text: generatedText,
usage: {
promptTokens: usageMetadata.promptTokenCount,
responseTokens: usageMetadata.candidatesTokenCount,
totalTokens: usageMetadata.totalTokenCount
},
finishReason
}),
{ headers: { 'Content-Type': 'application/json' } }
);
} catch (error: any) {
return new Response(
JSON.stringify({ error: error.message }),
{ status: 500, headers: { 'Content-Type': 'application/json' } }
);
}
}
};
/**
* Example for Node.js/Standalone
*/
async function mainNodeJS() {
const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
if (!GEMINI_API_KEY) {
throw new Error('GEMINI_API_KEY environment variable not set');
}
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': GEMINI_API_KEY,
},
body: JSON.stringify({
contents: [
{
parts: [
{ text: 'Explain quantum computing in simple terms' }
]
}
]
}),
}
);
const data = await response.json();
console.log(data.candidates[0].content.parts[0].text);
}
// Uncomment to run in Node.js
// mainNodeJS();

137
templates/thinking-mode.ts Normal file
View File

@@ -0,0 +1,137 @@
/**
* Thinking Mode Configuration with Gemini API
*
* Demonstrates:
* - Thinking mode (enabled by default on Gemini 2.5 models)
* - Configuring thinking budget
* - When to use thinking mode
* - Impact on latency and quality
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*
* Thinking mode is ALWAYS ENABLED on Gemini 2.5 models (cannot be disabled)
*/
import { GoogleGenAI } from '@google/genai';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Example 1: Default thinking budget
console.log('Example 1: Default Thinking Budget\n');
console.log('Prompt: Solve this complex math problem:\n');
console.log('If a train travels 120 km in 1.5 hours, then slows down to 60 km/h for 45 minutes, how far has it traveled total?\n');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: 'If a train travels 120 km in 1.5 hours, then slows down to 60 km/h for 45 minutes, how far has it traveled total?'
// No thinkingConfig = uses default budget
});
console.log('Answer:', response1.text);
console.log('\nToken usage:', response1.usageMetadata);
console.log('\n---\n');
// Example 2: Increased thinking budget for complex reasoning
console.log('Example 2: Increased Thinking Budget (8192 tokens)\n');
console.log('Prompt: Complex logic puzzle\n');
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: `
Three people (Alice, Bob, Carol) have different jobs (doctor, engineer, teacher).
Clues:
1. Alice is not a doctor
2. The engineer is older than Bob
3. Carol is younger than the teacher
4. The doctor is the youngest
Who has which job?
`,
config: {
thinkingConfig: {
thinkingBudget: 8192 // Increase budget for complex reasoning
}
}
});
console.log('Answer:', response2.text);
console.log('\nToken usage:', response2.usageMetadata);
console.log('\n---\n');
// Example 3: Comparison with gemini-2.5-pro (more thinking capability)
console.log('Example 3: Using gemini-2.5-pro for Advanced Reasoning\n');
console.log('Prompt: Multi-step code optimization problem\n');
const response3 = await ai.models.generateContent({
model: 'gemini-2.5-pro', // Pro model has better reasoning
contents: `
Optimize this Python code for better performance:
def find_duplicates(arr):
duplicates = []
for i in range(len(arr)):
for j in range(i + 1, len(arr)):
if arr[i] == arr[j] and arr[i] not in duplicates:
duplicates.append(arr[i])
return duplicates
Explain your optimization strategy step by step.
`,
config: {
thinkingConfig: {
thinkingBudget: 8192
}
}
});
console.log('Optimization:', response3.text);
console.log('\nToken usage:', response3.usageMetadata);
} catch (error: any) {
console.error('Error:', error.message);
}
}
/**
* Thinking Mode Guidelines:
*
* What is Thinking Mode?
* - Gemini 2.5 models "think" before responding, improving accuracy
* - The model internally reasons through the problem
* - This happens transparently (you don't see the thinking process)
*
* Thinking Budget:
* - Controls max tokens allocated for internal reasoning
* - Higher budget = more thorough reasoning (may increase latency)
* - Default budget is usually sufficient for most tasks
*
* When to Increase Budget:
* ✅ Complex math/logic problems
* ✅ Multi-step reasoning tasks
* ✅ Code optimization challenges
* ✅ Detailed analysis requiring careful consideration
*
* When Default is Fine:
* ⏺️ Simple factual questions
* ⏺️ Creative writing
* ⏺️ Translation
* ⏺️ Summarization
*
* Model Comparison:
* - gemini-2.5-pro: Best for complex reasoning, higher default thinking budget
* - gemini-2.5-flash: Good balance, suitable for most thinking tasks
* - gemini-2.5-flash-lite: Basic thinking, optimized for speed
*
* Important Notes:
* - You CANNOT disable thinking mode on 2.5 models (always on)
* - Thinking tokens count toward total usage
* - Higher thinking budget may increase latency slightly
*/
main();