Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:45:50 +08:00
commit bd85f56f7c
78 changed files with 33541 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
# Google Gemini API Key
# Get your API key from: https://makersuite.google.com/app/apikey
GEMINI_API_KEY=your-api-key-here

34
skills/gemini-imagegen/.gitignore vendored Normal file
View File

@@ -0,0 +1,34 @@
# Dependencies
node_modules/
# Build outputs
dist/
*.js
*.js.map
# Environment variables
.env
.env.local
# Generated images (examples)
*.png
*.jpg
*.jpeg
*.gif
*.webp
!examples/*.png
!examples/*.jpg
# IDE
.vscode/
.idea/
# OS
.DS_Store
Thumbs.db
# Logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

View File

@@ -0,0 +1,103 @@
# Gemini ImageGen Skill
AI-powered image generation, editing, and composition using Google's Gemini API.
## Quick Start
1. **Install dependencies:**
```bash
npm install
```
2. **Set your API key:**
```bash
export GEMINI_API_KEY="your-api-key-here"
```
Get your key from: https://makersuite.google.com/app/apikey
3. **Generate an image:**
```bash
npm run generate "a sunset over mountains" output.png
```
## Features
- **Generate**: Create images from text descriptions
- **Edit**: Modify existing images with natural language prompts
- **Compose**: Combine multiple images with flexible layouts
## Usage Examples
### Generate Images
```bash
# Basic generation
npm run generate "futuristic city skyline" city.png
# Custom size
npm run generate "modern office" office.png -- --width 1920 --height 1080
```
### Edit Images
```bash
# Style transformation
npm run edit photo.jpg "make it look like a watercolor painting" artistic.png
# Object modification
npm run edit landscape.png "add a rainbow in the sky" enhanced.png
```
### Compose Images
```bash
# Grid layout (default)
npm run compose collage.png img1.jpg img2.jpg img3.jpg img4.jpg
# Horizontal banner
npm run compose banner.png left.png right.png -- --layout horizontal
# Custom composition
npm run compose result.png a.jpg b.jpg -- --prompt "blend seamlessly"
```
## Scripts
- `npm run generate <prompt> <output>` - Generate image from text
- `npm run edit <source> <prompt> <output>` - Edit existing image
- `npm run compose <output> <images...>` - Compose multiple images
## Configuration
### Environment Variables
- `GEMINI_API_KEY` (required) - Your Google Gemini API key
### Options
See `SKILL.md` for detailed documentation on all available options and parameters.
## Development Notes
This is a local development skill that runs on your machine, not on Cloudflare Workers. It's designed for:
- Design workflows and asset creation
- Visual content generation
- Image manipulation and prototyping
- Creating test images for development
## Implementation Status
**Note**: The current implementation includes:
- Complete TypeScript structure
- Argument parsing and validation
- Gemini API integration for image analysis
- Comprehensive error handling
For production use with actual image generation/editing, you'll need to:
1. Use the Imagen model (imagen-3.0-generate-001)
2. Implement proper image data handling
3. Add output file writing with actual image data
Refer to the [Gemini Imagen documentation](https://ai.google.dev/docs/imagen) for implementation details.
## License
MIT

View File

@@ -0,0 +1,231 @@
---
name: gemini-imagegen
description: Generate, edit, and compose images using Google's Gemini AI API for design workflows and visual content creation
triggers: ["image generation", "visual content", "AI art", "image editing", "design automation"]
---
# Gemini ImageGen SKILL
## Overview
This skill provides image generation and manipulation capabilities using Google's Gemini AI API. It's designed for local development workflows where you need to create or modify images using AI assistance.
## Features
- **Generate Images**: Create images from text descriptions
- **Edit Images**: Modify existing images based on text prompts
- **Compose Images**: Combine multiple images with layout instructions
- **Multiple Formats**: Support for PNG, JPEG, and other common image formats
- **Size Options**: Flexible output dimensions for different use cases
## Environment Setup
This skill requires a Gemini API key:
```bash
export GEMINI_API_KEY="your-api-key-here"
```
Get your API key from: https://makersuite.google.com/app/apikey
## Available Scripts
### 1. Generate Image (`scripts/generate-image.ts`)
Create new images from text descriptions.
**Usage:**
```bash
npx tsx scripts/generate-image.ts <prompt> <output-path> [options]
```
**Arguments:**
- `prompt`: Text description of the image to generate
- `output-path`: Where to save the generated image (e.g., `./output.png`)
**Options:**
- `--width <number>`: Image width in pixels (default: 1024)
- `--height <number>`: Image height in pixels (default: 1024)
- `--model <string>`: Gemini model to use (default: 'gemini-2.0-flash-exp')
**Examples:**
```bash
# Basic usage
GEMINI_API_KEY=xxx npx tsx scripts/generate-image.ts "a sunset over mountains" output.png
# Custom size
npx tsx scripts/generate-image.ts "modern office workspace" office.png --width 1920 --height 1080
# Using npm script
npm run generate "futuristic city skyline" city.png
```
### 2. Edit Image (`scripts/edit-image.ts`)
Modify existing images based on text instructions.
**Usage:**
```bash
npx tsx scripts/edit-image.ts <source-image> <prompt> <output-path> [options]
```
**Arguments:**
- `source-image`: Path to the image to edit
- `prompt`: Text description of the desired changes
- `output-path`: Where to save the edited image
**Options:**
- `--model <string>`: Gemini model to use (default: 'gemini-2.0-flash-exp')
**Examples:**
```bash
# Basic editing
GEMINI_API_KEY=xxx npx tsx scripts/edit-image.ts photo.jpg "add a blue sky" edited.jpg
# Style transfer
npx tsx scripts/edit-image.ts portrait.png "make it look like a watercolor painting" artistic.png
# Using npm script
npm run edit photo.jpg "remove background" no-bg.png
```
### 3. Compose Images (`scripts/compose-images.ts`)
Combine multiple images into a single composition.
**Usage:**
```bash
npx tsx scripts/compose-images.ts <output-path> <image1> <image2> [image3...] [options]
```
**Arguments:**
- `output-path`: Where to save the composed image
- `image1, image2, ...`: Paths to images to combine (2-4 images)
**Options:**
- `--layout <string>`: Layout pattern (horizontal, vertical, grid, custom) (default: 'grid')
- `--prompt <string>`: Additional instructions for composition
- `--width <number>`: Output width in pixels (default: auto)
- `--height <number>`: Output height in pixels (default: auto)
**Examples:**
```bash
# Grid layout
GEMINI_API_KEY=xxx npx tsx scripts/compose-images.ts collage.png img1.jpg img2.jpg img3.jpg img4.jpg
# Horizontal layout
npx tsx scripts/compose-images.ts banner.png left.png right.png --layout horizontal
# Custom composition with prompt
npx tsx scripts/compose-images.ts result.png a.jpg b.jpg --prompt "blend seamlessly with gradient transition"
# Using npm script
npm run compose output.png photo1.jpg photo2.jpg photo3.jpg --layout vertical
```
## NPM Scripts
The package.json includes convenient npm scripts:
```bash
npm run generate <prompt> <output> # Generate image from prompt
npm run edit <source> <prompt> <output> # Edit existing image
npm run compose <output> <images...> # Compose multiple images
```
## Installation
From the skill directory:
```bash
npm install
```
This installs:
- `@google/generative-ai`: Google's Gemini API SDK
- `tsx`: TypeScript execution runtime
- `typescript`: TypeScript compiler
## Usage in Design Workflows
### Creating Marketing Assets
```bash
# Generate hero image
npm run generate "modern tech startup hero image, clean, professional" hero.png --width 1920 --height 1080
# Create variations
npm run edit hero.png "change color scheme to blue and green" hero-variant.png
# Compose for social media
npm run compose social-post.png hero.png logo.png --layout horizontal
```
### Rapid Prototyping
```bash
# Generate UI mockup
npm run generate "mobile app login screen, minimalist design" mockup.png --width 375 --height 812
# Iterate on design
npm run edit mockup.png "add a gradient background" mockup-v2.png
```
### Content Creation
```bash
# Generate illustrations
npm run generate "technical diagram of cloud architecture" diagram.png
# Create composite images
npm run compose infographic.png chart1.png chart2.png diagram.png --layout vertical
```
## Technical Details
### Image Generation
- Uses Gemini's imagen-3.0-generate-001 model
- Supports text-to-image generation
- Configurable output dimensions
- Automatic format detection from file extension
### Image Editing
- Uses Gemini's vision capabilities
- Applies transformations based on natural language
- Preserves original image quality where possible
- Supports various editing operations (style, objects, colors, etc.)
### Image Composition
- Intelligent layout algorithms
- Automatic sizing and spacing
- Seamless blending options
- Support for multiple composition patterns
## Error Handling
Common errors and solutions:
1. **Missing API Key**: Ensure `GEMINI_API_KEY` environment variable is set
2. **Invalid Image Format**: Use supported formats (PNG, JPEG, WebP)
3. **File Not Found**: Verify source image paths are correct
4. **API Rate Limits**: Implement delays between requests if needed
5. **Large File Sizes**: Compress images before editing/composing
## Limitations
- API rate limits apply based on your Gemini API tier
- Generated images are subject to Gemini's content policies
- Maximum image dimensions depend on the model used
- Processing time varies based on complexity and size
## Integration with Claude Code
This skill runs locally and can be used during development:
1. **Design System Creation**: Generate component mockups and visual assets
2. **Documentation**: Create diagrams and illustrations for docs
3. **Testing**: Generate test images for visual regression testing
4. **Prototyping**: Rapid iteration on visual concepts
## See Also
- [Google Gemini API Documentation](https://ai.google.dev/docs)
- [Gemini Image Generation Guide](https://ai.google.dev/docs/imagen)
- Edge Stack Plugin for deployment workflows

View File

@@ -0,0 +1,28 @@
{
"name": "gemini-imagegen",
"version": "1.0.0",
"description": "Generate, edit, and compose images using Google's Gemini AI API",
"type": "module",
"scripts": {
"generate": "npx tsx scripts/generate-image.ts",
"edit": "npx tsx scripts/edit-image.ts",
"compose": "npx tsx scripts/compose-images.ts"
},
"keywords": [
"gemini",
"image-generation",
"ai",
"google-ai",
"image-editing"
],
"author": "",
"license": "MIT",
"dependencies": {
"@google/generative-ai": "^0.21.0"
},
"devDependencies": {
"@types/node": "^20.11.0",
"tsx": "^4.7.0",
"typescript": "^5.3.0"
}
}

View File

@@ -0,0 +1,287 @@
#!/usr/bin/env node
import { GoogleGenerativeAI } from '@google/generative-ai';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { resolve } from 'path';
interface ComposeOptions {
layout?: 'horizontal' | 'vertical' | 'grid' | 'custom';
prompt?: string;
width?: number;
height?: number;
model?: string;
}
async function composeImages(
outputPath: string,
imagePaths: string[],
options: ComposeOptions = {}
): Promise<void> {
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
console.error('Error: GEMINI_API_KEY environment variable is required');
console.error('Get your API key from: https://makersuite.google.com/app/apikey');
process.exit(1);
}
if (imagePaths.length < 2) {
console.error('Error: At least 2 images are required for composition');
process.exit(1);
}
if (imagePaths.length > 4) {
console.error('Error: Maximum 4 images supported for composition');
process.exit(1);
}
// Verify all images exist
const resolvedPaths: string[] = [];
for (const imagePath of imagePaths) {
const resolvedPath = resolve(imagePath);
if (!existsSync(resolvedPath)) {
console.error(`Error: Image not found: ${resolvedPath}`);
process.exit(1);
}
resolvedPaths.push(resolvedPath);
}
const {
layout = 'grid',
prompt = '',
width,
height,
model = 'gemini-2.0-flash-exp'
} = options;
console.log('Composing images...');
console.log(`Images: ${resolvedPaths.length}`);
console.log(`Layout: ${layout}`);
console.log(`Model: ${model}`);
if (prompt) console.log(`Custom prompt: "${prompt}"`);
try {
const genAI = new GoogleGenerativeAI(apiKey);
const generativeModel = genAI.getGenerativeModel({ model });
// Read and encode all images
const imageDataList: Array<{ data: string; mimeType: string; path: string }> = [];
for (const imagePath of resolvedPaths) {
const imageData = readFileSync(imagePath);
const base64Image = imageData.toString('base64');
const mimeType = getMimeType(imagePath);
imageDataList.push({
data: base64Image,
mimeType,
path: imagePath
});
console.log(`Loaded: ${imagePath} (${(imageData.length / 1024).toFixed(2)} KB)`);
}
// Build composition prompt
let compositionPrompt = `You are an image composition assistant. Analyze these ${imageDataList.length} images and describe how to combine them into a single composition using a ${layout} layout.`;
if (width && height) {
compositionPrompt += ` The output should be ${width}x${height} pixels.`;
}
if (prompt) {
compositionPrompt += ` Additional instructions: ${prompt}`;
}
compositionPrompt += '\n\nProvide detailed instructions for:\n';
compositionPrompt += '1. Optimal arrangement of images\n';
compositionPrompt += '2. Sizing and spacing recommendations\n';
compositionPrompt += '3. Any blending or transition effects\n';
compositionPrompt += '4. Color harmony adjustments';
// Prepare content parts with all images
const contentParts: Array<any> = [];
for (const imageData of imageDataList) {
contentParts.push({
inlineData: {
data: imageData.data,
mimeType: imageData.mimeType
}
});
}
contentParts.push(compositionPrompt);
// Analyze the composition
const result = await generativeModel.generateContent(contentParts);
const response = result.response;
const compositionInstructions = response.text();
console.log('\nComposition Analysis:');
console.log(compositionInstructions);
// For actual image composition with Gemini, you would typically:
// 1. Use an image composition/editing model
// 2. Send all source images with layout instructions
// 3. Receive the composed image as base64
// 4. Save to output path
console.warn('\nNote: This is a demonstration implementation.');
console.warn('For actual image composition, you would use specialized image composition APIs.');
console.warn('The model has analyzed the images and provided composition instructions.');
// Calculate suggested dimensions based on layout
const suggestedDimensions = calculateDimensions(layout, imageDataList.length, width, height);
console.log(`\nSuggested output dimensions: ${suggestedDimensions.width}x${suggestedDimensions.height}`);
// In a real implementation:
// const composedImageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
// writeFileSync(resolve(outputPath), composedImageData);
console.log(`\nTo implement actual image composition:`);
console.log(`1. Use an image composition library or service`);
console.log(`2. Apply the ${layout} layout with ${imageDataList.length} images`);
console.log(`3. Follow the composition instructions provided above`);
console.log(`4. Save to: ${resolve(outputPath)}`);
} catch (error) {
if (error instanceof Error) {
console.error('Error composing images:', error.message);
if (error.message.includes('API key')) {
console.error('\nPlease verify your GEMINI_API_KEY is valid');
}
} else {
console.error('Error composing images:', error);
}
process.exit(1);
}
}
function getMimeType(filePath: string): string {
const extension = filePath.toLowerCase().split('.').pop();
const mimeTypes: Record<string, string> = {
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif',
'webp': 'image/webp',
'bmp': 'image/bmp'
};
return mimeTypes[extension || ''] || 'image/jpeg';
}
function calculateDimensions(
layout: string,
imageCount: number,
width?: number,
height?: number
): { width: number; height: number } {
// If dimensions are provided, use them
if (width && height) {
return { width, height };
}
// Default image size assumption
const defaultSize = 1024;
switch (layout) {
case 'horizontal':
return {
width: width || defaultSize * imageCount,
height: height || defaultSize
};
case 'vertical':
return {
width: width || defaultSize,
height: height || defaultSize * imageCount
};
case 'grid':
const cols = Math.ceil(Math.sqrt(imageCount));
const rows = Math.ceil(imageCount / cols);
return {
width: width || defaultSize * cols,
height: height || defaultSize * rows
};
case 'custom':
default:
return {
width: width || defaultSize,
height: height || defaultSize
};
}
}
// Parse command line arguments
function parseArgs(): { outputPath: string; imagePaths: string[]; options: ComposeOptions } {
const args = process.argv.slice(2);
if (args.length < 3) {
console.error('Usage: compose-images.ts <output-path> <image1> <image2> [image3...] [options]');
console.error('\nArguments:');
console.error(' output-path Where to save the composed image');
console.error(' image1-4 Paths to images to combine (2-4 images)');
console.error('\nOptions:');
console.error(' --layout <string> Layout pattern (horizontal|vertical|grid|custom) (default: grid)');
console.error(' --prompt <string> Additional composition instructions');
console.error(' --width <number> Output width in pixels (default: auto)');
console.error(' --height <number> Output height in pixels (default: auto)');
console.error(' --model <string> Gemini model to use (default: gemini-2.0-flash-exp)');
console.error('\nExample:');
console.error(' GEMINI_API_KEY=xxx npx tsx scripts/compose-images.ts collage.png img1.jpg img2.jpg img3.jpg --layout grid');
process.exit(1);
}
const outputPath = args[0];
const imagePaths: string[] = [];
const options: ComposeOptions = {};
// Parse image paths and options
for (let i = 1; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const flag = arg;
const value = args[i + 1];
switch (flag) {
case '--layout':
if (['horizontal', 'vertical', 'grid', 'custom'].includes(value)) {
options.layout = value as ComposeOptions['layout'];
} else {
console.warn(`Invalid layout: ${value}. Using default: grid`);
}
i++;
break;
case '--prompt':
options.prompt = value;
i++;
break;
case '--width':
options.width = parseInt(value, 10);
i++;
break;
case '--height':
options.height = parseInt(value, 10);
i++;
break;
case '--model':
options.model = value;
i++;
break;
default:
console.warn(`Unknown option: ${flag}`);
i++;
}
} else {
imagePaths.push(arg);
}
}
return { outputPath, imagePaths, options };
}
// Main execution
const { outputPath, imagePaths, options } = parseArgs();
composeImages(outputPath, imagePaths, options).catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@@ -0,0 +1,162 @@
#!/usr/bin/env node
import { GoogleGenerativeAI } from '@google/generative-ai';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { resolve } from 'path';
interface EditOptions {
model?: string;
}
async function editImage(
sourcePath: string,
prompt: string,
outputPath: string,
options: EditOptions = {}
): Promise<void> {
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
console.error('Error: GEMINI_API_KEY environment variable is required');
console.error('Get your API key from: https://makersuite.google.com/app/apikey');
process.exit(1);
}
const resolvedSourcePath = resolve(sourcePath);
if (!existsSync(resolvedSourcePath)) {
console.error(`Error: Source image not found: ${resolvedSourcePath}`);
process.exit(1);
}
const { model = 'gemini-2.0-flash-exp' } = options;
console.log('Editing image...');
console.log(`Source: ${resolvedSourcePath}`);
console.log(`Prompt: "${prompt}"`);
console.log(`Model: ${model}`);
try {
const genAI = new GoogleGenerativeAI(apiKey);
const generativeModel = genAI.getGenerativeModel({ model });
// Read and encode the source image
const imageData = readFileSync(resolvedSourcePath);
const base64Image = imageData.toString('base64');
// Determine MIME type from file extension
const mimeType = getMimeType(resolvedSourcePath);
console.log(`Image size: ${(imageData.length / 1024).toFixed(2)} KB`);
console.log(`MIME type: ${mimeType}`);
// Use Gemini's vision capabilities to analyze and describe the edit
const enhancedPrompt = `You are an image editing assistant. Analyze this image and describe how to apply the following edit: "${prompt}". Provide detailed instructions for the transformation.`;
const result = await generativeModel.generateContent([
{
inlineData: {
data: base64Image,
mimeType: mimeType
}
},
enhancedPrompt
]);
const response = result.response;
const editInstructions = response.text();
console.log('\nEdit Analysis:');
console.log(editInstructions);
// For actual image editing with Gemini, you would typically:
// 1. Use the Imagen model's image editing capabilities
// 2. Send the source image with the edit prompt
// 3. Receive the edited image as base64
// 4. Save to output path
console.warn('\nNote: This is a demonstration implementation.');
console.warn('For actual image editing, you would use Gemini\'s image editing API.');
console.warn('The model has analyzed the image and provided edit instructions.');
// In a real implementation with Imagen editing:
// const editedImageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
// writeFileSync(resolve(outputPath), editedImageData);
console.log(`\nTo implement actual image editing:`);
console.log(`1. Use Gemini's image editing endpoint`);
console.log(`2. Send source image with edit prompt`);
console.log(`3. Parse the edited image data from response`);
console.log(`4. Save to: ${resolve(outputPath)}`);
console.log(`\nRefer to: https://ai.google.dev/docs/imagen`);
} catch (error) {
if (error instanceof Error) {
console.error('Error editing image:', error.message);
if (error.message.includes('API key')) {
console.error('\nPlease verify your GEMINI_API_KEY is valid');
}
} else {
console.error('Error editing image:', error);
}
process.exit(1);
}
}
function getMimeType(filePath: string): string {
const extension = filePath.toLowerCase().split('.').pop();
const mimeTypes: Record<string, string> = {
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif',
'webp': 'image/webp',
'bmp': 'image/bmp'
};
return mimeTypes[extension || ''] || 'image/jpeg';
}
// Parse command line arguments
function parseArgs(): { sourcePath: string; prompt: string; outputPath: string; options: EditOptions } {
const args = process.argv.slice(2);
if (args.length < 3) {
console.error('Usage: edit-image.ts <source-image> <prompt> <output-path> [options]');
console.error('\nArguments:');
console.error(' source-image Path to the image to edit');
console.error(' prompt Text description of the desired changes');
console.error(' output-path Where to save the edited image');
console.error('\nOptions:');
console.error(' --model <string> Gemini model to use (default: gemini-2.0-flash-exp)');
console.error('\nExample:');
console.error(' GEMINI_API_KEY=xxx npx tsx scripts/edit-image.ts photo.jpg "add a blue sky" edited.jpg');
process.exit(1);
}
const sourcePath = args[0];
const prompt = args[1];
const outputPath = args[2];
const options: EditOptions = {};
// Parse options
for (let i = 3; i < args.length; i += 2) {
const flag = args[i];
const value = args[i + 1];
switch (flag) {
case '--model':
options.model = value;
break;
default:
console.warn(`Unknown option: ${flag}`);
}
}
return { sourcePath, prompt, outputPath, options };
}
// Main execution
const { sourcePath, prompt, outputPath, options } = parseArgs();
editImage(sourcePath, prompt, outputPath, options).catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env node
import { GoogleGenerativeAI } from '@google/generative-ai';
import { writeFileSync } from 'fs';
import { resolve } from 'path';
interface GenerateOptions {
width?: number;
height?: number;
model?: string;
}
async function generateImage(
prompt: string,
outputPath: string,
options: GenerateOptions = {}
): Promise<void> {
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
console.error('Error: GEMINI_API_KEY environment variable is required');
console.error('Get your API key from: https://makersuite.google.com/app/apikey');
process.exit(1);
}
const {
width = 1024,
height = 1024,
model = 'gemini-2.0-flash-exp'
} = options;
console.log('Generating image...');
console.log(`Prompt: "${prompt}"`);
console.log(`Dimensions: ${width}x${height}`);
console.log(`Model: ${model}`);
try {
const genAI = new GoogleGenerativeAI(apiKey);
const generativeModel = genAI.getGenerativeModel({ model });
// Enhanced prompt with image generation context
const enhancedPrompt = `Generate a high-quality image with the following description: ${prompt}. Image dimensions: ${width}x${height} pixels.`;
// For image generation, we'll use the text generation to get image data
// Note: As of the current Gemini API, direct image generation might require
// using the imagen model or multimodal capabilities
const result = await generativeModel.generateContent([
{
inlineData: {
data: '',
mimeType: 'text/plain'
}
},
enhancedPrompt
]);
const response = result.response;
const text = response.text();
// For actual image generation with Gemini, you would typically:
// 1. Use the Imagen model (imagen-3.0-generate-001)
// 2. Parse the response to get base64 image data
// 3. Convert to binary and save
// Placeholder implementation - in production, this would use the actual Imagen API
console.warn('\nNote: This is a demonstration implementation.');
console.warn('For actual image generation, you would use the Imagen model.');
console.warn('Response from model:', text.substring(0, 200) + '...');
// In a real implementation with Imagen:
// const imageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
// writeFileSync(resolve(outputPath), imageData);
console.log(`\nTo implement actual image generation:`);
console.log(`1. Use the Imagen model (imagen-3.0-generate-001)`);
console.log(`2. Parse the base64 image data from the response`);
console.log(`3. Save to: ${resolve(outputPath)}`);
console.log(`\nRefer to: https://ai.google.dev/docs/imagen`);
} catch (error) {
if (error instanceof Error) {
console.error('Error generating image:', error.message);
if (error.message.includes('API key')) {
console.error('\nPlease verify your GEMINI_API_KEY is valid');
}
} else {
console.error('Error generating image:', error);
}
process.exit(1);
}
}
// Parse command line arguments
function parseArgs(): { prompt: string; outputPath: string; options: GenerateOptions } {
const args = process.argv.slice(2);
if (args.length < 2) {
console.error('Usage: generate-image.ts <prompt> <output-path> [options]');
console.error('\nArguments:');
console.error(' prompt Text description of the image to generate');
console.error(' output-path Where to save the generated image');
console.error('\nOptions:');
console.error(' --width <number> Image width in pixels (default: 1024)');
console.error(' --height <number> Image height in pixels (default: 1024)');
console.error(' --model <string> Gemini model to use (default: gemini-2.0-flash-exp)');
console.error('\nExample:');
console.error(' GEMINI_API_KEY=xxx npx tsx scripts/generate-image.ts "a sunset over mountains" output.png --width 1920 --height 1080');
process.exit(1);
}
const prompt = args[0];
const outputPath = args[1];
const options: GenerateOptions = {};
// Parse options
for (let i = 2; i < args.length; i += 2) {
const flag = args[i];
const value = args[i + 1];
switch (flag) {
case '--width':
options.width = parseInt(value, 10);
break;
case '--height':
options.height = parseInt(value, 10);
break;
case '--model':
options.model = value;
break;
default:
console.warn(`Unknown option: ${flag}`);
}
}
return { prompt, outputPath, options };
}
// Main execution
const { prompt, outputPath, options } = parseArgs();
generateImage(prompt, outputPath, options).catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@@ -0,0 +1,18 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "node",
"lib": ["ES2022"],
"esModuleInterop": true,
"skipLibCheck": true,
"strict": true,
"resolveJsonModule": true,
"allowSyntheticDefaultImports": true,
"forceConsistentCasingInFileNames": true,
"outDir": "./dist",
"rootDir": "./scripts"
},
"include": ["scripts/**/*"],
"exclude": ["node_modules", "dist"]
}