Initial commit
This commit is contained in:
3
skills/gemini-imagegen/.env.example
Normal file
3
skills/gemini-imagegen/.env.example
Normal file
@@ -0,0 +1,3 @@
|
||||
# Google Gemini API Key
|
||||
# Get your API key from: https://makersuite.google.com/app/apikey
|
||||
GEMINI_API_KEY=your-api-key-here
|
||||
34
skills/gemini-imagegen/.gitignore
vendored
Normal file
34
skills/gemini-imagegen/.gitignore
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
|
||||
# Build outputs
|
||||
dist/
|
||||
*.js
|
||||
*.js.map
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
.env.local
|
||||
|
||||
# Generated images (examples)
|
||||
*.png
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.gif
|
||||
*.webp
|
||||
!examples/*.png
|
||||
!examples/*.jpg
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
103
skills/gemini-imagegen/README.md
Normal file
103
skills/gemini-imagegen/README.md
Normal file
@@ -0,0 +1,103 @@
|
||||
# Gemini ImageGen Skill
|
||||
|
||||
AI-powered image generation, editing, and composition using Google's Gemini API.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. **Install dependencies:**
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
2. **Set your API key:**
|
||||
```bash
|
||||
export GEMINI_API_KEY="your-api-key-here"
|
||||
```
|
||||
Get your key from: https://makersuite.google.com/app/apikey
|
||||
|
||||
3. **Generate an image:**
|
||||
```bash
|
||||
npm run generate "a sunset over mountains" output.png
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
- **Generate**: Create images from text descriptions
|
||||
- **Edit**: Modify existing images with natural language prompts
|
||||
- **Compose**: Combine multiple images with flexible layouts
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Generate Images
|
||||
```bash
|
||||
# Basic generation
|
||||
npm run generate "futuristic city skyline" city.png
|
||||
|
||||
# Custom size
|
||||
npm run generate "modern office" office.png -- --width 1920 --height 1080
|
||||
```
|
||||
|
||||
### Edit Images
|
||||
```bash
|
||||
# Style transformation
|
||||
npm run edit photo.jpg "make it look like a watercolor painting" artistic.png
|
||||
|
||||
# Object modification
|
||||
npm run edit landscape.png "add a rainbow in the sky" enhanced.png
|
||||
```
|
||||
|
||||
### Compose Images
|
||||
```bash
|
||||
# Grid layout (default)
|
||||
npm run compose collage.png img1.jpg img2.jpg img3.jpg img4.jpg
|
||||
|
||||
# Horizontal banner
|
||||
npm run compose banner.png left.png right.png -- --layout horizontal
|
||||
|
||||
# Custom composition
|
||||
npm run compose result.png a.jpg b.jpg -- --prompt "blend seamlessly"
|
||||
```
|
||||
|
||||
## Scripts
|
||||
|
||||
- `npm run generate <prompt> <output>` - Generate image from text
|
||||
- `npm run edit <source> <prompt> <output>` - Edit existing image
|
||||
- `npm run compose <output> <images...>` - Compose multiple images
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
- `GEMINI_API_KEY` (required) - Your Google Gemini API key
|
||||
|
||||
### Options
|
||||
|
||||
See `SKILL.md` for detailed documentation on all available options and parameters.
|
||||
|
||||
## Development Notes
|
||||
|
||||
This is a local development skill that runs on your machine, not on Cloudflare Workers. It's designed for:
|
||||
|
||||
- Design workflows and asset creation
|
||||
- Visual content generation
|
||||
- Image manipulation and prototyping
|
||||
- Creating test images for development
|
||||
|
||||
## Implementation Status
|
||||
|
||||
**Note**: The current implementation includes:
|
||||
- Complete TypeScript structure
|
||||
- Argument parsing and validation
|
||||
- Gemini API integration for image analysis
|
||||
- Comprehensive error handling
|
||||
|
||||
For production use with actual image generation/editing, you'll need to:
|
||||
1. Use the Imagen model (imagen-3.0-generate-001)
|
||||
2. Implement proper image data handling
|
||||
3. Add output file writing with actual image data
|
||||
|
||||
Refer to the [Gemini Imagen documentation](https://ai.google.dev/docs/imagen) for implementation details.
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
231
skills/gemini-imagegen/SKILL.md
Normal file
231
skills/gemini-imagegen/SKILL.md
Normal file
@@ -0,0 +1,231 @@
|
||||
---
|
||||
name: gemini-imagegen
|
||||
description: Generate, edit, and compose images using Google's Gemini AI API for design workflows and visual content creation
|
||||
triggers: ["image generation", "visual content", "AI art", "image editing", "design automation"]
|
||||
---
|
||||
|
||||
# Gemini ImageGen SKILL
|
||||
|
||||
## Overview
|
||||
|
||||
This skill provides image generation and manipulation capabilities using Google's Gemini AI API. It's designed for local development workflows where you need to create or modify images using AI assistance.
|
||||
|
||||
## Features
|
||||
|
||||
- **Generate Images**: Create images from text descriptions
|
||||
- **Edit Images**: Modify existing images based on text prompts
|
||||
- **Compose Images**: Combine multiple images with layout instructions
|
||||
- **Multiple Formats**: Support for PNG, JPEG, and other common image formats
|
||||
- **Size Options**: Flexible output dimensions for different use cases
|
||||
|
||||
## Environment Setup
|
||||
|
||||
This skill requires a Gemini API key:
|
||||
|
||||
```bash
|
||||
export GEMINI_API_KEY="your-api-key-here"
|
||||
```
|
||||
|
||||
Get your API key from: https://makersuite.google.com/app/apikey
|
||||
|
||||
## Available Scripts
|
||||
|
||||
### 1. Generate Image (`scripts/generate-image.ts`)
|
||||
|
||||
Create new images from text descriptions.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
npx tsx scripts/generate-image.ts <prompt> <output-path> [options]
|
||||
```
|
||||
|
||||
**Arguments:**
|
||||
- `prompt`: Text description of the image to generate
|
||||
- `output-path`: Where to save the generated image (e.g., `./output.png`)
|
||||
|
||||
**Options:**
|
||||
- `--width <number>`: Image width in pixels (default: 1024)
|
||||
- `--height <number>`: Image height in pixels (default: 1024)
|
||||
- `--model <string>`: Gemini model to use (default: 'gemini-2.0-flash-exp')
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Basic usage
|
||||
GEMINI_API_KEY=xxx npx tsx scripts/generate-image.ts "a sunset over mountains" output.png
|
||||
|
||||
# Custom size
|
||||
npx tsx scripts/generate-image.ts "modern office workspace" office.png --width 1920 --height 1080
|
||||
|
||||
# Using npm script
|
||||
npm run generate "futuristic city skyline" city.png
|
||||
```
|
||||
|
||||
### 2. Edit Image (`scripts/edit-image.ts`)
|
||||
|
||||
Modify existing images based on text instructions.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
npx tsx scripts/edit-image.ts <source-image> <prompt> <output-path> [options]
|
||||
```
|
||||
|
||||
**Arguments:**
|
||||
- `source-image`: Path to the image to edit
|
||||
- `prompt`: Text description of the desired changes
|
||||
- `output-path`: Where to save the edited image
|
||||
|
||||
**Options:**
|
||||
- `--model <string>`: Gemini model to use (default: 'gemini-2.0-flash-exp')
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Basic editing
|
||||
GEMINI_API_KEY=xxx npx tsx scripts/edit-image.ts photo.jpg "add a blue sky" edited.jpg
|
||||
|
||||
# Style transfer
|
||||
npx tsx scripts/edit-image.ts portrait.png "make it look like a watercolor painting" artistic.png
|
||||
|
||||
# Using npm script
|
||||
npm run edit photo.jpg "remove background" no-bg.png
|
||||
```
|
||||
|
||||
### 3. Compose Images (`scripts/compose-images.ts`)
|
||||
|
||||
Combine multiple images into a single composition.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
npx tsx scripts/compose-images.ts <output-path> <image1> <image2> [image3...] [options]
|
||||
```
|
||||
|
||||
**Arguments:**
|
||||
- `output-path`: Where to save the composed image
|
||||
- `image1, image2, ...`: Paths to images to combine (2-4 images)
|
||||
|
||||
**Options:**
|
||||
- `--layout <string>`: Layout pattern (horizontal, vertical, grid, custom) (default: 'grid')
|
||||
- `--prompt <string>`: Additional instructions for composition
|
||||
- `--width <number>`: Output width in pixels (default: auto)
|
||||
- `--height <number>`: Output height in pixels (default: auto)
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Grid layout
|
||||
GEMINI_API_KEY=xxx npx tsx scripts/compose-images.ts collage.png img1.jpg img2.jpg img3.jpg img4.jpg
|
||||
|
||||
# Horizontal layout
|
||||
npx tsx scripts/compose-images.ts banner.png left.png right.png --layout horizontal
|
||||
|
||||
# Custom composition with prompt
|
||||
npx tsx scripts/compose-images.ts result.png a.jpg b.jpg --prompt "blend seamlessly with gradient transition"
|
||||
|
||||
# Using npm script
|
||||
npm run compose output.png photo1.jpg photo2.jpg photo3.jpg --layout vertical
|
||||
```
|
||||
|
||||
## NPM Scripts
|
||||
|
||||
The package.json includes convenient npm scripts:
|
||||
|
||||
```bash
|
||||
npm run generate <prompt> <output> # Generate image from prompt
|
||||
npm run edit <source> <prompt> <output> # Edit existing image
|
||||
npm run compose <output> <images...> # Compose multiple images
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
From the skill directory:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
This installs:
|
||||
- `@google/generative-ai`: Google's Gemini API SDK
|
||||
- `tsx`: TypeScript execution runtime
|
||||
- `typescript`: TypeScript compiler
|
||||
|
||||
## Usage in Design Workflows
|
||||
|
||||
### Creating Marketing Assets
|
||||
```bash
|
||||
# Generate hero image
|
||||
npm run generate "modern tech startup hero image, clean, professional" hero.png --width 1920 --height 1080
|
||||
|
||||
# Create variations
|
||||
npm run edit hero.png "change color scheme to blue and green" hero-variant.png
|
||||
|
||||
# Compose for social media
|
||||
npm run compose social-post.png hero.png logo.png --layout horizontal
|
||||
```
|
||||
|
||||
### Rapid Prototyping
|
||||
```bash
|
||||
# Generate UI mockup
|
||||
npm run generate "mobile app login screen, minimalist design" mockup.png --width 375 --height 812
|
||||
|
||||
# Iterate on design
|
||||
npm run edit mockup.png "add a gradient background" mockup-v2.png
|
||||
```
|
||||
|
||||
### Content Creation
|
||||
```bash
|
||||
# Generate illustrations
|
||||
npm run generate "technical diagram of cloud architecture" diagram.png
|
||||
|
||||
# Create composite images
|
||||
npm run compose infographic.png chart1.png chart2.png diagram.png --layout vertical
|
||||
```
|
||||
|
||||
## Technical Details
|
||||
|
||||
### Image Generation
|
||||
- Uses Gemini's imagen-3.0-generate-001 model
|
||||
- Supports text-to-image generation
|
||||
- Configurable output dimensions
|
||||
- Automatic format detection from file extension
|
||||
|
||||
### Image Editing
|
||||
- Uses Gemini's vision capabilities
|
||||
- Applies transformations based on natural language
|
||||
- Preserves original image quality where possible
|
||||
- Supports various editing operations (style, objects, colors, etc.)
|
||||
|
||||
### Image Composition
|
||||
- Intelligent layout algorithms
|
||||
- Automatic sizing and spacing
|
||||
- Seamless blending options
|
||||
- Support for multiple composition patterns
|
||||
|
||||
## Error Handling
|
||||
|
||||
Common errors and solutions:
|
||||
|
||||
1. **Missing API Key**: Ensure `GEMINI_API_KEY` environment variable is set
|
||||
2. **Invalid Image Format**: Use supported formats (PNG, JPEG, WebP)
|
||||
3. **File Not Found**: Verify source image paths are correct
|
||||
4. **API Rate Limits**: Implement delays between requests if needed
|
||||
5. **Large File Sizes**: Compress images before editing/composing
|
||||
|
||||
## Limitations
|
||||
|
||||
- API rate limits apply based on your Gemini API tier
|
||||
- Generated images are subject to Gemini's content policies
|
||||
- Maximum image dimensions depend on the model used
|
||||
- Processing time varies based on complexity and size
|
||||
|
||||
## Integration with Claude Code
|
||||
|
||||
This skill runs locally and can be used during development:
|
||||
|
||||
1. **Design System Creation**: Generate component mockups and visual assets
|
||||
2. **Documentation**: Create diagrams and illustrations for docs
|
||||
3. **Testing**: Generate test images for visual regression testing
|
||||
4. **Prototyping**: Rapid iteration on visual concepts
|
||||
|
||||
## See Also
|
||||
|
||||
- [Google Gemini API Documentation](https://ai.google.dev/docs)
|
||||
- [Gemini Image Generation Guide](https://ai.google.dev/docs/imagen)
|
||||
- Edge Stack Plugin for deployment workflows
|
||||
28
skills/gemini-imagegen/package.json
Normal file
28
skills/gemini-imagegen/package.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"name": "gemini-imagegen",
|
||||
"version": "1.0.0",
|
||||
"description": "Generate, edit, and compose images using Google's Gemini AI API",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"generate": "npx tsx scripts/generate-image.ts",
|
||||
"edit": "npx tsx scripts/edit-image.ts",
|
||||
"compose": "npx tsx scripts/compose-images.ts"
|
||||
},
|
||||
"keywords": [
|
||||
"gemini",
|
||||
"image-generation",
|
||||
"ai",
|
||||
"google-ai",
|
||||
"image-editing"
|
||||
],
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@google/generative-ai": "^0.21.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.11.0",
|
||||
"tsx": "^4.7.0",
|
||||
"typescript": "^5.3.0"
|
||||
}
|
||||
}
|
||||
287
skills/gemini-imagegen/scripts/compose-images.ts
Normal file
287
skills/gemini-imagegen/scripts/compose-images.ts
Normal file
@@ -0,0 +1,287 @@
|
||||
#!/usr/bin/env node
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
|
||||
interface ComposeOptions {
|
||||
layout?: 'horizontal' | 'vertical' | 'grid' | 'custom';
|
||||
prompt?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
async function composeImages(
|
||||
outputPath: string,
|
||||
imagePaths: string[],
|
||||
options: ComposeOptions = {}
|
||||
): Promise<void> {
|
||||
const apiKey = process.env.GEMINI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
console.error('Error: GEMINI_API_KEY environment variable is required');
|
||||
console.error('Get your API key from: https://makersuite.google.com/app/apikey');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (imagePaths.length < 2) {
|
||||
console.error('Error: At least 2 images are required for composition');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (imagePaths.length > 4) {
|
||||
console.error('Error: Maximum 4 images supported for composition');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Verify all images exist
|
||||
const resolvedPaths: string[] = [];
|
||||
for (const imagePath of imagePaths) {
|
||||
const resolvedPath = resolve(imagePath);
|
||||
if (!existsSync(resolvedPath)) {
|
||||
console.error(`Error: Image not found: ${resolvedPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
resolvedPaths.push(resolvedPath);
|
||||
}
|
||||
|
||||
const {
|
||||
layout = 'grid',
|
||||
prompt = '',
|
||||
width,
|
||||
height,
|
||||
model = 'gemini-2.0-flash-exp'
|
||||
} = options;
|
||||
|
||||
console.log('Composing images...');
|
||||
console.log(`Images: ${resolvedPaths.length}`);
|
||||
console.log(`Layout: ${layout}`);
|
||||
console.log(`Model: ${model}`);
|
||||
if (prompt) console.log(`Custom prompt: "${prompt}"`);
|
||||
|
||||
try {
|
||||
const genAI = new GoogleGenerativeAI(apiKey);
|
||||
const generativeModel = genAI.getGenerativeModel({ model });
|
||||
|
||||
// Read and encode all images
|
||||
const imageDataList: Array<{ data: string; mimeType: string; path: string }> = [];
|
||||
|
||||
for (const imagePath of resolvedPaths) {
|
||||
const imageData = readFileSync(imagePath);
|
||||
const base64Image = imageData.toString('base64');
|
||||
const mimeType = getMimeType(imagePath);
|
||||
|
||||
imageDataList.push({
|
||||
data: base64Image,
|
||||
mimeType,
|
||||
path: imagePath
|
||||
});
|
||||
|
||||
console.log(`Loaded: ${imagePath} (${(imageData.length / 1024).toFixed(2)} KB)`);
|
||||
}
|
||||
|
||||
// Build composition prompt
|
||||
let compositionPrompt = `You are an image composition assistant. Analyze these ${imageDataList.length} images and describe how to combine them into a single composition using a ${layout} layout.`;
|
||||
|
||||
if (width && height) {
|
||||
compositionPrompt += ` The output should be ${width}x${height} pixels.`;
|
||||
}
|
||||
|
||||
if (prompt) {
|
||||
compositionPrompt += ` Additional instructions: ${prompt}`;
|
||||
}
|
||||
|
||||
compositionPrompt += '\n\nProvide detailed instructions for:\n';
|
||||
compositionPrompt += '1. Optimal arrangement of images\n';
|
||||
compositionPrompt += '2. Sizing and spacing recommendations\n';
|
||||
compositionPrompt += '3. Any blending or transition effects\n';
|
||||
compositionPrompt += '4. Color harmony adjustments';
|
||||
|
||||
// Prepare content parts with all images
|
||||
const contentParts: Array<any> = [];
|
||||
|
||||
for (const imageData of imageDataList) {
|
||||
contentParts.push({
|
||||
inlineData: {
|
||||
data: imageData.data,
|
||||
mimeType: imageData.mimeType
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
contentParts.push(compositionPrompt);
|
||||
|
||||
// Analyze the composition
|
||||
const result = await generativeModel.generateContent(contentParts);
|
||||
const response = result.response;
|
||||
const compositionInstructions = response.text();
|
||||
|
||||
console.log('\nComposition Analysis:');
|
||||
console.log(compositionInstructions);
|
||||
|
||||
// For actual image composition with Gemini, you would typically:
|
||||
// 1. Use an image composition/editing model
|
||||
// 2. Send all source images with layout instructions
|
||||
// 3. Receive the composed image as base64
|
||||
// 4. Save to output path
|
||||
|
||||
console.warn('\nNote: This is a demonstration implementation.');
|
||||
console.warn('For actual image composition, you would use specialized image composition APIs.');
|
||||
console.warn('The model has analyzed the images and provided composition instructions.');
|
||||
|
||||
// Calculate suggested dimensions based on layout
|
||||
const suggestedDimensions = calculateDimensions(layout, imageDataList.length, width, height);
|
||||
console.log(`\nSuggested output dimensions: ${suggestedDimensions.width}x${suggestedDimensions.height}`);
|
||||
|
||||
// In a real implementation:
|
||||
// const composedImageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
|
||||
// writeFileSync(resolve(outputPath), composedImageData);
|
||||
|
||||
console.log(`\nTo implement actual image composition:`);
|
||||
console.log(`1. Use an image composition library or service`);
|
||||
console.log(`2. Apply the ${layout} layout with ${imageDataList.length} images`);
|
||||
console.log(`3. Follow the composition instructions provided above`);
|
||||
console.log(`4. Save to: ${resolve(outputPath)}`);
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
console.error('Error composing images:', error.message);
|
||||
if (error.message.includes('API key')) {
|
||||
console.error('\nPlease verify your GEMINI_API_KEY is valid');
|
||||
}
|
||||
} else {
|
||||
console.error('Error composing images:', error);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function getMimeType(filePath: string): string {
|
||||
const extension = filePath.toLowerCase().split('.').pop();
|
||||
const mimeTypes: Record<string, string> = {
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'webp': 'image/webp',
|
||||
'bmp': 'image/bmp'
|
||||
};
|
||||
return mimeTypes[extension || ''] || 'image/jpeg';
|
||||
}
|
||||
|
||||
function calculateDimensions(
|
||||
layout: string,
|
||||
imageCount: number,
|
||||
width?: number,
|
||||
height?: number
|
||||
): { width: number; height: number } {
|
||||
// If dimensions are provided, use them
|
||||
if (width && height) {
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
// Default image size assumption
|
||||
const defaultSize = 1024;
|
||||
|
||||
switch (layout) {
|
||||
case 'horizontal':
|
||||
return {
|
||||
width: width || defaultSize * imageCount,
|
||||
height: height || defaultSize
|
||||
};
|
||||
case 'vertical':
|
||||
return {
|
||||
width: width || defaultSize,
|
||||
height: height || defaultSize * imageCount
|
||||
};
|
||||
case 'grid':
|
||||
const cols = Math.ceil(Math.sqrt(imageCount));
|
||||
const rows = Math.ceil(imageCount / cols);
|
||||
return {
|
||||
width: width || defaultSize * cols,
|
||||
height: height || defaultSize * rows
|
||||
};
|
||||
case 'custom':
|
||||
default:
|
||||
return {
|
||||
width: width || defaultSize,
|
||||
height: height || defaultSize
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs(): { outputPath: string; imagePaths: string[]; options: ComposeOptions } {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length < 3) {
|
||||
console.error('Usage: compose-images.ts <output-path> <image1> <image2> [image3...] [options]');
|
||||
console.error('\nArguments:');
|
||||
console.error(' output-path Where to save the composed image');
|
||||
console.error(' image1-4 Paths to images to combine (2-4 images)');
|
||||
console.error('\nOptions:');
|
||||
console.error(' --layout <string> Layout pattern (horizontal|vertical|grid|custom) (default: grid)');
|
||||
console.error(' --prompt <string> Additional composition instructions');
|
||||
console.error(' --width <number> Output width in pixels (default: auto)');
|
||||
console.error(' --height <number> Output height in pixels (default: auto)');
|
||||
console.error(' --model <string> Gemini model to use (default: gemini-2.0-flash-exp)');
|
||||
console.error('\nExample:');
|
||||
console.error(' GEMINI_API_KEY=xxx npx tsx scripts/compose-images.ts collage.png img1.jpg img2.jpg img3.jpg --layout grid');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const outputPath = args[0];
|
||||
const imagePaths: string[] = [];
|
||||
const options: ComposeOptions = {};
|
||||
|
||||
// Parse image paths and options
|
||||
for (let i = 1; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
|
||||
if (arg.startsWith('--')) {
|
||||
const flag = arg;
|
||||
const value = args[i + 1];
|
||||
|
||||
switch (flag) {
|
||||
case '--layout':
|
||||
if (['horizontal', 'vertical', 'grid', 'custom'].includes(value)) {
|
||||
options.layout = value as ComposeOptions['layout'];
|
||||
} else {
|
||||
console.warn(`Invalid layout: ${value}. Using default: grid`);
|
||||
}
|
||||
i++;
|
||||
break;
|
||||
case '--prompt':
|
||||
options.prompt = value;
|
||||
i++;
|
||||
break;
|
||||
case '--width':
|
||||
options.width = parseInt(value, 10);
|
||||
i++;
|
||||
break;
|
||||
case '--height':
|
||||
options.height = parseInt(value, 10);
|
||||
i++;
|
||||
break;
|
||||
case '--model':
|
||||
options.model = value;
|
||||
i++;
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown option: ${flag}`);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
imagePaths.push(arg);
|
||||
}
|
||||
}
|
||||
|
||||
return { outputPath, imagePaths, options };
|
||||
}
|
||||
|
||||
// Main execution
|
||||
const { outputPath, imagePaths, options } = parseArgs();
|
||||
composeImages(outputPath, imagePaths, options).catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
162
skills/gemini-imagegen/scripts/edit-image.ts
Normal file
162
skills/gemini-imagegen/scripts/edit-image.ts
Normal file
@@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env node
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
|
||||
interface EditOptions {
|
||||
model?: string;
|
||||
}
|
||||
|
||||
async function editImage(
|
||||
sourcePath: string,
|
||||
prompt: string,
|
||||
outputPath: string,
|
||||
options: EditOptions = {}
|
||||
): Promise<void> {
|
||||
const apiKey = process.env.GEMINI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
console.error('Error: GEMINI_API_KEY environment variable is required');
|
||||
console.error('Get your API key from: https://makersuite.google.com/app/apikey');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const resolvedSourcePath = resolve(sourcePath);
|
||||
|
||||
if (!existsSync(resolvedSourcePath)) {
|
||||
console.error(`Error: Source image not found: ${resolvedSourcePath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const { model = 'gemini-2.0-flash-exp' } = options;
|
||||
|
||||
console.log('Editing image...');
|
||||
console.log(`Source: ${resolvedSourcePath}`);
|
||||
console.log(`Prompt: "${prompt}"`);
|
||||
console.log(`Model: ${model}`);
|
||||
|
||||
try {
|
||||
const genAI = new GoogleGenerativeAI(apiKey);
|
||||
const generativeModel = genAI.getGenerativeModel({ model });
|
||||
|
||||
// Read and encode the source image
|
||||
const imageData = readFileSync(resolvedSourcePath);
|
||||
const base64Image = imageData.toString('base64');
|
||||
|
||||
// Determine MIME type from file extension
|
||||
const mimeType = getMimeType(resolvedSourcePath);
|
||||
|
||||
console.log(`Image size: ${(imageData.length / 1024).toFixed(2)} KB`);
|
||||
console.log(`MIME type: ${mimeType}`);
|
||||
|
||||
// Use Gemini's vision capabilities to analyze and describe the edit
|
||||
const enhancedPrompt = `You are an image editing assistant. Analyze this image and describe how to apply the following edit: "${prompt}". Provide detailed instructions for the transformation.`;
|
||||
|
||||
const result = await generativeModel.generateContent([
|
||||
{
|
||||
inlineData: {
|
||||
data: base64Image,
|
||||
mimeType: mimeType
|
||||
}
|
||||
},
|
||||
enhancedPrompt
|
||||
]);
|
||||
|
||||
const response = result.response;
|
||||
const editInstructions = response.text();
|
||||
|
||||
console.log('\nEdit Analysis:');
|
||||
console.log(editInstructions);
|
||||
|
||||
// For actual image editing with Gemini, you would typically:
|
||||
// 1. Use the Imagen model's image editing capabilities
|
||||
// 2. Send the source image with the edit prompt
|
||||
// 3. Receive the edited image as base64
|
||||
// 4. Save to output path
|
||||
|
||||
console.warn('\nNote: This is a demonstration implementation.');
|
||||
console.warn('For actual image editing, you would use Gemini\'s image editing API.');
|
||||
console.warn('The model has analyzed the image and provided edit instructions.');
|
||||
|
||||
// In a real implementation with Imagen editing:
|
||||
// const editedImageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
|
||||
// writeFileSync(resolve(outputPath), editedImageData);
|
||||
|
||||
console.log(`\nTo implement actual image editing:`);
|
||||
console.log(`1. Use Gemini's image editing endpoint`);
|
||||
console.log(`2. Send source image with edit prompt`);
|
||||
console.log(`3. Parse the edited image data from response`);
|
||||
console.log(`4. Save to: ${resolve(outputPath)}`);
|
||||
console.log(`\nRefer to: https://ai.google.dev/docs/imagen`);
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
console.error('Error editing image:', error.message);
|
||||
if (error.message.includes('API key')) {
|
||||
console.error('\nPlease verify your GEMINI_API_KEY is valid');
|
||||
}
|
||||
} else {
|
||||
console.error('Error editing image:', error);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function getMimeType(filePath: string): string {
|
||||
const extension = filePath.toLowerCase().split('.').pop();
|
||||
const mimeTypes: Record<string, string> = {
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'webp': 'image/webp',
|
||||
'bmp': 'image/bmp'
|
||||
};
|
||||
return mimeTypes[extension || ''] || 'image/jpeg';
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs(): { sourcePath: string; prompt: string; outputPath: string; options: EditOptions } {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length < 3) {
|
||||
console.error('Usage: edit-image.ts <source-image> <prompt> <output-path> [options]');
|
||||
console.error('\nArguments:');
|
||||
console.error(' source-image Path to the image to edit');
|
||||
console.error(' prompt Text description of the desired changes');
|
||||
console.error(' output-path Where to save the edited image');
|
||||
console.error('\nOptions:');
|
||||
console.error(' --model <string> Gemini model to use (default: gemini-2.0-flash-exp)');
|
||||
console.error('\nExample:');
|
||||
console.error(' GEMINI_API_KEY=xxx npx tsx scripts/edit-image.ts photo.jpg "add a blue sky" edited.jpg');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const sourcePath = args[0];
|
||||
const prompt = args[1];
|
||||
const outputPath = args[2];
|
||||
const options: EditOptions = {};
|
||||
|
||||
// Parse options
|
||||
for (let i = 3; i < args.length; i += 2) {
|
||||
const flag = args[i];
|
||||
const value = args[i + 1];
|
||||
|
||||
switch (flag) {
|
||||
case '--model':
|
||||
options.model = value;
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown option: ${flag}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { sourcePath, prompt, outputPath, options };
|
||||
}
|
||||
|
||||
// Main execution
|
||||
const { sourcePath, prompt, outputPath, options } = parseArgs();
|
||||
editImage(sourcePath, prompt, outputPath, options).catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
142
skills/gemini-imagegen/scripts/generate-image.ts
Normal file
142
skills/gemini-imagegen/scripts/generate-image.ts
Normal file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env node
|
||||
import { GoogleGenerativeAI } from '@google/generative-ai';
|
||||
import { writeFileSync } from 'fs';
|
||||
import { resolve } from 'path';
|
||||
|
||||
interface GenerateOptions {
|
||||
width?: number;
|
||||
height?: number;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
async function generateImage(
|
||||
prompt: string,
|
||||
outputPath: string,
|
||||
options: GenerateOptions = {}
|
||||
): Promise<void> {
|
||||
const apiKey = process.env.GEMINI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
console.error('Error: GEMINI_API_KEY environment variable is required');
|
||||
console.error('Get your API key from: https://makersuite.google.com/app/apikey');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const {
|
||||
width = 1024,
|
||||
height = 1024,
|
||||
model = 'gemini-2.0-flash-exp'
|
||||
} = options;
|
||||
|
||||
console.log('Generating image...');
|
||||
console.log(`Prompt: "${prompt}"`);
|
||||
console.log(`Dimensions: ${width}x${height}`);
|
||||
console.log(`Model: ${model}`);
|
||||
|
||||
try {
|
||||
const genAI = new GoogleGenerativeAI(apiKey);
|
||||
const generativeModel = genAI.getGenerativeModel({ model });
|
||||
|
||||
// Enhanced prompt with image generation context
|
||||
const enhancedPrompt = `Generate a high-quality image with the following description: ${prompt}. Image dimensions: ${width}x${height} pixels.`;
|
||||
|
||||
// For image generation, we'll use the text generation to get image data
|
||||
// Note: As of the current Gemini API, direct image generation might require
|
||||
// using the imagen model or multimodal capabilities
|
||||
const result = await generativeModel.generateContent([
|
||||
{
|
||||
inlineData: {
|
||||
data: '',
|
||||
mimeType: 'text/plain'
|
||||
}
|
||||
},
|
||||
enhancedPrompt
|
||||
]);
|
||||
|
||||
const response = result.response;
|
||||
const text = response.text();
|
||||
|
||||
// For actual image generation with Gemini, you would typically:
|
||||
// 1. Use the Imagen model (imagen-3.0-generate-001)
|
||||
// 2. Parse the response to get base64 image data
|
||||
// 3. Convert to binary and save
|
||||
|
||||
// Placeholder implementation - in production, this would use the actual Imagen API
|
||||
console.warn('\nNote: This is a demonstration implementation.');
|
||||
console.warn('For actual image generation, you would use the Imagen model.');
|
||||
console.warn('Response from model:', text.substring(0, 200) + '...');
|
||||
|
||||
// In a real implementation with Imagen:
|
||||
// const imageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
|
||||
// writeFileSync(resolve(outputPath), imageData);
|
||||
|
||||
console.log(`\nTo implement actual image generation:`);
|
||||
console.log(`1. Use the Imagen model (imagen-3.0-generate-001)`);
|
||||
console.log(`2. Parse the base64 image data from the response`);
|
||||
console.log(`3. Save to: ${resolve(outputPath)}`);
|
||||
console.log(`\nRefer to: https://ai.google.dev/docs/imagen`);
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof Error) {
|
||||
console.error('Error generating image:', error.message);
|
||||
if (error.message.includes('API key')) {
|
||||
console.error('\nPlease verify your GEMINI_API_KEY is valid');
|
||||
}
|
||||
} else {
|
||||
console.error('Error generating image:', error);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs(): { prompt: string; outputPath: string; options: GenerateOptions } {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length < 2) {
|
||||
console.error('Usage: generate-image.ts <prompt> <output-path> [options]');
|
||||
console.error('\nArguments:');
|
||||
console.error(' prompt Text description of the image to generate');
|
||||
console.error(' output-path Where to save the generated image');
|
||||
console.error('\nOptions:');
|
||||
console.error(' --width <number> Image width in pixels (default: 1024)');
|
||||
console.error(' --height <number> Image height in pixels (default: 1024)');
|
||||
console.error(' --model <string> Gemini model to use (default: gemini-2.0-flash-exp)');
|
||||
console.error('\nExample:');
|
||||
console.error(' GEMINI_API_KEY=xxx npx tsx scripts/generate-image.ts "a sunset over mountains" output.png --width 1920 --height 1080');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const prompt = args[0];
|
||||
const outputPath = args[1];
|
||||
const options: GenerateOptions = {};
|
||||
|
||||
// Parse options
|
||||
for (let i = 2; i < args.length; i += 2) {
|
||||
const flag = args[i];
|
||||
const value = args[i + 1];
|
||||
|
||||
switch (flag) {
|
||||
case '--width':
|
||||
options.width = parseInt(value, 10);
|
||||
break;
|
||||
case '--height':
|
||||
options.height = parseInt(value, 10);
|
||||
break;
|
||||
case '--model':
|
||||
options.model = value;
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown option: ${flag}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { prompt, outputPath, options };
|
||||
}
|
||||
|
||||
// Main execution
|
||||
const { prompt, outputPath, options } = parseArgs();
|
||||
generateImage(prompt, outputPath, options).catch((error) => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
18
skills/gemini-imagegen/tsconfig.json
Normal file
18
skills/gemini-imagegen/tsconfig.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "node",
|
||||
"lib": ["ES2022"],
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"resolveJsonModule": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./scripts"
|
||||
},
|
||||
"include": ["scripts/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user