Initial commit

2025-11-29 18:45:50 +08:00
commit bd85f56f7c
78 changed files with 33541 additions and 0 deletions
--- a/skills/gemini-imagegen/.env.example
+++ b/skills/gemini-imagegen/.env.example
@@ -0,0 +1,3 @@
+# Google Gemini API Key
+# Get your API key from: https://makersuite.google.com/app/apikey
+GEMINI_API_KEY=your-api-key-here
--- a/skills/gemini-imagegen/.gitignore
+++ b/skills/gemini-imagegen/.gitignore
@@ -0,0 +1,34 @@
+# Dependencies
+node_modules/
+
+# Build outputs
+dist/
+*.js
+*.js.map
+
+# Environment variables
+.env
+.env.local
+
+# Generated images (examples)
+*.png
+*.jpg
+*.jpeg
+*.gif
+*.webp
+!examples/*.png
+!examples/*.jpg
+
+# IDE
+.vscode/
+.idea/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
--- a/skills/gemini-imagegen/README.md
+++ b/skills/gemini-imagegen/README.md
@@ -0,0 +1,103 @@
+# Gemini ImageGen Skill
+
+AI-powered image generation, editing, and composition using Google's Gemini API.
+
+## Quick Start
+
+1. **Install dependencies:**
+   ```bash
+   npm install
+   ```
+
+2. **Set your API key:**
+   ```bash
+   export GEMINI_API_KEY="your-api-key-here"
+   ```
+   Get your key from: https://makersuite.google.com/app/apikey
+
+3. **Generate an image:**
+   ```bash
+   npm run generate "a sunset over mountains" output.png
+   ```
+
+## Features
+
+- **Generate**: Create images from text descriptions
+- **Edit**: Modify existing images with natural language prompts
+- **Compose**: Combine multiple images with flexible layouts
+
+## Usage Examples
+
+### Generate Images
+```bash
+# Basic generation
+npm run generate "futuristic city skyline" city.png
+
+# Custom size
+npm run generate "modern office" office.png -- --width 1920 --height 1080
+```
+
+### Edit Images
+```bash
+# Style transformation
+npm run edit photo.jpg "make it look like a watercolor painting" artistic.png
+
+# Object modification
+npm run edit landscape.png "add a rainbow in the sky" enhanced.png
+```
+
+### Compose Images
+```bash
+# Grid layout (default)
+npm run compose collage.png img1.jpg img2.jpg img3.jpg img4.jpg
+
+# Horizontal banner
+npm run compose banner.png left.png right.png -- --layout horizontal
+
+# Custom composition
+npm run compose result.png a.jpg b.jpg -- --prompt "blend seamlessly"
+```
+
+## Scripts
+
+- `npm run generate <prompt> <output>` - Generate image from text
+- `npm run edit <source> <prompt> <output>` - Edit existing image
+- `npm run compose <output> <images...>` - Compose multiple images
+
+## Configuration
+
+### Environment Variables
+
+- `GEMINI_API_KEY` (required) - Your Google Gemini API key
+
+### Options
+
+See `SKILL.md` for detailed documentation on all available options and parameters.
+
+## Development Notes
+
+This is a local development skill that runs on your machine, not on Cloudflare Workers. It's designed for:
+
+- Design workflows and asset creation
+- Visual content generation
+- Image manipulation and prototyping
+- Creating test images for development
+
+## Implementation Status
+
+**Note**: The current implementation includes:
+- Complete TypeScript structure
+- Argument parsing and validation
+- Gemini API integration for image analysis
+- Comprehensive error handling
+
+For production use with actual image generation/editing, you'll need to:
+1. Use the Imagen model (imagen-3.0-generate-001)
+2. Implement proper image data handling
+3. Add output file writing with actual image data
+
+Refer to the [Gemini Imagen documentation](https://ai.google.dev/docs/imagen) for implementation details.
+
+## License
+
+MIT
--- a/skills/gemini-imagegen/SKILL.md
+++ b/skills/gemini-imagegen/SKILL.md
@@ -0,0 +1,231 @@
+---
+name: gemini-imagegen
+description: Generate, edit, and compose images using Google's Gemini AI API for design workflows and visual content creation
+triggers: ["image generation", "visual content", "AI art", "image editing", "design automation"]
+---
+
+# Gemini ImageGen SKILL
+
+## Overview
+
+This skill provides image generation and manipulation capabilities using Google's Gemini AI API. It's designed for local development workflows where you need to create or modify images using AI assistance.
+
+## Features
+
+- **Generate Images**: Create images from text descriptions
+- **Edit Images**: Modify existing images based on text prompts
+- **Compose Images**: Combine multiple images with layout instructions
+- **Multiple Formats**: Support for PNG, JPEG, and other common image formats
+- **Size Options**: Flexible output dimensions for different use cases
+
+## Environment Setup
+
+This skill requires a Gemini API key:
+
+```bash
+export GEMINI_API_KEY="your-api-key-here"
+```
+
+Get your API key from: https://makersuite.google.com/app/apikey
+
+## Available Scripts
+
+### 1. Generate Image (`scripts/generate-image.ts`)
+
+Create new images from text descriptions.
+
+**Usage:**
+```bash
+npx tsx scripts/generate-image.ts <prompt> <output-path> [options]
+```
+
+**Arguments:**
+- `prompt`: Text description of the image to generate
+- `output-path`: Where to save the generated image (e.g., `./output.png`)
+
+**Options:**
+- `--width <number>`: Image width in pixels (default: 1024)
+- `--height <number>`: Image height in pixels (default: 1024)
+- `--model <string>`: Gemini model to use (default: 'gemini-2.0-flash-exp')
+
+**Examples:**
+```bash
+# Basic usage
+GEMINI_API_KEY=xxx npx tsx scripts/generate-image.ts "a sunset over mountains" output.png
+
+# Custom size
+npx tsx scripts/generate-image.ts "modern office workspace" office.png --width 1920 --height 1080
+
+# Using npm script
+npm run generate "futuristic city skyline" city.png
+```
+
+### 2. Edit Image (`scripts/edit-image.ts`)
+
+Modify existing images based on text instructions.
+
+**Usage:**
+```bash
+npx tsx scripts/edit-image.ts <source-image> <prompt> <output-path> [options]
+```
+
+**Arguments:**
+- `source-image`: Path to the image to edit
+- `prompt`: Text description of the desired changes
+- `output-path`: Where to save the edited image
+
+**Options:**
+- `--model <string>`: Gemini model to use (default: 'gemini-2.0-flash-exp')
+
+**Examples:**
+```bash
+# Basic editing
+GEMINI_API_KEY=xxx npx tsx scripts/edit-image.ts photo.jpg "add a blue sky" edited.jpg
+
+# Style transfer
+npx tsx scripts/edit-image.ts portrait.png "make it look like a watercolor painting" artistic.png
+
+# Using npm script
+npm run edit photo.jpg "remove background" no-bg.png
+```
+
+### 3. Compose Images (`scripts/compose-images.ts`)
+
+Combine multiple images into a single composition.
+
+**Usage:**
+```bash
+npx tsx scripts/compose-images.ts <output-path> <image1> <image2> [image3...] [options]
+```
+
+**Arguments:**
+- `output-path`: Where to save the composed image
+- `image1, image2, ...`: Paths to images to combine (2-4 images)
+
+**Options:**
+- `--layout <string>`: Layout pattern (horizontal, vertical, grid, custom) (default: 'grid')
+- `--prompt <string>`: Additional instructions for composition
+- `--width <number>`: Output width in pixels (default: auto)
+- `--height <number>`: Output height in pixels (default: auto)
+
+**Examples:**
+```bash
+# Grid layout
+GEMINI_API_KEY=xxx npx tsx scripts/compose-images.ts collage.png img1.jpg img2.jpg img3.jpg img4.jpg
+
+# Horizontal layout
+npx tsx scripts/compose-images.ts banner.png left.png right.png --layout horizontal
+
+# Custom composition with prompt
+npx tsx scripts/compose-images.ts result.png a.jpg b.jpg --prompt "blend seamlessly with gradient transition"
+
+# Using npm script
+npm run compose output.png photo1.jpg photo2.jpg photo3.jpg --layout vertical
+```
+
+## NPM Scripts
+
+The package.json includes convenient npm scripts:
+
+```bash
+npm run generate <prompt> <output>     # Generate image from prompt
+npm run edit <source> <prompt> <output> # Edit existing image
+npm run compose <output> <images...>    # Compose multiple images
+```
+
+## Installation
+
+From the skill directory:
+
+```bash
+npm install
+```
+
+This installs:
+- `@google/generative-ai`: Google's Gemini API SDK
+- `tsx`: TypeScript execution runtime
+- `typescript`: TypeScript compiler
+
+## Usage in Design Workflows
+
+### Creating Marketing Assets
+```bash
+# Generate hero image
+npm run generate "modern tech startup hero image, clean, professional" hero.png --width 1920 --height 1080
+
+# Create variations
+npm run edit hero.png "change color scheme to blue and green" hero-variant.png
+
+# Compose for social media
+npm run compose social-post.png hero.png logo.png --layout horizontal
+```
+
+### Rapid Prototyping
+```bash
+# Generate UI mockup
+npm run generate "mobile app login screen, minimalist design" mockup.png --width 375 --height 812
+
+# Iterate on design
+npm run edit mockup.png "add a gradient background" mockup-v2.png
+```
+
+### Content Creation
+```bash
+# Generate illustrations
+npm run generate "technical diagram of cloud architecture" diagram.png
+
+# Create composite images
+npm run compose infographic.png chart1.png chart2.png diagram.png --layout vertical
+```
+
+## Technical Details
+
+### Image Generation
+- Uses Gemini's imagen-3.0-generate-001 model
+- Supports text-to-image generation
+- Configurable output dimensions
+- Automatic format detection from file extension
+
+### Image Editing
+- Uses Gemini's vision capabilities
+- Applies transformations based on natural language
+- Preserves original image quality where possible
+- Supports various editing operations (style, objects, colors, etc.)
+
+### Image Composition
+- Intelligent layout algorithms
+- Automatic sizing and spacing
+- Seamless blending options
+- Support for multiple composition patterns
+
+## Error Handling
+
+Common errors and solutions:
+
+1. **Missing API Key**: Ensure `GEMINI_API_KEY` environment variable is set
+2. **Invalid Image Format**: Use supported formats (PNG, JPEG, WebP)
+3. **File Not Found**: Verify source image paths are correct
+4. **API Rate Limits**: Implement delays between requests if needed
+5. **Large File Sizes**: Compress images before editing/composing
+
+## Limitations
+
+- API rate limits apply based on your Gemini API tier
+- Generated images are subject to Gemini's content policies
+- Maximum image dimensions depend on the model used
+- Processing time varies based on complexity and size
+
+## Integration with Claude Code
+
+This skill runs locally and can be used during development:
+
+1. **Design System Creation**: Generate component mockups and visual assets
+2. **Documentation**: Create diagrams and illustrations for docs
+3. **Testing**: Generate test images for visual regression testing
+4. **Prototyping**: Rapid iteration on visual concepts
+
+## See Also
+
+- [Google Gemini API Documentation](https://ai.google.dev/docs)
+- [Gemini Image Generation Guide](https://ai.google.dev/docs/imagen)
+- Edge Stack Plugin for deployment workflows
--- a/skills/gemini-imagegen/package.json
+++ b/skills/gemini-imagegen/package.json
@@ -0,0 +1,28 @@
+{
+  "name": "gemini-imagegen",
+  "version": "1.0.0",
+  "description": "Generate, edit, and compose images using Google's Gemini AI API",
+  "type": "module",
+  "scripts": {
+    "generate": "npx tsx scripts/generate-image.ts",
+    "edit": "npx tsx scripts/edit-image.ts",
+    "compose": "npx tsx scripts/compose-images.ts"
+  },
+  "keywords": [
+    "gemini",
+    "image-generation",
+    "ai",
+    "google-ai",
+    "image-editing"
+  ],
+  "author": "",
+  "license": "MIT",
+  "dependencies": {
+    "@google/generative-ai": "^0.21.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.11.0",
+    "tsx": "^4.7.0",
+    "typescript": "^5.3.0"
+  }
+}
--- a/skills/gemini-imagegen/scripts/compose-images.ts
+++ b/skills/gemini-imagegen/scripts/compose-images.ts
@@ -0,0 +1,287 @@
+#!/usr/bin/env node
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { readFileSync, writeFileSync, existsSync } from 'fs';
+import { resolve } from 'path';
+
+interface ComposeOptions {
+  layout?: 'horizontal' | 'vertical' | 'grid' | 'custom';
+  prompt?: string;
+  width?: number;
+  height?: number;
+  model?: string;
+}
+
+async function composeImages(
+  outputPath: string,
+  imagePaths: string[],
+  options: ComposeOptions = {}
+): Promise<void> {
+  const apiKey = process.env.GEMINI_API_KEY;
+
+  if (!apiKey) {
+    console.error('Error: GEMINI_API_KEY environment variable is required');
+    console.error('Get your API key from: https://makersuite.google.com/app/apikey');
+    process.exit(1);
+  }
+
+  if (imagePaths.length < 2) {
+    console.error('Error: At least 2 images are required for composition');
+    process.exit(1);
+  }
+
+  if (imagePaths.length > 4) {
+    console.error('Error: Maximum 4 images supported for composition');
+    process.exit(1);
+  }
+
+  // Verify all images exist
+  const resolvedPaths: string[] = [];
+  for (const imagePath of imagePaths) {
+    const resolvedPath = resolve(imagePath);
+    if (!existsSync(resolvedPath)) {
+      console.error(`Error: Image not found: ${resolvedPath}`);
+      process.exit(1);
+    }
+    resolvedPaths.push(resolvedPath);
+  }
+
+  const {
+    layout = 'grid',
+    prompt = '',
+    width,
+    height,
+    model = 'gemini-2.0-flash-exp'
+  } = options;
+
+  console.log('Composing images...');
+  console.log(`Images: ${resolvedPaths.length}`);
+  console.log(`Layout: ${layout}`);
+  console.log(`Model: ${model}`);
+  if (prompt) console.log(`Custom prompt: "${prompt}"`);
+
+  try {
+    const genAI = new GoogleGenerativeAI(apiKey);
+    const generativeModel = genAI.getGenerativeModel({ model });
+
+    // Read and encode all images
+    const imageDataList: Array<{ data: string; mimeType: string; path: string }> = [];
+
+    for (const imagePath of resolvedPaths) {
+      const imageData = readFileSync(imagePath);
+      const base64Image = imageData.toString('base64');
+      const mimeType = getMimeType(imagePath);
+
+      imageDataList.push({
+        data: base64Image,
+        mimeType,
+        path: imagePath
+      });
+
+      console.log(`Loaded: ${imagePath} (${(imageData.length / 1024).toFixed(2)} KB)`);
+    }
+
+    // Build composition prompt
+    let compositionPrompt = `You are an image composition assistant. Analyze these ${imageDataList.length} images and describe how to combine them into a single composition using a ${layout} layout.`;
+
+    if (width && height) {
+      compositionPrompt += ` The output should be ${width}x${height} pixels.`;
+    }
+
+    if (prompt) {
+      compositionPrompt += ` Additional instructions: ${prompt}`;
+    }
+
+    compositionPrompt += '\n\nProvide detailed instructions for:\n';
+    compositionPrompt += '1. Optimal arrangement of images\n';
+    compositionPrompt += '2. Sizing and spacing recommendations\n';
+    compositionPrompt += '3. Any blending or transition effects\n';
+    compositionPrompt += '4. Color harmony adjustments';
+
+    // Prepare content parts with all images
+    const contentParts: Array<any> = [];
+
+    for (const imageData of imageDataList) {
+      contentParts.push({
+        inlineData: {
+          data: imageData.data,
+          mimeType: imageData.mimeType
+        }
+      });
+    }
+
+    contentParts.push(compositionPrompt);
+
+    // Analyze the composition
+    const result = await generativeModel.generateContent(contentParts);
+    const response = result.response;
+    const compositionInstructions = response.text();
+
+    console.log('\nComposition Analysis:');
+    console.log(compositionInstructions);
+
+    // For actual image composition with Gemini, you would typically:
+    // 1. Use an image composition/editing model
+    // 2. Send all source images with layout instructions
+    // 3. Receive the composed image as base64
+    // 4. Save to output path
+
+    console.warn('\nNote: This is a demonstration implementation.');
+    console.warn('For actual image composition, you would use specialized image composition APIs.');
+    console.warn('The model has analyzed the images and provided composition instructions.');
+
+    // Calculate suggested dimensions based on layout
+    const suggestedDimensions = calculateDimensions(layout, imageDataList.length, width, height);
+    console.log(`\nSuggested output dimensions: ${suggestedDimensions.width}x${suggestedDimensions.height}`);
+
+    // In a real implementation:
+    // const composedImageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
+    // writeFileSync(resolve(outputPath), composedImageData);
+
+    console.log(`\nTo implement actual image composition:`);
+    console.log(`1. Use an image composition library or service`);
+    console.log(`2. Apply the ${layout} layout with ${imageDataList.length} images`);
+    console.log(`3. Follow the composition instructions provided above`);
+    console.log(`4. Save to: ${resolve(outputPath)}`);
+
+  } catch (error) {
+    if (error instanceof Error) {
+      console.error('Error composing images:', error.message);
+      if (error.message.includes('API key')) {
+        console.error('\nPlease verify your GEMINI_API_KEY is valid');
+      }
+    } else {
+      console.error('Error composing images:', error);
+    }
+    process.exit(1);
+  }
+}
+
+function getMimeType(filePath: string): string {
+  const extension = filePath.toLowerCase().split('.').pop();
+  const mimeTypes: Record<string, string> = {
+    'jpg': 'image/jpeg',
+    'jpeg': 'image/jpeg',
+    'png': 'image/png',
+    'gif': 'image/gif',
+    'webp': 'image/webp',
+    'bmp': 'image/bmp'
+  };
+  return mimeTypes[extension || ''] || 'image/jpeg';
+}
+
+function calculateDimensions(
+  layout: string,
+  imageCount: number,
+  width?: number,
+  height?: number
+): { width: number; height: number } {
+  // If dimensions are provided, use them
+  if (width && height) {
+    return { width, height };
+  }
+
+  // Default image size assumption
+  const defaultSize = 1024;
+
+  switch (layout) {
+    case 'horizontal':
+      return {
+        width: width || defaultSize * imageCount,
+        height: height || defaultSize
+      };
+    case 'vertical':
+      return {
+        width: width || defaultSize,
+        height: height || defaultSize * imageCount
+      };
+    case 'grid':
+      const cols = Math.ceil(Math.sqrt(imageCount));
+      const rows = Math.ceil(imageCount / cols);
+      return {
+        width: width || defaultSize * cols,
+        height: height || defaultSize * rows
+      };
+    case 'custom':
+    default:
+      return {
+        width: width || defaultSize,
+        height: height || defaultSize
+      };
+  }
+}
+
+// Parse command line arguments
+function parseArgs(): { outputPath: string; imagePaths: string[]; options: ComposeOptions } {
+  const args = process.argv.slice(2);
+
+  if (args.length < 3) {
+    console.error('Usage: compose-images.ts <output-path> <image1> <image2> [image3...] [options]');
+    console.error('\nArguments:');
+    console.error('  output-path  Where to save the composed image');
+    console.error('  image1-4     Paths to images to combine (2-4 images)');
+    console.error('\nOptions:');
+    console.error('  --layout <string>   Layout pattern (horizontal|vertical|grid|custom) (default: grid)');
+    console.error('  --prompt <string>   Additional composition instructions');
+    console.error('  --width <number>    Output width in pixels (default: auto)');
+    console.error('  --height <number>   Output height in pixels (default: auto)');
+    console.error('  --model <string>    Gemini model to use (default: gemini-2.0-flash-exp)');
+    console.error('\nExample:');
+    console.error('  GEMINI_API_KEY=xxx npx tsx scripts/compose-images.ts collage.png img1.jpg img2.jpg img3.jpg --layout grid');
+    process.exit(1);
+  }
+
+  const outputPath = args[0];
+  const imagePaths: string[] = [];
+  const options: ComposeOptions = {};
+
+  // Parse image paths and options
+  for (let i = 1; i < args.length; i++) {
+    const arg = args[i];
+
+    if (arg.startsWith('--')) {
+      const flag = arg;
+      const value = args[i + 1];
+
+      switch (flag) {
+        case '--layout':
+          if (['horizontal', 'vertical', 'grid', 'custom'].includes(value)) {
+            options.layout = value as ComposeOptions['layout'];
+          } else {
+            console.warn(`Invalid layout: ${value}. Using default: grid`);
+          }
+          i++;
+          break;
+        case '--prompt':
+          options.prompt = value;
+          i++;
+          break;
+        case '--width':
+          options.width = parseInt(value, 10);
+          i++;
+          break;
+        case '--height':
+          options.height = parseInt(value, 10);
+          i++;
+          break;
+        case '--model':
+          options.model = value;
+          i++;
+          break;
+        default:
+          console.warn(`Unknown option: ${flag}`);
+          i++;
+      }
+    } else {
+      imagePaths.push(arg);
+    }
+  }
+
+  return { outputPath, imagePaths, options };
+}
+
+// Main execution
+const { outputPath, imagePaths, options } = parseArgs();
+composeImages(outputPath, imagePaths, options).catch((error) => {
+  console.error('Fatal error:', error);
+  process.exit(1);
+});
--- a/skills/gemini-imagegen/scripts/edit-image.ts
+++ b/skills/gemini-imagegen/scripts/edit-image.ts
@@ -0,0 +1,162 @@
+#!/usr/bin/env node
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { readFileSync, writeFileSync, existsSync } from 'fs';
+import { resolve } from 'path';
+
+interface EditOptions {
+  model?: string;
+}
+
+async function editImage(
+  sourcePath: string,
+  prompt: string,
+  outputPath: string,
+  options: EditOptions = {}
+): Promise<void> {
+  const apiKey = process.env.GEMINI_API_KEY;
+
+  if (!apiKey) {
+    console.error('Error: GEMINI_API_KEY environment variable is required');
+    console.error('Get your API key from: https://makersuite.google.com/app/apikey');
+    process.exit(1);
+  }
+
+  const resolvedSourcePath = resolve(sourcePath);
+
+  if (!existsSync(resolvedSourcePath)) {
+    console.error(`Error: Source image not found: ${resolvedSourcePath}`);
+    process.exit(1);
+  }
+
+  const { model = 'gemini-2.0-flash-exp' } = options;
+
+  console.log('Editing image...');
+  console.log(`Source: ${resolvedSourcePath}`);
+  console.log(`Prompt: "${prompt}"`);
+  console.log(`Model: ${model}`);
+
+  try {
+    const genAI = new GoogleGenerativeAI(apiKey);
+    const generativeModel = genAI.getGenerativeModel({ model });
+
+    // Read and encode the source image
+    const imageData = readFileSync(resolvedSourcePath);
+    const base64Image = imageData.toString('base64');
+
+    // Determine MIME type from file extension
+    const mimeType = getMimeType(resolvedSourcePath);
+
+    console.log(`Image size: ${(imageData.length / 1024).toFixed(2)} KB`);
+    console.log(`MIME type: ${mimeType}`);
+
+    // Use Gemini's vision capabilities to analyze and describe the edit
+    const enhancedPrompt = `You are an image editing assistant. Analyze this image and describe how to apply the following edit: "${prompt}". Provide detailed instructions for the transformation.`;
+
+    const result = await generativeModel.generateContent([
+      {
+        inlineData: {
+          data: base64Image,
+          mimeType: mimeType
+        }
+      },
+      enhancedPrompt
+    ]);
+
+    const response = result.response;
+    const editInstructions = response.text();
+
+    console.log('\nEdit Analysis:');
+    console.log(editInstructions);
+
+    // For actual image editing with Gemini, you would typically:
+    // 1. Use the Imagen model's image editing capabilities
+    // 2. Send the source image with the edit prompt
+    // 3. Receive the edited image as base64
+    // 4. Save to output path
+
+    console.warn('\nNote: This is a demonstration implementation.');
+    console.warn('For actual image editing, you would use Gemini\'s image editing API.');
+    console.warn('The model has analyzed the image and provided edit instructions.');
+
+    // In a real implementation with Imagen editing:
+    // const editedImageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
+    // writeFileSync(resolve(outputPath), editedImageData);
+
+    console.log(`\nTo implement actual image editing:`);
+    console.log(`1. Use Gemini's image editing endpoint`);
+    console.log(`2. Send source image with edit prompt`);
+    console.log(`3. Parse the edited image data from response`);
+    console.log(`4. Save to: ${resolve(outputPath)}`);
+    console.log(`\nRefer to: https://ai.google.dev/docs/imagen`);
+
+  } catch (error) {
+    if (error instanceof Error) {
+      console.error('Error editing image:', error.message);
+      if (error.message.includes('API key')) {
+        console.error('\nPlease verify your GEMINI_API_KEY is valid');
+      }
+    } else {
+      console.error('Error editing image:', error);
+    }
+    process.exit(1);
+  }
+}
+
+function getMimeType(filePath: string): string {
+  const extension = filePath.toLowerCase().split('.').pop();
+  const mimeTypes: Record<string, string> = {
+    'jpg': 'image/jpeg',
+    'jpeg': 'image/jpeg',
+    'png': 'image/png',
+    'gif': 'image/gif',
+    'webp': 'image/webp',
+    'bmp': 'image/bmp'
+  };
+  return mimeTypes[extension || ''] || 'image/jpeg';
+}
+
+// Parse command line arguments
+function parseArgs(): { sourcePath: string; prompt: string; outputPath: string; options: EditOptions } {
+  const args = process.argv.slice(2);
+
+  if (args.length < 3) {
+    console.error('Usage: edit-image.ts <source-image> <prompt> <output-path> [options]');
+    console.error('\nArguments:');
+    console.error('  source-image  Path to the image to edit');
+    console.error('  prompt        Text description of the desired changes');
+    console.error('  output-path   Where to save the edited image');
+    console.error('\nOptions:');
+    console.error('  --model <string>  Gemini model to use (default: gemini-2.0-flash-exp)');
+    console.error('\nExample:');
+    console.error('  GEMINI_API_KEY=xxx npx tsx scripts/edit-image.ts photo.jpg "add a blue sky" edited.jpg');
+    process.exit(1);
+  }
+
+  const sourcePath = args[0];
+  const prompt = args[1];
+  const outputPath = args[2];
+  const options: EditOptions = {};
+
+  // Parse options
+  for (let i = 3; i < args.length; i += 2) {
+    const flag = args[i];
+    const value = args[i + 1];
+
+    switch (flag) {
+      case '--model':
+        options.model = value;
+        break;
+      default:
+        console.warn(`Unknown option: ${flag}`);
+    }
+  }
+
+  return { sourcePath, prompt, outputPath, options };
+}
+
+// Main execution
+const { sourcePath, prompt, outputPath, options } = parseArgs();
+editImage(sourcePath, prompt, outputPath, options).catch((error) => {
+  console.error('Fatal error:', error);
+  process.exit(1);
+});
--- a/skills/gemini-imagegen/scripts/generate-image.ts
+++ b/skills/gemini-imagegen/scripts/generate-image.ts
@@ -0,0 +1,142 @@
+#!/usr/bin/env node
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { writeFileSync } from 'fs';
+import { resolve } from 'path';
+
+interface GenerateOptions {
+  width?: number;
+  height?: number;
+  model?: string;
+}
+
+async function generateImage(
+  prompt: string,
+  outputPath: string,
+  options: GenerateOptions = {}
+): Promise<void> {
+  const apiKey = process.env.GEMINI_API_KEY;
+
+  if (!apiKey) {
+    console.error('Error: GEMINI_API_KEY environment variable is required');
+    console.error('Get your API key from: https://makersuite.google.com/app/apikey');
+    process.exit(1);
+  }
+
+  const {
+    width = 1024,
+    height = 1024,
+    model = 'gemini-2.0-flash-exp'
+  } = options;
+
+  console.log('Generating image...');
+  console.log(`Prompt: "${prompt}"`);
+  console.log(`Dimensions: ${width}x${height}`);
+  console.log(`Model: ${model}`);
+
+  try {
+    const genAI = new GoogleGenerativeAI(apiKey);
+    const generativeModel = genAI.getGenerativeModel({ model });
+
+    // Enhanced prompt with image generation context
+    const enhancedPrompt = `Generate a high-quality image with the following description: ${prompt}. Image dimensions: ${width}x${height} pixels.`;
+
+    // For image generation, we'll use the text generation to get image data
+    // Note: As of the current Gemini API, direct image generation might require
+    // using the imagen model or multimodal capabilities
+    const result = await generativeModel.generateContent([
+      {
+        inlineData: {
+          data: '',
+          mimeType: 'text/plain'
+        }
+      },
+      enhancedPrompt
+    ]);
+
+    const response = result.response;
+    const text = response.text();
+
+    // For actual image generation with Gemini, you would typically:
+    // 1. Use the Imagen model (imagen-3.0-generate-001)
+    // 2. Parse the response to get base64 image data
+    // 3. Convert to binary and save
+
+    // Placeholder implementation - in production, this would use the actual Imagen API
+    console.warn('\nNote: This is a demonstration implementation.');
+    console.warn('For actual image generation, you would use the Imagen model.');
+    console.warn('Response from model:', text.substring(0, 200) + '...');
+
+    // In a real implementation with Imagen:
+    // const imageData = Buffer.from(response.candidates[0].content.parts[0].inlineData.data, 'base64');
+    // writeFileSync(resolve(outputPath), imageData);
+
+    console.log(`\nTo implement actual image generation:`);
+    console.log(`1. Use the Imagen model (imagen-3.0-generate-001)`);
+    console.log(`2. Parse the base64 image data from the response`);
+    console.log(`3. Save to: ${resolve(outputPath)}`);
+    console.log(`\nRefer to: https://ai.google.dev/docs/imagen`);
+
+  } catch (error) {
+    if (error instanceof Error) {
+      console.error('Error generating image:', error.message);
+      if (error.message.includes('API key')) {
+        console.error('\nPlease verify your GEMINI_API_KEY is valid');
+      }
+    } else {
+      console.error('Error generating image:', error);
+    }
+    process.exit(1);
+  }
+}
+
+// Parse command line arguments
+function parseArgs(): { prompt: string; outputPath: string; options: GenerateOptions } {
+  const args = process.argv.slice(2);
+
+  if (args.length < 2) {
+    console.error('Usage: generate-image.ts <prompt> <output-path> [options]');
+    console.error('\nArguments:');
+    console.error('  prompt        Text description of the image to generate');
+    console.error('  output-path   Where to save the generated image');
+    console.error('\nOptions:');
+    console.error('  --width <number>    Image width in pixels (default: 1024)');
+    console.error('  --height <number>   Image height in pixels (default: 1024)');
+    console.error('  --model <string>    Gemini model to use (default: gemini-2.0-flash-exp)');
+    console.error('\nExample:');
+    console.error('  GEMINI_API_KEY=xxx npx tsx scripts/generate-image.ts "a sunset over mountains" output.png --width 1920 --height 1080');
+    process.exit(1);
+  }
+
+  const prompt = args[0];
+  const outputPath = args[1];
+  const options: GenerateOptions = {};
+
+  // Parse options
+  for (let i = 2; i < args.length; i += 2) {
+    const flag = args[i];
+    const value = args[i + 1];
+
+    switch (flag) {
+      case '--width':
+        options.width = parseInt(value, 10);
+        break;
+      case '--height':
+        options.height = parseInt(value, 10);
+        break;
+      case '--model':
+        options.model = value;
+        break;
+      default:
+        console.warn(`Unknown option: ${flag}`);
+    }
+  }
+
+  return { prompt, outputPath, options };
+}
+
+// Main execution
+const { prompt, outputPath, options } = parseArgs();
+generateImage(prompt, outputPath, options).catch((error) => {
+  console.error('Fatal error:', error);
+  process.exit(1);
+});
--- a/skills/gemini-imagegen/tsconfig.json
+++ b/skills/gemini-imagegen/tsconfig.json
@@ -0,0 +1,18 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "node",
+    "lib": ["ES2022"],
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "resolveJsonModule": true,
+    "allowSyntheticDefaultImports": true,
+    "forceConsistentCasingInFileNames": true,
+    "outDir": "./dist",
+    "rootDir": "./scripts"
+  },
+  "include": ["scripts/**/*"],
+  "exclude": ["node_modules", "dist"]
+}