From bf74b6d1a5502a394a1cba730ca14c7c56be55fb Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sat, 29 Nov 2025 18:27:03 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 ++ README.md | 3 + plugin.lock.json | 53 ++++++++ skills/nanobanana-skill/SKILL.md | 136 +++++++++++++++++++++ skills/nanobanana-skill/nanobanana.py | 147 +++++++++++++++++++++++ skills/nanobanana-skill/requirements.txt | 4 + 6 files changed, 355 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/nanobanana-skill/SKILL.md create mode 100755 skills/nanobanana-skill/nanobanana.py create mode 100644 skills/nanobanana-skill/requirements.txt diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..53116ab --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "nanobanana-skill", + "description": "Generate or edit images using Google Gemini API via nanobanana", + "version": "1.0.0", + "author": { + "name": "Pengfei Ni", + "url": "https://github.com/feiskyer/claude-code-settings" + }, + "skills": [ + "./skills" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..749b434 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# nanobanana-skill + +Generate or edit images using Google Gemini API via nanobanana diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..90e8b9e --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,53 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:feiskyer/claude-code-settings:plugins/nanobanana-skill", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "efac37727fdb37c593085c02e843b2409ef472e9", + "treeHash": "8c8704e91b8640a9f4c103135f7769e30aceac0f85327b79fb05b1e0c1c073a8", + "generatedAt": "2025-11-28T10:16:53.475069Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "nanobanana-skill", + "description": "Generate or edit images using Google Gemini API via nanobanana", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "3c0a699d6a410f7051c72304addbd9c9dd7fe370675f7254306c139a5fd3aa30" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "4b6ee6b6a20757f7dc955e79d9da2beac7d6d61b75c6afcbf3eaca613b268282" + }, + { + "path": "skills/nanobanana-skill/requirements.txt", + "sha256": "c6e83160c50d16021c72ab93212431ff9327d84d6283a3a1daaaaf728dce70d9" + }, + { + "path": "skills/nanobanana-skill/SKILL.md", + "sha256": "73188326ef21536082d7fe65bbed8b0d928f3a4ad7991ec78bbdcf84d5ef00c2" + }, + { + "path": "skills/nanobanana-skill/nanobanana.py", + "sha256": "7e8921684cf0e8f35ab73d880fae3e5da97e67e4b21432ad5db5a8beefec0f26" + } + ], + "dirSha256": "8c8704e91b8640a9f4c103135f7769e30aceac0f85327b79fb05b1e0c1c073a8" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/nanobanana-skill/SKILL.md b/skills/nanobanana-skill/SKILL.md new file mode 100644 index 0000000..2985826 --- /dev/null +++ b/skills/nanobanana-skill/SKILL.md @@ -0,0 +1,136 @@ +--- +name: nanobanana-skill +description: Generate or edit images using Google Gemini API via nanobanana. Use when the user asks to create, generate, edit images with nanobanana, or mentions image generation/editing tasks. +allowed-tools: Bash +--- + +# Nanobanana Image Generation Skill + +Generate or edit images using Google Gemini API through the nanobanana tool. + +## Requirements + +1. **GEMINI_API_KEY**: Must be configured in `~/.nanobanana.env` or `export GEMINI_API_KEY=` +2. **Python3 with depedent packages installed**: google-genai, Pillow, python-dotenv. They could be installed via `python3 -m pip install -r ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/requirements.txt` if not installed yet. +3. **Executable**: `${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py` + +## Instructions + +### For image generation + +1. Ask the user for: + - What they want to create (the prompt) + - Desired aspect ratio/size (optional, defaults to 9:16 portrait) + - Output filename (optional, auto-generates UUID if not specified) + - Model preference (optional, defaults to gemini-3-pro-image-preview) + - Resolution (optional, defaults to 1K) + +2. Run the nanobanana script with appropriate parameters: + + ```bash + python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py --prompt "description of image" --output "filename.png" + ``` + +3. Show the user the saved image path when complete + +### For image editing + +1. Ask the user for: + - Input image file(s) to edit + - What changes they want (the prompt) + - Output filename (optional) + +2. Run with input images: + + ```bash + python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py --prompt "editing instructions" --input image1.png image2.png --output "edited.png" + ``` + +## Available Options + +### Aspect Ratios (--size) + +- `1024x1024` (1:1) - Square +- `832x1248` (2:3) - Portrait +- `1248x832` (3:2) - Landscape +- `864x1184` (3:4) - Portrait +- `1184x864` (4:3) - Landscape +- `896x1152` (4:5) - Portrait +- `1152x896` (5:4) - Landscape +- `768x1344` (9:16) - Portrait (default) +- `1344x768` (16:9) - Landscape +- `1536x672` (21:9) - Ultra-wide + +### Models (--model) + +- `gemini-3-pro-image-preview` (default) - Higher quality +- `gemini-2.5-flash-image` - Faster generation + +### Resolution (--resolution) + +- `1K` (default) +- `2K` +- `4K` + +## Examples + +### Generate a simple image + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py --prompt "A serene mountain landscape at sunset with a lake" +``` + +### Generate with specific size and output + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \ + --prompt "Modern minimalist logo for a tech startup" \ + --size 1024x1024 \ + --output "logo.png" +``` + +### Generate landscape image with high resolution + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \ + --prompt "Futuristic cityscape with flying cars" \ + --size 1344x768 \ + --resolution 2K \ + --output "cityscape.png" +``` + +### Edit existing images + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \ + --prompt "Add a rainbow in the sky" \ + --input photo.png \ + --output "photo-with-rainbow.png" +``` + +### Use faster model + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \ + --prompt "Quick sketch of a cat" \ + --model gemini-2.5-flash-image \ + --output "cat-sketch.png" +``` + +## Error Handling + +If the script fails: + +- Check that `GEMINI_API_KEY` is exported or set in ~/.nanobanana.env +- Verify input image files exist and are readable +- Ensure the output directory is writable +- If no image is generated, try making the prompt more specific about wanting an image + +## Best Practices + +1. Be descriptive in prompts - include style, mood, colors, composition +2. For logos/graphics, use square aspect ratio (1024x1024) +3. For social media posts, use 9:16 for stories or 1:1 for posts +4. For wallpapers, use 16:9 or 21:9 +5. Start with 1K resolution for testing, upgrade to 2K/4K for final output +6. Use gemini-3-pro-image-preview for best quality, gemini-2.5-flash-image for speed diff --git a/skills/nanobanana-skill/nanobanana.py b/skills/nanobanana-skill/nanobanana.py new file mode 100755 index 0000000..991675c --- /dev/null +++ b/skills/nanobanana-skill/nanobanana.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# Generate or edit images using Google Gemini API +import os +import argparse +import uuid +from pathlib import Path +from dotenv import load_dotenv +from google import genai +from google.genai import types +from PIL import Image +from io import BytesIO + +# Load environment variables +load_dotenv(os.path.expanduser("~") + "/.nanobanana.env") + +# Google API configuration from environment variables +api_key = os.getenv("GEMINI_API_KEY") or "" + +if not api_key: + raise ValueError( + "Missing GEMINI_API_KEY environment variable. Please check your .env file." + ) + +# Initialize Gemini client +client = genai.Client(api_key=api_key) + +# Aspect ratio to resolution mapping +ASPECT_RATIO_MAP = { + "1024x1024": "1:1", # 1:1 + "832x1248": "2:3", # 2:3 + "1248x832": "3:2", # 3:2 + "864x1184": "3:4", # 3:4 + "1184x864": "4:3", # 4:3 + "896x1152": "4:5", # 4:5 + "1152x896": "5:4", # 5:4 + "768x1344": "9:16", # 9:16 + "1344x768": "16:9", # 16:9 + "1536x672": "21:9", # 21:9 +} + + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser( + description="Generate or edit images using Google Gemini API" + ) + parser.add_argument( + "--prompt", + type=str, + required=True, + help="Prompt for image generation or editing", + ) + parser.add_argument( + "--output", + type=str, + default=f"nanobanana-{uuid.uuid4()}.png", + help="Output image filename (default: nanobanana-.png)", + ) + parser.add_argument( + "--input", type=str, nargs="*", help="Input image files for editing (optional)" + ) + parser.add_argument( + "--size", + type=str, + default="768x1344", + choices=list(ASPECT_RATIO_MAP.keys()), + help="Size/aspect ratio of the generated image (default: 768x1344 / 9:16)", + ) + parser.add_argument( + "--model", + type=str, + default="gemini-3-pro-image-preview", + choices=["gemini-3-pro-image-preview", "gemini-2.5-flash-image"], + help="Model to use for image generation (default: gemini-3-pro-image-preview)", + ) + parser.add_argument( + "--resolution", + type=str, + default="1K", + choices=["1K", "2K", "4K"], + help="Resolution of the generated image (default: 1K)", + ) + + args = parser.parse_args() + + # Get aspect ratio from size + aspect_ratio = ASPECT_RATIO_MAP.get(args.size, "16:9") + + # Build contents list for the API call + contents = [] + + # Check if input images are provided + if args.input and len(args.input) > 0: + # Use images.generate_content() with images for editing + print(f"Editing images with prompt: {args.prompt}") + print(f"Input images: {args.input}") + print(f"Aspect ratio: {aspect_ratio} ({args.size})") + + # Add prompt first + contents.append(args.prompt) + + # Add all input images + for img_path in args.input: + image = Image.open(img_path) + contents.append(image) + else: + print(f"Generating image (size: {args.size}) with prompt: {args.prompt}") + contents.append(args.prompt) + + # Generate or edit image with config + response = client.models.generate_content( + model=args.model, + contents=contents, + config=types.GenerateContentConfig( + response_modalities=['TEXT', 'IMAGE'], + tools=[types.Tool(google_search=types.GoogleSearch())], + image_config=types.ImageConfig( + aspect_ratio=aspect_ratio, + image_size=args.resolution, + ), + ), + ) + + if (response.candidates is None + or len(response.candidates) == 0 + or response.candidates[0].content is None + or response.candidates[0].content.parts is None): + raise ValueError("No data received from the API.") + + # Extract image from response + image_saved = False + for part in response.candidates[0].content.parts: + if part.text is not None: + print(f"{part.text}", end="") + elif part.inline_data is not None and part.inline_data.data is not None: + image = Image.open(BytesIO(part.inline_data.data)) + + image.save(args.output) + image_saved = True + print(f"\n\nImage saved to: {args.output}") + + if not image_saved: + print(f"\n\nWarning: No image data found in the API response. This usually means the model returned only text. Please try again with a different prompt to make image generation more clear.") + + +if __name__ == "__main__": + main() diff --git a/skills/nanobanana-skill/requirements.txt b/skills/nanobanana-skill/requirements.txt new file mode 100644 index 0000000..94675c9 --- /dev/null +++ b/skills/nanobanana-skill/requirements.txt @@ -0,0 +1,4 @@ +python-dotenv +httpx[socks] +google-genai +Pillow