Initial commit

2025-11-29 18:27:03 +08:00
commit bf74b6d1a5
6 changed files with 355 additions and 0 deletions
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
 {
  "name": "nanobanana-skill",
  "description": "Generate or edit images using Google Gemini API via nanobanana",
  "version": "1.0.0",
  "author": {
    "name": "Pengfei Ni",
    "url": "https://github.com/feiskyer/claude-code-settings"
  },
  "skills": [
    "./skills"
  ]
 }
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
 # nanobanana-skill
 Generate or edit images using Google Gemini API via nanobanana
--- a/plugin.lock.json
+++ b/plugin.lock.json
@@ -0,0 +1,53 @@
 {
  "$schema": "internal://schemas/plugin.lock.v1.json",
  "pluginId": "gh:feiskyer/claude-code-settings:plugins/nanobanana-skill",
  "normalized": {
    "repo": null,
    "ref": "refs/tags/v20251128.0",
    "commit": "efac37727fdb37c593085c02e843b2409ef472e9",
    "treeHash": "8c8704e91b8640a9f4c103135f7769e30aceac0f85327b79fb05b1e0c1c073a8",
    "generatedAt": "2025-11-28T10:16:53.475069Z",
    "toolVersion": "publish_plugins.py@0.2.0"
  },
  "origin": {
    "remote": "git@github.com:zhongweili/42plugin-data.git",
    "branch": "master",
    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
  },
  "manifest": {
    "name": "nanobanana-skill",
    "description": "Generate or edit images using Google Gemini API via nanobanana",
    "version": "1.0.0"
  },
  "content": {
    "files": [
      {
        "path": "README.md",
        "sha256": "3c0a699d6a410f7051c72304addbd9c9dd7fe370675f7254306c139a5fd3aa30"
      },
      {
        "path": ".claude-plugin/plugin.json",
        "sha256": "4b6ee6b6a20757f7dc955e79d9da2beac7d6d61b75c6afcbf3eaca613b268282"
      },
      {
        "path": "skills/nanobanana-skill/requirements.txt",
        "sha256": "c6e83160c50d16021c72ab93212431ff9327d84d6283a3a1daaaaf728dce70d9"
      },
      {
        "path": "skills/nanobanana-skill/SKILL.md",
        "sha256": "73188326ef21536082d7fe65bbed8b0d928f3a4ad7991ec78bbdcf84d5ef00c2"
      },
      {
        "path": "skills/nanobanana-skill/nanobanana.py",
        "sha256": "7e8921684cf0e8f35ab73d880fae3e5da97e67e4b21432ad5db5a8beefec0f26"
      }
    ],
    "dirSha256": "8c8704e91b8640a9f4c103135f7769e30aceac0f85327b79fb05b1e0c1c073a8"
  },
  "security": {
    "scannedAt": null,
    "scannerVersion": null,
    "flags": []
  }
 }
--- a/skills/nanobanana-skill/SKILL.md
+++ b/skills/nanobanana-skill/SKILL.md
@@ -0,0 +1,136 @@
 ---
 name: nanobanana-skill
 description: Generate or edit images using Google Gemini API via nanobanana. Use when the user asks to create, generate, edit images with nanobanana, or mentions image generation/editing tasks.
 allowed-tools: Bash
 ---
 # Nanobanana Image Generation Skill
 Generate or edit images using Google Gemini API through the nanobanana tool.
 ## Requirements
 1. **GEMINI_API_KEY**: Must be configured in `~/.nanobanana.env` or `export GEMINI_API_KEY=<your-api-key>`
 2. **Python3 with depedent packages installed**: google-genai, Pillow, python-dotenv. They could be installed via `python3 -m pip install -r ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/requirements.txt` if not installed yet.
 3. **Executable**: `${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py`
 ## Instructions
 ### For image generation
 1. Ask the user for:
   - What they want to create (the prompt)
   - Desired aspect ratio/size (optional, defaults to 9:16 portrait)
   - Output filename (optional, auto-generates UUID if not specified)
   - Model preference (optional, defaults to gemini-3-pro-image-preview)
   - Resolution (optional, defaults to 1K)
 2. Run the nanobanana script with appropriate parameters:
   ```bash
   python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py --prompt "description of image" --output "filename.png"
   ```
 3. Show the user the saved image path when complete
 ### For image editing
 1. Ask the user for:
   - Input image file(s) to edit
   - What changes they want (the prompt)
   - Output filename (optional)
 2. Run with input images:
   ```bash
   python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py --prompt "editing instructions" --input image1.png image2.png --output "edited.png"
   ```
 ## Available Options
 ### Aspect Ratios (--size)
 - `1024x1024` (1:1) - Square
 - `832x1248` (2:3) - Portrait
 - `1248x832` (3:2) - Landscape
 - `864x1184` (3:4) - Portrait
 - `1184x864` (4:3) - Landscape
 - `896x1152` (4:5) - Portrait
 - `1152x896` (5:4) - Landscape
 - `768x1344` (9:16) - Portrait (default)
 - `1344x768` (16:9) - Landscape
 - `1536x672` (21:9) - Ultra-wide
 ### Models (--model)
 - `gemini-3-pro-image-preview` (default) - Higher quality
 - `gemini-2.5-flash-image` - Faster generation
 ### Resolution (--resolution)
 - `1K` (default)
 - `2K`
 - `4K`
 ## Examples
 ### Generate a simple image
 ```bash
 python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py --prompt "A serene mountain landscape at sunset with a lake"
 ```
 ### Generate with specific size and output
 ```bash
 python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \
  --prompt "Modern minimalist logo for a tech startup" \
  --size 1024x1024 \
  --output "logo.png"
 ```
 ### Generate landscape image with high resolution
 ```bash
 python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \
  --prompt "Futuristic cityscape with flying cars" \
  --size 1344x768 \
  --resolution 2K \
  --output "cityscape.png"
 ```
 ### Edit existing images
 ```bash
 python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \
  --prompt "Add a rainbow in the sky" \
  --input photo.png \
  --output "photo-with-rainbow.png"
 ```
 ### Use faster model
 ```bash
 python3 ${CLAUDE_PLUGIN_ROOT}/skills/nanobanana-skill/nanobanana.py \
  --prompt "Quick sketch of a cat" \
  --model gemini-2.5-flash-image \
  --output "cat-sketch.png"
 ```
 ## Error Handling
 If the script fails:
 - Check that `GEMINI_API_KEY` is exported or set in ~/.nanobanana.env
 - Verify input image files exist and are readable
 - Ensure the output directory is writable
 - If no image is generated, try making the prompt more specific about wanting an image
 ## Best Practices
 1. Be descriptive in prompts - include style, mood, colors, composition
 2. For logos/graphics, use square aspect ratio (1024x1024)
 3. For social media posts, use 9:16 for stories or 1:1 for posts
 4. For wallpapers, use 16:9 or 21:9
 5. Start with 1K resolution for testing, upgrade to 2K/4K for final output
 6. Use gemini-3-pro-image-preview for best quality, gemini-2.5-flash-image for speed
--- a/skills/nanobanana-skill/nanobanana.py
+++ b/skills/nanobanana-skill/nanobanana.py
@@ -0,0 +1,147 @@
 #!/usr/bin/env python3
 # Generate or edit images using Google Gemini API
 import os
 import argparse
 import uuid
 from pathlib import Path
 from dotenv import load_dotenv
 from google import genai
 from google.genai import types
 from PIL import Image
 from io import BytesIO
 # Load environment variables
 load_dotenv(os.path.expanduser("~") + "/.nanobanana.env")
 # Google API configuration from environment variables
 api_key = os.getenv("GEMINI_API_KEY") or ""
 if not api_key:
    raise ValueError(
        "Missing GEMINI_API_KEY environment variable. Please check your .env file."
    )
 # Initialize Gemini client
 client = genai.Client(api_key=api_key)
 # Aspect ratio to resolution mapping
 ASPECT_RATIO_MAP = {
    "1024x1024": "1:1",  # 1:1
    "832x1248": "2:3",  # 2:3
    "1248x832": "3:2",  # 3:2
    "864x1184": "3:4",  # 3:4
    "1184x864": "4:3",  # 4:3
    "896x1152": "4:5",  # 4:5
    "1152x896": "5:4",  # 5:4
    "768x1344": "9:16",  # 9:16
    "1344x768": "16:9",  # 16:9
    "1536x672": "21:9",  # 21:9
 }
 def main():
    # Parse command-line arguments
    parser = argparse.ArgumentParser(
        description="Generate or edit images using Google Gemini API"
    )
    parser.add_argument(
        "--prompt",
        type=str,
        required=True,
        help="Prompt for image generation or editing",
    )
    parser.add_argument(
        "--output",
        type=str,
        default=f"nanobanana-{uuid.uuid4()}.png",
        help="Output image filename (default: nanobanana-<UUID>.png)",
    )
    parser.add_argument(
        "--input", type=str, nargs="*", help="Input image files for editing (optional)"
    )
    parser.add_argument(
        "--size",
        type=str,
        default="768x1344",
        choices=list(ASPECT_RATIO_MAP.keys()),
        help="Size/aspect ratio of the generated image (default: 768x1344 / 9:16)",
    )
    parser.add_argument(
        "--model",
        type=str,
        default="gemini-3-pro-image-preview",
        choices=["gemini-3-pro-image-preview", "gemini-2.5-flash-image"],
        help="Model to use for image generation (default: gemini-3-pro-image-preview)",
    )
    parser.add_argument(
        "--resolution",
        type=str,
        default="1K",
        choices=["1K", "2K", "4K"],
        help="Resolution of the generated image (default: 1K)",
    )
    args = parser.parse_args()
    # Get aspect ratio from size
    aspect_ratio = ASPECT_RATIO_MAP.get(args.size, "16:9")
    # Build contents list for the API call
    contents = []
    # Check if input images are provided
    if args.input and len(args.input) > 0:
        # Use images.generate_content() with images for editing
        print(f"Editing images with prompt: {args.prompt}")
        print(f"Input images: {args.input}")
        print(f"Aspect ratio: {aspect_ratio} ({args.size})")
        # Add prompt first
        contents.append(args.prompt)
        # Add all input images
        for img_path in args.input:
            image = Image.open(img_path)
            contents.append(image)
    else:
        print(f"Generating image (size: {args.size}) with prompt: {args.prompt}")
        contents.append(args.prompt)
    # Generate or edit image with config
    response = client.models.generate_content(
        model=args.model,
        contents=contents,
        config=types.GenerateContentConfig(
            response_modalities=['TEXT', 'IMAGE'],
            tools=[types.Tool(google_search=types.GoogleSearch())],
            image_config=types.ImageConfig(
                aspect_ratio=aspect_ratio,
                image_size=args.resolution,
            ),
        ),
    )
    if (response.candidates is None
        or len(response.candidates) == 0
        or response.candidates[0].content is None
        or response.candidates[0].content.parts is None):
        raise ValueError("No data received from the API.")
    # Extract image from response
    image_saved = False
    for part in response.candidates[0].content.parts:
        if part.text is not None:
            print(f"{part.text}", end="")
        elif part.inline_data is not None and part.inline_data.data is not None:
            image = Image.open(BytesIO(part.inline_data.data))
            image.save(args.output)
            image_saved = True
            print(f"\n\nImage saved to: {args.output}")
    if not image_saved:
        print(f"\n\nWarning: No image data found in the API response. This usually means the model returned only text. Please try again with a different prompt to make image generation more clear.")
 if __name__ == "__main__":
    main()
--- a/skills/nanobanana-skill/requirements.txt
+++ b/skills/nanobanana-skill/requirements.txt
@@ -0,0 +1,4 @@
 python-dotenv
 httpx[socks]
 google-genai
 Pillow
		`@@ -0,0 +1,3 @@`
							`# nanobanana-skill`

							`Generate or edit images using Google Gemini API via nanobanana`