gh-unclecode-claude-code-to…/skills/gemini-imagegen/scripts/compose_images.py

#!/usr/bin/env python3
"""
Compose multiple images into a new image using Gemini API.

Usage:
    python compose_images.py "instruction" output.png image1.png [image2.png ...]

Examples:
    python compose_images.py "Create a group photo of these people" group.png person1.png person2.png
    python compose_images.py "Put the cat from the first image on the couch from the second" result.png cat.png couch.png
    python compose_images.py "Apply the art style from the first image to the scene in the second" styled.png style.png photo.png

Note: Supports up to 14 reference images (Gemini 3 Pro only).

Environment:
    GEMINI_API_KEY - Required API key
"""

import argparse
import os
import sys

from PIL import Image
from google import genai
from google.genai import types


def compose_images(
    instruction: str,
    output_path: str,
    image_paths: list[str],
    model: str = "gemini-3-pro-image-preview",
    aspect_ratio: str | None = None,
    image_size: str | None = None,
) -> str | None:
    """Compose multiple images based on instructions.

    Args:
        instruction: Text description of how to combine images
        output_path: Path to save the result
        image_paths: List of input image paths (up to 14)
        model: Gemini model to use (pro recommended)
        aspect_ratio: Output aspect ratio
        image_size: Output resolution

    Returns:
        Any text response from the model, or None
    """
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        raise EnvironmentError("GEMINI_API_KEY environment variable not set")

    if len(image_paths) > 14:
        raise ValueError("Maximum 14 reference images supported")

    if len(image_paths) < 1:
        raise ValueError("At least one image is required")

    # Verify all images exist
    for path in image_paths:
        if not os.path.exists(path):
            raise FileNotFoundError(f"Image not found: {path}")

    client = genai.Client(api_key=api_key)

    # Load images
    images = [Image.open(path) for path in image_paths]

    # Build contents: instruction first, then images
    contents = [instruction] + images

    # Build config
    config_kwargs = {"response_modalities": ["TEXT", "IMAGE"]}

    image_config_kwargs = {}
    if aspect_ratio:
        image_config_kwargs["aspect_ratio"] = aspect_ratio
    if image_size:
        image_config_kwargs["image_size"] = image_size

    if image_config_kwargs:
        config_kwargs["image_config"] = types.ImageConfig(**image_config_kwargs)

    config = types.GenerateContentConfig(**config_kwargs)

    response = client.models.generate_content(
        model=model,
        contents=contents,
        config=config,
    )

    text_response = None
    image_saved = False

    for part in response.parts:
        if part.text is not None:
            text_response = part.text
        elif part.inline_data is not None:
            image = part.as_image()
            image.save(output_path)
            image_saved = True

    if not image_saved:
        raise RuntimeError("No image was generated.")

    return text_response


def main():
    parser = argparse.ArgumentParser(
        description="Compose multiple images using Gemini API",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__
    )
    parser.add_argument("instruction", help="Composition instruction")
    parser.add_argument("output", help="Output file path")
    parser.add_argument("images", nargs="+", help="Input images (up to 14)")
    parser.add_argument(
        "--model", "-m",
        default="gemini-3-pro-image-preview",
        choices=["gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
        help="Model to use (pro recommended for composition)"
    )
    parser.add_argument(
        "--aspect", "-a",
        choices=["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"],
        help="Output aspect ratio"
    )
    parser.add_argument(
        "--size", "-s",
        choices=["1K", "2K", "4K"],
        help="Output resolution"
    )

    args = parser.parse_args()

    try:
        text = compose_images(
            instruction=args.instruction,
            output_path=args.output,
            image_paths=args.images,
            model=args.model,
            aspect_ratio=args.aspect,
            image_size=args.size,
        )

        print(f"Composed image saved to: {args.output}")
        if text:
            print(f"Model response: {text}")

    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()