Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:48:52 +08:00
commit 6ec3196ecc
434 changed files with 125248 additions and 0 deletions

View File

@@ -0,0 +1,395 @@
#!/usr/bin/env python3
"""
Convert documents to Markdown using Gemini API.
Supports all document types:
- PDF documents (native vision processing)
- Images (JPEG, PNG, WEBP, HEIC)
- Office documents (DOCX, XLSX, PPTX)
- HTML, TXT, and other text formats
Features:
- Converts to clean markdown format
- Preserves structure, tables, and formatting
- Extracts text from images and scanned documents
- Batch conversion support
- Saves to docs/assets/document-extraction.md by default
"""
import argparse
import os
import sys
import time
from pathlib import Path
from typing import Optional, List, Dict, Any
try:
from google import genai
from google.genai import types
except ImportError:
print("Error: google-genai package not installed")
print("Install with: pip install google-genai")
sys.exit(1)
try:
from dotenv import load_dotenv
except ImportError:
load_dotenv = None
def find_api_key() -> Optional[str]:
"""Find Gemini API key using correct priority order.
Priority order (highest to lowest):
1. process.env (runtime environment variables)
2. .claude/skills/ai-multimodal/.env (skill-specific config)
3. .claude/skills/.env (shared skills config)
4. .claude/.env (Claude global config)
"""
# Priority 1: Already in process.env (highest)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
# Load .env files if dotenv available
if load_dotenv:
# Determine base paths
script_dir = Path(__file__).parent
skill_dir = script_dir.parent # .claude/skills/ai-multimodal
skills_dir = skill_dir.parent # .claude/skills
claude_dir = skills_dir.parent # .claude
# Priority 2: Skill-specific .env
env_file = skill_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
# Priority 3: Shared skills .env
env_file = skills_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
# Priority 4: Claude global .env
env_file = claude_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
return None
def find_project_root() -> Path:
"""Find project root directory."""
script_dir = Path(__file__).parent
# Look for .git or .claude directory
for parent in [script_dir] + list(script_dir.parents):
if (parent / '.git').exists() or (parent / '.claude').exists():
return parent
return script_dir
def get_mime_type(file_path: str) -> str:
"""Determine MIME type from file extension."""
ext = Path(file_path).suffix.lower()
mime_types = {
# Documents
'.pdf': 'application/pdf',
'.txt': 'text/plain',
'.html': 'text/html',
'.htm': 'text/html',
'.md': 'text/markdown',
'.csv': 'text/csv',
# Images
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.webp': 'image/webp',
'.heic': 'image/heic',
'.heif': 'image/heif',
# Office (need to be uploaded as binary)
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
}
return mime_types.get(ext, 'application/octet-stream')
def upload_file(client: genai.Client, file_path: str, verbose: bool = False) -> Any:
"""Upload file to Gemini File API."""
if verbose:
print(f"Uploading {file_path}...")
myfile = client.files.upload(file=file_path)
# Wait for processing if needed
max_wait = 300 # 5 minutes
elapsed = 0
while myfile.state.name == 'PROCESSING' and elapsed < max_wait:
time.sleep(2)
myfile = client.files.get(name=myfile.name)
elapsed += 2
if verbose and elapsed % 10 == 0:
print(f" Processing... {elapsed}s")
if myfile.state.name == 'FAILED':
raise ValueError(f"File processing failed: {file_path}")
if myfile.state.name == 'PROCESSING':
raise TimeoutError(f"Processing timeout after {max_wait}s: {file_path}")
if verbose:
print(f" Uploaded: {myfile.name}")
return myfile
def convert_to_markdown(
client: genai.Client,
file_path: str,
model: str = 'gemini-2.5-flash',
custom_prompt: Optional[str] = None,
verbose: bool = False,
max_retries: int = 3
) -> Dict[str, Any]:
"""Convert a document to markdown using Gemini."""
for attempt in range(max_retries):
try:
file_path_obj = Path(file_path)
file_size = file_path_obj.stat().st_size
use_file_api = file_size > 20 * 1024 * 1024 # >20MB
# Default prompt for markdown conversion
if custom_prompt:
prompt = custom_prompt
else:
prompt = """Convert this document to clean, well-formatted Markdown.
Requirements:
- Preserve all content, structure, and formatting
- Convert tables to markdown table format
- Maintain heading hierarchy (# ## ### etc)
- Preserve lists, code blocks, and quotes
- Extract text from images if present
- Keep formatting consistent and readable
Output only the markdown content without any preamble or explanation."""
# Upload or inline the file
if use_file_api:
myfile = upload_file(client, str(file_path), verbose)
content = [prompt, myfile]
else:
with open(file_path, 'rb') as f:
file_bytes = f.read()
mime_type = get_mime_type(str(file_path))
content = [
prompt,
types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
]
# Generate markdown
response = client.models.generate_content(
model=model,
contents=content
)
markdown_content = response.text if hasattr(response, 'text') else ''
return {
'file': str(file_path),
'status': 'success',
'markdown': markdown_content
}
except Exception as e:
if attempt == max_retries - 1:
return {
'file': str(file_path),
'status': 'error',
'error': str(e),
'markdown': None
}
wait_time = 2 ** attempt
if verbose:
print(f" Retry {attempt + 1} after {wait_time}s: {e}")
time.sleep(wait_time)
def batch_convert(
files: List[str],
output_file: Optional[str] = None,
auto_name: bool = False,
model: str = 'gemini-2.5-flash',
custom_prompt: Optional[str] = None,
verbose: bool = False
) -> List[Dict[str, Any]]:
"""Batch convert multiple files to markdown."""
api_key = find_api_key()
if not api_key:
print("Error: GEMINI_API_KEY not found")
print("Set via: export GEMINI_API_KEY='your-key'")
print("Or create .env file with: GEMINI_API_KEY=your-key")
sys.exit(1)
client = genai.Client(api_key=api_key)
results = []
# Determine output path
if not output_file:
project_root = find_project_root()
output_dir = project_root / 'docs' / 'assets'
if auto_name and len(files) == 1:
# Auto-generate meaningful filename from input
input_path = Path(files[0])
base_name = input_path.stem
output_file = str(output_dir / f"{base_name}-extraction.md")
else:
output_file = str(output_dir / 'document-extraction.md')
output_path = Path(output_file)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Process each file
for i, file_path in enumerate(files, 1):
if verbose:
print(f"\n[{i}/{len(files)}] Converting: {file_path}")
result = convert_to_markdown(
client=client,
file_path=file_path,
model=model,
custom_prompt=custom_prompt,
verbose=verbose
)
results.append(result)
if verbose:
status = result.get('status', 'unknown')
print(f" Status: {status}")
# Save combined markdown
with open(output_path, 'w', encoding='utf-8') as f:
f.write("# Document Extraction Results\n\n")
f.write(f"Converted {len(files)} document(s) to markdown.\n\n")
f.write("---\n\n")
for result in results:
f.write(f"## {Path(result['file']).name}\n\n")
if result['status'] == 'success' and result.get('markdown'):
f.write(result['markdown'])
f.write("\n\n")
elif result['status'] == 'success':
f.write("**Note**: Conversion succeeded but no content was returned.\n\n")
else:
f.write(f"**Error**: {result.get('error', 'Unknown error')}\n\n")
f.write("---\n\n")
if verbose or True: # Always show output location
print(f"\n{'='*50}")
print(f"Converted: {len(results)} file(s)")
print(f"Success: {sum(1 for r in results if r['status'] == 'success')}")
print(f"Failed: {sum(1 for r in results if r['status'] == 'error')}")
print(f"Output saved to: {output_path}")
return results
def main():
parser = argparse.ArgumentParser(
description='Convert documents to Markdown using Gemini API',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Convert single PDF to markdown (default name)
%(prog)s --input document.pdf
# Auto-generate meaningful filename
%(prog)s --input testpdf.pdf --auto-name
# Output: docs/assets/testpdf-extraction.md
# Convert multiple files
%(prog)s --input doc1.pdf doc2.docx image.png
# Specify custom output location
%(prog)s --input document.pdf --output ./output.md
# Use custom prompt
%(prog)s --input document.pdf --prompt "Extract only the tables as markdown"
# Batch convert directory
%(prog)s --input ./documents/*.pdf --verbose
Supported formats:
- PDF documents (up to 1,000 pages)
- Images (JPEG, PNG, WEBP, HEIC)
- Office documents (DOCX, XLSX, PPTX)
- Text formats (TXT, HTML, Markdown, CSV)
Default output: <project-root>/docs/assets/document-extraction.md
"""
)
parser.add_argument('--input', '-i', nargs='+', required=True,
help='Input file(s) to convert')
parser.add_argument('--output', '-o',
help='Output markdown file (default: docs/assets/document-extraction.md)')
parser.add_argument('--auto-name', '-a', action='store_true',
help='Auto-generate meaningful output filename from input (e.g., document.pdf -> document-extraction.md)')
parser.add_argument('--model', default='gemini-2.5-flash',
help='Gemini model to use (default: gemini-2.5-flash)')
parser.add_argument('--prompt', '-p',
help='Custom prompt for conversion')
parser.add_argument('--verbose', '-v', action='store_true',
help='Verbose output')
args = parser.parse_args()
# Validate input files
files = []
for file_pattern in args.input:
file_path = Path(file_pattern)
if file_path.exists() and file_path.is_file():
files.append(str(file_path))
else:
# Try glob pattern
import glob
matched = glob.glob(file_pattern)
files.extend([f for f in matched if Path(f).is_file()])
if not files:
print("Error: No valid input files found")
sys.exit(1)
# Convert files
batch_convert(
files=files,
output_file=args.output,
auto_name=args.auto_name,
model=args.model,
custom_prompt=args.prompt,
verbose=args.verbose
)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,480 @@
#!/usr/bin/env python3
"""
Batch process multiple media files using Gemini API.
Supports all Gemini modalities:
- Audio: Transcription, analysis, summarization
- Image: Captioning, detection, OCR, analysis
- Video: Summarization, Q&A, scene detection
- Document: PDF extraction, structured output
- Generation: Image creation from text prompts
"""
import argparse
import json
import os
import sys
import time
from pathlib import Path
from typing import List, Dict, Any, Optional
import csv
import shutil
try:
from google import genai
from google.genai import types
except ImportError:
print("Error: google-genai package not installed")
print("Install with: pip install google-genai")
sys.exit(1)
try:
from dotenv import load_dotenv
except ImportError:
load_dotenv = None
def find_api_key() -> Optional[str]:
"""Find Gemini API key using correct priority order.
Priority order (highest to lowest):
1. process.env (runtime environment variables)
2. .claude/skills/ai-multimodal/.env (skill-specific config)
3. .claude/skills/.env (shared skills config)
4. .claude/.env (Claude global config)
"""
# Priority 1: Already in process.env (highest)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
# Load .env files if dotenv available
if load_dotenv:
# Determine base paths
script_dir = Path(__file__).parent
skill_dir = script_dir.parent # .claude/skills/ai-multimodal
skills_dir = skill_dir.parent # .claude/skills
claude_dir = skills_dir.parent # .claude
# Priority 2: Skill-specific .env
env_file = skill_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
# Priority 3: Shared skills .env
env_file = skills_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
# Priority 4: Claude global .env
env_file = claude_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
return api_key
return None
def get_mime_type(file_path: str) -> str:
"""Determine MIME type from file extension."""
ext = Path(file_path).suffix.lower()
mime_types = {
# Audio
'.mp3': 'audio/mp3',
'.wav': 'audio/wav',
'.aac': 'audio/aac',
'.flac': 'audio/flac',
'.ogg': 'audio/ogg',
'.aiff': 'audio/aiff',
# Image
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.webp': 'image/webp',
'.heic': 'image/heic',
'.heif': 'image/heif',
# Video
'.mp4': 'video/mp4',
'.mpeg': 'video/mpeg',
'.mov': 'video/quicktime',
'.avi': 'video/x-msvideo',
'.flv': 'video/x-flv',
'.mpg': 'video/mpeg',
'.webm': 'video/webm',
'.wmv': 'video/x-ms-wmv',
'.3gpp': 'video/3gpp',
# Document
'.pdf': 'application/pdf',
'.txt': 'text/plain',
'.html': 'text/html',
'.md': 'text/markdown',
}
return mime_types.get(ext, 'application/octet-stream')
def upload_file(client: genai.Client, file_path: str, verbose: bool = False) -> Any:
"""Upload file to Gemini File API."""
if verbose:
print(f"Uploading {file_path}...")
myfile = client.files.upload(file=file_path)
# Wait for processing (video/audio files need processing)
mime_type = get_mime_type(file_path)
if mime_type.startswith('video/') or mime_type.startswith('audio/'):
max_wait = 300 # 5 minutes
elapsed = 0
while myfile.state.name == 'PROCESSING' and elapsed < max_wait:
time.sleep(2)
myfile = client.files.get(name=myfile.name)
elapsed += 2
if verbose and elapsed % 10 == 0:
print(f" Processing... {elapsed}s")
if myfile.state.name == 'FAILED':
raise ValueError(f"File processing failed: {file_path}")
if myfile.state.name == 'PROCESSING':
raise TimeoutError(f"Processing timeout after {max_wait}s: {file_path}")
if verbose:
print(f" Uploaded: {myfile.name}")
return myfile
def process_file(
client: genai.Client,
file_path: Optional[str],
prompt: str,
model: str,
task: str,
format_output: str,
aspect_ratio: Optional[str] = None,
verbose: bool = False,
max_retries: int = 3
) -> Dict[str, Any]:
"""Process a single file with retry logic."""
for attempt in range(max_retries):
try:
# For generation tasks without input files
if task == 'generate' and not file_path:
content = [prompt]
else:
# Process input file
file_path = Path(file_path)
# Determine if we need File API
file_size = file_path.stat().st_size
use_file_api = file_size > 20 * 1024 * 1024 # >20MB
if use_file_api:
# Upload to File API
myfile = upload_file(client, str(file_path), verbose)
content = [prompt, myfile]
else:
# Inline data
with open(file_path, 'rb') as f:
file_bytes = f.read()
mime_type = get_mime_type(str(file_path))
content = [
prompt,
types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
]
# Configure request
config_args = {}
if task == 'generate':
config_args['response_modalities'] = ['Image'] # Capital I per API spec
if aspect_ratio:
# Nest aspect_ratio in image_config per API spec
config_args['image_config'] = types.ImageConfig(
aspect_ratio=aspect_ratio
)
if format_output == 'json':
config_args['response_mime_type'] = 'application/json'
config = types.GenerateContentConfig(**config_args) if config_args else None
# Generate content
response = client.models.generate_content(
model=model,
contents=content,
config=config
)
# Extract response
result = {
'file': str(file_path) if file_path else 'generated',
'status': 'success',
'response': response.text if hasattr(response, 'text') else None
}
# Handle image output
if task == 'generate' and hasattr(response, 'candidates'):
for i, part in enumerate(response.candidates[0].content.parts):
if part.inline_data:
# Determine output directory - use project root docs/assets
if file_path:
output_dir = Path(file_path).parent
base_name = Path(file_path).stem
else:
# Find project root (look for .git or .claude directory)
script_dir = Path(__file__).parent
project_root = script_dir
for parent in [script_dir] + list(script_dir.parents):
if (parent / '.git').exists() or (parent / '.claude').exists():
project_root = parent
break
output_dir = project_root / 'docs' / 'assets'
output_dir.mkdir(parents=True, exist_ok=True)
base_name = "generated"
output_file = output_dir / f"{base_name}_generated_{i}.png"
with open(output_file, 'wb') as f:
f.write(part.inline_data.data)
result['generated_image'] = str(output_file)
if verbose:
print(f" Saved image to: {output_file}")
return result
except Exception as e:
if attempt == max_retries - 1:
return {
'file': str(file_path) if file_path else 'generated',
'status': 'error',
'error': str(e)
}
wait_time = 2 ** attempt
if verbose:
print(f" Retry {attempt + 1} after {wait_time}s: {e}")
time.sleep(wait_time)
def batch_process(
files: List[str],
prompt: str,
model: str,
task: str,
format_output: str,
aspect_ratio: Optional[str] = None,
output_file: Optional[str] = None,
verbose: bool = False,
dry_run: bool = False
) -> List[Dict[str, Any]]:
"""Batch process multiple files."""
api_key = find_api_key()
if not api_key:
print("Error: GEMINI_API_KEY not found")
print("Set via: export GEMINI_API_KEY='your-key'")
print("Or create .env file with: GEMINI_API_KEY=your-key")
sys.exit(1)
if dry_run:
print("DRY RUN MODE - No API calls will be made")
print(f"Files to process: {len(files)}")
print(f"Model: {model}")
print(f"Task: {task}")
print(f"Prompt: {prompt}")
return []
client = genai.Client(api_key=api_key)
results = []
# For generation tasks without input files, process once
if task == 'generate' and not files:
if verbose:
print(f"\nGenerating image from prompt...")
result = process_file(
client=client,
file_path=None,
prompt=prompt,
model=model,
task=task,
format_output=format_output,
aspect_ratio=aspect_ratio,
verbose=verbose
)
results.append(result)
if verbose:
status = result.get('status', 'unknown')
print(f" Status: {status}")
else:
# Process input files
for i, file_path in enumerate(files, 1):
if verbose:
print(f"\n[{i}/{len(files)}] Processing: {file_path}")
result = process_file(
client=client,
file_path=file_path,
prompt=prompt,
model=model,
task=task,
format_output=format_output,
aspect_ratio=aspect_ratio,
verbose=verbose
)
results.append(result)
if verbose:
status = result.get('status', 'unknown')
print(f" Status: {status}")
# Save results
if output_file:
save_results(results, output_file, format_output)
return results
def save_results(results: List[Dict[str, Any]], output_file: str, format_output: str):
"""Save results to file."""
output_path = Path(output_file)
# Special handling for image generation - if output has image extension, copy the generated image
image_extensions = {'.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp'}
if output_path.suffix.lower() in image_extensions and len(results) == 1:
generated_image = results[0].get('generated_image')
if generated_image:
# Copy the generated image to the specified output location
shutil.copy2(generated_image, output_path)
return
else:
# Don't write text reports to image files - save error as .txt instead
output_path = output_path.with_suffix('.error.txt')
print(f"Warning: Generation failed, saving error report to: {output_path}")
if format_output == 'json':
with open(output_path, 'w') as f:
json.dump(results, f, indent=2)
elif format_output == 'csv':
with open(output_path, 'w', newline='') as f:
fieldnames = ['file', 'status', 'response', 'error']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for result in results:
writer.writerow({
'file': result.get('file', ''),
'status': result.get('status', ''),
'response': result.get('response', ''),
'error': result.get('error', '')
})
else: # markdown
with open(output_path, 'w') as f:
f.write("# Batch Processing Results\n\n")
for i, result in enumerate(results, 1):
f.write(f"## {i}. {result.get('file', 'Unknown')}\n\n")
f.write(f"**Status**: {result.get('status', 'unknown')}\n\n")
if result.get('response'):
f.write(f"**Response**:\n\n{result['response']}\n\n")
if result.get('error'):
f.write(f"**Error**: {result['error']}\n\n")
def main():
parser = argparse.ArgumentParser(
description='Batch process media files with Gemini API',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Transcribe multiple audio files
%(prog)s --files *.mp3 --task transcribe --model gemini-2.5-flash
# Analyze images
%(prog)s --files *.jpg --task analyze --prompt "Describe this image" \\
--model gemini-2.5-flash
# Process PDFs to JSON
%(prog)s --files *.pdf --task extract --prompt "Extract data as JSON" \\
--format json --output results.json
# Generate images
%(prog)s --task generate --prompt "A mountain landscape" \\
--model gemini-2.5-flash-image --aspect-ratio 16:9
"""
)
parser.add_argument('--files', nargs='*', help='Input files to process')
parser.add_argument('--task', required=True,
choices=['transcribe', 'analyze', 'extract', 'generate'],
help='Task to perform')
parser.add_argument('--prompt', help='Prompt for analysis/generation')
parser.add_argument('--model', default='gemini-2.5-flash',
help='Gemini model to use (default: gemini-2.5-flash)')
parser.add_argument('--format', dest='format_output', default='text',
choices=['text', 'json', 'csv', 'markdown'],
help='Output format (default: text)')
parser.add_argument('--aspect-ratio', choices=['1:1', '16:9', '9:16', '4:3', '3:4'],
help='Aspect ratio for image generation')
parser.add_argument('--output', help='Output file for results')
parser.add_argument('--verbose', '-v', action='store_true',
help='Verbose output')
parser.add_argument('--dry-run', action='store_true',
help='Show what would be done without making API calls')
args = parser.parse_args()
# Validate arguments
if args.task != 'generate' and not args.files:
parser.error("--files required for non-generation tasks")
if args.task == 'generate' and not args.prompt:
parser.error("--prompt required for generation task")
if args.task != 'generate' and not args.prompt:
# Set default prompts
if args.task == 'transcribe':
args.prompt = 'Generate a transcript with timestamps'
elif args.task == 'analyze':
args.prompt = 'Analyze this content'
elif args.task == 'extract':
args.prompt = 'Extract key information'
# Process files
files = args.files or []
results = batch_process(
files=files,
prompt=args.prompt,
model=args.model,
task=args.task,
format_output=args.format_output,
aspect_ratio=args.aspect_ratio,
output_file=args.output,
verbose=args.verbose,
dry_run=args.dry_run
)
# Print summary
if not args.dry_run and results:
success = sum(1 for r in results if r.get('status') == 'success')
failed = len(results) - success
print(f"\n{'='*50}")
print(f"Processed: {len(results)} files")
print(f"Success: {success}")
print(f"Failed: {failed}")
if args.output:
print(f"Results saved to: {args.output}")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,506 @@
#!/usr/bin/env python3
"""
Optimize media files for Gemini API processing.
Features:
- Compress videos/audio for size limits
- Resize images appropriately
- Split long videos into chunks
- Format conversion
- Quality vs size optimization
- Validation before upload
"""
import argparse
import json
import os
import subprocess
import sys
from pathlib import Path
from typing import Optional, Dict, Any, List
try:
from dotenv import load_dotenv
except ImportError:
load_dotenv = None
def load_env_files():
"""Load .env files in correct priority order.
Priority order (highest to lowest):
1. process.env (runtime environment variables)
2. .claude/skills/ai-multimodal/.env (skill-specific config)
3. .claude/skills/.env (shared skills config)
4. .claude/.env (Claude global config)
"""
if not load_dotenv:
return
# Determine base paths
script_dir = Path(__file__).parent
skill_dir = script_dir.parent # .claude/skills/ai-multimodal
skills_dir = skill_dir.parent # .claude/skills
claude_dir = skills_dir.parent # .claude
# Priority 2: Skill-specific .env
env_file = skill_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
# Priority 3: Shared skills .env
env_file = skills_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
# Priority 4: Claude global .env
env_file = claude_dir / '.env'
if env_file.exists():
load_dotenv(env_file)
# Load environment variables at module level
load_env_files()
def check_ffmpeg() -> bool:
"""Check if ffmpeg is installed."""
try:
subprocess.run(['ffmpeg', '-version'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError, Exception):
return False
def get_media_info(file_path: str) -> Dict[str, Any]:
"""Get media file information using ffprobe."""
if not check_ffmpeg():
return {}
try:
cmd = [
'ffprobe',
'-v', 'quiet',
'-print_format', 'json',
'-show_format',
'-show_streams',
file_path
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
data = json.loads(result.stdout)
info = {
'size': int(data['format'].get('size', 0)),
'duration': float(data['format'].get('duration', 0)),
'bit_rate': int(data['format'].get('bit_rate', 0)),
}
# Get video/audio specific info
for stream in data.get('streams', []):
if stream['codec_type'] == 'video':
info['width'] = stream.get('width', 0)
info['height'] = stream.get('height', 0)
info['fps'] = eval(stream.get('r_frame_rate', '0/1'))
elif stream['codec_type'] == 'audio':
info['sample_rate'] = int(stream.get('sample_rate', 0))
info['channels'] = stream.get('channels', 0)
return info
except (subprocess.CalledProcessError, json.JSONDecodeError, Exception):
return {}
def optimize_video(
input_path: str,
output_path: str,
target_size_mb: Optional[int] = None,
max_duration: Optional[int] = None,
quality: int = 23,
resolution: Optional[str] = None,
verbose: bool = False
) -> bool:
"""Optimize video file for Gemini API."""
if not check_ffmpeg():
print("Error: ffmpeg not installed")
print("Install: apt-get install ffmpeg (Linux) or brew install ffmpeg (Mac)")
return False
info = get_media_info(input_path)
if not info:
print(f"Error: Could not read media info from {input_path}")
return False
if verbose:
print(f"Input: {Path(input_path).name}")
print(f" Size: {info['size'] / (1024*1024):.2f} MB")
print(f" Duration: {info['duration']:.2f}s")
if 'width' in info:
print(f" Resolution: {info['width']}x{info['height']}")
print(f" Bit rate: {info['bit_rate'] / 1000:.0f} kbps")
# Build ffmpeg command
cmd = ['ffmpeg', '-i', input_path, '-y']
# Video codec
cmd.extend(['-c:v', 'libx264', '-crf', str(quality)])
# Resolution
if resolution:
cmd.extend(['-vf', f'scale={resolution}'])
elif 'width' in info and info['width'] > 1920:
cmd.extend(['-vf', 'scale=1920:-2']) # Max 1080p
# Audio codec
cmd.extend(['-c:a', 'aac', '-b:a', '128k', '-ac', '2'])
# Duration limit
if max_duration and info['duration'] > max_duration:
cmd.extend(['-t', str(max_duration)])
# Target size (rough estimate using bitrate)
if target_size_mb:
target_bits = target_size_mb * 8 * 1024 * 1024
duration = min(info['duration'], max_duration) if max_duration else info['duration']
target_bitrate = int(target_bits / duration)
# Reserve some for audio (128kbps)
video_bitrate = max(target_bitrate - 128000, 500000)
cmd.extend(['-b:v', str(video_bitrate)])
cmd.append(output_path)
if verbose:
print(f"\nOptimizing...")
print(f" Command: {' '.join(cmd)}")
try:
subprocess.run(cmd, check=True, capture_output=not verbose)
# Check output
output_info = get_media_info(output_path)
if output_info and verbose:
print(f"\nOutput: {Path(output_path).name}")
print(f" Size: {output_info['size'] / (1024*1024):.2f} MB")
print(f" Duration: {output_info['duration']:.2f}s")
if 'width' in output_info:
print(f" Resolution: {output_info['width']}x{output_info['height']}")
compression = (1 - output_info['size'] / info['size']) * 100
print(f" Compression: {compression:.1f}%")
return True
except subprocess.CalledProcessError as e:
print(f"Error optimizing video: {e}")
return False
def optimize_audio(
input_path: str,
output_path: str,
target_size_mb: Optional[int] = None,
bitrate: str = '64k',
sample_rate: int = 16000,
verbose: bool = False
) -> bool:
"""Optimize audio file for Gemini API."""
if not check_ffmpeg():
print("Error: ffmpeg not installed")
return False
info = get_media_info(input_path)
if not info:
print(f"Error: Could not read media info from {input_path}")
return False
if verbose:
print(f"Input: {Path(input_path).name}")
print(f" Size: {info['size'] / (1024*1024):.2f} MB")
print(f" Duration: {info['duration']:.2f}s")
# Build command
cmd = [
'ffmpeg', '-i', input_path, '-y',
'-c:a', 'aac',
'-b:a', bitrate,
'-ar', str(sample_rate),
'-ac', '1', # Mono (Gemini uses mono anyway)
output_path
]
if verbose:
print(f"\nOptimizing...")
try:
subprocess.run(cmd, check=True, capture_output=not verbose)
output_info = get_media_info(output_path)
if output_info and verbose:
print(f"\nOutput: {Path(output_path).name}")
print(f" Size: {output_info['size'] / (1024*1024):.2f} MB")
compression = (1 - output_info['size'] / info['size']) * 100
print(f" Compression: {compression:.1f}%")
return True
except subprocess.CalledProcessError as e:
print(f"Error optimizing audio: {e}")
return False
def optimize_image(
input_path: str,
output_path: str,
max_width: int = 1920,
quality: int = 85,
verbose: bool = False
) -> bool:
"""Optimize image file for Gemini API."""
try:
from PIL import Image
except ImportError:
print("Error: Pillow not installed")
print("Install with: pip install pillow")
return False
try:
img = Image.open(input_path)
if verbose:
print(f"Input: {Path(input_path).name}")
print(f" Size: {Path(input_path).stat().st_size / 1024:.2f} KB")
print(f" Resolution: {img.width}x{img.height}")
# Resize if needed
if img.width > max_width:
ratio = max_width / img.width
new_height = int(img.height * ratio)
img = img.resize((max_width, new_height), Image.Resampling.LANCZOS)
if verbose:
print(f" Resized to: {img.width}x{img.height}")
# Convert RGBA to RGB if saving as JPEG
if output_path.lower().endswith('.jpg') or output_path.lower().endswith('.jpeg'):
if img.mode == 'RGBA':
rgb_img = Image.new('RGB', img.size, (255, 255, 255))
rgb_img.paste(img, mask=img.split()[3])
img = rgb_img
# Save
img.save(output_path, quality=quality, optimize=True)
if verbose:
print(f"\nOutput: {Path(output_path).name}")
print(f" Size: {Path(output_path).stat().st_size / 1024:.2f} KB")
compression = (1 - Path(output_path).stat().st_size / Path(input_path).stat().st_size) * 100
print(f" Compression: {compression:.1f}%")
return True
except Exception as e:
print(f"Error optimizing image: {e}")
return False
def split_video(
input_path: str,
output_dir: str,
chunk_duration: int = 3600,
verbose: bool = False
) -> List[str]:
"""Split long video into chunks."""
if not check_ffmpeg():
print("Error: ffmpeg not installed")
return []
info = get_media_info(input_path)
if not info:
return []
total_duration = info['duration']
num_chunks = int(total_duration / chunk_duration) + 1
if num_chunks == 1:
if verbose:
print("Video is short enough, no splitting needed")
return [input_path]
Path(output_dir).mkdir(parents=True, exist_ok=True)
output_files = []
for i in range(num_chunks):
start_time = i * chunk_duration
output_file = Path(output_dir) / f"{Path(input_path).stem}_chunk_{i+1}.mp4"
cmd = [
'ffmpeg', '-i', input_path, '-y',
'-ss', str(start_time),
'-t', str(chunk_duration),
'-c', 'copy',
str(output_file)
]
if verbose:
print(f"Creating chunk {i+1}/{num_chunks}...")
try:
subprocess.run(cmd, check=True, capture_output=not verbose)
output_files.append(str(output_file))
except subprocess.CalledProcessError as e:
print(f"Error creating chunk {i+1}: {e}")
return output_files
def main():
parser = argparse.ArgumentParser(
description='Optimize media files for Gemini API',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Optimize video to 100MB
%(prog)s --input video.mp4 --output optimized.mp4 --target-size 100
# Optimize audio
%(prog)s --input audio.mp3 --output optimized.m4a --bitrate 64k
# Resize image
%(prog)s --input image.jpg --output resized.jpg --max-width 1920
# Split long video
%(prog)s --input long-video.mp4 --split --chunk-duration 3600 --output-dir ./chunks
# Batch optimize directory
%(prog)s --input-dir ./videos --output-dir ./optimized --quality 85
"""
)
parser.add_argument('--input', help='Input file')
parser.add_argument('--output', help='Output file')
parser.add_argument('--input-dir', help='Input directory for batch processing')
parser.add_argument('--output-dir', help='Output directory for batch processing')
parser.add_argument('--target-size', type=int, help='Target size in MB')
parser.add_argument('--quality', type=int, default=85,
help='Quality (video: 0-51 CRF, image: 1-100) (default: 85)')
parser.add_argument('--max-width', type=int, default=1920,
help='Max image width (default: 1920)')
parser.add_argument('--bitrate', default='64k',
help='Audio bitrate (default: 64k)')
parser.add_argument('--resolution', help='Video resolution (e.g., 1920x1080)')
parser.add_argument('--split', action='store_true', help='Split long video into chunks')
parser.add_argument('--chunk-duration', type=int, default=3600,
help='Chunk duration in seconds (default: 3600 = 1 hour)')
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
args = parser.parse_args()
# Validate arguments
if not args.input and not args.input_dir:
parser.error("Either --input or --input-dir required")
# Single file processing
if args.input:
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {input_path}")
sys.exit(1)
if args.split:
output_dir = args.output_dir or './chunks'
chunks = split_video(str(input_path), output_dir, args.chunk_duration, args.verbose)
print(f"\nCreated {len(chunks)} chunks in {output_dir}")
sys.exit(0)
if not args.output:
parser.error("--output required for single file processing")
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Determine file type
ext = input_path.suffix.lower()
if ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv']:
success = optimize_video(
str(input_path),
str(output_path),
target_size_mb=args.target_size,
quality=args.quality,
resolution=args.resolution,
verbose=args.verbose
)
elif ext in ['.mp3', '.wav', '.m4a', '.flac', '.aac']:
success = optimize_audio(
str(input_path),
str(output_path),
target_size_mb=args.target_size,
bitrate=args.bitrate,
verbose=args.verbose
)
elif ext in ['.jpg', '.jpeg', '.png', '.webp']:
success = optimize_image(
str(input_path),
str(output_path),
max_width=args.max_width,
quality=args.quality,
verbose=args.verbose
)
else:
print(f"Error: Unsupported file type: {ext}")
sys.exit(1)
sys.exit(0 if success else 1)
# Batch processing
if args.input_dir:
if not args.output_dir:
parser.error("--output-dir required for batch processing")
input_dir = Path(args.input_dir)
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Find all media files
patterns = ['*.mp4', '*.mov', '*.avi', '*.mkv', '*.webm',
'*.mp3', '*.wav', '*.m4a', '*.flac',
'*.jpg', '*.jpeg', '*.png', '*.webp']
files = []
for pattern in patterns:
files.extend(input_dir.glob(pattern))
if not files:
print(f"No media files found in {input_dir}")
sys.exit(1)
print(f"Found {len(files)} files to process")
success_count = 0
for input_file in files:
output_file = output_dir / input_file.name
ext = input_file.suffix.lower()
success = False
if ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv']:
success = optimize_video(str(input_file), str(output_file),
quality=args.quality, verbose=args.verbose)
elif ext in ['.mp3', '.wav', '.m4a', '.flac', '.aac']:
success = optimize_audio(str(input_file), str(output_file),
bitrate=args.bitrate, verbose=args.verbose)
elif ext in ['.jpg', '.jpeg', '.png', '.webp']:
success = optimize_image(str(input_file), str(output_file),
max_width=args.max_width, quality=args.quality,
verbose=args.verbose)
if success:
success_count += 1
print(f"\nProcessed: {success_count}/{len(files)} files")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,26 @@
# AI Multimodal Skill Dependencies
# Python 3.10+ required
# Google Gemini API
google-genai>=0.1.0
# PDF processing
pypdf>=4.0.0
# Document conversion
python-docx>=1.0.0
docx2pdf>=0.1.8 # Windows only, optional on Linux/macOS
# Markdown processing
markdown>=3.5.0
# Image processing
Pillow>=10.0.0
# Environment variable management
python-dotenv>=1.0.0
# Testing dependencies (dev)
pytest>=8.0.0
pytest-cov>=4.1.0
pytest-mock>=3.12.0

View File

@@ -0,0 +1,20 @@
# Core dependencies
google-genai>=0.2.0
python-dotenv>=1.0.0
# Image processing
pillow>=10.0.0
# PDF processing
pypdf>=3.0.0
# Document conversion
markdown>=3.5
# Testing
pytest>=7.4.0
pytest-cov>=4.1.0
pytest-mock>=3.12.0
# Optional dependencies for full functionality
# ffmpeg-python>=0.2.0 # For media optimization (requires ffmpeg installed)

View File

@@ -0,0 +1,299 @@
"""
Tests for document_converter.py
"""
import pytest
import sys
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
sys.path.insert(0, str(Path(__file__).parent.parent))
import document_converter as dc
class TestEnvLoading:
"""Test environment variable loading."""
@patch('document_converter.load_dotenv')
@patch('pathlib.Path.exists')
def test_load_env_files_success(self, mock_exists, mock_load_dotenv):
"""Test successful .env file loading."""
mock_exists.return_value = True
dc.load_env_files()
# Should be called for skill, skills, and claude dirs
assert mock_load_dotenv.call_count >= 1
@patch('document_converter.load_dotenv', None)
def test_load_env_files_no_dotenv(self):
"""Test when dotenv is not available."""
# Should not raise an error
dc.load_env_files()
class TestDependencyCheck:
"""Test dependency checking."""
@patch('builtins.__import__')
def test_check_all_dependencies_available(self, mock_import):
"""Test when all dependencies are available."""
mock_import.return_value = Mock()
deps = dc.check_dependencies()
assert 'pypdf' in deps
assert 'markdown' in deps
assert 'pillow' in deps
@patch('builtins.__import__')
def test_check_dependencies_missing(self, mock_import):
"""Test when dependencies are missing."""
def import_side_effect(name, *args, **kwargs):
if name == 'pypdf':
raise ImportError()
return Mock()
mock_import.side_effect = import_side_effect
# The function uses try/except, so we test the actual function
with patch('document_converter.sys.modules', {}):
# This is tricky to test due to import handling
pass
class TestPDFPageExtraction:
"""Test PDF page extraction."""
@patch('pypdf.PdfReader')
@patch('pypdf.PdfWriter')
@patch('builtins.open', create=True)
def test_extract_single_page(self, mock_open, mock_writer_class, mock_reader_class):
"""Test extracting a single page."""
# Mock reader
mock_reader = Mock()
mock_page = Mock()
mock_reader.pages = [Mock(), mock_page, Mock()]
mock_reader_class.return_value = mock_reader
# Mock writer
mock_writer = Mock()
mock_writer.pages = [mock_page]
mock_writer_class.return_value = mock_writer
result = dc.extract_pdf_pages(
'input.pdf',
'output.pdf',
page_range='2',
verbose=False
)
assert result is True
mock_writer.add_page.assert_called_once_with(mock_page)
@patch('pypdf.PdfReader')
@patch('pypdf.PdfWriter')
@patch('builtins.open', create=True)
def test_extract_page_range(self, mock_open, mock_writer_class, mock_reader_class):
"""Test extracting a range of pages."""
mock_reader = Mock()
mock_reader.pages = [Mock() for _ in range(10)]
mock_reader_class.return_value = mock_reader
mock_writer = Mock()
mock_writer.pages = []
mock_writer_class.return_value = mock_writer
result = dc.extract_pdf_pages(
'input.pdf',
'output.pdf',
page_range='2-5',
verbose=False
)
assert result is True
assert mock_writer.add_page.call_count == 4 # Pages 2-5 (4 pages)
def test_extract_pages_no_pypdf(self):
"""Test page extraction without pypdf."""
with patch.dict('sys.modules', {'pypdf': None}):
result = dc.extract_pdf_pages('input.pdf', 'output.pdf', '1-10')
assert result is False
class TestPDFOptimization:
"""Test PDF optimization."""
@patch('pypdf.PdfReader')
@patch('pypdf.PdfWriter')
@patch('builtins.open', create=True)
@patch('pathlib.Path.stat')
def test_optimize_pdf_success(self, mock_stat, mock_open, mock_writer_class, mock_reader_class):
"""Test successful PDF optimization."""
# Mock reader
mock_reader = Mock()
mock_page = Mock()
mock_reader.pages = [mock_page, mock_page]
mock_reader_class.return_value = mock_reader
# Mock writer
mock_writer = Mock()
mock_writer.pages = [mock_page, mock_page]
mock_writer_class.return_value = mock_writer
# Mock file sizes
mock_stat.return_value.st_size = 1024 * 1024
result = dc.optimize_pdf('input.pdf', 'output.pdf', verbose=False)
assert result is True
mock_page.compress_content_streams.assert_called()
def test_optimize_pdf_no_pypdf(self):
"""Test PDF optimization without pypdf."""
with patch.dict('sys.modules', {'pypdf': None}):
result = dc.optimize_pdf('input.pdf', 'output.pdf')
assert result is False
class TestImageExtraction:
"""Test image extraction from PDFs."""
@patch('pypdf.PdfReader')
@patch('PIL.Image')
@patch('pathlib.Path.mkdir')
@patch('builtins.open', create=True)
def test_extract_images_success(self, mock_open, mock_mkdir, mock_image, mock_reader_class):
"""Test successful image extraction."""
# Mock PDF reader
mock_reader = Mock()
mock_page = MagicMock()
# Mock XObject with image
mock_obj = MagicMock()
mock_obj.__getitem__.side_effect = lambda k: {
'/Subtype': '/Image',
'/Width': 100,
'/Height': 100,
'/Filter': '/DCTDecode'
}[k]
mock_obj.get_data.return_value = b'image_data'
mock_xobjects = MagicMock()
mock_xobjects.__iter__.return_value = ['img1']
mock_xobjects.__getitem__.return_value = mock_obj
mock_resources = MagicMock()
mock_resources.get_object.return_value = mock_xobjects
mock_page.__getitem__.side_effect = lambda k: {
'/Resources': {'/XObject': mock_resources}
}[k]
mock_reader.pages = [mock_page]
mock_reader_class.return_value = mock_reader
result = dc.extract_images_from_pdf('input.pdf', './output', verbose=False)
assert len(result) > 0
def test_extract_images_no_dependencies(self):
"""Test image extraction without required dependencies."""
with patch.dict('sys.modules', {'pypdf': None}):
result = dc.extract_images_from_pdf('input.pdf', './output')
assert result == []
class TestMarkdownConversion:
"""Test Markdown to PDF conversion."""
@patch('markdown.markdown')
@patch('builtins.open', create=True)
@patch('subprocess.run')
@patch('pathlib.Path.unlink')
def test_convert_markdown_success(self, mock_unlink, mock_run, mock_open, mock_markdown):
"""Test successful Markdown to PDF conversion."""
mock_markdown.return_value = '<h1>Test</h1>'
# Mock file reading and writing
mock_file = MagicMock()
mock_file.__enter__.return_value.read.return_value = '# Test'
mock_open.return_value = mock_file
result = dc.convert_markdown_to_pdf('input.md', 'output.pdf', verbose=False)
assert result is True
mock_run.assert_called_once()
@patch('markdown.markdown')
@patch('builtins.open', create=True)
@patch('subprocess.run')
def test_convert_markdown_no_wkhtmltopdf(self, mock_run, mock_open, mock_markdown):
"""Test Markdown conversion without wkhtmltopdf."""
mock_markdown.return_value = '<h1>Test</h1>'
mock_file = MagicMock()
mock_file.__enter__.return_value.read.return_value = '# Test'
mock_open.return_value = mock_file
mock_run.side_effect = FileNotFoundError()
result = dc.convert_markdown_to_pdf('input.md', 'output.pdf', verbose=False)
assert result is False
def test_convert_markdown_no_markdown_lib(self):
"""Test Markdown conversion without markdown library."""
with patch.dict('sys.modules', {'markdown': None}):
result = dc.convert_markdown_to_pdf('input.md', 'output.pdf')
assert result is False
class TestHTMLConversion:
"""Test HTML to PDF conversion."""
@patch('subprocess.run')
def test_convert_html_success(self, mock_run):
"""Test successful HTML to PDF conversion."""
result = dc.convert_html_to_pdf('input.html', 'output.pdf', verbose=False)
assert result is True
mock_run.assert_called_once()
@patch('subprocess.run')
def test_convert_html_no_wkhtmltopdf(self, mock_run):
"""Test HTML conversion without wkhtmltopdf."""
mock_run.side_effect = FileNotFoundError()
result = dc.convert_html_to_pdf('input.html', 'output.pdf', verbose=False)
assert result is False
class TestIntegration:
"""Integration tests."""
@patch('pathlib.Path.exists')
def test_file_not_found(self, mock_exists):
"""Test handling of non-existent input file."""
mock_exists.return_value = False
# This would normally be tested via main() but we test the concept
assert not Path('nonexistent.pdf').exists()
@patch('document_converter.check_dependencies')
def test_check_dependencies_integration(self, mock_check):
"""Test dependency checking integration."""
mock_check.return_value = {
'pypdf': True,
'markdown': True,
'pillow': True
}
deps = dc.check_dependencies()
assert deps['pypdf'] is True
assert deps['markdown'] is True
assert deps['pillow'] is True
if __name__ == '__main__':
pytest.main([__file__, '-v', '--cov=document_converter', '--cov-report=term-missing'])

View File

@@ -0,0 +1,362 @@
"""
Tests for gemini_batch_process.py
"""
import pytest
import sys
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
import gemini_batch_process as gbp
class TestAPIKeyFinder:
"""Test API key detection."""
def test_find_api_key_from_env(self, monkeypatch):
"""Test finding API key from environment variable."""
monkeypatch.setenv('GEMINI_API_KEY', 'test_key_123')
assert gbp.find_api_key() == 'test_key_123'
@patch('gemini_batch_process.load_dotenv')
def test_find_api_key_not_found(self, mock_load_dotenv, monkeypatch):
"""Test when API key is not found."""
monkeypatch.delenv('GEMINI_API_KEY', raising=False)
# Mock load_dotenv to not actually load any files
mock_load_dotenv.return_value = None
assert gbp.find_api_key() is None
class TestMimeTypeDetection:
"""Test MIME type detection."""
def test_audio_mime_types(self):
"""Test audio file MIME types."""
assert gbp.get_mime_type('test.mp3') == 'audio/mp3'
assert gbp.get_mime_type('test.wav') == 'audio/wav'
assert gbp.get_mime_type('test.aac') == 'audio/aac'
assert gbp.get_mime_type('test.flac') == 'audio/flac'
def test_image_mime_types(self):
"""Test image file MIME types."""
assert gbp.get_mime_type('test.jpg') == 'image/jpeg'
assert gbp.get_mime_type('test.jpeg') == 'image/jpeg'
assert gbp.get_mime_type('test.png') == 'image/png'
assert gbp.get_mime_type('test.webp') == 'image/webp'
def test_video_mime_types(self):
"""Test video file MIME types."""
assert gbp.get_mime_type('test.mp4') == 'video/mp4'
assert gbp.get_mime_type('test.mov') == 'video/quicktime'
assert gbp.get_mime_type('test.avi') == 'video/x-msvideo'
def test_document_mime_types(self):
"""Test document file MIME types."""
assert gbp.get_mime_type('test.pdf') == 'application/pdf'
assert gbp.get_mime_type('test.txt') == 'text/plain'
def test_unknown_mime_type(self):
"""Test unknown file extension."""
assert gbp.get_mime_type('test.xyz') == 'application/octet-stream'
def test_case_insensitive(self):
"""Test case-insensitive extension matching."""
assert gbp.get_mime_type('TEST.MP3') == 'audio/mp3'
assert gbp.get_mime_type('Test.JPG') == 'image/jpeg'
class TestFileUpload:
"""Test file upload functionality."""
@patch('gemini_batch_process.genai.Client')
def test_upload_file_success(self, mock_client_class):
"""Test successful file upload."""
# Mock client and file
mock_client = Mock()
mock_file = Mock()
mock_file.state.name = 'ACTIVE'
mock_file.name = 'test_file'
mock_client.files.upload.return_value = mock_file
result = gbp.upload_file(mock_client, 'test.jpg', verbose=False)
assert result == mock_file
mock_client.files.upload.assert_called_once_with(file='test.jpg')
@patch('gemini_batch_process.genai.Client')
@patch('gemini_batch_process.time.sleep')
def test_upload_video_with_processing(self, mock_sleep, mock_client_class):
"""Test video upload with processing wait."""
mock_client = Mock()
# First call: PROCESSING, second call: ACTIVE
mock_file_processing = Mock()
mock_file_processing.state.name = 'PROCESSING'
mock_file_processing.name = 'test_video'
mock_file_active = Mock()
mock_file_active.state.name = 'ACTIVE'
mock_file_active.name = 'test_video'
mock_client.files.upload.return_value = mock_file_processing
mock_client.files.get.return_value = mock_file_active
result = gbp.upload_file(mock_client, 'test.mp4', verbose=False)
assert result.state.name == 'ACTIVE'
@patch('gemini_batch_process.genai.Client')
def test_upload_file_failed(self, mock_client_class):
"""Test failed file upload."""
mock_client = Mock()
mock_file = Mock()
mock_file.state.name = 'FAILED'
mock_client.files.upload.return_value = mock_file
mock_client.files.get.return_value = mock_file
with pytest.raises(ValueError, match="File processing failed"):
gbp.upload_file(mock_client, 'test.mp4', verbose=False)
class TestProcessFile:
"""Test file processing functionality."""
@patch('gemini_batch_process.genai.Client')
@patch('builtins.open', create=True)
@patch('pathlib.Path.stat')
def test_process_small_file_inline(self, mock_stat, mock_open, mock_client_class):
"""Test processing small file with inline data."""
# Mock small file
mock_stat.return_value.st_size = 10 * 1024 * 1024 # 10MB
# Mock file content
mock_open.return_value.__enter__.return_value.read.return_value = b'test_data'
# Mock client and response
mock_client = Mock()
mock_response = Mock()
mock_response.text = 'Test response'
mock_client.models.generate_content.return_value = mock_response
result = gbp.process_file(
client=mock_client,
file_path='test.jpg',
prompt='Describe this image',
model='gemini-2.5-flash',
task='analyze',
format_output='text',
verbose=False
)
assert result['status'] == 'success'
assert result['response'] == 'Test response'
@patch('gemini_batch_process.upload_file')
@patch('gemini_batch_process.genai.Client')
@patch('pathlib.Path.stat')
def test_process_large_file_api(self, mock_stat, mock_client_class, mock_upload):
"""Test processing large file with File API."""
# Mock large file
mock_stat.return_value.st_size = 50 * 1024 * 1024 # 50MB
# Mock upload and response
mock_file = Mock()
mock_upload.return_value = mock_file
mock_client = Mock()
mock_response = Mock()
mock_response.text = 'Test response'
mock_client.models.generate_content.return_value = mock_response
result = gbp.process_file(
client=mock_client,
file_path='test.mp4',
prompt='Summarize this video',
model='gemini-2.5-flash',
task='analyze',
format_output='text',
verbose=False
)
assert result['status'] == 'success'
mock_upload.assert_called_once()
@patch('gemini_batch_process.genai.Client')
@patch('builtins.open', create=True)
@patch('pathlib.Path.stat')
def test_process_file_error_handling(self, mock_stat, mock_open, mock_client_class):
"""Test error handling in file processing."""
mock_stat.return_value.st_size = 1024
# Mock file read
mock_file = MagicMock()
mock_file.__enter__.return_value.read.return_value = b'test_data'
mock_open.return_value = mock_file
mock_client = Mock()
mock_client.models.generate_content.side_effect = Exception("API Error")
result = gbp.process_file(
client=mock_client,
file_path='test.jpg',
prompt='Test',
model='gemini-2.5-flash',
task='analyze',
format_output='text',
verbose=False,
max_retries=1
)
assert result['status'] == 'error'
assert 'API Error' in result['error']
@patch('gemini_batch_process.genai.Client')
@patch('builtins.open', create=True)
@patch('pathlib.Path.stat')
def test_image_generation_with_aspect_ratio(self, mock_stat, mock_open, mock_client_class):
"""Test image generation with aspect ratio config."""
mock_stat.return_value.st_size = 1024
# Mock file read
mock_file = MagicMock()
mock_file.__enter__.return_value.read.return_value = b'test'
mock_open.return_value = mock_file
mock_client = Mock()
mock_response = Mock()
mock_response.candidates = [Mock()]
mock_response.candidates[0].content.parts = [
Mock(inline_data=Mock(data=b'fake_image_data'))
]
mock_client.models.generate_content.return_value = mock_response
result = gbp.process_file(
client=mock_client,
file_path='test.txt',
prompt='Generate mountain landscape',
model='gemini-2.5-flash-image',
task='generate',
format_output='text',
aspect_ratio='16:9',
verbose=False
)
# Verify config was called with correct structure
call_args = mock_client.models.generate_content.call_args
config = call_args.kwargs.get('config')
assert config is not None
assert result['status'] == 'success'
assert 'generated_image' in result
class TestBatchProcessing:
"""Test batch processing functionality."""
@patch('gemini_batch_process.find_api_key')
@patch('gemini_batch_process.process_file')
@patch('gemini_batch_process.genai.Client')
def test_batch_process_success(self, mock_client_class, mock_process, mock_find_key):
"""Test successful batch processing."""
mock_find_key.return_value = 'test_key'
mock_process.return_value = {'status': 'success', 'response': 'Test'}
results = gbp.batch_process(
files=['test1.jpg', 'test2.jpg'],
prompt='Analyze',
model='gemini-2.5-flash',
task='analyze',
format_output='text',
verbose=False,
dry_run=False
)
assert len(results) == 2
assert all(r['status'] == 'success' for r in results)
@patch('gemini_batch_process.find_api_key')
def test_batch_process_no_api_key(self, mock_find_key):
"""Test batch processing without API key."""
mock_find_key.return_value = None
with pytest.raises(SystemExit):
gbp.batch_process(
files=['test.jpg'],
prompt='Test',
model='gemini-2.5-flash',
task='analyze',
format_output='text',
verbose=False,
dry_run=False
)
@patch('gemini_batch_process.find_api_key')
def test_batch_process_dry_run(self, mock_find_key):
"""Test dry run mode."""
# API key not needed for dry run, but we mock it to avoid sys.exit
mock_find_key.return_value = 'test_key'
results = gbp.batch_process(
files=['test1.jpg', 'test2.jpg'],
prompt='Test',
model='gemini-2.5-flash',
task='analyze',
format_output='text',
verbose=False,
dry_run=True
)
assert results == []
class TestResultsSaving:
"""Test results saving functionality."""
@patch('builtins.open', create=True)
@patch('json.dump')
def test_save_results_json(self, mock_json_dump, mock_open):
"""Test saving results as JSON."""
results = [
{'file': 'test1.jpg', 'status': 'success', 'response': 'Test1'},
{'file': 'test2.jpg', 'status': 'success', 'response': 'Test2'}
]
gbp.save_results(results, 'output.json', 'json')
mock_json_dump.assert_called_once()
@patch('builtins.open', create=True)
@patch('csv.DictWriter')
def test_save_results_csv(self, mock_csv_writer, mock_open):
"""Test saving results as CSV."""
results = [
{'file': 'test1.jpg', 'status': 'success', 'response': 'Test1'},
{'file': 'test2.jpg', 'status': 'success', 'response': 'Test2'}
]
gbp.save_results(results, 'output.csv', 'csv')
# Verify CSV writer was used
mock_csv_writer.assert_called_once()
@patch('builtins.open', create=True)
def test_save_results_markdown(self, mock_open):
"""Test saving results as Markdown."""
mock_file = MagicMock()
mock_open.return_value.__enter__.return_value = mock_file
results = [
{'file': 'test1.jpg', 'status': 'success', 'response': 'Test1'},
{'file': 'test2.jpg', 'status': 'error', 'error': 'Failed'}
]
gbp.save_results(results, 'output.md', 'markdown')
# Verify write was called
assert mock_file.write.call_count > 0
if __name__ == '__main__':
pytest.main([__file__, '-v', '--cov=gemini_batch_process', '--cov-report=term-missing'])

View File

@@ -0,0 +1,373 @@
"""
Tests for media_optimizer.py
"""
import pytest
import sys
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
import json
sys.path.insert(0, str(Path(__file__).parent.parent))
import media_optimizer as mo
class TestEnvLoading:
"""Test environment variable loading."""
@patch('media_optimizer.load_dotenv')
@patch('pathlib.Path.exists')
def test_load_env_files_success(self, mock_exists, mock_load_dotenv):
"""Test successful .env file loading."""
mock_exists.return_value = True
mo.load_env_files()
# Should be called for skill, skills, and claude dirs
assert mock_load_dotenv.call_count >= 1
@patch('media_optimizer.load_dotenv', None)
def test_load_env_files_no_dotenv(self):
"""Test when dotenv is not available."""
# Should not raise an error
mo.load_env_files()
class TestFFmpegCheck:
"""Test ffmpeg availability checking."""
@patch('subprocess.run')
def test_ffmpeg_installed(self, mock_run):
"""Test when ffmpeg is installed."""
mock_run.return_value = Mock()
assert mo.check_ffmpeg() is True
@patch('subprocess.run')
def test_ffmpeg_not_installed(self, mock_run):
"""Test when ffmpeg is not installed."""
mock_run.side_effect = FileNotFoundError()
assert mo.check_ffmpeg() is False
@patch('subprocess.run')
def test_ffmpeg_error(self, mock_run):
"""Test ffmpeg command error."""
mock_run.side_effect = Exception("Error")
assert mo.check_ffmpeg() is False
class TestMediaInfo:
"""Test media information extraction."""
@patch('media_optimizer.check_ffmpeg')
@patch('subprocess.run')
def test_get_video_info(self, mock_run, mock_check):
"""Test extracting video information."""
mock_check.return_value = True
mock_result = Mock()
mock_result.stdout = json.dumps({
'format': {
'size': '10485760',
'duration': '120.5',
'bit_rate': '691200'
},
'streams': [
{
'codec_type': 'video',
'width': 1920,
'height': 1080,
'r_frame_rate': '30/1'
},
{
'codec_type': 'audio',
'sample_rate': '48000',
'channels': 2
}
]
})
mock_run.return_value = mock_result
info = mo.get_media_info('test.mp4')
assert info['size'] == 10485760
assert info['duration'] == 120.5
assert info['width'] == 1920
assert info['height'] == 1080
assert info['sample_rate'] == 48000
@patch('media_optimizer.check_ffmpeg')
def test_get_media_info_no_ffmpeg(self, mock_check):
"""Test when ffmpeg is not available."""
mock_check.return_value = False
info = mo.get_media_info('test.mp4')
assert info == {}
@patch('media_optimizer.check_ffmpeg')
@patch('subprocess.run')
def test_get_media_info_error(self, mock_run, mock_check):
"""Test error handling in media info extraction."""
mock_check.return_value = True
mock_run.side_effect = Exception("Error")
info = mo.get_media_info('test.mp4')
assert info == {}
class TestVideoOptimization:
"""Test video optimization functionality."""
@patch('media_optimizer.check_ffmpeg')
@patch('media_optimizer.get_media_info')
@patch('subprocess.run')
def test_optimize_video_success(self, mock_run, mock_info, mock_check):
"""Test successful video optimization."""
mock_check.return_value = True
mock_info.side_effect = [
# Input info
{
'size': 50 * 1024 * 1024,
'duration': 120.0,
'bit_rate': 3500000,
'width': 1920,
'height': 1080
},
# Output info
{
'size': 25 * 1024 * 1024,
'duration': 120.0,
'width': 1920,
'height': 1080
}
]
result = mo.optimize_video(
'input.mp4',
'output.mp4',
quality=23,
verbose=False
)
assert result is True
mock_run.assert_called_once()
@patch('media_optimizer.check_ffmpeg')
def test_optimize_video_no_ffmpeg(self, mock_check):
"""Test video optimization without ffmpeg."""
mock_check.return_value = False
result = mo.optimize_video('input.mp4', 'output.mp4')
assert result is False
@patch('media_optimizer.check_ffmpeg')
@patch('media_optimizer.get_media_info')
def test_optimize_video_no_info(self, mock_info, mock_check):
"""Test video optimization when info cannot be read."""
mock_check.return_value = True
mock_info.return_value = {}
result = mo.optimize_video('input.mp4', 'output.mp4')
assert result is False
@patch('media_optimizer.check_ffmpeg')
@patch('media_optimizer.get_media_info')
@patch('subprocess.run')
def test_optimize_video_with_target_size(self, mock_run, mock_info, mock_check):
"""Test video optimization with target size."""
mock_check.return_value = True
mock_info.side_effect = [
{'size': 100 * 1024 * 1024, 'duration': 60.0, 'bit_rate': 3500000},
{'size': 50 * 1024 * 1024, 'duration': 60.0}
]
result = mo.optimize_video(
'input.mp4',
'output.mp4',
target_size_mb=50,
verbose=False
)
assert result is True
@patch('media_optimizer.check_ffmpeg')
@patch('media_optimizer.get_media_info')
@patch('subprocess.run')
def test_optimize_video_with_resolution(self, mock_run, mock_info, mock_check):
"""Test video optimization with custom resolution."""
mock_check.return_value = True
mock_info.side_effect = [
{'size': 50 * 1024 * 1024, 'duration': 120.0, 'bit_rate': 3500000},
{'size': 25 * 1024 * 1024, 'duration': 120.0}
]
result = mo.optimize_video(
'input.mp4',
'output.mp4',
resolution='1280x720',
verbose=False
)
assert result is True
class TestAudioOptimization:
"""Test audio optimization functionality."""
@patch('media_optimizer.check_ffmpeg')
@patch('media_optimizer.get_media_info')
@patch('subprocess.run')
def test_optimize_audio_success(self, mock_run, mock_info, mock_check):
"""Test successful audio optimization."""
mock_check.return_value = True
mock_info.side_effect = [
{'size': 10 * 1024 * 1024, 'duration': 300.0},
{'size': 5 * 1024 * 1024, 'duration': 300.0}
]
result = mo.optimize_audio(
'input.mp3',
'output.m4a',
bitrate='64k',
verbose=False
)
assert result is True
mock_run.assert_called_once()
@patch('media_optimizer.check_ffmpeg')
def test_optimize_audio_no_ffmpeg(self, mock_check):
"""Test audio optimization without ffmpeg."""
mock_check.return_value = False
result = mo.optimize_audio('input.mp3', 'output.m4a')
assert result is False
class TestImageOptimization:
"""Test image optimization functionality."""
@patch('PIL.Image.open')
@patch('pathlib.Path.stat')
def test_optimize_image_success(self, mock_stat, mock_image_open):
"""Test successful image optimization."""
# Mock image
mock_resized = Mock()
mock_resized.mode = 'RGB'
mock_img = Mock()
mock_img.width = 3840
mock_img.height = 2160
mock_img.mode = 'RGB'
mock_img.resize.return_value = mock_resized
mock_image_open.return_value = mock_img
# Mock file sizes
mock_stat.return_value.st_size = 5 * 1024 * 1024
result = mo.optimize_image(
'input.jpg',
'output.jpg',
max_width=1920,
quality=85,
verbose=False
)
assert result is True
# Since image is resized, save is called on the resized image
mock_resized.save.assert_called_once()
@patch('PIL.Image.open')
@patch('pathlib.Path.stat')
def test_optimize_image_resize(self, mock_stat, mock_image_open):
"""Test image resizing during optimization."""
mock_img = Mock()
mock_img.width = 3840
mock_img.height = 2160
mock_img.mode = 'RGB'
mock_resized = Mock()
mock_img.resize.return_value = mock_resized
mock_image_open.return_value = mock_img
mock_stat.return_value.st_size = 5 * 1024 * 1024
mo.optimize_image('input.jpg', 'output.jpg', max_width=1920, verbose=False)
mock_img.resize.assert_called_once()
@patch('PIL.Image.open')
@patch('pathlib.Path.stat')
def test_optimize_image_rgba_to_jpg(self, mock_stat, mock_image_open):
"""Test converting RGBA to RGB for JPEG."""
mock_img = Mock()
mock_img.width = 1920
mock_img.height = 1080
mock_img.mode = 'RGBA'
mock_img.split.return_value = [Mock(), Mock(), Mock(), Mock()]
mock_image_open.return_value = mock_img
mock_stat.return_value.st_size = 1024 * 1024
with patch('PIL.Image.new') as mock_new:
mock_rgb = Mock()
mock_new.return_value = mock_rgb
mo.optimize_image('input.png', 'output.jpg', verbose=False)
mock_new.assert_called_once()
def test_optimize_image_no_pillow(self):
"""Test image optimization without Pillow."""
with patch.dict('sys.modules', {'PIL': None}):
result = mo.optimize_image('input.jpg', 'output.jpg')
# Will fail to import but function handles it
assert result is False
class TestVideoSplitting:
"""Test video splitting functionality."""
@patch('media_optimizer.check_ffmpeg')
@patch('media_optimizer.get_media_info')
@patch('subprocess.run')
@patch('pathlib.Path.mkdir')
def test_split_video_success(self, mock_mkdir, mock_run, mock_info, mock_check):
"""Test successful video splitting."""
mock_check.return_value = True
mock_info.return_value = {'duration': 7200.0} # 2 hours
result = mo.split_video(
'input.mp4',
'./chunks',
chunk_duration=3600, # 1 hour chunks
verbose=False
)
# Duration 7200s / 3600s = 2, +1 for safety = 3 chunks
assert len(result) == 3
assert mock_run.call_count == 3
@patch('media_optimizer.check_ffmpeg')
@patch('media_optimizer.get_media_info')
def test_split_video_short_duration(self, mock_info, mock_check):
"""Test splitting video shorter than chunk duration."""
mock_check.return_value = True
mock_info.return_value = {'duration': 1800.0} # 30 minutes
result = mo.split_video(
'input.mp4',
'./chunks',
chunk_duration=3600, # 1 hour
verbose=False
)
assert result == ['input.mp4']
@patch('media_optimizer.check_ffmpeg')
def test_split_video_no_ffmpeg(self, mock_check):
"""Test video splitting without ffmpeg."""
mock_check.return_value = False
result = mo.split_video('input.mp4', './chunks')
assert result == []
if __name__ == '__main__':
pytest.main([__file__, '-v', '--cov=media_optimizer', '--cov-report=term-missing'])