From 87cad5646f760b49079398b9284d7ba23c536d80 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sat, 29 Nov 2025 18:23:08 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 14 + README.md | 3 + commands/billing.md | 39 ++ commands/completion.md | 36 ++ commands/info.md | 30 ++ commands/list-engines.md | 29 ++ commands/list-voices.md | 43 ++ commands/pricing.md | 31 ++ commands/synthesize.md | 48 ++ plugin.lock.json | 73 +++ skills/aws-polly-tts-tool/SKILL.md | 697 +++++++++++++++++++++++++++++ 11 files changed, 1043 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 commands/billing.md create mode 100644 commands/completion.md create mode 100644 commands/info.md create mode 100644 commands/list-engines.md create mode 100644 commands/list-voices.md create mode 100644 commands/pricing.md create mode 100644 commands/synthesize.md create mode 100644 plugin.lock.json create mode 100644 skills/aws-polly-tts-tool/SKILL.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..0813058 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,14 @@ +{ + "name": "aws-polly-tts-tool", + "description": "AWS Polly TTS CLI and library", + "version": "0.2.0", + "author": { + "name": "Dennis Vriend" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..10dfe95 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# aws-polly-tts-tool + +AWS Polly TTS CLI and library diff --git a/commands/billing.md b/commands/billing.md new file mode 100644 index 0000000..c3dea04 --- /dev/null +++ b/commands/billing.md @@ -0,0 +1,39 @@ +--- +description: Query AWS billing for Polly usage costs +argument-hint: +--- + +Query AWS Cost Explorer for actual Amazon Polly usage costs with engine breakdown. + +## Usage + +```bash +aws-polly-tts-tool billing [OPTIONS] +``` + +## Options + +- `--days INT` / `-d INT`: Number of days to query (default: 30) +- `--start-date TEXT`: Custom start date (YYYY-MM-DD) +- `--end-date TEXT`: Custom end date (YYYY-MM-DD) +- `--region TEXT` / `-r TEXT`: AWS region for Cost Explorer +- `-V/-VV/-VVV`: Verbosity (INFO/DEBUG/TRACE) + +## Examples + +```bash +# Last 30 days of Polly costs +aws-polly-tts-tool billing + +# Last 7 days +aws-polly-tts-tool billing --days 7 + +# Custom date range +aws-polly-tts-tool billing --start-date 2025-01-01 --end-date 2025-01-31 +``` + +## Output + +Returns total cost and breakdown by engine (Standard, Neural, Generative, Long-form). + +Requires IAM permission: `ce:GetCostAndUsage` diff --git a/commands/completion.md b/commands/completion.md new file mode 100644 index 0000000..371d3af --- /dev/null +++ b/commands/completion.md @@ -0,0 +1,36 @@ +--- +description: Generate shell completion scripts +argument-hint: shell +--- + +Generate shell completion script for bash, zsh, or fish to enable tab completion. + +## Usage + +```bash +aws-polly-tts-tool completion [bash|zsh|fish] +``` + +## Arguments + +- `SHELL`: Shell type (bash, zsh, or fish) - required + +## Examples + +```bash +# Generate bash completion +aws-polly-tts-tool completion bash + +# Install for bash (add to ~/.bashrc) +eval "$(aws-polly-tts-tool completion bash)" + +# Install for zsh (add to ~/.zshrc) +eval "$(aws-polly-tts-tool completion zsh)" + +# Install for fish +aws-polly-tts-tool completion fish > ~/.config/fish/completions/aws-polly-tts-tool.fish +``` + +## Output + +Returns shell-specific completion script. After installation, restart shell or source config file. diff --git a/commands/info.md b/commands/info.md new file mode 100644 index 0000000..b460acc --- /dev/null +++ b/commands/info.md @@ -0,0 +1,30 @@ +--- +description: Display AWS credentials and tool config +argument-hint: +--- + +Display AWS Polly tool configuration, credentials status, and helpful command references. + +## Usage + +```bash +aws-polly-tts-tool info +``` + +## Examples + +```bash +# Show configuration and verify credentials +aws-polly-tts-tool info +``` + +## Output + +Returns: +- AWS credential status (Valid/Invalid) +- AWS Account ID, User ID, ARN +- Available engines (standard, neural, generative, long-form) +- Output formats (mp3, ogg_vorbis, pcm) +- Useful command examples + +Use this to verify AWS authentication and discover tool capabilities. diff --git a/commands/list-engines.md b/commands/list-engines.md new file mode 100644 index 0000000..6aa93e4 --- /dev/null +++ b/commands/list-engines.md @@ -0,0 +1,29 @@ +--- +description: Display voice engines with pricing and features +argument-hint: +--- + +Display all available AWS Polly voice engines with technology, pricing, and use cases. + +## Usage + +```bash +aws-polly-tts-tool list-engines +``` + +## Examples + +```bash +# Show all engines with details +aws-polly-tts-tool list-engines +``` + +## Output + +Returns table with Engine, Technology, Price/1M chars, Char Limit, and Best For columns. + +Includes: +- Standard ($4/1M) - Traditional TTS +- Neural ($16/1M) - Natural voices +- Generative ($30/1M) - Highest quality +- Long-form ($100/1M) - Audiobooks diff --git a/commands/list-voices.md b/commands/list-voices.md new file mode 100644 index 0000000..75b6788 --- /dev/null +++ b/commands/list-voices.md @@ -0,0 +1,43 @@ +--- +description: List available Polly voices with filters +argument-hint: +--- + +List all available AWS Polly voices with optional filtering by engine, language, and gender. + +## Usage + +```bash +aws-polly-tts-tool list-voices [OPTIONS] +``` + +## Options + +- `--engine TEXT` / `-e TEXT`: Filter by engine (standard, neural, generative, long-form) +- `--language TEXT` / `-l TEXT`: Filter by language code (e.g., en-US, es-ES) +- `--gender TEXT` / `-g TEXT`: Filter by gender (Female, Male) +- `--region TEXT` / `-r TEXT`: AWS region override +- `-V/-VV/-VVV`: Verbosity (INFO/DEBUG/TRACE) + +## Examples + +```bash +# List all voices +aws-polly-tts-tool list-voices + +# Filter by engine +aws-polly-tts-tool list-voices --engine neural + +# Filter by language +aws-polly-tts-tool list-voices --language en-US + +# Combine filters +aws-polly-tts-tool list-voices --engine neural --language en --gender Female + +# Search with grep +aws-polly-tts-tool list-voices | grep British +``` + +## Output + +Returns table with Voice, Gender, Language, Engines, Description. diff --git a/commands/pricing.md b/commands/pricing.md new file mode 100644 index 0000000..fb847fe --- /dev/null +++ b/commands/pricing.md @@ -0,0 +1,31 @@ +--- +description: Show Polly pricing and cost examples +argument-hint: +--- + +Display AWS Polly pricing information for all engines with cost examples. + +## Usage + +```bash +aws-polly-tts-tool pricing +``` + +## Examples + +```bash +# Show pricing table +aws-polly-tts-tool pricing +``` + +## Output + +Returns pricing table with: +- Engine name and cost per 1M characters +- Technology type (Concatenative/Neural/Generative) +- Quality level +- Character limit per request +- Concurrent request limits +- Free tier information +- Best use cases +- Cost examples (1,000 words, audiobooks) diff --git a/commands/synthesize.md b/commands/synthesize.md new file mode 100644 index 0000000..2ade5f8 --- /dev/null +++ b/commands/synthesize.md @@ -0,0 +1,48 @@ +--- +description: Convert text to speech using AWS Polly +argument-hint: text +--- + +Convert text to speech using Amazon Polly with support for multiple engines and voices. + +## Usage + +```bash +aws-polly-tts-tool synthesize "TEXT" [OPTIONS] +``` + +## Arguments + +- `TEXT`: Text to synthesize (required, or use `--stdin`) +- `--stdin` / `-s`: Read text from stdin (for piping) +- `--voice TEXT`: Voice ID (default: Joanna) +- `--output PATH` / `-o PATH`: Save to file instead of playing +- `--format TEXT` / `-f TEXT`: Audio format (mp3, ogg_vorbis, pcm) +- `--engine TEXT` / `-e TEXT`: Engine (standard, neural, generative, long-form) +- `--ssml`: Treat input as SSML markup +- `--show-cost`: Display character count and cost estimate +- `--region TEXT` / `-r TEXT`: AWS region override +- `-V/-VV/-VVV`: Verbosity (INFO/DEBUG/TRACE) + +## Examples + +```bash +# Play with default voice (Joanna, neural) +aws-polly-tts-tool synthesize "Hello world" + +# Use different voice and engine +aws-polly-tts-tool synthesize "Hello" --voice Matthew --engine generative + +# Save to file +aws-polly-tts-tool synthesize "Hello world" --output speech.mp3 + +# Read from stdin +echo "Hello world" | aws-polly-tts-tool synthesize --stdin + +# SSML with pause +aws-polly-tts-tool synthesize 'Hello world' --ssml +``` + +## Output + +Audio played through speakers or saved to file with character count. diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..7256756 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,73 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:dnvriend/aws-polly-tts-tool:plugins/aws-polly-tts-tool", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "8dd18345d4e998b3c7e3c4522b7f1df6c8c9b109", + "treeHash": "37f9975fb4e69d429c2c690b96c8bebe16bbb53d730b73c373cfb832a6e2bf5b", + "generatedAt": "2025-11-28T10:16:34.546361Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "aws-polly-tts-tool", + "description": "AWS Polly TTS CLI and library", + "version": "0.2.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "572a38600592362a4a636e8aa71d9e5ab77df9b377305c5ef5e7ff70c34d7e08" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "296e849d6dbf5275c40d1e597fc9dc2a5b78c16eca3827a7e65d0eac5d6b81a4" + }, + { + "path": "commands/pricing.md", + "sha256": "269671446fc8f0765b5f222f7cc2178b0266456e58916600244591738205dfed" + }, + { + "path": "commands/synthesize.md", + "sha256": "8fcb8987270dc9112c39e1f6f78508d141d748388473a3a83b5f4ba292a19a4b" + }, + { + "path": "commands/info.md", + "sha256": "c65b1737ffdf5c60dd7fd4d64f76a6934a0c340ab3348a5f8c141810266f23b4" + }, + { + "path": "commands/billing.md", + "sha256": "fae214c5e53d42bcb0106ba79872e05934b2b0b976afbcf719e74b06479001bc" + }, + { + "path": "commands/completion.md", + "sha256": "6d6067ad6cf1f06f980c19160064ff2eb2e162de0594d7a4dc55be12a4474121" + }, + { + "path": "commands/list-voices.md", + "sha256": "4b7144d629a95141d63c7fc0539359007816b3f3e743a884e0a49691041657d9" + }, + { + "path": "commands/list-engines.md", + "sha256": "f39bfce80ebedb0d86c963b4a7359daaf9bf5a4cb64f14d021fbcc9c221c9c8f" + }, + { + "path": "skills/aws-polly-tts-tool/SKILL.md", + "sha256": "0df0345395a24dc3df126ef0756b81c34f37bc3560707e5a2f12da87c438740f" + } + ], + "dirSha256": "37f9975fb4e69d429c2c690b96c8bebe16bbb53d730b73c373cfb832a6e2bf5b" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/aws-polly-tts-tool/SKILL.md b/skills/aws-polly-tts-tool/SKILL.md new file mode 100644 index 0000000..271290d --- /dev/null +++ b/skills/aws-polly-tts-tool/SKILL.md @@ -0,0 +1,697 @@ +--- +name: skill-aws-polly-tts-tool +description: AWS Polly TTS CLI for text-to-speech synthesis +--- + +# When to use +- Converting text to lifelike speech using AWS Polly +- Working with multiple voice engines and output formats +- Tracking TTS costs and AWS billing +- Implementing TTS in automation pipelines + +# AWS Polly TTS Tool Skill + +## Purpose + +Professional AWS Polly text-to-speech CLI and library with agent-friendly design, enabling conversion of text to lifelike speech using Amazon Polly's deep learning technology. Supports 60+ voices in 30+ languages across four quality tiers with comprehensive cost tracking. + +## When to Use This Skill + +**Use this skill when:** +- You need to convert text to speech using AWS Polly +- You want to explore available voices and engines +- You need to track TTS costs or query billing data +- You're building automation with TTS capabilities +- You need SSML support for advanced speech control +- You want to work with different audio formats + +**Do NOT use this skill for:** +- Non-AWS TTS services (Google, Azure, etc.) +- Real-time streaming TTS (use AWS SDK directly) +- Voice cloning or training (Polly doesn't support this) + +## CLI Tool: aws-polly-tts-tool + +Professional AWS Polly TTS CLI and Python library designed with CLI-first philosophy for both command-line and programmatic use. + +### Installation + +```bash +# Clone repository +git clone https://github.com/dnvriend/aws-polly-tts-tool.git +cd aws-polly-tts-tool + +# Install with uv (Python 3.12) +uv tool install . --python 3.12 + +# Verify installation +aws-polly-tts-tool --version +``` + +### Prerequisites + +- **Python 3.12+** (Python 3.13+ has pydub compatibility issues) +- AWS credentials configured +- **ffmpeg** for audio playback (not required for file output) +- IAM permissions: `polly:DescribeVoices`, `polly:SynthesizeSpeech`, `ce:GetCostAndUsage` + +### Quick Start + +```bash +# Play text with default voice +aws-polly-tts-tool synthesize "Hello world" + +# Save to file +aws-polly-tts-tool synthesize "Hello world" --output speech.mp3 + +# List available voices +aws-polly-tts-tool list-voices + +# Show pricing +aws-polly-tts-tool pricing +``` + +## Progressive Disclosure + +
+📖 Core Commands (Click to expand) + +### synthesize - Convert Text to Speech + +Main TTS command with full feature support including multiple engines, voices, and output formats. + +**Usage:** +```bash +aws-polly-tts-tool synthesize "TEXT" [OPTIONS] +``` + +**Arguments:** +- `TEXT`: Text to synthesize (required, or use `--stdin`) +- `--stdin` / `-s`: Read text from stdin (enables piping) +- `--voice TEXT`: Voice ID (default: Joanna) +- `--output PATH` / `-o PATH`: Save audio to file instead of playing +- `--format TEXT` / `-f TEXT`: Output format (mp3, ogg_vorbis, pcm) - default: mp3 +- `--engine TEXT` / `-e TEXT`: Voice engine (standard, neural, generative, long-form) - default: neural +- `--ssml`: Treat input as SSML markup +- `--show-cost`: Display character count and cost estimate +- `--region TEXT` / `-r TEXT`: AWS region override +- `-V/-VV/-VVV`: Verbosity (INFO/DEBUG/TRACE with AWS SDK details) + +**Examples:** +```bash +# Basic synthesis with default voice (Joanna, neural) +aws-polly-tts-tool synthesize "Hello world" + +# Use different voice and engine +aws-polly-tts-tool synthesize "Hello" --voice Matthew --engine generative + +# Save to file with specific format +aws-polly-tts-tool synthesize "Hello world" --output speech.mp3 --format mp3 + +# Read from stdin +echo "Hello world" | aws-polly-tts-tool synthesize --stdin + +# Read from file +cat article.txt | aws-polly-tts-tool synthesize --stdin --output article.mp3 + +# Use SSML for advanced control +aws-polly-tts-tool synthesize 'Hello world' --ssml + +# Show cost estimate +aws-polly-tts-tool synthesize "Hello world" --show-cost + +# Multiple options combined with debugging +cat article.txt | aws-polly-tts-tool synthesize --stdin \ + --voice Joanna \ + --engine neural \ + --output article.mp3 \ + --show-cost \ + -VV +``` + +**Output:** +- Audio played through speakers (default) or saved to file +- Character count and cost estimate (with `--show-cost`) +- Logs to stderr, keeping stdout clean for piping + +--- + +### list-voices - Discover Available Voices + +List and filter AWS Polly voices by engine, language, and gender. + +**Usage:** +```bash +aws-polly-tts-tool list-voices [OPTIONS] +``` + +**Options:** +- `--engine TEXT` / `-e TEXT`: Filter by engine (standard, neural, generative, long-form) +- `--language TEXT` / `-l TEXT`: Filter by language code (e.g., en-US, es-ES, fr-FR) +- `--gender TEXT` / `-g TEXT`: Filter by gender (Female, Male) +- `--region TEXT` / `-r TEXT`: AWS region override +- `-V/-VV/-VVV`: Verbosity levels + +**Examples:** +```bash +# List all voices +aws-polly-tts-tool list-voices + +# Filter by engine +aws-polly-tts-tool list-voices --engine neural + +# Filter by language +aws-polly-tts-tool list-voices --language en-US + +# Combine filters +aws-polly-tts-tool list-voices --engine neural --language en --gender Female + +# Use with grep for searching +aws-polly-tts-tool list-voices | grep British +aws-polly-tts-tool list-voices --engine generative | grep Spanish +``` + +**Output:** +Table with Voice, Gender, Language, Engines (supported), and Description columns. Dynamically fetched from Polly API (always up-to-date). + +--- + +### list-engines - Display Voice Engines + +Show all available voice engines with technology, pricing, and best use cases. + +**Usage:** +```bash +aws-polly-tts-tool list-engines +``` + +**Examples:** +```bash +# Show all engines with details +aws-polly-tts-tool list-engines +``` + +**Output:** +Table showing: +- **Standard** ($4/1M chars) - Traditional concatenative TTS, 3000 char limit +- **Neural** ($16/1M chars) - Natural human-like voices, 3000 char limit +- **Generative** ($30/1M chars) - Most advanced emotionally engaged, 3000 char limit +- **Long-form** ($100/1M chars) - Optimized for audiobooks, 100,000 char limit + +--- + +### billing - Query AWS Costs + +Query AWS Cost Explorer for actual Polly usage costs with engine breakdown. + +**Usage:** +```bash +aws-polly-tts-tool billing [OPTIONS] +``` + +**Options:** +- `--days INT` / `-d INT`: Number of days to query (default: 30) +- `--start-date TEXT`: Custom start date (YYYY-MM-DD) +- `--end-date TEXT`: Custom end date (YYYY-MM-DD) +- `--region TEXT` / `-r TEXT`: AWS region for Cost Explorer +- `-V/-VV/-VVV`: Verbosity levels + +**Examples:** +```bash +# Last 30 days of Polly costs +aws-polly-tts-tool billing + +# Last 7 days +aws-polly-tts-tool billing --days 7 + +# Custom date range +aws-polly-tts-tool billing --start-date 2025-01-01 --end-date 2025-01-31 + +# With verbose output +aws-polly-tts-tool billing --days 7 -V +``` + +**Output:** +Total cost and breakdown by engine (Standard, Neural, Generative, Long-form) in USD. + +**Note:** Requires IAM permission `ce:GetCostAndUsage` + +--- + +### pricing - Show Pricing Information + +Display static pricing information for all Polly engines with cost examples. + +**Usage:** +```bash +aws-polly-tts-tool pricing +``` + +**Examples:** +```bash +# Show pricing table and examples +aws-polly-tts-tool pricing +``` + +**Output:** +Comprehensive pricing with: +- Cost per 1M characters for each engine +- Technology type and quality level +- Character limits per request +- Concurrent request limits +- Free tier information +- Best use cases +- Cost examples (1,000 words, audiobooks) + +--- + +### info - Tool Configuration + +Display AWS credentials status and tool configuration. + +**Usage:** +```bash +aws-polly-tts-tool info +``` + +**Examples:** +```bash +# Verify AWS authentication and show config +aws-polly-tts-tool info +``` + +**Output:** +- AWS credential status (Valid/Invalid) +- Account ID, User ID, ARN +- Available engines +- Output formats +- Useful command examples + +--- + +### completion - Shell Completion + +Generate shell completion scripts for bash, zsh, or fish. + +**Usage:** +```bash +aws-polly-tts-tool completion [bash|zsh|fish] +``` + +**Arguments:** +- `SHELL`: Shell type (bash, zsh, or fish) - required + +**Examples:** +```bash +# Generate bash completion +aws-polly-tts-tool completion bash + +# Install for bash (add to ~/.bashrc) +eval "$(aws-polly-tts-tool completion bash)" + +# Install for zsh (add to ~/.zshrc) +eval "$(aws-polly-tts-tool completion zsh)" + +# Install for fish +aws-polly-tts-tool completion fish > ~/.config/fish/completions/aws-polly-tts-tool.fish + +# File-based installation (recommended) +aws-polly-tts-tool completion bash > ~/.aws-polly-tts-tool-complete.bash +echo 'source ~/.aws-polly-tts-tool-complete.bash' >> ~/.bashrc +``` + +**Output:** +Shell-specific completion script. After installation, restart shell or source config file. + +
+ +
+⚙️ Advanced Features (Click to expand) + +### SSML Support + +Full SSML (Speech Synthesis Markup Language) support for advanced speech control. + +**Features:** +- **Prosody**: Control rate, pitch, volume +- **Breaks**: Add pauses of specific duration +- **Emphasis**: Add emphasis to words +- **Speaking styles**: Newscaster, conversational (select voices) +- **Phonemes**: Control pronunciation + +**Examples:** +```bash +# Basic pause +aws-polly-tts-tool synthesize 'Hello world' --ssml + +# Prosody control (speed, pitch, volume) +aws-polly-tts-tool synthesize 'Deep voice' --ssml + +# Emphasis +aws-polly-tts-tool synthesize 'I really like this' --ssml + +# Newscaster style (Matthew, Joanna only) +aws-polly-tts-tool synthesize 'Breaking news today' --ssml --voice Matthew + +# Multiple prosody attributes +aws-polly-tts-tool synthesize 'Excited announcement!' --ssml +``` + +**SSML Resources:** +- [AWS Polly SSML Reference](https://docs.aws.amazon.com/polly/latest/dg/supportedtags.html) + +--- + +### Multi-Level Verbosity + +Progressive logging detail for debugging without code changes. + +**Levels:** +- **Default**: Errors and warnings only (clean output) +- **`-V`** (INFO): High-level operations (voice selection, file operations) +- **`-VV`** (DEBUG): Detailed steps (validation, API calls, character counts) +- **`-VVV`** (TRACE): Full AWS SDK internals (credentials, HTTP requests, boto3 events) + +**Examples:** +```bash +# Default: No verbose output +aws-polly-tts-tool synthesize "Hello world" --output test.mp3 + +# INFO level (-V) +aws-polly-tts-tool synthesize "Hello world" -V --output test.mp3 +# [INFO] Using voice: Joanna (neural engine) +# [INFO] Synthesizing audio to file: test.mp3 + +# DEBUG level (-VV) +aws-polly-tts-tool synthesize "Hello world" -VV --output test.mp3 +# [DEBUG] Validating engine: neural +# [DEBUG] Validating output format: mp3 +# [DEBUG] Initializing AWS Polly client +# [INFO] Using voice: Joanna (neural engine) +# [DEBUG] Synthesized 11 characters + +# TRACE level (-VVV) - Full AWS SDK details +aws-polly-tts-tool synthesize "Hello world" -VVV --output test.mp3 +# [DEBUG] Looking for credentials via: env +# [INFO] Found credentials in shared credentials file: ~/.aws/credentials +# [DEBUG] Starting new HTTPS connection (1): polly.eu-central-1.amazonaws.com:443 +# [DEBUG] https://polly.eu-central-1.amazonaws.com:443 "POST /v1/speech HTTP/1.1" 200 +``` + +**Note:** All logs go to stderr, keeping stdout clean for data/piping. + +--- + +### Library Usage + +Import and use as a Python library for programmatic access. + +**Basic Usage:** +```python +from aws_polly_tts_tool import ( + get_polly_client, + synthesize_audio, + save_speech, + VoiceManager, + calculate_cost, +) + +# Initialize client +client = get_polly_client(region="us-east-1") + +# Synthesize audio +audio_bytes, char_count = synthesize_audio( + client=client, + text="Hello world", + voice_id="Joanna", + output_format="mp3", + engine="neural" +) + +# Save to file +save_speech( + client=client, + text="Hello world", + voice_id="Joanna", + output_path=Path("output.mp3"), + engine="neural" +) + +# List voices +voice_manager = VoiceManager(client) +voices = voice_manager.list_voices(engine="neural", language="en") + +# Calculate cost +cost = calculate_cost(character_count=5000, engine="neural") +print(f"Estimated cost: ${cost:.4f}") +``` + +**Public API:** +- `get_polly_client(region=None)` - Initialize boto3 Polly client +- `synthesize_audio(client, text, voice_id, output_format, engine, text_type)` - Synthesize audio +- `save_speech(client, text, voice_id, output_path, ...)` - Save to file +- `play_speech(client, text, voice_id, ...)` - Play through speakers +- `VoiceManager(client)` - Voice discovery and management +- `calculate_cost(char_count, engine)` - Cost estimation + +--- + +### Voice Engine Selection Guide + +**Standard Engine** ($4/1M chars) +- **Technology**: Traditional concatenative TTS +- **Quality**: Basic synthetic sound +- **Limit**: 3,000 chars/request +- **Best for**: Cost-sensitive applications, basic announcements +- **Free tier**: 5M chars/month (12 months) + +**Neural Engine** ($16/1M chars) +- **Technology**: Deep learning neural networks +- **Quality**: Natural, human-like voices +- **Limit**: 3,000 chars/request +- **Best for**: General-purpose TTS, recommended for most use cases +- **Free tier**: 1M chars/month (12 months) + +**Generative Engine** ($30/1M chars) +- **Technology**: Advanced generative AI +- **Quality**: Most lifelike, emotionally engaged +- **Limit**: 3,000 chars/request +- **Best for**: High-quality content, brand voices, engaging experiences +- **Free tier**: None + +**Long-form Engine** ($100/1M chars) +- **Technology**: Neural with long-context optimization +- **Quality**: Consistent over long passages +- **Limit**: 100,000 chars/request +- **Best for**: Audiobooks, long articles, consistent narration +- **Free tier**: None + +**Decision Matrix:** +- Budget-conscious → Standard +- General use → Neural (recommended) +- Premium quality → Generative +- Audiobooks/articles → Long-form + +--- + +### Cost Tracking Strategies + +**Immediate Estimates:** +```bash +# Use --show-cost for instant character count and cost +aws-polly-tts-tool synthesize "Text" --show-cost +``` + +**Actual Billing:** +```bash +# Query real AWS costs with Cost Explorer +aws-polly-tts-tool billing --days 30 +``` + +**Cost Optimization Tips:** +1. Use Standard engine for non-critical audio +2. Cache synthesized audio files to avoid re-synthesis +3. Batch process text for efficiency +4. Use Long-form engine only for actual long content +5. Monitor with `billing` command regularly + +**Cost Examples:** +- 1,000 words (~5,000 chars): + - Standard: $0.02 + - Neural: $0.08 + - Generative: $0.15 + - Long-form: $0.50 +- 50,000 word audiobook: + - Standard: $1.00 + - Neural: $4.00 + - Generative: $7.50 + - Long-form: $25.00 + +
+ +
+🔧 Troubleshooting (Click to expand) + +### Common Issues + +**Issue: No AWS credentials found** +```bash +# Symptom +Error: Unable to locate credentials +``` + +**Solution:** +```bash +# Configure AWS credentials +aws configure + +# Or set environment variables +export AWS_ACCESS_KEY_ID="your-access-key" +export AWS_SECRET_ACCESS_KEY="your-secret-key" +export AWS_DEFAULT_REGION="us-east-1" + +# Verify with +aws-polly-tts-tool info +``` + +--- + +**Issue: Audio playback fails on Python 3.13+** +```bash +# Symptom +Error: No module named 'audioop' +``` + +**Solution:** +Option 1: Use Python 3.12 (recommended) +```bash +mise use python@3.12 +uv tool install . --python 3.12 +``` + +Option 2: Save to file instead (works on all Python versions) +```bash +aws-polly-tts-tool synthesize "Hello" --output speech.mp3 +``` + +--- + +**Issue: Voice not found** +```bash +# Symptom +Error: Voice 'invalid' not found +``` + +**Solution:** +```bash +# List available voices +aws-polly-tts-tool list-voices + +# Filter by engine +aws-polly-tts-tool list-voices --engine neural + +# Case-sensitive voice names +aws-polly-tts-tool synthesize "Hello" --voice Joanna # Correct +``` + +--- + +**Issue: Engine not supported by voice** +```bash +# Symptom +Error: Voice doesn't support this engine +``` + +**Solution:** +```bash +# Check which engines a voice supports +aws-polly-tts-tool list-voices | grep "VoiceName" + +# Not all voices support all engines +# Example: Standard voices don't support neural engine +``` + +--- + +**Issue: Cost Explorer access denied** +```bash +# Symptom +Error: AccessDeniedException when calling GetCostAndUsage +``` + +**Solution:** +Add IAM permission `ce:GetCostAndUsage`: +```json +{ + "Effect": "Allow", + "Action": ["ce:GetCostAndUsage"], + "Resource": "*" +} +``` + +--- + +**Issue: Text too long for engine** +```bash +# Symptom +Error: Text exceeds character limit +``` + +**Solution:** +- Standard/Neural/Generative: Max 3,000 chars per request +- Long-form: Max 100,000 chars per request +- Split long text into chunks or use Long-form engine + +--- + +### Getting Help + +```bash +# General help +aws-polly-tts-tool --help + +# Command-specific help +aws-polly-tts-tool synthesize --help +aws-polly-tts-tool list-voices --help + +# Show version +aws-polly-tts-tool --version + +# Verify configuration +aws-polly-tts-tool info +``` + +### Debug Mode + +Use progressive verbosity to diagnose issues: +```bash +# Basic debug info +aws-polly-tts-tool synthesize "Hello" -V + +# Detailed debug info +aws-polly-tts-tool synthesize "Hello" -VV + +# Full AWS SDK trace +aws-polly-tts-tool synthesize "Hello" -VVV +``` + +
+ +## Best Practices + +1. **Default to Neural Engine**: Best balance of quality and cost for most use cases +2. **Use SSML for Control**: Add pauses, emphasis, and prosody for natural speech +3. **Cache Audio Files**: Save synthesized audio to avoid repeated API calls and costs +4. **Monitor Costs**: Use `billing` command to track actual spending +5. **Validate Voice Support**: Use `list-voices` to check engine compatibility before synthesis +6. **Save Critical Audio**: Use `--output` to save important audio for offline use +7. **Use Verbosity**: Add `-V/-VV/-VVV` when debugging issues +8. **Leverage stdin**: Pipe text from files or commands for automation + +## Resources + +- **GitHub**: https://github.com/dnvriend/aws-polly-tts-tool +- **Amazon Polly Docs**: https://docs.aws.amazon.com/polly/ +- **Polly Pricing**: https://aws.amazon.com/polly/pricing/ +- **SSML Reference**: https://docs.aws.amazon.com/polly/latest/dg/supportedtags.html +- **Boto3 Polly API**: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/polly.html