commit 7d5b628e7d3053aec65ebfaa2cfbb883d444c4fd Author: Zhongwei Li Date: Sun Nov 30 08:38:41 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..57403f1 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,14 @@ +{ + "name": "youtube", + "description": "Youtube toolkit for Claude Code.", + "version": "1.0.0", + "author": { + "name": "Sanhe Hu" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..cafc874 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# youtube + +Youtube toolkit for Claude Code. diff --git a/commands/enrich-citations.md b/commands/enrich-citations.md new file mode 100644 index 0000000..3fc5c3d --- /dev/null +++ b/commands/enrich-citations.md @@ -0,0 +1,121 @@ +--- +name: enrich-citations +description: Add authoritative source links for all facts, citations, and references in markdown content +--- + +Process markdown content and enrich it with authoritative citation links: + + +$ARGUMENTS + + +## Input Processing + +The command accepts ARGUMENTS which can be either: + +1. **Absolute file path**: If the ARGUMENTS starts with `/`, treat it as a file path + - Read the markdown content from the specified file + - Example: `/enrich-citations /Users/sanhehu/Documents/article.md` + +2. **Inline content**: If ARGUMENTS is not an absolute path, treat it as inline markdown text + - Process the text directly + - Example: `/enrich-citations This article mentions React and TypeScript frameworks.` + +## Workflow + +### 1. Load Content + +- If absolute path provided: Read content from file +- If inline content provided: Use the provided text directly + +### 2. Enrich Citations + +Use the `enrich-citations` agent skill to process the markdown content: + +**Identify all references:** +- External sources and research papers +- Tools, software, frameworks, libraries +- Products and services +- Organizations and institutions +- Technical concepts and standards +- People and experts + +**For each identified reference:** +1. Perform web search to find the authoritative source +2. Verify the URL is current and accessible +3. Replace plain text with markdown hyperlinks: `[Reference](URL)` +4. **Add spaces around hyperlinks**: `text [link](url) text` + +**Citation priority:** +- Official sources (project homepages, official documentation) +- Primary sources (original research papers, publications) +- Authoritative organizations (standards bodies, academic institutions) +- Reputable publications (tech blogs, news sites) +- Community resources (GitHub, Stack Overflow) +- General references (Wikipedia as last resort) + +**Quality requirements:** +- Verify all links are valid and working +- Ensure proper markdown formatting with spaces +- Preserve all original content (only add hyperlinks) +- Maintain document structure and meaning + +### 3. Save Output + +Save the enriched document to: `~/tmp/citation_enriched.md` + +**Example enrichment:** + +Input: +```markdown +# Introduction to Modern JavaScript + +JavaScript has evolved significantly with ES6 introducing features like arrow functions. +TypeScript adds static typing to JavaScript and is developed by Microsoft. +``` + +Output: +```markdown +# Introduction to Modern JavaScript + +[JavaScript](https://developer.mozilla.org/en-US/docs/Web/JavaScript) has evolved significantly with [ES6](https://www.ecma-international.org/ecma-262/6.0/) introducing features like arrow functions. +[TypeScript](https://www.typescriptlang.org/) adds static typing to JavaScript and is developed by [Microsoft](https://www.microsoft.com/) . +``` + +### 4. Display Output Location + +After completing the enrichment and saving the document, print the absolute path in a clickable format. + +**Format:** `file:///absolute/path/to/citation_enriched.md` + +**Example output:** +``` +✓ Citation enrichment completed successfully! +✓ Enriched document saved to: file:///Users/sanhehu/tmp/citation_enriched.md + +Click the link above to open the document. +``` + +**Requirements:** +- Use absolute path (not relative path like `~/tmp/...`) +- Include `file://` protocol prefix for clickability +- Provide clear success message +- Make it easy for user to access the enriched document + +--- + +## Usage Examples + +**Enrich an existing file:** +```bash +/enrich-citations /Users/sanhehu/Documents/article.md +``` + +**Enrich inline content:** +```bash +/enrich-citations This article discusses React, Next.js, and Vercel deployment. +``` + +--- + +**Note:** This command makes citation enrichment quick and accessible. It automatically performs web searches to find authoritative sources and adds verified hyperlinks to the document. diff --git a/commands/yt-to-md.md b/commands/yt-to-md.md new file mode 100644 index 0000000..f79a063 --- /dev/null +++ b/commands/yt-to-md.md @@ -0,0 +1,72 @@ +--- +name: yt-to-md +description: Download, transcribe, and organize YouTube video into structured markdown document +--- + +Process YouTube video $ARGUMENTS end-to-end and create a well-organized document: + +## Workflow + +Execute these skills in sequence: + +### 1. Download Audio +Use the `youtube-video-to-audio` agent skill to download audio from the provided YouTube URL. + +### 2. Transcribe Audio +Use the `transcribe-audio-to-text` agent skill to transcribe the downloaded audio file. + +### 3. Clean Up Transcription +Use the `audio-transcription-cleanup` agent skill to transform the raw transcription into a well-structured markdown document. + +This step will: +- Detect the primary language and use it for output +- Add semantic paragraph breaks based on topic changes +- Create descriptive section headings +- Fix obvious spelling, terminology, and grammatical errors +- Remove filler words and verbal artifacts +- Maintain all original information from the video + +**Output Format:** +```markdown +# [Video Title] + +## [Section 1 Heading] + +[Cleaned paragraph content...] + +[More cleaned content...] + +## [Section 2 Heading] + +[Content continues...] + +## Key Takeaways + +- Point 1 +- Point 2 +- Point 3 +``` + +Save the cleaned document to: `~/tmp/organized_transcript.md` + +### 4. Display Output Location + +After completing the cleanup and saving the document, print the absolute path of the final organized document in a clickable format. + +**Format:** `file:///absolute/path/to/organized_transcript.md` + +**Example output:** +``` +✓ YouTube video processed successfully! +✓ Organized document saved to: file:///Users/sanhehu/tmp/organized_transcript.md + +Click the link above to open the document. +``` + +**Requirements:** +- Use absolute path (not relative path like `~/tmp/...`) +- Include `file://` protocol prefix for clickability +- Provide clear success message +- Make it easy for user to access the final document + +**Note:** This command automates the core workflow from video URL to organized markdown document. Each skill can also be run independently for partial workflows. diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..8bcfb24 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,81 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:MacHu-GWU/sanhe-claude-code-plugins:plugins/social-media-network/youtube", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "3cdcff17c37ed05dd39655464e9a6341dc088180", + "treeHash": "0df4fe3ca127d4d78db9fa6573360edd6691b523c1c57940ac42eea957b6c88d", + "generatedAt": "2025-11-28T10:12:04.264380Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "youtube", + "description": "Youtube toolkit for Claude Code.", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "87417563457ce722c372f13f4331678321c1b8193ed81b800984c97a1b1b4f8f" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "be542de53f0d607ef00f714a666130e8ef99efe83eb435d568de4b68227b79f7" + }, + { + "path": "commands/yt-to-md.md", + "sha256": "1b64f6bc6cc2a9b0241f8ffe27790eb599813396ac4acd5be95ce5bc490e002f" + }, + { + "path": "commands/enrich-citations.md", + "sha256": "17038f9db9bab9d27d3dbdb2d03b9388081b0f0f6ba4c86f50fbde4ffe93a18c" + }, + { + "path": "skills/youtube-video-to-audio/SKILL.md", + "sha256": "90345dafd0294c0fff62e075e2e9bcb86936234c61fb047ec712745daf6b138a" + }, + { + "path": "skills/youtube-video-to-audio/scripts/download_audio.py", + "sha256": "5b634eccb702a33ec5b87803de6d20bb03b93086e3926d1875678c029fecf1e0" + }, + { + "path": "skills/enrich-citations/SKILL.md", + "sha256": "441142d7f9cf2d239e9f83c17ebca388244908e03d725d8feda0f42b5b10c528" + }, + { + "path": "skills/enrich-citations/scripts/enrich_citations.py", + "sha256": "abe901c9480f7f0e7427c870841c89e911719b376bd1b7ac9b92f244befa88b0" + }, + { + "path": "skills/audio-transcript-cleanup/SKILL.md", + "sha256": "2d7cbebe9bfd85ff17b72c766a6b93cafca5f38f2c6e51317d761085a54e0dad" + }, + { + "path": "skills/audio-transcript-cleanup/scripts/audio_transcript_cleanup.py", + "sha256": "f7f14e690f8cc8c8a279508147a66434cbe1ccd2849c3ec977fec1f14e33f493" + }, + { + "path": "skills/transcribe-audio-to-text/SKILL.md", + "sha256": "d7756a805f2ae7ba8544fd608b9c1d8b8761700106654730fa24c9297f60a6c0" + }, + { + "path": "skills/transcribe-audio-to-text/scripts/transcribe_audio.py", + "sha256": "e97635f3ae7fb89f4477da6d792cbe190fdce472470eba9ea2b49813f0d502aa" + } + ], + "dirSha256": "0df4fe3ca127d4d78db9fa6573360edd6691b523c1c57940ac42eea957b6c88d" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/audio-transcript-cleanup/SKILL.md b/skills/audio-transcript-cleanup/SKILL.md new file mode 100644 index 0000000..89eaef4 --- /dev/null +++ b/skills/audio-transcript-cleanup/SKILL.md @@ -0,0 +1,54 @@ +--- +name: audio-transcription-cleanup +description: Transform messy voice transcription text into well-formatted, human-readable documents while preserving original meaning +--- + +# Audio Transcription Cleanup + +Clean up raw audio transcriptions by removing filler words, fixing errors, and adding proper structure. + +## Usage + +Use the `audio_transcript_cleanup.py` script to process transcript files: + +```bash +# Use default output location (~/tmp/cleaned_transcript.md - allows overwrite) +python scripts/audio_transcript_cleanup.py --transcript-file /path/to/transcript.txt + +# Specify custom output location (cannot overwrite existing files) +python scripts/audio_transcript_cleanup.py --transcript-file /path/to/transcript.txt --output /path/to/output.md +``` + +## What It Does + +The script automatically: +- Removes verbal artifacts (um, uh, like, you know, 呃, 啊, 那个, etc.) +- Fixes spelling and grammar errors +- Adds semantic paragraph breaks and section headings +- Converts spoken fragments into complete sentences +- Preserves all original information (no summarization) +- Auto-detects language and maintains natural expression + +## Options + +- `--transcript-file` (required) - Path to the transcript file to clean up +- `--output` (optional) - Custom output path (default: `~/tmp/cleaned_transcript.md`) + +## Output Behavior + +- **Default location**: `~/tmp/cleaned_transcript.md` - Allows overwrite +- **Custom location**: Cannot overwrite existing files (raises error if file exists) + +## Language Support + +Auto-detects and works with: +- English +- Chinese (Mandarin, Cantonese) +- Mixed language content +- Multi-speaker transcriptions + +## Requirements + +- Python 3.11+ +- Claude CLI must be installed and accessible +- Transcript file must exist at specified path diff --git a/skills/audio-transcript-cleanup/scripts/audio_transcript_cleanup.py b/skills/audio-transcript-cleanup/scripts/audio_transcript_cleanup.py new file mode 100644 index 0000000..797c6ee --- /dev/null +++ b/skills/audio-transcript-cleanup/scripts/audio_transcript_cleanup.py @@ -0,0 +1,329 @@ +# -*- coding: utf-8 -*- + +""" +Audio Transcription Cleanup Script + +This module transforms messy voice transcription text into well-formatted, human-readable +markdown documents using Claude CLI. It preserves all original meaning and content while +improving readability through intelligent text cleanup and restructuring. + +Key Features: + - Remove verbal artifacts (um, uh, like, filler words in any language) + - Fix spelling and grammar errors + - Add semantic paragraph breaks and section headings + - Support for single-speaker and multi-speaker content + - Multi-language support (English, Chinese, and more) + - Preserve all original information (no summarization) + - Default output location with overwrite capability + - Custom output locations with overwrite protection + +Default Behavior: + - Output: ~/tmp/cleaned_transcript.md (allows overwrite) + - Language: Auto-detected from source + - Format: Well-structured markdown with headings + +Custom Output: + - Cannot overwrite existing files (raises FileExistsError) + - Creates parent directories automatically + +Text Cleanup Operations: + - Remove filler words: um, uh, like, you know, 呃, 啊, 那个, 就是说 + - Fix obvious noun errors and typos + - Correct grammar mistakes + - Convert spoken fragments to complete sentences + - Add descriptive section headings + - Organize content into semantic paragraphs + +Example Usage: + # Use default output location (allows overwrite) + $ python audio_transcription_cleanup.py --transcript-file /path/to/transcript.txt + + # Specify custom output location (cannot overwrite existing file) + $ python audio_transcription_cleanup.py --transcript-file /path/to/transcript.txt --output ~/Documents/cleaned.md + +Requirements: + - Claude CLI must be installed and accessible + - Transcript file must exist at specified path + +Author: sanhe +Plugin: youtube@sanhe-claude-code-plugins +""" + +import subprocess +import argparse +from pathlib import Path + +dir_home = Path.home() +path_cleaned_transcript = dir_home / "tmp" / "cleaned_transcript.md" + +prompt = """ +## Task +Transform the messy voice transcription text provided below into a well-formatted, human-readable document while preserving ALL original meaning and content. + +## Key Principles +- **Preserve original meaning**: Do not summarize or omit information +- **Fix transcription artifacts**: Remove filler words, false starts, and repetitions +- **Improve readability**: Organize into proper paragraphs with clear structure +- **Handle multi-speaker content**: Clearly attribute dialogue when multiple speakers are present +- **Enhance document structure**: Add semantic paragraphing and descriptive headings + +## Processing Instructions + +### Step 1: Analyze the Input +Examine the transcription to identify: +- **Primary language**: Determine the dominant language of the transcription +- **Number of speakers**: Single vs. multi-speaker content +- **Main topics or themes**: Identify distinct topics for sectioning +- **Transcription quality issues**: Filler words, repetitions, false starts, obvious errors + +### Step 2: Text Cleanup Operations + +1. **Remove verbal artifacts**: + - Filler words: "um", "uh", "like", "you know", "呃", "啊", "那个", "就是说", "嗯", "然后" + - False starts and self-corrections + - Unnecessary repetitions + +2. **Fix errors and improve accuracy**: + - **Correct obvious noun errors**: Fix misrecognized names, places, technical terms + - **Fix spelling mistakes**: Correct typos and transcription errors + - **Fix grammar errors**: Subject-verb agreement, tense consistency, word order + - **Clarify ambiguous terms**: Use context to determine correct words when transcription is unclear + +3. **Organize content structure**: + - **Semantic paragraphing**: Group related ideas into logical paragraphs based on meaning + - **Add section headings**: Create descriptive headings that summarize each section's content + - Convert spoken fragments into complete sentences + - Create logical flow between sections + +4. **For multi-speaker content**: + - Use clear speaker labels (Speaker A, B, or actual names if identified) + - Format as dialogue or meeting notes + - Preserve conversational context + +### Step 3: Language and Formatting + +**Language Selection Rules** (in priority order): +1. If user explicitly specifies output language → use that language +2. Otherwise → use the primary language of the original transcription +3. Preserve technical terms and proper nouns in their original language/form + +**For single-speaker content**: +```markdown +# [Main Topic Title] + +## [Section 1 Heading] + +[Cleaned paragraph 1 content...] + +[Cleaned paragraph 2 content...] + +## [Section 2 Heading] + +[Cleaned content...] +``` + +**For multi-speaker content** - Option 1 (Dialogue format): +```markdown +# [Meeting/Discussion Title] + +## [Topic 1] + +**Speaker A:** [cleaned content] + +**Speaker B:** [cleaned content] + +## [Topic 2] + +**Speaker A:** [cleaned content] +``` + +**For multi-speaker content** - Option 2 (Meeting notes format): +```markdown +# [Meeting Title] + +## [Discussion Topic 1] + +Speaker A mentioned that [cleaned content]... + +Speaker B responded by explaining [cleaned content]... + +## [Discussion Topic 2] + +The group discussed [cleaned content]... +``` + +## Important Constraints +- **NEVER summarize**: Include all information from original transcription +- **NEVER add new substantive information**: Only reorganize and clarify existing content +- **NEVER change the speaker's meaning or intent** +- **DO add section headings**: These are structural additions that help readability +- **Preserve technical terms and specific names** exactly as intended (fix transcription errors only) + +## Heading Guidelines +- Section headings should be **concise and descriptive** (3-8 words typically) +- Headings should reflect the **actual content** of each section +- Use appropriate heading levels (H1 for main title, H2 for major sections, H3 for subsections) +- Headings are added to **improve navigation**, not to interpret or editorialize + +## Language Support +This skill works with transcriptions in any language: +- **Chinese** (Mandarin, Cantonese, etc.) +- **English** +- **Mixed language** content (preserve code-switching naturally) +- **Other languages** (apply language-appropriate grammar rules) + +## OUTPUT INSTRUCTIONS + +**CRITICAL**: Your response must contain ONLY the cleaned markdown document. + +**DO NOT include**: +- Any explanations before the document +- Any explanations after the document +- Any meta-commentary about the cleaning process +- Any acknowledgments like "Here is the cleaned version" +- Any markdown code fences (no ```markdown blocks) +- Any introductory or concluding remarks + +**START your response immediately with the H1 title** (# [Title]) and **END immediately after the last content paragraph**. + +Your entire response = the cleaned document itself, nothing more. + +## INPUT TRANSCRIPTION: +""".strip() + + +def cleanup_transcript(path_transcript: Path): + """ + Clean up audio transcription and save to file. + + Args: + path_transcript: Path to the raw transcript file + """ + # Read transcript content + content = path_transcript.read_text(encoding="utf-8") + + # Call Claude CLI to process the transcript + args = [ + "claude", + "--append-system-prompt", + prompt, + "--print", + content, + ] + + result = subprocess.run( + args, + capture_output=True, + text=True, + ) + + if result.returncode != 0: + raise RuntimeError(f"Claude CLI failed: {result.stderr}") + + # Save the cleaned output + cleaned_transcript = result.stdout + return cleaned_transcript + + +def main(): + """ + Main CLI entry point for cleaning up audio transcriptions. + + This script transforms messy voice transcription text into well-formatted, + human-readable markdown documents while preserving the original meaning. + + Example usage: + python audio_transcription_cleanup.py --transcript-file "/path/to/transcript.txt" + + What it does: + - Removes filler words and verbal artifacts (um, uh, like, etc.) + - Fixes obvious spelling and grammar errors + - Adds semantic paragraph breaks and section headings + - Preserves all original information (no summarization) + - Maintains the speaker's meaning and intent + + Requirements: + - Transcript file must exist + - Claude CLI must be installed and accessible + """ + parser = argparse.ArgumentParser( + description="Clean up audio transcriptions into well-formatted markdown documents", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Use default output location (allows overwrite) + %(prog)s --transcript-file "~/tmp/transcript.txt" + + # Specify custom output location (cannot overwrite existing file) + %(prog)s --transcript-file "~/tmp/transcript.txt" --output "~/Documents/cleaned.md" + +Cleanup Operations: + - Remove verbal artifacts: um, uh, like, you know, 呃, 啊, 那个 + - Fix spelling and grammar errors + - Add semantic paragraphs and section headings + - Convert spoken fragments into complete sentences + - Preserve all original information + +Output Behavior: + - Default location (~/tmp/cleaned_transcript.md): Allows overwrite + - Custom location: Cannot overwrite existing files (will raise error) + +Note: + This script uses Claude CLI to perform intelligent transcript cleanup. + All original information is preserved - no summarization occurs. + """, + ) + + parser.add_argument( + "--transcript-file", + type=str, + required=True, + help=f"Path to the transcript file to clean up (required)", + ) + + parser.add_argument( + "--output", + type=str, + default=None, + help=f"Output file path for cleaned transcript (default: {path_cleaned_transcript}). Note: Default location allows overwrite, custom locations cannot overwrite existing files.", + ) + + args = parser.parse_args() + + # Convert to Path objects and expand user home directory + path_transcript = Path(args.transcript_file).expanduser() + + # Determine output path + if args.output: + path_output = Path(args.output).expanduser() + # For custom locations, check if file already exists + if (path_output != path_cleaned_transcript) and path_output.exists(): + raise FileExistsError( + f"Output file already exists at {path_output}. " + f"Please choose a different location or remove the existing file. " + f"(Default location {path_cleaned_transcript} allows overwrite)" + ) + else: + # Use default location (allows overwrite) + path_output = path_cleaned_transcript + + # Ensure output directory exists + path_output.parent.mkdir(parents=True, exist_ok=True) + + # Clean up the transcript + cleaned_transcript = cleanup_transcript( + path_transcript=path_transcript, + ) + + # Save to output file + path_output.write_text(cleaned_transcript, encoding="utf-8") + + # Print success message with clickable file path + absolute_path = path_output.resolve() + print(f"✓ Transcript cleanup completed successfully!") + print(f"✓ Cleaned transcript saved to: file://{absolute_path}") + print(f"\nClick the link above to open the document.") + + +if __name__ == "__main__": + main() diff --git a/skills/enrich-citations/SKILL.md b/skills/enrich-citations/SKILL.md new file mode 100644 index 0000000..a6093d2 --- /dev/null +++ b/skills/enrich-citations/SKILL.md @@ -0,0 +1,56 @@ +--- +name: enrich-citations +description: Find and add authoritative source links for all facts, citations, and references in markdown documents +--- + +# Enrich Citations + +Enhance markdown documents by finding and adding authoritative source links for mentioned facts, tools, products, research, and references. + +## Usage + +Use the `enrich_citations.py` script to process markdown documents: + +```bash +# Use default output location (~/tmp/citation_enriched.md - allows overwrite) +python scripts/enrich_citations.py --document-file /path/to/document.md + +# Specify custom output location (cannot overwrite existing files) +python scripts/enrich_citations.py --document-file /path/to/document.md --output /path/to/output.md +``` + +## What It Does + +The script automatically: +- Identifies all references (tools, research, products, organizations, people, standards) +- Performs web search to find authoritative sources +- Adds markdown hyperlinks with proper spacing: `[Reference](URL)` +- Verifies all URLs are valid and accessible +- Preserves all original content (only adds hyperlinks, no text changes) +- Prioritizes official sources and documentation + +## Options + +- `--document-file` (required) - Path to the markdown document to enrich +- `--output` (optional) - Custom output path (default: `~/tmp/citation_enriched.md`) + +## Output Behavior + +- **Default location**: `~/tmp/citation_enriched.md` - Allows overwrite +- **Custom location**: Cannot overwrite existing files (raises error if file exists) + +## Reference Types Identified + +- External sources and research papers +- Tools, software, frameworks, libraries +- Products and services +- Organizations and institutions +- Technical concepts and standards (RFC, W3C, APIs) +- People and experts + +## Requirements + +- Python 3.11+ +- Claude CLI must be installed and accessible +- Internet connection (for web searches) +- Document file must exist at specified path diff --git a/skills/enrich-citations/scripts/enrich_citations.py b/skills/enrich-citations/scripts/enrich_citations.py new file mode 100644 index 0000000..c2faf63 --- /dev/null +++ b/skills/enrich-citations/scripts/enrich_citations.py @@ -0,0 +1,323 @@ +# -*- coding: utf-8 -*- + +""" +Example usage:: + + # Use default output location (allows overwrite) + python /Users/sanhehu/Documents/GitHub/sanhe-claude-code-plugins/plugins/social-media-network/youtube/skills/enrich-citations/scripts/enrich_citations.py --document-file /Users/sanhehu/tmp/cleaned_transcript.md + + # Specify custom output location (cannot overwrite existing file) + python /Users/sanhehu/Documents/GitHub/sanhe-claude-code-plugins/plugins/social-media-network/youtube/skills/enrich-citations/scripts/enrich_citations.py --document-file /Users/sanhehu/tmp/cleaned_transcript.md --output ~/Documents/enriched_document.md +""" + +import subprocess +import argparse +from pathlib import Path + +dir_home = Path.home() +path_enriched_document = dir_home / "tmp" / "citation_enriched.md" + +prompt = """ +## Task +Enhance the markdown document provided below by finding and adding authoritative source links for all mentioned facts, citations, tools, products, research, and references. + +## Key Principles +- **Find authoritative sources**: Search for the original, most credible source for each reference +- **Verify accuracy**: Ensure links point to the correct, current information +- **Preserve content**: Only add hyperlinks, don't change the document's text or meaning +- **Proper formatting**: Use markdown hyperlink syntax with spaces for proper rendering + +## Processing Instructions + +### Step 1: Identify References + +Read through the entire markdown document and identify all mentions of: + +1. **External sources and research**: + - Academic papers, studies, reports + - Books, articles, blog posts + - Research findings or statistics + +2. **Tools and software**: + - Applications, frameworks, libraries + - Programming languages, platforms + - Development tools, services + +3. **Products and services**: + - Commercial products + - SaaS platforms + - Hardware or software products + +4. **Organizations and institutions**: + - Companies, universities + - Standards bodies, foundations + - Government agencies + +5. **Technical concepts and standards**: + - Specifications (RFC, W3C, etc.) + - APIs, protocols + - Industry standards + +6. **People and experts**: + - Authors, researchers + - Industry leaders + - Subject matter experts + +### Step 2: Web Search for Sources + +For **each identified reference**: + +1. **Search for the authoritative source**: + - Use web search to find the official or most credible source + - Prioritize: official websites > documentation > reputable publications + - For academic references: search for DOI, arXiv, official publication + +2. **Verify the source**: + - Ensure the URL is current and accessible + - Check that content matches the reference + - Confirm it's the most authoritative source available + +3. **Select the best link**: + - Official documentation or homepage for tools/products + - Original publication for research/articles + - Wikipedia for general concepts (if no better source exists) + - GitHub repository for open source projects + +### Step 3: Add Hyperlinks + +Replace plain text references with markdown hyperlinks: + +**Format:** `[Reference Text](URL)` + +**Critical formatting requirements:** +- **Add spaces around hyperlinks**: `text [link](url) text` (not `text[link](url)text`) +- Use descriptive link text (the actual reference name, not "click here") +- Maintain the original sentence structure +- Preserve all other content unchanged + +**Examples:** + +Before: +``` +According to the 2023 Stack Overflow Survey, JavaScript is the most popular language. +``` + +After: +``` +According to the [2023 Stack Overflow Survey](https://survey.stackoverflow.co/2023/) , JavaScript is the most popular language. +``` + +Before (Chinese): +``` +文章提到了GPT-4的能力提升 +``` + +After (Chinese): +``` +文章提到了 [GPT-4](https://openai.com/gpt-4) 的能力提升 +``` + +### Step 4: Quality Assurance + +Before finalizing: + +1. **Verify all links**: + - Test that URLs are valid and accessible + - Check for broken links or redirects + - Ensure HTTPS when available + +2. **Check formatting**: + - Confirm spaces around hyperlinks + - Verify markdown syntax is correct + - Ensure links render properly + +3. **Review accuracy**: + - Links point to correct resources + - No duplicate or conflicting sources + - All identified references are addressed + +## Important Constraints + +- **NEVER change the document content**: Only add hyperlinks to existing text +- **NEVER add new information**: Don't insert citations not mentioned in the original +- **NEVER remove existing content**: Preserve all original text +- **NEVER summarize or paraphrase**: Keep exact wording +- **DO add spaces around hyperlinks**: Essential for proper rendering +- **DO verify all sources**: Ensure accuracy and accessibility + +## Source Priority Guidelines + +When multiple sources exist, prioritize in this order: + +1. **Official sources**: Project homepages, official documentation +2. **Primary sources**: Original research papers, first publications +3. **Authoritative organizations**: Standards bodies, academic institutions +4. **Reputable publications**: Well-known tech blogs, news sites +5. **Community resources**: GitHub, Stack Overflow (for code/tools) +6. **General references**: Wikipedia, encyclopedias (as last resort) + +## Language Support + +This skill works with documents in any language: +- Respect the document's primary language +- Use appropriate sources for the language/region +- Maintain original text while adding hyperlinks +- For multilingual documents, find sources matching each language section when appropriate + +## OUTPUT INSTRUCTIONS + +**CRITICAL**: Your response must contain ONLY the enhanced markdown document with citations added. + +**DO NOT include**: +- Any explanations before the document +- Any explanations after the document +- Any meta-commentary about the enrichment process +- Any acknowledgments like "Here is the enriched version" +- Any markdown code fences (no ```markdown blocks) +- Any introductory or concluding remarks + +**START your response immediately with the first line of the document** and **END immediately after the last line**. + +Your entire response = the enriched document itself, nothing more. + +## INPUT DOCUMENT: +""".strip() + + +def enrich_citations(path_document: Path): + """ + Enrich markdown document with authoritative citation links. + + Args: + path_document: Path to the markdown document file + """ + # Read document content + content = path_document.read_text(encoding="utf-8") + + # Call Claude CLI to process the document + args = [ + "claude", + "--append-system-prompt", + prompt, + "--print", + content, + ] + + result = subprocess.run( + args, + capture_output=True, + text=True, + ) + + if result.returncode != 0: + raise RuntimeError(f"Claude CLI failed: {result.stderr}") + + # Return the enriched document + enriched_document = result.stdout + return enriched_document + + +def main(): + """ + Main CLI entry point for enriching citations in markdown documents. + + This script enhances markdown documents by finding and adding authoritative + source links for all mentioned facts, citations, and references. + + Example usage: + python enrich_citations.py --document-file "/path/to/document.md" + + What it does: + - Identifies all references (tools, research, products, people, etc.) + - Performs web search to find authoritative sources + - Adds markdown hyperlinks with proper spacing + - Verifies all URLs are valid and current + - Preserves all original content (only adds links) + + Requirements: + - Document file must exist + - Claude CLI must be installed and accessible + - Internet connection required for web searches + """ + parser = argparse.ArgumentParser( + description="Enrich markdown documents with authoritative citation links", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Use default output location (allows overwrite) + %(prog)s --document-file "~/tmp/cleaned_transcript.md" + + # Specify custom output location (cannot overwrite existing file) + %(prog)s --document-file "~/tmp/cleaned_transcript.md" --output "~/Documents/enriched.md" + +Citation Enrichment: + - Identifies facts, tools, products, research, and references + - Web search to find authoritative sources + - Adds markdown hyperlinks: [Title](URL) + - Verifies all links are valid and current + - Maintains proper spacing around hyperlinks + - Preserves original document content + +Output Behavior: + - Default location (~/tmp/citation_enriched.md): Allows overwrite + - Custom location: Cannot overwrite existing files (will raise error) + +Note: + This script uses Claude CLI with web search to perform intelligent citation enrichment. + All original content is preserved - only hyperlinks are added. + """, + ) + + parser.add_argument( + "--document-file", + type=str, + required=True, + help=f"Path to the markdown document to enrich (required)", + ) + + parser.add_argument( + "--output", + type=str, + default=None, + help=f"Output file path for enriched document (default: {path_enriched_document}). Note: Default location allows overwrite, custom locations cannot overwrite existing files.", + ) + + args = parser.parse_args() + + # Convert to Path objects and expand user home directory + path_document = Path(args.document_file).expanduser() + + # Determine output path + if args.output: + path_output = Path(args.output).expanduser() + # For custom locations, check if file already exists + if (path_output != path_enriched_document) and path_output.exists(): + raise FileExistsError( + f"Output file already exists at {path_output}. " + f"Please choose a different location or remove the existing file. " + f"(Default location {path_enriched_document} allows overwrite)" + ) + else: + # Use default location (allows overwrite) + path_output = path_enriched_document + + # Ensure output directory exists + path_output.parent.mkdir(parents=True, exist_ok=True) + + # Enrich citations in the document + enriched_document = enrich_citations( + path_document=path_document, + ) + + # Save to output file + path_output.write_text(enriched_document, encoding="utf-8") + + # Print success message with clickable file path + absolute_path = path_output.resolve() + print(f"✓ Citation enrichment completed successfully!") + print(f"✓ Enriched document saved to: file://{absolute_path}") + print(f"\nClick the link above to open the document.") + + +if __name__ == "__main__": + main() diff --git a/skills/transcribe-audio-to-text/SKILL.md b/skills/transcribe-audio-to-text/SKILL.md new file mode 100644 index 0000000..393c03b --- /dev/null +++ b/skills/transcribe-audio-to-text/SKILL.md @@ -0,0 +1,45 @@ +--- +name: transcribe-audio-to-text +description: Transcribe audio files to text using audinota cli +--- + +# Transcribe Audio to Text + +Convert audio files to text using audinota transcription service. + +## Usage + +```bash +# Use default audio file (output from youtube-video-to-audio skill) +python scripts/transcribe_audio.py + +# Specify custom audio file +python scripts/transcribe_audio.py --audio-file-path "/path/to/audio.mp3" +``` + +## Options + +- `--audio-file-path` - Path to audio file to transcribe (default: ~/tmp/download_audio_result.mp3) + +## Examples + +```bash +# Transcribe default audio file +python scripts/transcribe_audio.py + +# Transcribe custom audio file +python scripts/transcribe_audio.py --audio-file-path "~/Music/podcast.mp3" + +# Get help +python scripts/transcribe_audio.py -h +``` + +## Requirements + +- Python 3.11+ +- audinota installed at `~/Documents/GitHub/audinota-project/.venv/bin/audinota` +- Audio file must exist at specified path + +## Integration + +This skill works seamlessly with the `youtube-video-to-audio` skill. By default, it transcribes the audio file downloaded by that skill at `~/tmp/download_audio_result.mp3`. diff --git a/skills/transcribe-audio-to-text/scripts/transcribe_audio.py b/skills/transcribe-audio-to-text/scripts/transcribe_audio.py new file mode 100644 index 0000000..fab995a --- /dev/null +++ b/skills/transcribe-audio-to-text/scripts/transcribe_audio.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- + +""" +Audio Transcription Script + +This module provides functionality to transcribe audio files to text using the audinota CLI. +It supports both default and custom output locations, with built-in protection against +accidentally overwriting existing files at custom locations. + +Key Features: + - Transcribe audio files using audinota + - Default output location with overwrite capability + - Custom output locations with overwrite protection + - Automatic directory creation for custom paths + - Integration with youtube-video-to-audio skill workflow + +Default Behavior: + - Input: ~/tmp/download_audio_result.mp3 + - Output: ~/tmp/download_audio_result.txt (allows overwrite) + +Custom Output: + - Cannot overwrite existing files (raises FileExistsError) + - Creates parent directories automatically + +Example Usage: + # Use defaults (allows overwrite) + $ python transcribe_audio.py + + # Custom audio file + $ python transcribe_audio.py --audio-file-path "/path/to/audio.mp3" + + # Custom output (cannot overwrite existing) + $ python transcribe_audio.py --audio-file-path "/path/to/audio.mp3" --output "~/Documents/transcript.txt" + +Requirements: + - audinota installed at ~/Documents/GitHub/audinota-project/.venv/bin/audinota + - Audio file must exist at specified path + +Author: sanhe +Plugin: youtube@sanhe-claude-code-plugins +""" + +import subprocess +import argparse +from pathlib import Path + +dir_home = Path.home() +default_audio_path = dir_home / "tmp" / "download_audio_result.mp3" +default_transcript_path = dir_home / "tmp" / "download_audio_result.txt" + + +def transcribe_audio(path_audio: Path, path_output: Path = None): + """ + Transcribe an audio file to text using audinota. + + Args: + path_audio: Path to the audio file to transcribe + path_output: Path to save the transcript (optional, audinota will use default if not specified) + + Raises: + subprocess.CalledProcessError: If audinota transcription fails + """ + path_audinota = ( + dir_home + / "Documents" + / "GitHub" + / "audinota-project" + / ".venv" + / "bin" + / "audinota" + ) + + if not path_audinota.exists(): + raise FileNotFoundError(f"audinota not found at {path_audinota}") + + if not path_audio.exists(): + raise FileNotFoundError(f"Audio file not found at {path_audio}") + + args = [ + str(path_audinota), + "transcribe", + "--input", + str(path_audio), + ] + + # Add output argument if specified + if path_output: + args.extend(["--output", str(path_output)]) + + print(f"Transcribing audio file: {path_audio}") + subprocess.run(args, check=True) + + output_location = path_output if path_output else "default location" + print(f"✓ Transcription completed successfully") + print(f"✓ Saved to: {output_location}") + + +def main(): + """ + Main CLI entry point for transcribing audio files to text. + + This script uses audinota to transcribe audio files to text. + By default, it transcribes the audio file downloaded by the youtube-video-to-audio skill. + + Example usage: + # Use default paths (allows overwrite) + python transcribe_audio.py + + # Specify custom audio file with default output + python transcribe_audio.py --audio-file-path "/path/to/audio.mp3" + + # Specify both custom audio and output (cannot overwrite existing file) + python transcribe_audio.py --audio-file-path "/path/to/audio.mp3" --output "~/Documents/transcript.txt" + + Requirements: + - audinota must be installed at ~/Documents/GitHub/audinota-project/.venv/bin/audinota + - Audio file must exist at the specified path + + Note: + - Default location (~/tmp/download_audio_result.txt) allows overwrite + - Custom output locations cannot overwrite existing files + """ + parser = argparse.ArgumentParser( + description="Transcribe audio files to text using audinota", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s + %(prog)s --audio-file-path "/path/to/audio.mp3" + %(prog)s --audio-file-path "~/Music/podcast.mp3" --output "~/Documents/transcript.txt" + +Notes: + - The default audio file path is the output of youtube-video-to-audio skill + - Default output location (~/tmp/download_audio_result.txt) allows overwrite + - Custom output locations cannot overwrite existing files + - Requires audinota to be installed at ~/Documents/GitHub/audinota-project/.venv/bin/audinota + """, + ) + + parser.add_argument( + "--audio-file-path", + type=str, + default=str(default_audio_path), + help=f"Path to the audio file to transcribe (default: {default_audio_path})", + ) + + parser.add_argument( + "--output", + type=str, + default=None, + help=f"Output file path for transcript (default: {default_transcript_path}). Note: Default location allows overwrite, custom locations cannot overwrite existing files.", + ) + + args = parser.parse_args() + + # Convert to Path object and expand user home directory + audio_path = Path(args.audio_file_path).expanduser() + + # Determine output path and check for existing files + if args.output: + path_output = Path(args.output).expanduser() + # For custom locations, check if file already exists + if (path_output != default_transcript_path) and (path_output.exists()): + raise FileExistsError( + f"Output file already exists at {path_output}. " + f"Please choose a different location or remove the existing file. " + f"(Default location {default_transcript_path} allows overwrite)" + ) + # Ensure output directory exists + path_output.parent.mkdir(parents=True, exist_ok=True) + else: + # Use default location (allows overwrite) + path_output = default_transcript_path + path_output.unlink(missing_ok=True) + + # Transcribe the audio + transcribe_audio(path_audio=audio_path, path_output=path_output) + + +if __name__ == "__main__": + main() diff --git a/skills/youtube-video-to-audio/SKILL.md b/skills/youtube-video-to-audio/SKILL.md new file mode 100644 index 0000000..69fd87e --- /dev/null +++ b/skills/youtube-video-to-audio/SKILL.md @@ -0,0 +1,41 @@ +--- +name: youtube-video-to-audio +description: Download YouTube videos as audio files using yt-dlp +--- + +# YouTube Video to Audio + +Extract and download audio from YouTube videos. + +## Usage + +```bash +python scripts/download_audio.py --video-url "https://www.youtube.com/watch?v=VIDEO_ID" +``` + +## Options + +- `--video-url` (required) - YouTube video URL to download +- `--quality` - Quality selector (default: "bestaudio[abr<=64]/worstaudio") +- `--audio-format` - Output format: mp3, aac, wav (default: mp3) +- `--bitrate` - Audio bitrate: 64K, 128K, 192K, 320K (default: 64K) +- `--output` - Custom output path (default: ~/tmp/download_audio_result.mp3) + +## Examples + +```bash +# Basic usage (default: 64K bitrate MP3 to ~/tmp/download_audio_result.mp3) +python scripts/download_audio.py --video-url "https://www.youtube.com/watch?v=d6rZtgHcbWA" + +# High quality with custom bitrate +python scripts/download_audio.py --video-url "https://youtu.be/xyz" --quality "bestaudio" --bitrate "128K" + +# Custom output location +python scripts/download_audio.py --video-url "https://youtu.be/xyz" --output "/path/to/output.mp3" +``` + +## Requirements + +- Python 3.11+ +- ffmpeg installed at `~/ffmpeg` +- yt-dlp (auto-downloaded on first run) diff --git a/skills/youtube-video-to-audio/scripts/download_audio.py b/skills/youtube-video-to-audio/scripts/download_audio.py new file mode 100644 index 0000000..e3285a6 --- /dev/null +++ b/skills/youtube-video-to-audio/scripts/download_audio.py @@ -0,0 +1,290 @@ +# -*- coding: utf-8 -*- + +""" +YouTube Audio Downloader + +This module provides functionality to download audio from YouTube videos using yt-dlp. +It supports cross-platform operation (Windows, macOS, Linux) and includes automatic +yt-dlp binary management with built-in protection against overwriting existing files. + +Key Features: + - Download audio from YouTube videos as MP3 files + - Automatic yt-dlp binary download and updates + - Cross-platform support (Windows, macOS, Linux) + - Customizable audio quality and bitrate + - Default output location with overwrite capability + - Custom output locations with overwrite protection + - Automatic directory creation for custom paths + +Default Behavior: + - Output: ~/tmp/download_audio_result.mp3 (allows overwrite) + - Quality: bestaudio[abr<=64]/worstaudio + - Format: MP3 + - Bitrate: 64K + +Custom Output: + - Cannot overwrite existing files (raises FileExistsError) + - Creates parent directories automatically + +Example Usage: + # Download with defaults (allows overwrite) + $ python download_audio.py --video-url "https://www.youtube.com/watch?v=d6rZtgHcbWA" + + # Custom quality and bitrate + $ python download_audio.py --video-url "https://youtu.be/xyz" --quality "bestaudio" --bitrate "128K" + + # Custom output location (cannot overwrite existing) + $ python download_audio.py --video-url "https://youtu.be/xyz" --output "/path/to/audio.mp3" + +Requirements: + - ffmpeg installed at ~/ffmpeg + - Internet connection for downloading yt-dlp and videos + - yt-dlp will be automatically downloaded if not present + +Platform Support: + - Windows: Downloads yt-dlp.exe + - macOS: Downloads yt-dlp_macos + - Linux: Downloads yt-dlp_linux + +Author: sanhe +Plugin: youtube@sanhe-claude-code-plugins +""" + +import sys +import os +import json +import subprocess +import urllib.request +import argparse +from pathlib import Path + +# Detect OS +IS_WINDOWS = False +IS_MACOS = False +IS_LINUX = False + +_platform = sys.platform +if _platform in ["win32", "cygwin"]: + IS_WINDOWS = True +elif _platform == "darwin": + IS_MACOS = True +elif _platform == "linux": + IS_LINUX = True +else: + raise RuntimeError(f"Unsupported platform: {_platform}") + +dir_home = Path.home() +dir_tmp = dir_home / "tmp" +try: + dir_tmp.mkdir(exist_ok=True) +except Exception: # pragma: no cover + pass +path_audio = dir_tmp / "download_audio_result.mp3" + +if IS_WINDOWS: + filename = "yt-dlp.exe" + path_yt_dlp = dir_home / "yt-dlp.exe" +elif IS_MACOS: + filename = "yt-dlp_macos" + path_yt_dlp = dir_home / "yt-dlp_macos" +elif IS_LINUX: + filename = "yt-dlp_linux" + path_yt_dlp = dir_home / "yt-dlp_linux" +else: + raise RuntimeError("Unsupported operating system") + +path_ffmpeg = dir_home / "ffmpeg" + + +def get_latest_yt_dlp_release() -> str: + """Get the latest yt-dlp release version from GitHub API.""" + api_url = "https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest" + + with urllib.request.urlopen(api_url) as response: + data = json.loads(response.read().decode()) + tag_name = data["tag_name"] + return tag_name + + +def download_yt_dlp(): + """Download the appropriate yt-dlp binary based on the OS.""" + # Get latest release version + latest_version = get_latest_yt_dlp_release() + print(f"Latest yt-dlp version: {latest_version}") + + # Determine download URL and local path based on OS + download_url = f"https://github.com/yt-dlp/yt-dlp/releases/download/{latest_version}/{filename}" + + print(f"Downloading {filename} from {download_url}...") + print(f"Saving to {path_yt_dlp}...") + + # Download the file + urllib.request.urlretrieve(download_url, path_yt_dlp) + + # Set executable permissions for macOS and Linux + if IS_MACOS or IS_LINUX: + os.chmod(path_yt_dlp, 0o755) + + print(f"Successfully downloaded yt-dlp to {path_yt_dlp}") + + +def download_audio( + video_url: str, + quality: str = "bestaudio[abr<=64]/worstaudio", + audio_format: str = "mp3", + bitrate: str = "64K", + output_path: str = None, +): + """ + Download audio from a YouTube video using yt-dlp. + + Args: + video_url: YouTube video URL to download + quality: yt-dlp quality selector (default: "bestaudio[abr<=64]/worstaudio") + audio_format: Output audio format (default: "mp3") + bitrate: Audio bitrate (default: "64K") + output_path: Custom output path (default: ~/tmp/download_audio_result.mp3) + + Returns: + Path to the downloaded audio file + """ + output = output_path if output_path else str(path_audio) + + args = [ + f"{path_yt_dlp}", + "-f", + quality, + "--extract-audio", + "--audio-format", + audio_format, + "--audio-quality", + bitrate, + "-o", + output, + "--restrict-filenames", + "--ffmpeg-location", + f"{str(path_ffmpeg)}", + video_url, + ] + result = subprocess.run(args, check=True, capture_output=True) + return output + + +def main(): + """ + Main CLI entry point for downloading YouTube audio. + + This script downloads audio from YouTube videos using yt-dlp and ffmpeg. + It automatically downloads yt-dlp if not present in the home directory. + + Example usage: + # Use default output location (allows overwrite) + python download_audio.py --video-url "https://www.youtube.com/watch?v=d6rZtgHcbWA" + + # Specify custom quality and bitrate + python download_audio.py --video-url "https://youtu.be/xyz" --quality "bestaudio" --bitrate "128K" + + # Specify custom output location (cannot overwrite existing file) + python download_audio.py --video-url "https://youtu.be/xyz" --output "/path/to/output.mp3" + + Requirements: + - ffmpeg must be installed at ~/ffmpeg + - yt-dlp will be automatically downloaded to home directory if not present + + Note: + - Default location (~/tmp/download_audio_result.mp3) allows overwrite + - Custom output locations cannot overwrite existing files + """ + parser = argparse.ArgumentParser( + description="Download audio from YouTube videos as MP3 files using yt-dlp", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s --video-url "https://www.youtube.com/watch?v=d6rZtgHcbWA" + %(prog)s --video-url "https://youtu.be/xyz" --quality "bestaudio" --bitrate "128K" + %(prog)s --video-url "https://youtu.be/xyz" --output "/custom/path/audio.mp3" + +Quality options: + bestaudio - Best quality audio available + bestaudio[abr<=64] - Best audio with max 64kbps bitrate + worstaudio - Lowest quality audio (smallest file) + """, + ) + + # Required arguments + parser.add_argument( + "--video-url", + type=str, + required=True, + help="YouTube video URL to download (required)", + ) + + # Optional arguments for yt-dlp customization + parser.add_argument( + "--quality", + type=str, + default="bestaudio[abr<=64]/worstaudio", + help='Quality selector for yt-dlp (default: "bestaudio[abr<=64]/worstaudio")', + ) + + parser.add_argument( + "--audio-format", + type=str, + default="mp3", + help="Output audio format: mp3, aac, wav, etc. (default: mp3)", + ) + + parser.add_argument( + "--bitrate", + type=str, + default="64K", + help="Audio bitrate: 64K, 128K, 192K, 320K, etc. (default: 64K)", + ) + + parser.add_argument( + "--output", + type=str, + default=None, + help=f"Custom output file path (default: {path_audio}). Note: Default location allows overwrite, custom locations cannot overwrite existing files.", + ) + + args = parser.parse_args() + + # Ensure yt-dlp is downloaded + if not path_yt_dlp.exists(): + print("yt-dlp not found, downloading...") + download_yt_dlp() + + # Determine output path and check for existing files + if args.output: + path_output = Path(args.output).expanduser() + # For custom locations, check if file already exists + if (path_output != path_audio) and path_output.exists(): + raise FileExistsError( + f"Output file already exists at {path_output}. " + f"Please choose a different location or remove the existing file. " + f"(Default location {path_audio} allows overwrite)" + ) + # Ensure output directory exists + path_output.parent.mkdir(parents=True, exist_ok=True) + output_path_str = str(path_output) + else: + # Use default location (allows overwrite) + path_audio.unlink(missing_ok=True) + output_path_str = None + + # Download the audio + output_file = download_audio( + video_url=args.video_url, + quality=args.quality, + audio_format=args.audio_format, + bitrate=args.bitrate, + output_path=output_path_str, + ) + + print(f"✓ Audio successfully downloaded from {args.video_url}") + print(f"✓ Saved to: {output_file}") + + +if __name__ == "__main__": + main()