commit 47031044b5f7766df29cddea090dcafc62350444 Author: Zhongwei Li Date: Sun Nov 30 08:38:08 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..1089bbb --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,14 @@ +{ + "name": "video-to-article", + "description": "Convert lecture videos to transcripts, outlines, and article drafts using automated workflow with ffmpeg and ElevenLabs API", + "version": "1.6.4", + "author": { + "name": "Lukas Trumm" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b609761 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# video-to-article + +Convert lecture videos to transcripts, outlines, and article drafts using automated workflow with ffmpeg and ElevenLabs API diff --git a/commands/process-video.md b/commands/process-video.md new file mode 100644 index 0000000..dc18943 --- /dev/null +++ b/commands/process-video.md @@ -0,0 +1,35 @@ +--- +name: process-video +description: Convert lecture videos to transcripts and articles +argument-hint: [youtube-url-or-folder-path] +--- + +# Video to Article Conversion + +Convert a lecture or presentation video into multiple text formats. + +## Usage + +- `/process-video https://youtube.com/watch?v=...` - Process YouTube video +- `/process-video /path/to/folder` - Process video in existing folder +- `/process-video` - Interactive mode (will ask for input) + +--- + +Load video-to-article skill first. + +Execute the video-to-article workflow for: {arg1} + +**Handling file references:** + +- When user provides path via @ syntax (e.g., @"folder name"), extract the actual path +- The @ syntax provides file/folder context automatically - use the referenced path +- If path contains spaces and quotes, clean them: `@"My Folder"` → `My Folder` +- Support both absolute and relative paths +- Navigate to the folder before executing workflow + +**Input types:** + +1. **YouTube URL**: Detect youtube.com or youtu.be pattern and download first +2. **Folder path**: Navigate to folder and process existing video +3. **Empty/Interactive**: Ask user for input or use current directory diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..ac5eba5 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,61 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:lttr/claude-marketplace:plugins/video-to-article", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "cd0ee9574d5e6a07f90152963cf6627d6bd89a14", + "treeHash": "c1811e9167b6cc4e9cb35fca4fa214d88ca655596ab3df79862b84d8d096dbd4", + "generatedAt": "2025-11-28T10:20:21.832575Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "video-to-article", + "description": "Convert lecture videos to transcripts, outlines, and article drafts using automated workflow with ffmpeg and ElevenLabs API", + "version": "1.6.4" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "05b335a22bfa20ad51dac6ed61cd4ded0d1408a4c7948ad43dcadb83540f17a2" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "23f3b9ea47f1e7cf1d3f6cd18588f5bb066fcbfe0966a4e4cd4a43e51d4622a6" + }, + { + "path": "commands/process-video.md", + "sha256": "26c2b9388fbcfeaa503dc7bbb01146170bd7e59f22f6e3b1e53a893c7ff05e21" + }, + { + "path": "skills/video-to-article/SKILL.md", + "sha256": "04630e9051ff610d6171a22e4c9af56e2a47da78841cab182258373af7c22dd5" + }, + { + "path": "skills/video-to-article/scripts/extract-audio.sh", + "sha256": "54d4eeba1ed04336c037d88596e94e4a39867863aa182af8169cf3fcc5e0b67f" + }, + { + "path": "skills/video-to-article/scripts/download-youtube.sh", + "sha256": "9a305bce27d38c8be43413533a87b745242c74947bbe24e5422e8a6806fdadf6" + }, + { + "path": "skills/video-to-article/scripts/transcribe-audio.sh", + "sha256": "c182764d86037efb36bd4c1f3bd2f9d9ea0c36e1874e4e45ff349b0ead94cc5f" + } + ], + "dirSha256": "c1811e9167b6cc4e9cb35fca4fa214d88ca655596ab3df79862b84d8d096dbd4" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/video-to-article/SKILL.md b/skills/video-to-article/SKILL.md new file mode 100644 index 0000000..8187364 --- /dev/null +++ b/skills/video-to-article/SKILL.md @@ -0,0 +1,339 @@ +--- +name: video-to-article +description: Use this skill when the user wants to convert a lecture, presentation, or talk video into text formats (transcript, outline, or article). Trigger when user mentions processing video recordings, creating transcripts from lectures, or generating articles from recorded presentations. +allowed-tools: Bash, Read, Write, AskUserQuestion, Glob +--- + +# Video to Article Conversion Workflow + +This skill automates the conversion of lecture/presentation videos into various text formats including transcripts, outlines, and article drafts. + +## Workflow Overview + +1. **Setup Folder** - Create properly named folder if needed +2. **Validate Input Metadata** - Check for README.md with required frontmatter +3. **Extract Audio** - Convert video to MP3 using ffmpeg +4. **Transcribe** - Generate text transcript using ElevenLabs API +5. **Process Text** - Create progressive text refinements + +## Step 0: Folder Setup (if starting from scratch) + +If user is starting a new talk/lecture conversion and no folder exists yet: + +Create folder using format: `YYYY Title of Talk` + +After creating folder, navigate into it and proceed with workflow. + +## Step 1: Validate Metadata + +Look for `README.md` in the current directory with these frontmatter fields: + +- `title` - Lecture/presentation title +- `speaker` - Speaker name +- `date` - Presentation date (YYYY-MM-DD) +- `lang` - Presentation language (e.g., "english", "czech", "spanish") +- `abstract` - Brief description +- `slides` - Link to slides (or file path) +- `video` - Optional: link to video + +### If README.md is Missing or Incomplete + +Use the AskUserQuestion tool to collect missing information: + +``` +- What is the lecture title? +- Who is the speaker? +- What date was this presented? +- What language is the presentation in? +- Brief description or abstract? +- Link to slides or presentation file? +``` + +Create or update README.md with collected information. + +## Step 2: Find or Download Video File + +### Check for Local Video + +Look for video file in current directory: + +- Common patterns: `*.mp4`, `*.mov`, `*.avi`, `*.mkv` +- If multiple found, ask user which to process + +### If No Local Video Found + +Ask user if they have a YouTube URL using AskUserQuestion tool: + +- "Do you have a YouTube URL for this talk?" +- If yes, collect the YouTube URL +- If no, ask for local video file path + +### Download from YouTube (if URL provided) + +```bash +bash ${CLAUDE_PLUGIN_ROOT}/skills/video-to-article/scripts/download-youtube.sh +``` + +This downloads the video as `video.mp4` in medium quality (720p max). + +**Requirements**: User needs yt-dlp installed. If missing, show: + +``` +Install yt-dlp: +- pip install yt-dlp +- brew install yt-dlp (macOS) +- sudo apt install yt-dlp (Ubuntu/Debian) +``` + +## Step 3: Extract Audio + +Check if `audio.mp3` already exists. If not: + +Run the extract-audio.sh script using the plugin root variable: + +```bash +bash ${CLAUDE_PLUGIN_ROOT}/skills/video-to-article/scripts/extract-audio.sh +``` + +This creates `audio.mp3` in the current directory. + +## Step 4: Transcribe Audio + +Check if `generated-transcript.txt` already exists. If not: + +### Map Language to ISO Code + +Convert the `lang` field from README.md to the appropriate ISO 639-1 language code for ElevenLabs API. Use your knowledge of language codes to map language names to their two-letter codes (e.g., "english" → "en", "czech" → "cs", "spanish" → "es"). + +### Run Transcription + +```bash +bash ${CLAUDE_PLUGIN_ROOT}/skills/video-to-article/scripts/transcribe-audio.sh +``` + +## Step 5: Generate Text Outputs + +### Execution Protocol + +Before generating ANY content, you MUST complete these steps in order: + +1. **List all required outputs** - Read and acknowledge the complete list below +2. **Check which files already exist** - Use file existence checks for each output +3. **Read all existing source files** - Load transcript, outline, key-ideas as context +4. **Confirm which outputs need generation** - Explicitly state which files are missing +5. **Only then begin generation** - Generate missing files one by one + +### Required Outputs + +You MUST generate ALL of these files: + +1. `generated-transcript-cleaned.md` - Cleaned transcript +2. `generated-transcript-readable.md` - Readable transcript +3. `generated-transcript-outline.md` - Content outline +4. `generated-key-ideas.md` - Key takeaways +5. `generated-article-formal.md` - Explanatory blog post (long-form, narrative) +6. `generated-article-direct.md` - Scannable technical brief (concise, bullets) + +**IMPORTANT:** Outputs 5 and 6 are TWO DIFFERENT ARTICLE VARIANTS with distinct styles and audiences. You must generate BOTH. + +**MANDATORY WORKFLOW:** + +- Check file existence for ALL files above +- **Automatically generate ANY missing files without asking user** +- Skip files that already exist (inform user which were skipped) +- NEVER declare completion unless all files exist +- NEVER ask user if they want to generate missing files - just generate them +- **Generate BOTH article variants** (formal and direct) - they serve different purposes + +### Idempotent Generation Strategy + +This process is idempotent - running the skill multiple times will only generate missing files, never overwrite existing ones: + +1. **Check each output file before generating** - Use file existence checks +2. **Skip files that already exist** - Inform user which files are being skipped +3. **Generate only missing files** - Never overwrite existing content +4. **Use existing files as context** - When generating later outputs (like key-ideas), read all available previous outputs for context + +Example: If only `generated-key-ideas.md` is missing: + +- Read existing: `generated-transcript-cleaned.md`, `generated-transcript-readable.md`, `generated-transcript-outline.md` +- Generate: `generated-key-ideas.md` using the existing files as context +- Skip: All other outputs + +This allows users to: + +- Resume interrupted workflows +- Generate only specific missing outputs +- Re-run safely without data loss + +### Output 1: Cleaned Transcript + +**File:** `generated-transcript-cleaned.md` +**Input:** `generated-transcript.txt` +**Check:** Skip if file exists + +Convert the transcript into a more readable form by dividing it into paragraphs and adding headings. + +**Instructions:** + +- Do not change the content, except for obvious typos +- Do not change the tone of voice - if the speaker uses informal language, keep it +- Remove filler words like "Eh", "Ehm", "Um", "Uh" +- Remove comments related to the transcription itself, or rewrite them elegantly if possible (e.g., "(laughter)" or "(2 second pause)") +- Respond in {lang} language - maintain the same language as the original transcript + +The cleaned transcript should preserve the speaker's authentic voice while being easier to read and follow. + +### Output 2: Readable Transcript + +**File:** `generated-transcript-readable.md` +**Inputs:** `generated-transcript.txt` and `generated-transcript-cleaned.md` +**Check:** Skip if file exists + +You have two inputs: + +1. The first text contains the direct transcript of a lecture +2. The second text contains a better structured version of this transcript + +The slides are available here: {slides} + +Create a more readable text from this transcript: + +**Instructions:** + +- Preserve the style, but optimize for reading comprehension +- Fix typos, repetitions, and improve stylistic clarity +- Shorten where it doesn't affect the message +- Make it flow naturally for readers while keeping the speaker's voice +- Respond in {lang} language - the output must be in the same language as the input + +The goal is to make the content accessible to readers while maintaining the essence of the spoken presentation. + +### Output 3: Outline + +**File:** `generated-transcript-outline.md` +**Inputs:** Cleaned and readable transcripts +**Check:** Skip if file exists + +From the provided transcript inputs, create an outline of what was discussed. + +**Instructions:** + +- List the main ideas and messages with very brief content +- Use bullet points for clarity +- Focus on key takeaways and core concepts +- Keep descriptions concise - just enough to understand the point +- Respond in {lang} language - the outline must be in the same language as the source material + +This outline should serve as a quick reference guide to the presentation's structure and main points. + +### Output 4: Key Ideas + +**File:** `generated-key-ideas.md` +**Inputs:** Cleaned and readable transcripts (or all available generated files if running incrementally) +**Check:** Skip if file exists + +From the provided transcript inputs, extract the key ideas, tips, and main concepts. + +**Instructions:** + +- Focus on interesting insights, actionable tips, and core concepts +- Unlike the outline, don't follow chronological order - prioritize importance +- Exclude generic or procedural content +- Each idea should have a brief description explaining why it matters +- Use bullet points or numbered list +- Respond in {lang} language - the output must be in the same language as the source material + +This should capture the most valuable takeaways someone would want to remember from the talk. + +### Output 5: Explanatory Blog Post + +**File:** `generated-article-formal.md` +**Inputs:** All previous outputs (or all available generated files if running incrementally) +**Check:** Skip if file exists + +Create a traditional blog post with complete explanations and smooth narrative flow. + +**Target audience:** General readers discovering the topic, browsing casually +**Reading context:** Website visitors who have time and want full understanding + +**Instructions:** + +- Write in complete, flowing sentences with careful transitions +- Provide context and background before diving into details +- Use storytelling elements to maintain engagement +- Example opening: "V průběhu posledního roku jsme zaznamenali značné změny..." (Czech) or equivalent +- Explain why concepts matter, not just what they are +- Guide readers through ideas with clear signposting +- Respond in {lang} language - the article must be in the same language as the source material + +**Style characteristics:** + +- Narrative structure with introduction, development, conclusion +- Welcoming tone that assumes less prior knowledge +- Longer paragraphs with descriptive language +- Educational approach that builds understanding step-by-step + +### Output 6: Scannable Technical Brief + +**File:** `generated-article-direct.md` +**Inputs:** All previous outputs (or all available generated files if running incrementally) +**Check:** Skip if file exists + +Create a condensed, high-signal article optimized for rapid scanning and information extraction. + +**Target audience:** Technical readers or busy professionals who skim content +**Reading context:** Newsletter, Slack/Discord shares, or quick reference + +**Instructions:** + +- Lead with short, declarative statements - skip warm-up sentences +- Frontload key information in each paragraph +- Heavy use of bullets, numbered lists, and visual breaks +- Example opening: "AI modely: radikální skok za rok." (Czech) or equivalent +- Cut all filler words, hedging, and obvious statements +- Dense information - every sentence earns its place +- Respond in {lang} language - the article must be in the same language as the source material + +**Style characteristics:** + +- Telegraphic style - fragments are encouraged when clearer +- Bullets and lists dominate over prose paragraphs +- Assumes reader familiarity with domain +- Optimized for 2-minute skim, not 10-minute read + +## Summary + +After completion, inform the user which files were: + +- **Skipped** (already existed) +- **Generated** (newly created) + +Example output: + +``` +Workflow complete! + +Skipped (already exists): +- audio.mp3 +- generated-transcript.txt +- generated-transcript-cleaned.md +- generated-transcript-readable.md +- generated-transcript-outline.md + +Generated: +- generated-key-ideas.md +- generated-article-formal.md +- generated-article-direct.md + +All outputs in language. +``` + +If everything was generated fresh, simply list all outputs under "Generated". + +## Error Handling + +- **Missing ffmpeg**: Prompt user to install ffmpeg +- **Missing API key**: Prompt user to set ELEVENLABS_API_KEY +- **Transcription errors**: Show error message and suggest checking API key/quota +- **Script not found**: Use absolute paths to skill scripts directory diff --git a/skills/video-to-article/scripts/download-youtube.sh b/skills/video-to-article/scripts/download-youtube.sh new file mode 100755 index 0000000..06085a7 --- /dev/null +++ b/skills/video-to-article/scripts/download-youtube.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Download YouTube video in medium quality (720p or lower) +# Usage: ./download-youtube.sh + +if [ -z "$1" ]; then + echo "Error: YouTube URL required" + echo "Usage: $0 " + exit 1 +fi + +YOUTUBE_URL="$1" + +# Check if yt-dlp is installed +if ! command -v yt-dlp &> /dev/null; then + echo "Error: yt-dlp is not installed" + echo "Install with: pip install yt-dlp" + echo "Or: brew install yt-dlp (macOS)" + echo "Or: sudo apt install yt-dlp (Ubuntu/Debian)" + exit 1 +fi + +# Download video in medium quality (720p max), output as video.mp4 +echo "Downloading YouTube video..." +yt-dlp \ + -f "bestvideo[height<=720]+bestaudio/best[height<=720]" \ + --merge-output-format mp4 \ + -o "video.mp4" \ + "$YOUTUBE_URL" + +if [ $? -eq 0 ]; then + echo "✓ Video downloaded successfully as video.mp4" +else + echo "Error: Failed to download video" + exit 1 +fi diff --git a/skills/video-to-article/scripts/extract-audio.sh b/skills/video-to-article/scripts/extract-audio.sh new file mode 100755 index 0000000..bb67dfc --- /dev/null +++ b/skills/video-to-article/scripts/extract-audio.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# extract-audio.sh - Extract audio from video file for lecture transcription +# Usage: extract-audio.sh + +set -euo pipefail + +VIDEO_FILE="$1" + +if [ ! -f "$VIDEO_FILE" ]; then + echo "Error: Video file '$VIDEO_FILE' not found" >&2 + exit 1 +fi + +if ! command -v ffmpeg >/dev/null 2>&1; then + echo "Error: ffmpeg not found" >&2 + exit 1 +fi + +# Extract audio to audio.mp3 +ffmpeg -i "$VIDEO_FILE" -vn -acodec libmp3lame -q:a 2 audio.mp3 -y 2>&1 | grep -v "^frame=" + +echo "Audio extracted: audio.mp3" diff --git a/skills/video-to-article/scripts/transcribe-audio.sh b/skills/video-to-article/scripts/transcribe-audio.sh new file mode 100755 index 0000000..7188ba4 --- /dev/null +++ b/skills/video-to-article/scripts/transcribe-audio.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# transcribe-audio.sh - Transcribe audio using ElevenLabs API +# Usage: transcribe-audio.sh + +set -euo pipefail + +LANGUAGE_CODE="$1" + +if [ -z "${ELEVENLABS_API_KEY:-}" ]; then + echo "Error: ELEVENLABS_API_KEY environment variable not set" >&2 + exit 1 +fi + +if [ ! -f "audio.mp3" ]; then + echo "Error: audio.mp3 not found" >&2 + exit 1 +fi + +TEMP_JSON=$(mktemp) +trap "rm -f $TEMP_JSON" EXIT + +# Call ElevenLabs API +curl -X POST "https://api.elevenlabs.io/v1/speech-to-text" \ + -H "xi-api-key: $ELEVENLABS_API_KEY" \ + -F "model_id=scribe_v1" \ + -F "file=@audio.mp3" \ + -F "language_code=$LANGUAGE_CODE" \ + -o "$TEMP_JSON" \ + -s + +# Check for errors +if ! jq empty "$TEMP_JSON" 2>/dev/null; then + echo "Error: Invalid API response" >&2 + cat "$TEMP_JSON" >&2 + exit 1 +fi + +if jq -e '.detail' "$TEMP_JSON" >/dev/null 2>&1; then + echo "API Error:" >&2 + jq -r '.detail' "$TEMP_JSON" >&2 + exit 1 +fi + +# Extract and save transcript +jq -r '.text' "$TEMP_JSON" > generated-transcript.txt + +echo "Transcript saved: generated-transcript.txt"