commit 3c644a155267ebc0787ae4cb708066874772d87f Author: Zhongwei Li Date: Sun Nov 30 09:04:57 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..8525cff --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "youtube-comment-analysis", + "description": "Extract and analyze YouTube comments. Run standalone for comment analysis or sequential with youtube-to-markdown for cross-analysis with video summary. Avoid diving in the cesspool and wasting your time on irrelevant noise.", + "version": "1.0.0", + "author": { + "name": "Ville Reijonen", + "email": "marketplace@vre.iki.fi" + }, + "skills": [ + "./" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c0a4ab --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# youtube-comment-analysis + +Extract and analyze YouTube comments. Run standalone for comment analysis or sequential with youtube-to-markdown for cross-analysis with video summary. Avoid diving in the cesspool and wasting your time on irrelevant noise. diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..511ace6 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,106 @@ +--- +name: youtube-comment-analysis +description: Use when user requests YouTube comments. Run standalone for comment analysis or sequential with youtube-to-markdown for cross-analysis with video summary. +allowed-tools: + - Bash + - Read + - Write + - Task + - AskUserQuestion + - Skill +--- + +# YouTube Comment Analysis + +Execute all steps sequentially without asking for user approval. Use TodoWrite to track progress. + +## Step 0: Check for video summary + +If you don't know from conversation context whether a video summary exists for this URL, extract video ID from URL and check if file matching `/youtube - * ({video_id}).md` exists. + +If no summary file exists OR you don't know from context, ask user: + +``` +AskUserQuestion: +- question: "No video summary found. How would you like to proceed with comment analysis?" +- header: "Mode" +- options: + 1. label: "Create transcript first" + description: "Run youtube-to-markdown skill to create video summary, then cross-analyze comments against it" + 2. label: "Standalone analysis" + description: "Analyze comments without video context (faster, less informed)" +``` + +If user chooses "Create transcript first": Run youtube-to-markdown skill with the URL first, then proceed with youtube-comment-analysis skill. + +If summary exists OR user chooses "Standalone analysis": Proceed directly to Step 1. + +## Step 1: Extract comments + +```bash +python3 ./extract_comments.py "" "" +``` + +Creates: youtube_{VIDEO_ID}_name.txt, youtube_{VIDEO_ID}_comments.md + +## Step 2: Clean comments + +task_tool: +- subagent_type: "general-purpose" +- model: "haiku" +- prompt: + +**Standalone mode (no summary):** +``` +Read /${BASE_NAME}_comments.md and clean/curate comments. Write to /${BASE_NAME}_comments_cleaned.md. Do not translate. + +Tasks: +- Remove low-value: "+1", "thanks", "great video", spam, duplicates +- Remove comments that repeat content from other comments +``` + +**Sequential mode (with summary from youtube-to-markdown):** +``` +Read Summary section from /youtube - * ({video_id}).md file to understand main points. + +Read /${BASE_NAME}_comments.md and clean/curate comments. Write to /${BASE_NAME}_comments_cleaned.md. Do not translate. + +Tasks: +- Remove low-value: "+1", "thanks", "great video", spam, duplicates +- Remove comments that are off-topic (use summary to identify) +- Remove comments that repeat content from the summary or other comments +``` + +## Step 3: Extract Golden Comments + +task_tool: +- subagent_type: "general-purpose" +- prompt: + +**Standalone mode (no summary):** +``` +Read /${BASE_NAME}_comments_cleaned.md. Extract and condense the MOST exceptional insights from highly upvoted comments. No fluff, NOT a document. Do not translate. Write to /${BASE_NAME}_comment_gold.md in format: + +**Golden Comments**: +- [true insights from comments] +``` + +**Sequential mode (with summary from youtube-to-markdown):** +``` +Read Summary section from /youtube - * ({video_id}).md file. + +Read /${BASE_NAME}_comments_cleaned.md. Extract, condense, combine and summarize ruthlessly for the MOST exceptional true golden insights NOT already covered by the summary. No fluff, NOT a document. Write to /${BASE_NAME}_comment_gold.md in format: + +**Golden Comments**: +- [any true insights hiding in the comment garbage, NOT in summary] +``` + +## Step 4: Finalize + +```bash +python3 ./finalize_comments.py "${BASE_NAME}" "" +``` + +Output: `youtube - {title} - comments ({video_id}).md` + +Use `--debug` flag to keep intermediate work files for inspection. diff --git a/extract_comments.py b/extract_comments.py new file mode 100644 index 0000000..3094583 --- /dev/null +++ b/extract_comments.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +Extracts YouTube video comments +Usage: extract_comments.py +Output: Creates youtube_{VIDEO_ID}_name.txt, youtube_{VIDEO_ID}_comments.md +""" + +import json +import sys +import os +import subprocess +import re + +def extract_video_id(url): + """Extract video ID from YouTube URL""" + # Handle youtu.be format + if 'youtu.be/' in url: + video_id = url.split('youtu.be/')[-1].split('?')[0] + return video_id + + # Handle youtube.com format with v= parameter + match = re.search(r'[?&]v=([^&]+)', url) + if match: + return match.group(1) + + return None + +def check_yt_dlp(): + """Check if yt-dlp is installed""" + try: + subprocess.run(['yt-dlp', '--version'], capture_output=True, check=True) + except (subprocess.CalledProcessError, FileNotFoundError): + print("ERROR: yt-dlp is not installed", file=sys.stderr) + print("Install options:", file=sys.stderr) + print(" - macOS: brew install yt-dlp", file=sys.stderr) + print(" - Ubuntu/Debian: sudo apt update && sudo apt install -y yt-dlp", file=sys.stderr) + print(" - All systems: pip3 install yt-dlp", file=sys.stderr) + sys.exit(1) + +def fetch_video_data(video_url, output_dir): + """Fetch video title and comments from YouTube""" + temp_json = os.path.join(output_dir, "video_data.json") + try: + with open(temp_json, 'w') as f: + result = subprocess.run( + ['yt-dlp', '--dump-single-json', '--write-comments', '--skip-download', video_url], + stdout=f, + stderr=subprocess.PIPE, + text=True + ) + if result.returncode != 0: + print("ERROR: Failed to extract video data", file=sys.stderr) + if os.path.exists(temp_json): + os.remove(temp_json) + sys.exit(1) + except Exception as e: + print(f"ERROR: Failed to extract video data: {e}", file=sys.stderr) + if os.path.exists(temp_json): + os.remove(temp_json) + sys.exit(1) + + try: + with open(temp_json, 'r') as f: + data = json.load(f) + except Exception as e: + print(f"ERROR: Failed to read JSON: {e}", file=sys.stderr) + if os.path.exists(temp_json): + os.remove(temp_json) + sys.exit(1) + finally: + if os.path.exists(temp_json): + os.remove(temp_json) + + return data + +def create_comments_file(data, base_name, output_dir): + """ + Create hierarchical comments using heading levels 3-5, flatten deeper levels. + + yt-dlp returns flat list with 'parent' field. We reconstruct hierarchy: + - Level 0: ### (foldable in Obsidian) + - Level 1: #### + - Level 2: ##### + - Level 3+: Bullet lists (flattened to prevent excessive nesting) + """ + comments = data.get('comments', []) + comment_file = os.path.join(output_dir, f"{base_name}_comments.md") + + if not comments: + # Create empty file + with open(comment_file, 'w', encoding='utf-8') as cf: + cf.write("No comments available\n") + print(f"COMMENTS: {comment_file} (no comments)") + return comment_file + + # Build hierarchy from flat structure (parent='root' = top-level, else reply) + comment_by_id = {} + replies_by_parent = {} + + for comment in comments: + cid = comment.get('id', '') + parent = comment.get('parent', 'root') + comment_by_id[cid] = comment + + if parent not in replies_by_parent: + replies_by_parent[parent] = [] + replies_by_parent[parent].append(comment) + + def write_comment(cf, comment, depth=0): + """Recursively write comment and its replies with appropriate heading levels""" + author = comment.get('author', 'Unknown') + text = comment.get('text', '') + likes = comment.get('like_count', 0) + cid = comment.get('id', '') + + # Use headings for depth 0-2 (### #### #####), flatten beyond that + if depth == 0: + # Top-level: ### + cf.write(f"### {author} ({likes} likes)\n\n") + elif depth == 1: + # First reply level: #### + cf.write(f"#### {author} ({likes} likes)\n\n") + elif depth == 2: + # Second reply level: ##### + cf.write(f"##### {author} ({likes} likes)\n\n") + else: + # Deeper levels: flatten as bullet list + cf.write(f"- **{author} ({likes} likes)**: {text}\n\n") + # Don't recurse further, flatten all deeper replies here + replies = replies_by_parent.get(cid, []) + for reply in replies: + r_author = reply.get('author', 'Unknown') + r_text = reply.get('text', '') + r_likes = reply.get('like_count', 0) + cf.write(f" - **{r_author} ({r_likes} likes)**: {r_text}\n") + if replies: + cf.write("\n") + return + + # Write comment text for heading levels + cf.write(f"{text}\n\n") + + # Recursively write replies for depth 0-2 + replies = replies_by_parent.get(cid, []) + for reply in replies: + write_comment(cf, reply, depth + 1) + + # Write all top-level comments + with open(comment_file, 'w', encoding='utf-8') as cf: + top_level = replies_by_parent.get('root', [])[:50] + for idx, comment in enumerate(top_level, 1): + # Add numbering only to top-level + author = comment.get('author', 'Unknown') + text = comment.get('text', '') + likes = comment.get('like_count', 0) + cid = comment.get('id', '') + + cf.write(f"### {idx}. {author} ({likes} likes)\n\n") + cf.write(f"{text}\n\n") + + # Write replies recursively + replies = replies_by_parent.get(cid, []) + for reply in replies: + write_comment(cf, reply, depth=1) + + total_replies = len(comments) - len(replies_by_parent.get('root', [])) + print(f"COMMENTS: {comment_file} ({len(replies_by_parent.get('root', []))} comments, {total_replies} replies)") + return comment_file + +def main(): + # Parse arguments + if len(sys.argv) != 3: + print("Usage: extract_comments.py ", file=sys.stderr) + sys.exit(1) + + video_url = sys.argv[1] + output_dir = sys.argv[2] + + # Validate arguments + if not video_url: + print("ERROR: No YouTube URL provided", file=sys.stderr) + sys.exit(1) + + # Check required commands + check_yt_dlp() + + # Extract video ID from URL + video_id = extract_video_id(video_url) + if not video_id: + print("ERROR: Could not extract video ID from URL", file=sys.stderr) + sys.exit(1) + + base_name = f"youtube_{video_id}" + + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + # Fetch video data from YouTube + data = fetch_video_data(video_url, output_dir) + + # Extract and save video title + title = data.get('title', 'Untitled') + name_file = os.path.join(output_dir, f"{base_name}_name.txt") + with open(name_file, 'w', encoding='utf-8') as f: + f.write(title) + print(f"SUCCESS: {name_file}") + + # Create comments file + create_comments_file(data, base_name, output_dir) + +if __name__ == "__main__": + try: + main() + except Exception as e: + print(f"ERROR: {str(e)}", file=sys.stderr) + sys.exit(1) diff --git a/finalize_comments.py b/finalize_comments.py new file mode 100644 index 0000000..d8c9c1a --- /dev/null +++ b/finalize_comments.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +""" +Creates final comment analysis markdown file from template and component files, cleans up intermediate work files +Usage: finalize_comments.py [--debug] +Keeps: {BASE_NAME}_comment_analysis.md +Removes: _name.txt, _comments.md, _comments_cleaned.md, _comment_gold.md (unless --debug) +""" + +import sys +import os +import re + +def clean_title_for_filename(title, max_length=60): + """Clean title for use in filename""" + # Remove or replace problematic characters + cleaned = re.sub(r'[<>:"/\\|?*]', '', title) # Remove invalid filename chars + cleaned = re.sub(r'\s+', ' ', cleaned) # Normalize whitespace + cleaned = cleaned.strip() + + # Truncate if too long + if len(cleaned) > max_length: + cleaned = cleaned[:max_length].rsplit(' ', 1)[0] # Cut at word boundary + + return cleaned + +def read_file_or_empty(file_path): + """Read file content or return empty string if file doesn't exist""" + if os.path.exists(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + return "" + +def main(): + # Parse options + debug = False + args = [] + for arg in sys.argv[1:]: + if arg == '--debug': + debug = True + else: + args.append(arg) + + # Parse arguments + if len(args) < 1: + print("ERROR: No BASE_NAME provided", file=sys.stderr) + print("Usage: finalize_comments.py [--debug]", file=sys.stderr) + sys.exit(1) + + base_name = args[0] + output_dir = args[1] if len(args) > 1 else "." + + # Get script directory for template + script_dir = os.path.dirname(os.path.abspath(__file__)) + template_file = os.path.join(script_dir, "template.md") + + # Validate template exists + if not os.path.exists(template_file): + print(f"ERROR: {template_file} not found", file=sys.stderr) + sys.exit(1) + + # Read template + with open(template_file, 'r', encoding='utf-8') as f: + template = f.read() + + # Read component files + video_name = read_file_or_empty(os.path.join(output_dir, f"{base_name}_name.txt")) + comment_gold = read_file_or_empty(os.path.join(output_dir, f"{base_name}_comment_gold.md")) + comments = read_file_or_empty(os.path.join(output_dir, f"{base_name}_comments_cleaned.md")) + + # Replace placeholders + final_content = template.replace("{video_name}", video_name.strip()) + final_content = final_content.replace("{comment_gold}", comment_gold.strip()) + final_content = final_content.replace("{comments}", comments.strip()) + + # Create human-readable filename + if video_name.strip(): + cleaned_title = clean_title_for_filename(video_name.strip()) + video_id = base_name.replace('youtube_', '') + final_filename = f"youtube - {cleaned_title} - comments ({video_id}).md" + else: + # Fallback to old format if title not found + final_filename = f"{base_name}_comment_analysis.md" + + # Write final file + final_file = os.path.join(output_dir, final_filename) + with open(final_file, 'w', encoding='utf-8') as f: + f.write(final_content) + + print(f"Created final file: {final_filename}") + + # Clean up intermediate work files unless --debug is set + if debug: + print("Debug mode: keeping intermediate work files") + else: + work_files = [ + f"{base_name}_name.txt", + f"{base_name}_comments.md", + f"{base_name}_comments_cleaned.md", + f"{base_name}_comment_gold.md" + ] + + for work_file in work_files: + file_path = os.path.join(output_dir, work_file) + if os.path.exists(file_path): + os.remove(file_path) + + print("Cleaned up intermediate work files") + + print(f"Final file: {final_filename}") + +if __name__ == "__main__": + try: + main() + except Exception as e: + print(f"ERROR: {str(e)}", file=sys.stderr) + sys.exit(1) diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..c041beb --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,57 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:vre/flow-state:youtube-comment-analysis", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "e66e5fd4b7b7f2660c915a7e33ce001acaff55ee", + "treeHash": "510496b23bfd06aebed87eb4a57c45c60b7c8c53fdb92db543febfe17f499c70", + "generatedAt": "2025-11-28T10:28:56.644897Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "youtube-comment-analysis", + "description": "Extract and analyze YouTube comments. Run standalone for comment analysis or sequential with youtube-to-markdown for cross-analysis with video summary. Avoid diving in the cesspool and wasting your time on irrelevant noise.", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "template.md", + "sha256": "233d62ae387024f34f659246475ea8b00c884de74a77d689a36c0c2d2dd4d7b2" + }, + { + "path": "README.md", + "sha256": "14250ffcef26b1a689da0cd3476c9968dc9f0bc8c8be06a00e524df9c3de30f1" + }, + { + "path": "finalize_comments.py", + "sha256": "9be3b329255f8c3a3d9db7466b90397ad593e5c3aa2fb3d19601dea70e787d07" + }, + { + "path": "SKILL.md", + "sha256": "1a9ad846abcbb99a16c6b60f4a0012dcccf4c4d9ced1f98ea52055d83833cdbe" + }, + { + "path": "extract_comments.py", + "sha256": "b1aea61cd4fb3cd776667dbb45744d4975c7c83e6ddbba69b6bd1354ec4eaaf2" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "ea3488a9d6656cada8b81f9b66aa9ddaba1e3b449875e355faab1ed75cb5b307" + } + ], + "dirSha256": "510496b23bfd06aebed87eb4a57c45c60b7c8c53fdb92db543febfe17f499c70" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/template.md b/template.md new file mode 100644 index 0000000..f230dba --- /dev/null +++ b/template.md @@ -0,0 +1,7 @@ +## Golden Insights + +{comment_gold} + +## Curated Comments + +{comments}