Initial commit

2025-11-30 08:40:16 +08:00
commit 9c6a81fd7f
8 changed files with 1030 additions and 0 deletions
--- a/skills/conversation-analyzer/scripts/analyze_history.py
+++ b/skills/conversation-analyzer/scripts/analyze_history.py
@@ -0,0 +1,375 @@
+#!/usr/bin/env python3
+"""
+Analyze Claude Code conversation history to identify patterns and common mistakes.
+"""
+
+import json
+import re
+from collections import Counter, defaultdict
+from datetime import datetime
+from pathlib import Path
+
+def load_history(history_path):
+    """Load and parse history.jsonl file."""
+    conversations = []
+    with open(history_path, 'r') as f:
+        for line in f:
+            try:
+                conversations.append(json.loads(line))
+            except json.JSONDecodeError:
+                continue
+    return conversations
+
+def extract_patterns(conversations):
+    """Extract patterns from conversations."""
+    patterns = {
+        'common_tasks': Counter(),
+        'projects': Counter(),
+        'error_keywords': Counter(),
+        'request_types': Counter(),
+        'time_distribution': defaultdict(int),
+        'complexity_indicators': Counter(),
+    }
+
+    error_keywords = ['error', 'fix', 'bug', 'issue', 'problem', 'fail', 'broken', 'wrong', 'incorrect', 'merge conflict']
+    complexity_indicators = ['refactor', 'implement', 'add feature', 'create', 'build', 'migrate', 'optimize', 'analyze']
+
+    for conv in conversations:
+        display = conv.get('display', '').lower()
+        project = conv.get('project', '')
+
+        # Count projects
+        if project:
+            patterns['projects'][project] += 1
+
+        # Count error-related requests
+        for keyword in error_keywords:
+            if keyword in display:
+                patterns['error_keywords'][keyword] += 1
+
+        # Count complexity indicators
+        for indicator in complexity_indicators:
+            if indicator in display:
+                patterns['complexity_indicators'][indicator] += 1
+
+        # Categorize request types
+        if any(kw in display for kw in ['fix', 'error', 'bug', 'issue', 'problem']):
+            patterns['request_types']['bug_fix'] += 1
+        elif any(kw in display for kw in ['add', 'create', 'implement', 'build', 'new']):
+            patterns['request_types']['feature_addition'] += 1
+        elif any(kw in display for kw in ['refactor', 'improve', 'optimize', 'clean']):
+            patterns['request_types']['refactoring'] += 1
+        elif any(kw in display for kw in ['explain', 'what', 'how', 'why', 'analyze', 'understand']):
+            patterns['request_types']['information_query'] += 1
+        elif any(kw in display for kw in ['test', 'run', 'build', 'deploy']):
+            patterns['request_types']['testing_deployment'] += 1
+        else:
+            patterns['request_types']['other'] += 1
+
+        # Time distribution (hour of day)
+        if 'timestamp' in conv:
+            dt = datetime.fromtimestamp(conv['timestamp'] / 1000)
+            hour = dt.hour
+            patterns['time_distribution'][hour] += 1
+
+        # Track all tasks
+        patterns['common_tasks'][display] += 1
+
+    return patterns
+
+def identify_common_mistakes(conversations):
+    """Identify patterns that might indicate common mistakes."""
+    mistakes = {
+        'repeated_fixes': [],
+        'merge_conflicts': [],
+        'repeated_requests': [],
+        'vague_requests': [],
+        'multi_step_without_planning': [],
+    }
+
+    # Track repeated similar requests
+    task_groups = defaultdict(list)
+    for i, conv in enumerate(conversations):
+        display = conv.get('display', '')
+
+        # Group similar tasks
+        normalized = re.sub(r'\d+', '#', display.lower())
+        normalized = re.sub(r'[^\w\s]', '', normalized)
+        task_groups[normalized].append((i, display, conv))
+
+    # Identify repeated fixes
+    for task, occurrences in task_groups.items():
+        if len(occurrences) > 2 and any(kw in task for kw in ['fix', 'error', 'bug']):
+            mistakes['repeated_fixes'].append({
+                'pattern': task,
+                'count': len(occurrences),
+                'examples': [occ[1] for occ in occurrences[:3]]
+            })
+
+    # Identify merge conflicts
+    for conv in conversations:
+        display = conv.get('display', '')
+        if 'merge conflict' in display.lower():
+            mistakes['merge_conflicts'].append(display)
+
+    # Identify repeated requests (exact matches)
+    for task, occurrences in task_groups.items():
+        if len(occurrences) > 3:
+            mistakes['repeated_requests'].append({
+                'pattern': occurrences[0][1],
+                'count': len(occurrences)
+            })
+
+    # Identify vague requests (very short or unclear)
+    vague_keywords = ['this', 'that', 'it', 'the thing', 'stuff']
+    for conv in conversations:
+        display = conv.get('display', '')
+        if len(display.split()) < 4 or any(vk in display.lower() for vk in vague_keywords):
+            if len(display) < 30:
+                mistakes['vague_requests'].append(display)
+
+    # Identify complex multi-step requests
+    multi_step_indicators = [' and ', ', ', 'then ', 'also ', 'plus ']
+    for conv in conversations:
+        display = conv.get('display', '')
+        if sum(indicator in display.lower() for indicator in multi_step_indicators) >= 2:
+            mistakes['multi_step_without_planning'].append(display)
+
+    return mistakes
+
+def generate_report(patterns, mistakes):
+    """Generate a comprehensive analysis report."""
+    report = []
+
+    report.append("=" * 80)
+    report.append("CLAUDE CODE CONVERSATION ANALYSIS REPORT")
+    report.append("=" * 80)
+    report.append("")
+
+    # Overall statistics
+    report.append("## OVERALL STATISTICS")
+    report.append(f"Total conversations: {sum(patterns['request_types'].values())}")
+    report.append("")
+
+    # Request type distribution
+    report.append("## REQUEST TYPE DISTRIBUTION")
+    for req_type, count in patterns['request_types'].most_common():
+        percentage = (count / sum(patterns['request_types'].values())) * 100
+        report.append(f"  {req_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)")
+    report.append("")
+
+    # Most active projects
+    report.append("## MOST ACTIVE PROJECTS")
+    for project, count in patterns['projects'].most_common(10):
+        report.append(f"  {count:3d}x {project}")
+    report.append("")
+
+    # Common error keywords
+    report.append("## COMMON ERROR KEYWORDS")
+    for keyword, count in patterns['error_keywords'].most_common(10):
+        report.append(f"  {keyword}: {count}")
+    report.append("")
+
+    # Complexity indicators
+    report.append("## COMPLEXITY INDICATORS")
+    for indicator, count in patterns['complexity_indicators'].most_common(10):
+        report.append(f"  {indicator}: {count}")
+    report.append("")
+
+    # Time distribution
+    report.append("## TIME DISTRIBUTION (BY HOUR)")
+    for hour in sorted(patterns['time_distribution'].keys()):
+        count = patterns['time_distribution'][hour]
+        bar = '█' * (count // 5)
+        report.append(f"  {hour:02d}:00 {bar} ({count})")
+    report.append("")
+
+    # Common mistakes
+    report.append("=" * 80)
+    report.append("## IDENTIFIED COMMON MISTAKES AND PATTERNS")
+    report.append("=" * 80)
+    report.append("")
+
+    if mistakes['merge_conflicts']:
+        report.append(f"### 1. MERGE CONFLICTS ({len(mistakes['merge_conflicts'])} occurrences)")
+        report.append("These indicate potential git workflow issues:")
+        for mc in mistakes['merge_conflicts'][:5]:
+            report.append(f"  - {mc}")
+        report.append("")
+
+    if mistakes['repeated_fixes']:
+        report.append(f"### 2. REPEATED FIX PATTERNS ({len(mistakes['repeated_fixes'])} patterns)")
+        report.append("Similar fixes requested multiple times - may indicate recurring issues:")
+        for fix in sorted(mistakes['repeated_fixes'], key=lambda x: x['count'], reverse=True)[:5]:
+            report.append(f"  - Pattern: '{fix['pattern']}' (occurred {fix['count']} times)")
+            for example in fix['examples'][:2]:
+                report.append(f"    Example: {example}")
+        report.append("")
+
+    if mistakes['repeated_requests']:
+        report.append(f"### 3. REPEATED EXACT REQUESTS ({len(mistakes['repeated_requests'])} patterns)")
+        report.append("Exact same requests multiple times - could be automated:")
+        for req in sorted(mistakes['repeated_requests'], key=lambda x: x['count'], reverse=True)[:5]:
+            report.append(f"  - '{req['pattern']}' (x{req['count']})")
+        report.append("")
+
+    if mistakes['vague_requests']:
+        report.append(f"### 4. VAGUE REQUESTS ({len(mistakes['vague_requests'])} found)")
+        report.append("Short or unclear requests that might benefit from more context:")
+        for vague in mistakes['vague_requests'][:10]:
+            report.append(f"  - '{vague}'")
+        report.append("")
+
+    if mistakes['multi_step_without_planning']:
+        report.append(f"### 5. COMPLEX MULTI-STEP REQUESTS ({len(mistakes['multi_step_without_planning'])} found)")
+        report.append("Requests with multiple steps that could benefit from planning:")
+        for multi in mistakes['multi_step_without_planning'][:10]:
+            report.append(f"  - {multi}")
+        report.append("")
+
+    # Most common tasks
+    report.append("## TOP 20 MOST COMMON TASKS")
+    for task, count in patterns['common_tasks'].most_common(20):
+        if count > 1:
+            report.append(f"  {count:3d}x {task}")
+    report.append("")
+
+    return "\n".join(report)
+
+def generate_recommendations(patterns, mistakes):
+    """Generate specific recommendations based on analysis."""
+    recommendations = []
+
+    recommendations.append("=" * 80)
+    recommendations.append("RECOMMENDATIONS FOR IMPROVEMENT")
+    recommendations.append("=" * 80)
+    recommendations.append("")
+
+    rec_num = 1
+
+    # Merge conflict recommendations
+    if mistakes['merge_conflicts']:
+        recommendations.append(f"{rec_num}. GIT WORKFLOW IMPROVEMENTS")
+        recommendations.append("   Problem: Multiple merge conflicts detected")
+        recommendations.append("   Solutions:")
+        recommendations.append("   - Create a pre-commit hook to check for conflicts")
+        recommendations.append("   - Add a git alias for safe rebasing")
+        recommendations.append("   - Document merge conflict resolution workflow")
+        recommendations.append("   - Consider using git hooks to prevent pushing conflicted files")
+        recommendations.append("")
+        rec_num += 1
+
+    # Repeated fixes recommendations
+    if mistakes['repeated_fixes']:
+        recommendations.append(f"{rec_num}. PREVENT RECURRING BUGS")
+        recommendations.append("   Problem: Same types of fixes requested repeatedly")
+        recommendations.append("   Solutions:")
+        recommendations.append("   - Add linting rules to catch common errors")
+        recommendations.append("   - Create test cases for frequently fixed bugs")
+        recommendations.append("   - Document common pitfalls in CLAUDE.md")
+        recommendations.append("   - Consider pre-commit hooks for validation")
+        recommendations.append("")
+        rec_num += 1
+
+    # Repeated requests
+    if mistakes['repeated_requests']:
+        recommendations.append(f"{rec_num}. AUTOMATE REPETITIVE TASKS")
+        recommendations.append("   Problem: Same requests made multiple times")
+        recommendations.append("   Solutions:")
+        recommendations.append("   - Create slash commands for common tasks")
+        recommendations.append("   - Add shell aliases or scripts")
+        recommendations.append("   - Consider creating Claude Code skills for workflows")
+        recommendations.append("   - Document common patterns in CLAUDE.md")
+        recommendations.append("")
+        rec_num += 1
+
+    # Vague requests
+    if mistakes['vague_requests']:
+        recommendations.append(f"{rec_num}. IMPROVE REQUEST CLARITY")
+        recommendations.append("   Problem: Many vague or unclear requests")
+        recommendations.append("   Solutions:")
+        recommendations.append("   - Create request templates in CLAUDE.md")
+        recommendations.append("   - Add examples of good vs. bad requests")
+        recommendations.append("   - Use more specific language and context")
+        recommendations.append("   - Break down complex requests into steps")
+        recommendations.append("")
+        rec_num += 1
+
+    # Multi-step planning
+    if mistakes['multi_step_without_planning']:
+        recommendations.append(f"{rec_num}. BETTER TASK PLANNING")
+        recommendations.append("   Problem: Complex multi-step requests without planning")
+        recommendations.append("   Solutions:")
+        recommendations.append("   - Use 'plan mode' for complex tasks")
+        recommendations.append("   - Break down requests into discrete steps")
+        recommendations.append("   - Create checklists for common workflows")
+        recommendations.append("   - Consider using the feature-planning skill")
+        recommendations.append("")
+        rec_num += 1
+
+    # Error-heavy workflow
+    error_ratio = sum(patterns['error_keywords'].values()) / max(sum(patterns['request_types'].values()), 1)
+    if error_ratio > 0.3:
+        recommendations.append(f"{rec_num}. REDUCE ERROR RATE")
+        recommendations.append(f"   Problem: High error rate detected ({error_ratio*100:.1f}% of requests)")
+        recommendations.append("   Solutions:")
+        recommendations.append("   - Implement comprehensive testing before changes")
+        recommendations.append("   - Add validation hooks (pre-commit, pre-push)")
+        recommendations.append("   - Create a testing checklist in CLAUDE.md")
+        recommendations.append("   - Consider TDD approach for new features")
+        recommendations.append("")
+        rec_num += 1
+
+    return "\n".join(recommendations)
+
+def main():
+    history_path = Path.home() / '.claude' / 'history.jsonl'
+
+    print("Loading conversation history...")
+    all_conversations = load_history(history_path)
+
+    # Focus on recent conversations (last 200)
+    recent_conversations = all_conversations[-200:]
+
+    print(f"Analyzing {len(all_conversations)} total conversations...")
+    print(f"Deep analysis on {len(recent_conversations)} recent conversations...")
+
+    patterns = extract_patterns(recent_conversations)
+    mistakes = identify_common_mistakes(recent_conversations)
+
+    print("\nGenerating reports...")
+    report = generate_report(patterns, mistakes)
+    recommendations = generate_recommendations(patterns, mistakes)
+
+    # Save reports
+    output_dir = Path.cwd()
+
+    # Add header with analysis scope
+    header = f"""
+ANALYSIS SCOPE
+==============
+Total conversations in history: {len(all_conversations)}
+Recent conversations analyzed: {len(recent_conversations)}
+Time period: Last 200 interactions
+
+"""
+
+    with open(output_dir / 'conversation_analysis.txt', 'w') as f:
+        f.write(header)
+        f.write(report)
+
+    with open(output_dir / 'recommendations.txt', 'w') as f:
+        f.write(recommendations)
+
+    print("\n" + "=" * 80)
+    print("Analysis complete! Reports saved to:")
+    print(f"  - {output_dir / 'conversation_analysis.txt'}")
+    print(f"  - {output_dir / 'recommendations.txt'}")
+    print("=" * 80)
+
+    # Print summary to console
+    print("\n" + report)
+    print("\n" + recommendations)
+
+if __name__ == '__main__':
+    main()