Initial commit

2025-11-30 08:46:50 +08:00
commit a3a73d67d7
67 changed files with 19703 additions and 0 deletions
--- a/skills/oracle/scripts/analyze_patterns.py
+++ b/skills/oracle/scripts/analyze_patterns.py
@@ -0,0 +1,413 @@
+#!/usr/bin/env python3
+"""
+Oracle Pattern Analysis Script
+
+Analyze Oracle knowledge base and session logs to identify:
+- Repeated tasks (candidates for automation)
+- Common corrections (update defaults/documentation)
+- Frequent queries (add to auto-inject context)
+- Token-heavy operations (automate)
+
+Usage:
+    python analyze_patterns.py
+    python analyze_patterns.py --generate-scripts
+    python analyze_patterns.py --threshold 3
+
+Examples:
+    python analyze_patterns.py
+    python analyze_patterns.py --generate-scripts --threshold 5
+"""
+
+import os
+import sys
+import json
+import argparse
+from datetime import datetime
+from pathlib import Path
+from collections import Counter, defaultdict
+import re
+
+
+def find_oracle_root():
+    """Find the .oracle directory."""
+    current = Path.cwd()
+
+    while current != current.parent:
+        oracle_path = current / '.oracle'
+        if oracle_path.exists():
+            return oracle_path
+        current = current.parent
+
+    return None
+
+
+def load_all_sessions(oracle_path):
+    """Load all session logs."""
+    sessions_dir = oracle_path / 'sessions'
+    sessions = []
+
+    for session_file in sessions_dir.glob('*.md'):
+        try:
+            with open(session_file, 'r') as f:
+                content = f.read()
+                sessions.append({
+                    'id': session_file.stem,
+                    'file': session_file,
+                    'content': content
+                })
+        except Exception as e:
+            print(f"Warning: Could not read {session_file}: {e}")
+
+    return sessions
+
+
+def analyze_repeated_activities(sessions):
+    """Find repeated activities across sessions."""
+    all_activities = []
+
+    for session in sessions:
+        # Extract activities from session log
+        content = session['content']
+        if '## Activities' in content:
+            activities_section = content.split('## Activities')[1].split('\n\n')[0]
+            activities = re.findall(r'^- (.+)$', activities_section, re.MULTILINE)
+            all_activities.extend(activities)
+
+    # Count occurrences
+    activity_counts = Counter(all_activities)
+
+    return activity_counts
+
+
+def analyze_corrections(oracle_path):
+    """Analyze correction patterns."""
+    knowledge_dir = oracle_path / 'knowledge'
+    corrections_file = knowledge_dir / 'corrections.json'
+
+    if not corrections_file.exists():
+        return {}
+
+    with open(corrections_file, 'r') as f:
+        corrections = json.load(f)
+
+    # Group by common themes
+    themes = defaultdict(list)
+
+    for correction in corrections:
+        content = correction.get('content', '')
+
+        # Try to identify theme
+        if 'async' in content.lower() or 'await' in content.lower():
+            themes['async-programming'].append(correction)
+        elif 'security' in content.lower() or 'xss' in content.lower() or 'injection' in content.lower():
+            themes['security'].append(correction)
+        elif 'performance' in content.lower() or 'optimization' in content.lower():
+            themes['performance'].append(correction)
+        elif 'test' in content.lower():
+            themes['testing'].append(correction)
+        else:
+            themes['general'].append(correction)
+
+    return themes
+
+
+def analyze_file_patterns(sessions):
+    """Analyze which files are changed most often."""
+    file_changes = Counter()
+
+    for session in sessions:
+        content = session['content']
+        if '## Changes Made' in content:
+            # Extract file paths
+            files = re.findall(r'\*\*File\*\*: `([^`]+)`', content)
+            file_changes.update(files)
+
+    return file_changes
+
+
+def identify_automation_candidates(activity_counts, threshold=3):
+    """Identify tasks that are repeated enough to warrant automation."""
+    candidates = []
+
+    for activity, count in activity_counts.items():
+        if count >= threshold:
+            # Analyze if it's automatable
+            automation_score = 0
+
+            # Keyword-based scoring
+            deterministic_keywords = ['run tests', 'build', 'lint', 'format', 'deploy', 'update dependencies']
+            for keyword in deterministic_keywords:
+                if keyword in activity.lower():
+                    automation_score += 2
+
+            if automation_score > 0 or count >= threshold * 2:
+                candidates.append({
+                    'activity': activity,
+                    'count': count,
+                    'automation_score': automation_score,
+                    'confidence': 'high' if automation_score >= 2 else 'medium'
+                })
+
+    return sorted(candidates, key=lambda x: (x['automation_score'], x['count']), reverse=True)
+
+
+def generate_automation_script(activity):
+    """Generate a basic automation script for an activity."""
+    activity_lower = activity.lower()
+
+    script_name = re.sub(r'[^a-z0-9]+', '_', activity_lower).strip('_')
+    script_name = f"auto_{script_name}.sh"
+
+    # Basic script template
+    script = f"""#!/bin/bash
+# Auto-generated by Oracle Pattern Analysis
+# Purpose: {activity}
+# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+
+set -e  # Exit on error
+
+echo " Automated task: {activity}"
+echo "---"
+
+# TODO: Implement automation logic
+# Based on the activity pattern, add appropriate commands here
+
+"""
+
+    # Add common commands based on keywords
+    if 'test' in activity_lower:
+        script += """# Run tests
+# npm test
+# pytest
+# cargo test
+"""
+    elif 'build' in activity_lower:
+        script += """# Build project
+# npm run build
+# cargo build
+# make
+"""
+    elif 'lint' in activity_lower:
+        script += """# Run linter
+# npm run lint
+# cargo clippy
+# pylint
+"""
+    elif 'format' in activity_lower:
+        script += """# Format code
+# npm run format
+# cargo fmt
+# black .
+"""
+
+    script += """
+echo "---"
+echo "[OK] Completed: {activity}"
+""".format(activity=activity)
+
+    return script_name, script
+
+
+def generate_report(oracle_path, sessions, threshold):
+    """Generate analysis report."""
+    print("="*70)
+    print("[SEARCH] Oracle Pattern Analysis Report")
+    print("="*70)
+    print(f"\nAnalyzing {len(sessions)} sessions\n")
+
+    # Repeated activities
+    print("## Repeated Activities\n")
+    activity_counts = analyze_repeated_activities(sessions)
+
+    if activity_counts:
+        print("Top repeated tasks:\n")
+        for activity, count in activity_counts.most_common(10):
+            emoji = "" if count >= threshold else ""
+            print(f"  {emoji} [{count}x] {activity}")
+    else:
+        print("  No repeated activities found\n")
+
+    print()
+
+    # Automation candidates
+    print("## Automation Opportunities\n")
+    candidates = identify_automation_candidates(activity_counts, threshold)
+
+    if candidates:
+        print(f"Found {len(candidates)} automation candidates:\n")
+        for candidate in candidates:
+            confidence_emoji = "" if candidate['confidence'] == 'high' else ""
+            print(f"  {confidence_emoji} [{candidate['count']}x] {candidate['activity']}")
+            print(f"      Confidence: {candidate['confidence']}, Score: {candidate['automation_score']}\n")
+    else:
+        print(f"  No automation candidates (threshold: {threshold} occurrences)\n")
+
+    print()
+
+    # Correction patterns
+    print("## Correction Patterns\n")
+    correction_themes = analyze_corrections(oracle_path)
+
+    if correction_themes:
+        print("Corrections by theme:\n")
+        for theme, corrections in sorted(correction_themes.items(), key=lambda x: len(x[1]), reverse=True):
+            print(f"   {theme.capitalize()}: {len(corrections)} corrections")
+
+        print("\n[WARNING]  Consider updating documentation or creating safeguards for common themes\n")
+    else:
+        print("  No corrections recorded yet\n")
+
+    print()
+
+    # File change patterns
+    print("## Frequently Modified Files\n")
+    file_changes = analyze_file_patterns(sessions)
+
+    if file_changes:
+        print("Most frequently changed files:\n")
+        for file_path, count in file_changes.most_common(10):
+            print(f"  [{count}x] {file_path}")
+
+        print("\n[TIP] Consider if these files need refactoring or better structure\n")
+    else:
+        print("  No file change patterns found\n")
+
+    print()
+
+    # Recommendations
+    print("="*70)
+    print("[INFO] Recommendations")
+    print("="*70)
+    print()
+
+    if candidates:
+        print(f"1. **Automate {len(candidates)} repeated tasks**")
+        print(f"   Run with --generate-scripts to create automation scripts\n")
+
+    if correction_themes:
+        most_common_theme = max(correction_themes.items(), key=lambda x: len(x[1]))[0]
+        print(f"2. **Address {most_common_theme} corrections**")
+        print(f"   Review and create guidelines or linting rules\n")
+
+    if file_changes:
+        top_file = file_changes.most_common(1)[0]
+        print(f"3. **Review frequently changed file: {top_file[0]}**")
+        print(f"   Changed {top_file[1]} times - may need refactoring\n")
+
+    print("="*70)
+
+
+def save_automation_scripts(oracle_path, candidates):
+    """Generate and save automation scripts."""
+    scripts_dir = oracle_path / 'scripts'
+    scripts_generated = []
+
+    for candidate in candidates:
+        script_name, script_content = generate_automation_script(candidate['activity'])
+        script_path = scripts_dir / script_name
+
+        with open(script_path, 'w') as f:
+            f.write(script_content)
+
+        # Make executable
+        os.chmod(script_path, 0o755)
+
+        scripts_generated.append(script_path)
+
+        print(f"[OK] Generated: {script_path}")
+
+    # Create README in scripts dir
+    readme_path = scripts_dir / 'README.md'
+    readme_content = f"""# Auto-Generated Automation Scripts
+
+These scripts were generated by Oracle pattern analysis on {datetime.now().strftime('%Y-%m-%d')}.
+
+## Scripts
+
+"""
+
+    for candidate in candidates:
+        script_name = re.sub(r'[^a-z0-9]+', '_', candidate['activity'].lower()).strip('_')
+        readme_content += f"- `auto_{script_name}.sh` - {candidate['activity']} (used {candidate['count']}x)\n"
+
+    readme_content += """
+## Usage
+
+Each script is executable:
+
+```bash
+./auto_script_name.sh
+```
+
+## Customization
+
+These scripts are templates. Review and customize them for your specific needs.
+"""
+
+    with open(readme_path, 'w') as f:
+        f.write(readme_content)
+
+    print(f"\n Created README: {readme_path}")
+
+    return scripts_generated
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Analyze Oracle patterns and identify automation opportunities',
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+
+    parser.add_argument(
+        '--threshold',
+        type=int,
+        default=3,
+        help='Minimum occurrences to consider for automation (default: 3)'
+    )
+
+    parser.add_argument(
+        '--generate-scripts',
+        action='store_true',
+        help='Generate automation scripts for candidates'
+    )
+
+    args = parser.parse_args()
+
+    # Find Oracle
+    oracle_path = find_oracle_root()
+
+    if not oracle_path:
+        print("[ERROR] Error: .oracle directory not found.")
+        sys.exit(1)
+
+    # Load sessions
+    sessions = load_all_sessions(oracle_path)
+
+    if not sessions:
+        print("[WARNING]  No sessions found. Start recording sessions to enable pattern analysis.")
+        sys.exit(0)
+
+    # Generate report
+    generate_report(oracle_path, sessions, args.threshold)
+
+    # Generate scripts if requested
+    if args.generate_scripts:
+        activity_counts = analyze_repeated_activities(sessions)
+        candidates = identify_automation_candidates(activity_counts, args.threshold)
+
+        if candidates:
+            print("\n" + "="*70)
+            print(" Generating Automation Scripts")
+            print("="*70 + "\n")
+
+            scripts = save_automation_scripts(oracle_path, candidates)
+
+            print(f"\n Generated {len(scripts)} automation scripts!")
+            print(f"   Location: {oracle_path / 'scripts'}")
+            print("\n[WARNING]  Review and customize these scripts before use.\n")
+        else:
+            print("\n[WARNING]  No automation candidates found (threshold: {args.threshold})\n")
+
+
+if __name__ == '__main__':
+    main()