Initial commit
This commit is contained in:
413
skills/oracle/scripts/analyze_patterns.py
Executable file
413
skills/oracle/scripts/analyze_patterns.py
Executable file
@@ -0,0 +1,413 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Oracle Pattern Analysis Script
|
||||
|
||||
Analyze Oracle knowledge base and session logs to identify:
|
||||
- Repeated tasks (candidates for automation)
|
||||
- Common corrections (update defaults/documentation)
|
||||
- Frequent queries (add to auto-inject context)
|
||||
- Token-heavy operations (automate)
|
||||
|
||||
Usage:
|
||||
python analyze_patterns.py
|
||||
python analyze_patterns.py --generate-scripts
|
||||
python analyze_patterns.py --threshold 3
|
||||
|
||||
Examples:
|
||||
python analyze_patterns.py
|
||||
python analyze_patterns.py --generate-scripts --threshold 5
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from collections import Counter, defaultdict
|
||||
import re
|
||||
|
||||
|
||||
def find_oracle_root():
|
||||
"""Find the .oracle directory."""
|
||||
current = Path.cwd()
|
||||
|
||||
while current != current.parent:
|
||||
oracle_path = current / '.oracle'
|
||||
if oracle_path.exists():
|
||||
return oracle_path
|
||||
current = current.parent
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def load_all_sessions(oracle_path):
|
||||
"""Load all session logs."""
|
||||
sessions_dir = oracle_path / 'sessions'
|
||||
sessions = []
|
||||
|
||||
for session_file in sessions_dir.glob('*.md'):
|
||||
try:
|
||||
with open(session_file, 'r') as f:
|
||||
content = f.read()
|
||||
sessions.append({
|
||||
'id': session_file.stem,
|
||||
'file': session_file,
|
||||
'content': content
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not read {session_file}: {e}")
|
||||
|
||||
return sessions
|
||||
|
||||
|
||||
def analyze_repeated_activities(sessions):
|
||||
"""Find repeated activities across sessions."""
|
||||
all_activities = []
|
||||
|
||||
for session in sessions:
|
||||
# Extract activities from session log
|
||||
content = session['content']
|
||||
if '## Activities' in content:
|
||||
activities_section = content.split('## Activities')[1].split('\n\n')[0]
|
||||
activities = re.findall(r'^- (.+)$', activities_section, re.MULTILINE)
|
||||
all_activities.extend(activities)
|
||||
|
||||
# Count occurrences
|
||||
activity_counts = Counter(all_activities)
|
||||
|
||||
return activity_counts
|
||||
|
||||
|
||||
def analyze_corrections(oracle_path):
|
||||
"""Analyze correction patterns."""
|
||||
knowledge_dir = oracle_path / 'knowledge'
|
||||
corrections_file = knowledge_dir / 'corrections.json'
|
||||
|
||||
if not corrections_file.exists():
|
||||
return {}
|
||||
|
||||
with open(corrections_file, 'r') as f:
|
||||
corrections = json.load(f)
|
||||
|
||||
# Group by common themes
|
||||
themes = defaultdict(list)
|
||||
|
||||
for correction in corrections:
|
||||
content = correction.get('content', '')
|
||||
|
||||
# Try to identify theme
|
||||
if 'async' in content.lower() or 'await' in content.lower():
|
||||
themes['async-programming'].append(correction)
|
||||
elif 'security' in content.lower() or 'xss' in content.lower() or 'injection' in content.lower():
|
||||
themes['security'].append(correction)
|
||||
elif 'performance' in content.lower() or 'optimization' in content.lower():
|
||||
themes['performance'].append(correction)
|
||||
elif 'test' in content.lower():
|
||||
themes['testing'].append(correction)
|
||||
else:
|
||||
themes['general'].append(correction)
|
||||
|
||||
return themes
|
||||
|
||||
|
||||
def analyze_file_patterns(sessions):
|
||||
"""Analyze which files are changed most often."""
|
||||
file_changes = Counter()
|
||||
|
||||
for session in sessions:
|
||||
content = session['content']
|
||||
if '## Changes Made' in content:
|
||||
# Extract file paths
|
||||
files = re.findall(r'\*\*File\*\*: `([^`]+)`', content)
|
||||
file_changes.update(files)
|
||||
|
||||
return file_changes
|
||||
|
||||
|
||||
def identify_automation_candidates(activity_counts, threshold=3):
|
||||
"""Identify tasks that are repeated enough to warrant automation."""
|
||||
candidates = []
|
||||
|
||||
for activity, count in activity_counts.items():
|
||||
if count >= threshold:
|
||||
# Analyze if it's automatable
|
||||
automation_score = 0
|
||||
|
||||
# Keyword-based scoring
|
||||
deterministic_keywords = ['run tests', 'build', 'lint', 'format', 'deploy', 'update dependencies']
|
||||
for keyword in deterministic_keywords:
|
||||
if keyword in activity.lower():
|
||||
automation_score += 2
|
||||
|
||||
if automation_score > 0 or count >= threshold * 2:
|
||||
candidates.append({
|
||||
'activity': activity,
|
||||
'count': count,
|
||||
'automation_score': automation_score,
|
||||
'confidence': 'high' if automation_score >= 2 else 'medium'
|
||||
})
|
||||
|
||||
return sorted(candidates, key=lambda x: (x['automation_score'], x['count']), reverse=True)
|
||||
|
||||
|
||||
def generate_automation_script(activity):
|
||||
"""Generate a basic automation script for an activity."""
|
||||
activity_lower = activity.lower()
|
||||
|
||||
script_name = re.sub(r'[^a-z0-9]+', '_', activity_lower).strip('_')
|
||||
script_name = f"auto_{script_name}.sh"
|
||||
|
||||
# Basic script template
|
||||
script = f"""#!/bin/bash
|
||||
# Auto-generated by Oracle Pattern Analysis
|
||||
# Purpose: {activity}
|
||||
# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
echo " Automated task: {activity}"
|
||||
echo "---"
|
||||
|
||||
# TODO: Implement automation logic
|
||||
# Based on the activity pattern, add appropriate commands here
|
||||
|
||||
"""
|
||||
|
||||
# Add common commands based on keywords
|
||||
if 'test' in activity_lower:
|
||||
script += """# Run tests
|
||||
# npm test
|
||||
# pytest
|
||||
# cargo test
|
||||
"""
|
||||
elif 'build' in activity_lower:
|
||||
script += """# Build project
|
||||
# npm run build
|
||||
# cargo build
|
||||
# make
|
||||
"""
|
||||
elif 'lint' in activity_lower:
|
||||
script += """# Run linter
|
||||
# npm run lint
|
||||
# cargo clippy
|
||||
# pylint
|
||||
"""
|
||||
elif 'format' in activity_lower:
|
||||
script += """# Format code
|
||||
# npm run format
|
||||
# cargo fmt
|
||||
# black .
|
||||
"""
|
||||
|
||||
script += """
|
||||
echo "---"
|
||||
echo "[OK] Completed: {activity}"
|
||||
""".format(activity=activity)
|
||||
|
||||
return script_name, script
|
||||
|
||||
|
||||
def generate_report(oracle_path, sessions, threshold):
|
||||
"""Generate analysis report."""
|
||||
print("="*70)
|
||||
print("[SEARCH] Oracle Pattern Analysis Report")
|
||||
print("="*70)
|
||||
print(f"\nAnalyzing {len(sessions)} sessions\n")
|
||||
|
||||
# Repeated activities
|
||||
print("## Repeated Activities\n")
|
||||
activity_counts = analyze_repeated_activities(sessions)
|
||||
|
||||
if activity_counts:
|
||||
print("Top repeated tasks:\n")
|
||||
for activity, count in activity_counts.most_common(10):
|
||||
emoji = "" if count >= threshold else ""
|
||||
print(f" {emoji} [{count}x] {activity}")
|
||||
else:
|
||||
print(" No repeated activities found\n")
|
||||
|
||||
print()
|
||||
|
||||
# Automation candidates
|
||||
print("## Automation Opportunities\n")
|
||||
candidates = identify_automation_candidates(activity_counts, threshold)
|
||||
|
||||
if candidates:
|
||||
print(f"Found {len(candidates)} automation candidates:\n")
|
||||
for candidate in candidates:
|
||||
confidence_emoji = "" if candidate['confidence'] == 'high' else ""
|
||||
print(f" {confidence_emoji} [{candidate['count']}x] {candidate['activity']}")
|
||||
print(f" Confidence: {candidate['confidence']}, Score: {candidate['automation_score']}\n")
|
||||
else:
|
||||
print(f" No automation candidates (threshold: {threshold} occurrences)\n")
|
||||
|
||||
print()
|
||||
|
||||
# Correction patterns
|
||||
print("## Correction Patterns\n")
|
||||
correction_themes = analyze_corrections(oracle_path)
|
||||
|
||||
if correction_themes:
|
||||
print("Corrections by theme:\n")
|
||||
for theme, corrections in sorted(correction_themes.items(), key=lambda x: len(x[1]), reverse=True):
|
||||
print(f" {theme.capitalize()}: {len(corrections)} corrections")
|
||||
|
||||
print("\n[WARNING] Consider updating documentation or creating safeguards for common themes\n")
|
||||
else:
|
||||
print(" No corrections recorded yet\n")
|
||||
|
||||
print()
|
||||
|
||||
# File change patterns
|
||||
print("## Frequently Modified Files\n")
|
||||
file_changes = analyze_file_patterns(sessions)
|
||||
|
||||
if file_changes:
|
||||
print("Most frequently changed files:\n")
|
||||
for file_path, count in file_changes.most_common(10):
|
||||
print(f" [{count}x] {file_path}")
|
||||
|
||||
print("\n[TIP] Consider if these files need refactoring or better structure\n")
|
||||
else:
|
||||
print(" No file change patterns found\n")
|
||||
|
||||
print()
|
||||
|
||||
# Recommendations
|
||||
print("="*70)
|
||||
print("[INFO] Recommendations")
|
||||
print("="*70)
|
||||
print()
|
||||
|
||||
if candidates:
|
||||
print(f"1. **Automate {len(candidates)} repeated tasks**")
|
||||
print(f" Run with --generate-scripts to create automation scripts\n")
|
||||
|
||||
if correction_themes:
|
||||
most_common_theme = max(correction_themes.items(), key=lambda x: len(x[1]))[0]
|
||||
print(f"2. **Address {most_common_theme} corrections**")
|
||||
print(f" Review and create guidelines or linting rules\n")
|
||||
|
||||
if file_changes:
|
||||
top_file = file_changes.most_common(1)[0]
|
||||
print(f"3. **Review frequently changed file: {top_file[0]}**")
|
||||
print(f" Changed {top_file[1]} times - may need refactoring\n")
|
||||
|
||||
print("="*70)
|
||||
|
||||
|
||||
def save_automation_scripts(oracle_path, candidates):
|
||||
"""Generate and save automation scripts."""
|
||||
scripts_dir = oracle_path / 'scripts'
|
||||
scripts_generated = []
|
||||
|
||||
for candidate in candidates:
|
||||
script_name, script_content = generate_automation_script(candidate['activity'])
|
||||
script_path = scripts_dir / script_name
|
||||
|
||||
with open(script_path, 'w') as f:
|
||||
f.write(script_content)
|
||||
|
||||
# Make executable
|
||||
os.chmod(script_path, 0o755)
|
||||
|
||||
scripts_generated.append(script_path)
|
||||
|
||||
print(f"[OK] Generated: {script_path}")
|
||||
|
||||
# Create README in scripts dir
|
||||
readme_path = scripts_dir / 'README.md'
|
||||
readme_content = f"""# Auto-Generated Automation Scripts
|
||||
|
||||
These scripts were generated by Oracle pattern analysis on {datetime.now().strftime('%Y-%m-%d')}.
|
||||
|
||||
## Scripts
|
||||
|
||||
"""
|
||||
|
||||
for candidate in candidates:
|
||||
script_name = re.sub(r'[^a-z0-9]+', '_', candidate['activity'].lower()).strip('_')
|
||||
readme_content += f"- `auto_{script_name}.sh` - {candidate['activity']} (used {candidate['count']}x)\n"
|
||||
|
||||
readme_content += """
|
||||
## Usage
|
||||
|
||||
Each script is executable:
|
||||
|
||||
```bash
|
||||
./auto_script_name.sh
|
||||
```
|
||||
|
||||
## Customization
|
||||
|
||||
These scripts are templates. Review and customize them for your specific needs.
|
||||
"""
|
||||
|
||||
with open(readme_path, 'w') as f:
|
||||
f.write(readme_content)
|
||||
|
||||
print(f"\n Created README: {readme_path}")
|
||||
|
||||
return scripts_generated
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze Oracle patterns and identify automation opportunities',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--threshold',
|
||||
type=int,
|
||||
default=3,
|
||||
help='Minimum occurrences to consider for automation (default: 3)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--generate-scripts',
|
||||
action='store_true',
|
||||
help='Generate automation scripts for candidates'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Find Oracle
|
||||
oracle_path = find_oracle_root()
|
||||
|
||||
if not oracle_path:
|
||||
print("[ERROR] Error: .oracle directory not found.")
|
||||
sys.exit(1)
|
||||
|
||||
# Load sessions
|
||||
sessions = load_all_sessions(oracle_path)
|
||||
|
||||
if not sessions:
|
||||
print("[WARNING] No sessions found. Start recording sessions to enable pattern analysis.")
|
||||
sys.exit(0)
|
||||
|
||||
# Generate report
|
||||
generate_report(oracle_path, sessions, args.threshold)
|
||||
|
||||
# Generate scripts if requested
|
||||
if args.generate_scripts:
|
||||
activity_counts = analyze_repeated_activities(sessions)
|
||||
candidates = identify_automation_candidates(activity_counts, args.threshold)
|
||||
|
||||
if candidates:
|
||||
print("\n" + "="*70)
|
||||
print(" Generating Automation Scripts")
|
||||
print("="*70 + "\n")
|
||||
|
||||
scripts = save_automation_scripts(oracle_path, candidates)
|
||||
|
||||
print(f"\n Generated {len(scripts)} automation scripts!")
|
||||
print(f" Location: {oracle_path / 'scripts'}")
|
||||
print("\n[WARNING] Review and customize these scripts before use.\n")
|
||||
else:
|
||||
print("\n[WARNING] No automation candidates found (threshold: {args.threshold})\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user