Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:46:50 +08:00
commit a3a73d67d7
67 changed files with 19703 additions and 0 deletions

View File

@@ -0,0 +1,413 @@
#!/usr/bin/env python3
"""
Oracle Pattern Analysis Script
Analyze Oracle knowledge base and session logs to identify:
- Repeated tasks (candidates for automation)
- Common corrections (update defaults/documentation)
- Frequent queries (add to auto-inject context)
- Token-heavy operations (automate)
Usage:
python analyze_patterns.py
python analyze_patterns.py --generate-scripts
python analyze_patterns.py --threshold 3
Examples:
python analyze_patterns.py
python analyze_patterns.py --generate-scripts --threshold 5
"""
import os
import sys
import json
import argparse
from datetime import datetime
from pathlib import Path
from collections import Counter, defaultdict
import re
def find_oracle_root():
"""Find the .oracle directory."""
current = Path.cwd()
while current != current.parent:
oracle_path = current / '.oracle'
if oracle_path.exists():
return oracle_path
current = current.parent
return None
def load_all_sessions(oracle_path):
"""Load all session logs."""
sessions_dir = oracle_path / 'sessions'
sessions = []
for session_file in sessions_dir.glob('*.md'):
try:
with open(session_file, 'r') as f:
content = f.read()
sessions.append({
'id': session_file.stem,
'file': session_file,
'content': content
})
except Exception as e:
print(f"Warning: Could not read {session_file}: {e}")
return sessions
def analyze_repeated_activities(sessions):
"""Find repeated activities across sessions."""
all_activities = []
for session in sessions:
# Extract activities from session log
content = session['content']
if '## Activities' in content:
activities_section = content.split('## Activities')[1].split('\n\n')[0]
activities = re.findall(r'^- (.+)$', activities_section, re.MULTILINE)
all_activities.extend(activities)
# Count occurrences
activity_counts = Counter(all_activities)
return activity_counts
def analyze_corrections(oracle_path):
"""Analyze correction patterns."""
knowledge_dir = oracle_path / 'knowledge'
corrections_file = knowledge_dir / 'corrections.json'
if not corrections_file.exists():
return {}
with open(corrections_file, 'r') as f:
corrections = json.load(f)
# Group by common themes
themes = defaultdict(list)
for correction in corrections:
content = correction.get('content', '')
# Try to identify theme
if 'async' in content.lower() or 'await' in content.lower():
themes['async-programming'].append(correction)
elif 'security' in content.lower() or 'xss' in content.lower() or 'injection' in content.lower():
themes['security'].append(correction)
elif 'performance' in content.lower() or 'optimization' in content.lower():
themes['performance'].append(correction)
elif 'test' in content.lower():
themes['testing'].append(correction)
else:
themes['general'].append(correction)
return themes
def analyze_file_patterns(sessions):
"""Analyze which files are changed most often."""
file_changes = Counter()
for session in sessions:
content = session['content']
if '## Changes Made' in content:
# Extract file paths
files = re.findall(r'\*\*File\*\*: `([^`]+)`', content)
file_changes.update(files)
return file_changes
def identify_automation_candidates(activity_counts, threshold=3):
"""Identify tasks that are repeated enough to warrant automation."""
candidates = []
for activity, count in activity_counts.items():
if count >= threshold:
# Analyze if it's automatable
automation_score = 0
# Keyword-based scoring
deterministic_keywords = ['run tests', 'build', 'lint', 'format', 'deploy', 'update dependencies']
for keyword in deterministic_keywords:
if keyword in activity.lower():
automation_score += 2
if automation_score > 0 or count >= threshold * 2:
candidates.append({
'activity': activity,
'count': count,
'automation_score': automation_score,
'confidence': 'high' if automation_score >= 2 else 'medium'
})
return sorted(candidates, key=lambda x: (x['automation_score'], x['count']), reverse=True)
def generate_automation_script(activity):
"""Generate a basic automation script for an activity."""
activity_lower = activity.lower()
script_name = re.sub(r'[^a-z0-9]+', '_', activity_lower).strip('_')
script_name = f"auto_{script_name}.sh"
# Basic script template
script = f"""#!/bin/bash
# Auto-generated by Oracle Pattern Analysis
# Purpose: {activity}
# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
set -e # Exit on error
echo " Automated task: {activity}"
echo "---"
# TODO: Implement automation logic
# Based on the activity pattern, add appropriate commands here
"""
# Add common commands based on keywords
if 'test' in activity_lower:
script += """# Run tests
# npm test
# pytest
# cargo test
"""
elif 'build' in activity_lower:
script += """# Build project
# npm run build
# cargo build
# make
"""
elif 'lint' in activity_lower:
script += """# Run linter
# npm run lint
# cargo clippy
# pylint
"""
elif 'format' in activity_lower:
script += """# Format code
# npm run format
# cargo fmt
# black .
"""
script += """
echo "---"
echo "[OK] Completed: {activity}"
""".format(activity=activity)
return script_name, script
def generate_report(oracle_path, sessions, threshold):
"""Generate analysis report."""
print("="*70)
print("[SEARCH] Oracle Pattern Analysis Report")
print("="*70)
print(f"\nAnalyzing {len(sessions)} sessions\n")
# Repeated activities
print("## Repeated Activities\n")
activity_counts = analyze_repeated_activities(sessions)
if activity_counts:
print("Top repeated tasks:\n")
for activity, count in activity_counts.most_common(10):
emoji = "" if count >= threshold else ""
print(f" {emoji} [{count}x] {activity}")
else:
print(" No repeated activities found\n")
print()
# Automation candidates
print("## Automation Opportunities\n")
candidates = identify_automation_candidates(activity_counts, threshold)
if candidates:
print(f"Found {len(candidates)} automation candidates:\n")
for candidate in candidates:
confidence_emoji = "" if candidate['confidence'] == 'high' else ""
print(f" {confidence_emoji} [{candidate['count']}x] {candidate['activity']}")
print(f" Confidence: {candidate['confidence']}, Score: {candidate['automation_score']}\n")
else:
print(f" No automation candidates (threshold: {threshold} occurrences)\n")
print()
# Correction patterns
print("## Correction Patterns\n")
correction_themes = analyze_corrections(oracle_path)
if correction_themes:
print("Corrections by theme:\n")
for theme, corrections in sorted(correction_themes.items(), key=lambda x: len(x[1]), reverse=True):
print(f" {theme.capitalize()}: {len(corrections)} corrections")
print("\n[WARNING] Consider updating documentation or creating safeguards for common themes\n")
else:
print(" No corrections recorded yet\n")
print()
# File change patterns
print("## Frequently Modified Files\n")
file_changes = analyze_file_patterns(sessions)
if file_changes:
print("Most frequently changed files:\n")
for file_path, count in file_changes.most_common(10):
print(f" [{count}x] {file_path}")
print("\n[TIP] Consider if these files need refactoring or better structure\n")
else:
print(" No file change patterns found\n")
print()
# Recommendations
print("="*70)
print("[INFO] Recommendations")
print("="*70)
print()
if candidates:
print(f"1. **Automate {len(candidates)} repeated tasks**")
print(f" Run with --generate-scripts to create automation scripts\n")
if correction_themes:
most_common_theme = max(correction_themes.items(), key=lambda x: len(x[1]))[0]
print(f"2. **Address {most_common_theme} corrections**")
print(f" Review and create guidelines or linting rules\n")
if file_changes:
top_file = file_changes.most_common(1)[0]
print(f"3. **Review frequently changed file: {top_file[0]}**")
print(f" Changed {top_file[1]} times - may need refactoring\n")
print("="*70)
def save_automation_scripts(oracle_path, candidates):
"""Generate and save automation scripts."""
scripts_dir = oracle_path / 'scripts'
scripts_generated = []
for candidate in candidates:
script_name, script_content = generate_automation_script(candidate['activity'])
script_path = scripts_dir / script_name
with open(script_path, 'w') as f:
f.write(script_content)
# Make executable
os.chmod(script_path, 0o755)
scripts_generated.append(script_path)
print(f"[OK] Generated: {script_path}")
# Create README in scripts dir
readme_path = scripts_dir / 'README.md'
readme_content = f"""# Auto-Generated Automation Scripts
These scripts were generated by Oracle pattern analysis on {datetime.now().strftime('%Y-%m-%d')}.
## Scripts
"""
for candidate in candidates:
script_name = re.sub(r'[^a-z0-9]+', '_', candidate['activity'].lower()).strip('_')
readme_content += f"- `auto_{script_name}.sh` - {candidate['activity']} (used {candidate['count']}x)\n"
readme_content += """
## Usage
Each script is executable:
```bash
./auto_script_name.sh
```
## Customization
These scripts are templates. Review and customize them for your specific needs.
"""
with open(readme_path, 'w') as f:
f.write(readme_content)
print(f"\n Created README: {readme_path}")
return scripts_generated
def main():
parser = argparse.ArgumentParser(
description='Analyze Oracle patterns and identify automation opportunities',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
'--threshold',
type=int,
default=3,
help='Minimum occurrences to consider for automation (default: 3)'
)
parser.add_argument(
'--generate-scripts',
action='store_true',
help='Generate automation scripts for candidates'
)
args = parser.parse_args()
# Find Oracle
oracle_path = find_oracle_root()
if not oracle_path:
print("[ERROR] Error: .oracle directory not found.")
sys.exit(1)
# Load sessions
sessions = load_all_sessions(oracle_path)
if not sessions:
print("[WARNING] No sessions found. Start recording sessions to enable pattern analysis.")
sys.exit(0)
# Generate report
generate_report(oracle_path, sessions, args.threshold)
# Generate scripts if requested
if args.generate_scripts:
activity_counts = analyze_repeated_activities(sessions)
candidates = identify_automation_candidates(activity_counts, args.threshold)
if candidates:
print("\n" + "="*70)
print(" Generating Automation Scripts")
print("="*70 + "\n")
scripts = save_automation_scripts(oracle_path, candidates)
print(f"\n Generated {len(scripts)} automation scripts!")
print(f" Location: {oracle_path / 'scripts'}")
print("\n[WARNING] Review and customize these scripts before use.\n")
else:
print("\n[WARNING] No automation candidates found (threshold: {args.threshold})\n")
if __name__ == '__main__':
main()