Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:46:58 +08:00
commit 6eb041c48f
16 changed files with 4262 additions and 0 deletions

View File

@@ -0,0 +1,512 @@
#!/usr/bin/env python3
"""
Guardian Learning System
Adjusts Guardian sensitivity and thresholds based on user feedback.
Key Principle: "Learn from feedback to adjust sensitivity"
This script:
1. Tracks acceptance/rejection rates per category
2. Adjusts thresholds dynamically to maintain target acceptance rate
3. Learns which file types need more/less review
4. Stores anti-patterns based on rejection reasons
5. Adapts to user's working style over time
Usage:
# Adjust thresholds based on recent feedback
python learning.py --adjust
# Get current threshold recommendations
python learning.py --recommend
# Manually set acceptance rate target
python learning.py --set-target 0.75
# View learning statistics
python learning.py --stats
Environment Variables:
GUARDIAN_CONFIG_PATH: Path to Guardian config file [default: .guardian/config.json]
"""
import os
import sys
import json
import argparse
from pathlib import Path
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta
def find_guardian_root() -> Optional[Path]:
"""Find the .guardian directory."""
current = Path.cwd()
while current != current.parent:
guardian_path = current / '.guardian'
if guardian_path.exists():
return guardian_path
current = current.parent
return None
def load_config(guardian_path: Path) -> Dict[str, Any]:
"""Load Guardian configuration."""
config_path = guardian_path / 'config.json'
if not config_path.exists():
return {}
try:
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, FileNotFoundError, OSError, IOError):
return {}
def save_config(guardian_path: Path, config: Dict[str, Any]) -> None:
"""Save Guardian configuration."""
config_path = guardian_path / 'config.json'
try:
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=2)
except (OSError, IOError) as e:
print(f"Error: Failed to save config: {e}", file=sys.stderr)
sys.exit(1)
def load_acceptance_stats(guardian_path: Path) -> Dict[str, Any]:
"""Load acceptance rate statistics."""
stats_file = guardian_path / 'acceptance_stats.json'
if not stats_file.exists():
return {
'by_category': {},
'by_type': {},
'overall': {
'accepted': 0,
'rejected': 0,
'rate': 0.0
}
}
try:
with open(stats_file, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, FileNotFoundError, OSError, IOError):
return {'by_category': {}, 'by_type': {}, 'overall': {'accepted': 0, 'rejected': 0, 'rate': 0.0}}
def load_rejection_history(guardian_path: Path, days: int = 30) -> List[Dict[str, Any]]:
"""Load recent rejection history.
Args:
guardian_path: Path to Guardian directory
days: Number of days to look back
Returns:
List of recent rejections
"""
history_file = guardian_path / 'rejection_history.json'
if not history_file.exists():
return []
try:
with open(history_file, 'r', encoding='utf-8') as f:
all_history = json.load(f)
except (json.JSONDecodeError, FileNotFoundError, OSError, IOError):
return []
# Filter to recent rejections
cutoff = datetime.now() - timedelta(days=days)
recent = []
for rejection in all_history:
try:
ts = datetime.fromisoformat(rejection.get('timestamp', ''))
# Handle timezone-aware timestamps
if ts.tzinfo and not cutoff.tzinfo:
cutoff = cutoff.replace(tzinfo=ts.tzinfo)
if ts >= cutoff:
recent.append(rejection)
except (ValueError, TypeError):
continue
return recent
def calculate_threshold_adjustments(
config: Dict[str, Any],
acceptance_stats: Dict[str, Any],
target_rate: float = 0.7,
adjustment_speed: float = 0.1
) -> Dict[str, int]:
"""Calculate new threshold values based on acceptance rates.
Args:
config: Current configuration
acceptance_stats: Acceptance statistics
target_rate: Target acceptance rate (0.0 to 1.0)
adjustment_speed: How fast to adjust (0.0 to 1.0)
Returns:
Dictionary of adjusted threshold values
"""
current_sensitivity = config.get('sensitivity', {})
overall_rate = acceptance_stats.get('overall', {}).get('rate', 0.5)
adjustments = {}
# Lines threshold adjustment
current_lines = current_sensitivity.get('lines_threshold', 50)
if overall_rate < target_rate - 0.1:
# Too many false positives - increase threshold (trigger less often)
adjustment = int(current_lines * adjustment_speed)
adjustments['lines_threshold'] = current_lines + max(5, adjustment)
elif overall_rate > target_rate + 0.1:
# Too many missed issues - decrease threshold (trigger more often)
adjustment = int(current_lines * adjustment_speed)
adjustments['lines_threshold'] = max(20, current_lines - max(5, adjustment))
else:
# Well-calibrated
adjustments['lines_threshold'] = current_lines
# Error repeat threshold adjustment
error_stats = acceptance_stats.get('by_category', {}).get('error_analysis', {})
error_rate = error_stats.get('rate', 0.5)
current_error_threshold = current_sensitivity.get('error_repeat_threshold', 3)
if error_rate < target_rate - 0.1:
adjustments['error_repeat_threshold'] = min(10, current_error_threshold + 1)
elif error_rate > target_rate + 0.1:
adjustments['error_repeat_threshold'] = max(2, current_error_threshold - 1)
else:
adjustments['error_repeat_threshold'] = current_error_threshold
# File churn threshold adjustment
churn_stats = acceptance_stats.get('by_category', {}).get('file_churn', {})
churn_rate = churn_stats.get('rate', 0.5)
current_churn_threshold = current_sensitivity.get('file_churn_threshold', 5)
if churn_rate < target_rate - 0.1:
adjustments['file_churn_threshold'] = min(15, current_churn_threshold + 1)
elif churn_rate > target_rate + 0.1:
adjustments['file_churn_threshold'] = max(3, current_churn_threshold - 1)
else:
adjustments['file_churn_threshold'] = current_churn_threshold
# Correction threshold adjustment
correction_stats = acceptance_stats.get('by_category', {}).get('corrections', {})
correction_rate = correction_stats.get('rate', 0.5)
current_correction_threshold = current_sensitivity.get('correction_threshold', 3)
if correction_rate < target_rate - 0.1:
adjustments['correction_threshold'] = min(10, current_correction_threshold + 1)
elif correction_rate > target_rate + 0.1:
adjustments['correction_threshold'] = max(2, current_correction_threshold - 1)
else:
adjustments['correction_threshold'] = current_correction_threshold
return adjustments
def learn_from_rejections(
rejection_history: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Analyze rejection patterns to learn anti-patterns.
Args:
rejection_history: List of rejections
Returns:
Dictionary of learned anti-patterns and insights
"""
insights = {
'common_rejection_reasons': {},
'frequently_rejected_categories': {},
'anti_patterns': []
}
# Count rejection reasons
for rejection in rejection_history:
reason = rejection.get('reason', 'Unknown')
category = rejection.get('category', 'general')
# Count by reason
if reason not in insights['common_rejection_reasons']:
insights['common_rejection_reasons'][reason] = 0
insights['common_rejection_reasons'][reason] += 1
# Count by category
if category not in insights['frequently_rejected_categories']:
insights['frequently_rejected_categories'][category] = 0
insights['frequently_rejected_categories'][category] += 1
# Identify anti-patterns (highly rejected categories)
for category, count in insights['frequently_rejected_categories'].items():
if count >= 5: # Rejected 5+ times
rejection_rate = count / len(rejection_history) if len(rejection_history) > 0 else 0
if rejection_rate > 0.8: # 80%+ rejection rate
insights['anti_patterns'].append({
'category': category,
'rejection_count': count,
'rejection_rate': rejection_rate,
'recommendation': f"Stop suggesting {category} - rejection rate {rejection_rate:.0%}"
})
return insights
def update_auto_review_rules(
config: Dict[str, Any],
acceptance_stats: Dict[str, Any],
insights: Dict[str, Any]
) -> Dict[str, Any]:
"""Update auto-review rules based on learning.
Args:
config: Current configuration
acceptance_stats: Acceptance statistics
insights: Learned insights from rejections
Returns:
Updated auto_review configuration
"""
auto_review = config.get('auto_review', {
'enabled': True,
'always_review': ['auth', 'security', 'crypto', 'payment'],
'never_review': ['test', 'mock', 'fixture']
})
# Add anti-patterns to never_review
for anti_pattern in insights.get('anti_patterns', []):
category = anti_pattern['category']
if category not in auto_review['never_review']:
auto_review['never_review'].append(category)
# Check which categories have high acceptance rates
by_category = acceptance_stats.get('by_category', {})
for category, stats in by_category.items():
rate = stats.get('rate', 0.0)
total = stats.get('accepted', 0) + stats.get('rejected', 0)
# If category has >90% acceptance and >10 samples, add to always_review
if rate > 0.9 and total > 10:
if category not in auto_review['always_review']:
auto_review['always_review'].append(category)
# If category has <20% acceptance and >10 samples, add to never_review
if rate < 0.2 and total > 10:
if category not in auto_review['never_review']:
auto_review['never_review'].append(category)
return auto_review
def apply_adjustments(
guardian_path: Path,
dry_run: bool = False
) -> Dict[str, Any]:
"""Apply learned adjustments to configuration.
Args:
guardian_path: Path to Guardian directory
dry_run: If True, don't save changes (just return recommendations)
Returns:
Dictionary with adjustment details
"""
config = load_config(guardian_path)
acceptance_stats = load_acceptance_stats(guardian_path)
rejection_history = load_rejection_history(guardian_path)
# Get learning parameters
learning_config = config.get('learning', {})
target_rate = learning_config.get('acceptance_rate_target', 0.7)
adjustment_speed = learning_config.get('adjustment_speed', 0.1)
# Calculate threshold adjustments
threshold_adjustments = calculate_threshold_adjustments(
config,
acceptance_stats,
target_rate,
adjustment_speed
)
# Learn from rejections
insights = learn_from_rejections(rejection_history)
# Update auto-review rules
updated_auto_review = update_auto_review_rules(config, acceptance_stats, insights)
# Prepare result
result = {
'current_acceptance_rate': acceptance_stats.get('overall', {}).get('rate', 0.0),
'target_acceptance_rate': target_rate,
'threshold_adjustments': threshold_adjustments,
'auto_review_updates': updated_auto_review,
'insights': insights,
'applied': not dry_run
}
# Apply changes if not dry run
if not dry_run:
# Update sensitivity thresholds
if 'sensitivity' not in config:
config['sensitivity'] = {}
config['sensitivity'].update(threshold_adjustments)
# Update auto_review
config['auto_review'] = updated_auto_review
# Save updated config
save_config(guardian_path, config)
return result
def get_statistics(guardian_path: Path) -> Dict[str, Any]:
"""Get learning statistics.
Args:
guardian_path: Path to Guardian directory
Returns:
Statistics dictionary
"""
config = load_config(guardian_path)
acceptance_stats = load_acceptance_stats(guardian_path)
rejection_history = load_rejection_history(guardian_path, days=30)
overall = acceptance_stats.get('overall', {})
by_category = acceptance_stats.get('by_category', {})
stats = {
'overall': {
'accepted': overall.get('accepted', 0),
'rejected': overall.get('rejected', 0),
'acceptance_rate': overall.get('rate', 0.0)
},
'by_category': {},
'current_thresholds': config.get('sensitivity', {}),
'target_acceptance_rate': config.get('learning', {}).get('acceptance_rate_target', 0.7),
'recent_rejections_30d': len(rejection_history),
'auto_review_rules': config.get('auto_review', {})
}
# Add category breakdown
for category, cat_stats in by_category.items():
stats['by_category'][category] = {
'accepted': cat_stats.get('accepted', 0),
'rejected': cat_stats.get('rejected', 0),
'rate': cat_stats.get('rate', 0.0)
}
return stats
def main():
parser = argparse.ArgumentParser(
description='Guardian learning system - adjust thresholds based on feedback',
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
'--adjust',
action='store_true',
help='Apply threshold adjustments based on recent feedback'
)
parser.add_argument(
'--recommend',
action='store_true',
help='Show recommended adjustments without applying (dry run)'
)
parser.add_argument(
'--stats',
action='store_true',
help='Show learning statistics'
)
parser.add_argument(
'--set-target',
type=float,
help='Set target acceptance rate (0.0 to 1.0)'
)
parser.add_argument(
'--set-speed',
type=float,
help='Set adjustment speed (0.0 to 1.0)'
)
args = parser.parse_args()
# Find Guardian
guardian_path = find_guardian_root()
if not guardian_path:
print("Error: Guardian not initialized (.guardian directory not found)", file=sys.stderr)
sys.exit(1)
# Handle set target
if args.set_target is not None:
if not 0.0 <= args.set_target <= 1.0:
print("Error: Target acceptance rate must be between 0.0 and 1.0", file=sys.stderr)
sys.exit(1)
config = load_config(guardian_path)
if 'learning' not in config:
config['learning'] = {}
config['learning']['acceptance_rate_target'] = args.set_target
save_config(guardian_path, config)
print(json.dumps({'target_set': args.set_target}))
sys.exit(0)
# Handle set speed
if args.set_speed is not None:
if not 0.0 <= args.set_speed <= 1.0:
print("Error: Adjustment speed must be between 0.0 and 1.0", file=sys.stderr)
sys.exit(1)
config = load_config(guardian_path)
if 'learning' not in config:
config['learning'] = {}
config['learning']['adjustment_speed'] = args.set_speed
save_config(guardian_path, config)
print(json.dumps({'speed_set': args.set_speed}))
sys.exit(0)
# Handle stats
if args.stats:
stats = get_statistics(guardian_path)
print(json.dumps(stats, indent=2))
sys.exit(0)
# Handle recommend (dry run)
if args.recommend:
result = apply_adjustments(guardian_path, dry_run=True)
print(json.dumps(result, indent=2))
sys.exit(0)
# Handle adjust
if args.adjust:
result = apply_adjustments(guardian_path, dry_run=False)
print(json.dumps(result, indent=2))
sys.exit(0)
parser.print_help()
sys.exit(1)
if __name__ == '__main__':
main()