Files
2025-11-29 18:20:25 +08:00

422 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""
============================================================================
SCRIPT: pattern-detector.py
PURPOSE: Detect commit message patterns and conventions from git history
VERSION: 1.0.0
USAGE: ./pattern-detector.py --count N --branch BRANCH [--detailed]
RETURNS: JSON format with pattern detection results
EXIT CODES:
0 - Success
1 - Not a git repository
2 - No commit history
3 - Git command failed
DEPENDENCIES: git, python3
============================================================================
"""
import subprocess
import sys
import json
import re
import argparse
from collections import defaultdict
from typing import Dict, List, Tuple
def run_git_command(cmd: List[str]) -> Tuple[int, str]:
"""Execute git command and return exit code and output."""
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=False
)
return result.returncode, result.stdout.strip()
except Exception as e:
return 1, str(e)
def is_git_repository() -> bool:
"""Check if current directory is a git repository."""
code, _ = run_git_command(['git', 'rev-parse', '--git-dir'])
return code == 0
def has_commits() -> bool:
"""Check if repository has any commits."""
code, _ = run_git_command(['git', 'log', '-1'])
return code == 0
def get_commits(count: int, branch: str) -> List[Dict[str, str]]:
"""Fetch commit messages from git log."""
code, output = run_git_command([
'git', 'log',
f'-{count}',
branch,
'--format=%H%n%s%n%b%n---COMMIT_SEPARATOR---'
])
if code != 0:
return []
commits = []
lines = output.split('\n')
i = 0
while i < len(lines):
if i + 1 >= len(lines):
break
commit_hash = lines[i]
subject = lines[i + 1] if i + 1 < len(lines) else ""
# Find body (lines until separator)
body_lines = []
i += 2
while i < len(lines) and lines[i] != '---COMMIT_SEPARATOR---':
if lines[i].strip(): # Skip empty lines at start
body_lines.append(lines[i])
i += 1
body = '\n'.join(body_lines).strip()
commits.append({
'hash': commit_hash,
'subject': subject,
'body': body,
'full': subject + '\n\n' + body if body else subject
})
i += 1 # Skip separator
return commits
def is_conventional_commit(subject: str) -> bool:
"""Check if commit follows conventional commits format."""
pattern = r'^[a-z]+(\([^)]+\))?: .+'
return bool(re.match(pattern, subject))
def has_prefix(subject: str) -> bool:
"""Check if commit has prefix format like [PREFIX]."""
pattern = r'^\[[^\]]+\]'
return bool(re.match(pattern, subject))
def has_tag(subject: str) -> bool:
"""Check if commit starts with tag like #tag."""
return subject.startswith('#')
def is_imperative_mood(subject: str) -> bool:
"""
Check if subject uses imperative mood.
Simple heuristic: starts with common imperative verbs.
"""
# Extract first word after type/scope if conventional
words = subject.lower()
if ':' in words:
words = words.split(':', 1)[1].strip()
# Common imperative verbs and their non-imperative forms to avoid
imperative_verbs = [
'add', 'fix', 'update', 'remove', 'delete', 'create', 'implement',
'change', 'improve', 'optimize', 'refactor', 'enhance', 'correct',
'resolve', 'merge', 'bump', 'revert', 'document', 'upgrade',
'downgrade', 'rename', 'move', 'replace', 'extract', 'simplify'
]
# Non-imperative indicators
non_imperative = ['added', 'fixed', 'updated', 'removed', 'deleted',
'created', 'implemented', 'changed', 'improved',
'adding', 'fixing', 'updating']
first_word = words.split()[0] if words.split() else ""
if first_word in non_imperative:
return False
return first_word in imperative_verbs
def is_capitalized(subject: str) -> bool:
"""Check if subject is properly capitalized."""
# Extract text after type/scope if conventional
text = subject
if ':' in text:
text = text.split(':', 1)[1].strip()
return text[0].isupper() if text else False
def has_no_period_end(subject: str) -> bool:
"""Check if subject doesn't end with period."""
return not subject.endswith('.')
def has_blank_line_before_body(full_message: str) -> bool:
"""Check if there's a blank line between subject and body."""
lines = full_message.split('\n')
if len(lines) < 3:
return True # No body or only one line body
# Check if second line is empty
return lines[1].strip() == ''
def is_body_wrapped(body: str, max_width: int = 72) -> bool:
"""Check if body lines are wrapped at max_width."""
if not body:
return True
lines = body.split('\n')
for line in lines:
# Allow bullet points and URLs to exceed limit
if line.strip().startswith(('-', '*', '', 'http://', 'https://')):
continue
if len(line) > max_width:
return False
return True
def has_footer(full_message: str) -> bool:
"""Check if commit has footer (BREAKING CHANGE, issue refs, etc.)."""
footer_patterns = [
r'BREAKING CHANGE:',
r'Closes #\d+',
r'Fixes #\d+',
r'Refs #\d+',
r'Co-authored-by:',
r'Signed-off-by:'
]
for pattern in footer_patterns:
if re.search(pattern, full_message):
return True
return False
def references_issues(full_message: str) -> bool:
"""Check if commit references issues."""
pattern = r'#\d+|[Cc]loses|[Ff]ixes|[Rr]efs'
return bool(re.search(pattern, full_message))
def mentions_breaking(full_message: str) -> bool:
"""Check if commit mentions breaking changes."""
return 'BREAKING CHANGE:' in full_message or 'BREAKING-CHANGE:' in full_message
def has_co_authors(full_message: str) -> bool:
"""Check if commit has co-authors."""
return 'Co-authored-by:' in full_message
def is_signed_off(full_message: str) -> bool:
"""Check if commit is signed off."""
return 'Signed-off-by:' in full_message
def includes_rationale(body: str) -> bool:
"""Check if body includes rationale (why/because/to/for)."""
if not body:
return False
words = ['because', 'to ', 'for ', 'why', 'since', 'as ', 'in order to']
body_lower = body.lower()
return any(word in body_lower for word in words)
def mentions_impact(body: str) -> bool:
"""Check if body mentions impact."""
if not body:
return False
words = ['affect', 'impact', 'change', 'improve', 'break', 'fix']
body_lower = body.lower()
return any(word in body_lower for word in words)
def analyze_patterns(commits: List[Dict[str, str]]) -> Dict:
"""Analyze commit patterns and return results."""
total = len(commits)
# Initialize counters
patterns = {
'format': defaultdict(int),
'conventions': defaultdict(int),
'content': defaultdict(int)
}
# Count commits with bodies (for calculations)
commits_with_body = 0
for commit in commits:
subject = commit['subject']
body = commit['body']
full = commit['full']
# Format patterns
if is_conventional_commit(subject):
patterns['format']['conventional_commits'] += 1
elif has_prefix(subject):
patterns['format']['prefixed'] += 1
elif has_tag(subject):
patterns['format']['tagged'] += 1
else:
patterns['format']['simple_subject'] += 1
# Convention patterns
if is_imperative_mood(subject):
patterns['conventions']['imperative_mood'] += 1
if is_capitalized(subject):
patterns['conventions']['capitalized_subject'] += 1
if has_no_period_end(subject):
patterns['conventions']['no_period_end'] += 1
if body:
commits_with_body += 1
if has_blank_line_before_body(full):
patterns['conventions']['blank_line_before_body'] += 1
if is_body_wrapped(body):
patterns['conventions']['wrapped_body'] += 1
if has_footer(full):
patterns['conventions']['has_footer'] += 1
# Content patterns
if references_issues(full):
patterns['content']['references_issues'] += 1
if mentions_breaking(full):
patterns['content']['mentions_breaking'] += 1
if has_co_authors(full):
patterns['content']['has_co_authors'] += 1
if is_signed_off(full):
patterns['content']['signed_off'] += 1
if includes_rationale(body):
patterns['content']['includes_rationale'] += 1
if mentions_impact(body):
patterns['content']['mentions_impact'] += 1
# Calculate percentages and strength
def calc_percentage(count, denominator=total):
return round((count / denominator * 100), 1) if denominator > 0 else 0
def get_strength(percentage):
if percentage >= 95:
return "perfect"
elif percentage >= 80:
return "strong"
elif percentage >= 65:
return "dominant"
elif percentage >= 45:
return "common"
elif percentage >= 25:
return "moderate"
elif percentage >= 10:
return "occasional"
elif percentage >= 1:
return "rare"
else:
return "absent"
# Build results
results = {
'format': {},
'conventions': {},
'content': {}
}
for category, counters in patterns.items():
for pattern_name, count in counters.items():
# Use commits_with_body as denominator for body-specific patterns
if pattern_name in ['blank_line_before_body', 'wrapped_body']:
denominator = commits_with_body
else:
denominator = total
percentage = calc_percentage(count, denominator)
results[category][pattern_name] = {
'count': count,
'percentage': percentage,
'strength': get_strength(percentage)
}
# Calculate consistency score
# Weight: format(40), conventions(40), content(20)
format_score = results['format'].get('conventional_commits', {}).get('percentage', 0)
convention_scores = [
results['conventions'].get('imperative_mood', {}).get('percentage', 0),
results['conventions'].get('capitalized_subject', {}).get('percentage', 0),
results['conventions'].get('no_period_end', {}).get('percentage', 0)
]
avg_convention = sum(convention_scores) / len(convention_scores) if convention_scores else 0
content_scores = [
results['content'].get('references_issues', {}).get('percentage', 0),
results['content'].get('includes_rationale', {}).get('percentage', 0)
]
avg_content = sum(content_scores) / len(content_scores) if content_scores else 0
consistency_score = int(format_score * 0.4 + avg_convention * 0.4 + avg_content * 0.2)
# Determine dominant pattern
format_patterns = results['format']
dominant_pattern = max(format_patterns.items(), key=lambda x: x[1]['count'])[0] if format_patterns else "unknown"
return {
'commits_analyzed': total,
'patterns': results,
'consistency_score': consistency_score,
'dominant_pattern': dominant_pattern
}
def main():
parser = argparse.ArgumentParser(description='Detect commit message patterns')
parser.add_argument('--count', type=int, default=50, help='Number of commits to analyze')
parser.add_argument('--branch', default='HEAD', help='Branch to analyze')
parser.add_argument('--detailed', action='store_true', help='Include detailed breakdown')
args = parser.parse_args()
# Validate git repository
if not is_git_repository():
print(json.dumps({'error': 'Not in a git repository'}), file=sys.stderr)
sys.exit(1)
if not has_commits():
print(json.dumps({'error': 'No commit history found'}), file=sys.stderr)
sys.exit(2)
# Fetch commits
commits = get_commits(args.count, args.branch)
if not commits:
print(json.dumps({'error': 'Failed to fetch commits'}), file=sys.stderr)
sys.exit(3)
# Analyze patterns
results = analyze_patterns(commits)
results['branch'] = args.branch
results['detailed'] = args.detailed
# Output JSON
print(json.dumps(results, indent=2))
sys.exit(0)
if __name__ == '__main__':
main()