422 lines
13 KiB
Python
Executable File
422 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
============================================================================
|
|
SCRIPT: pattern-detector.py
|
|
PURPOSE: Detect commit message patterns and conventions from git history
|
|
VERSION: 1.0.0
|
|
USAGE: ./pattern-detector.py --count N --branch BRANCH [--detailed]
|
|
RETURNS: JSON format with pattern detection results
|
|
EXIT CODES:
|
|
0 - Success
|
|
1 - Not a git repository
|
|
2 - No commit history
|
|
3 - Git command failed
|
|
DEPENDENCIES: git, python3
|
|
============================================================================
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import json
|
|
import re
|
|
import argparse
|
|
from collections import defaultdict
|
|
from typing import Dict, List, Tuple
|
|
|
|
|
|
def run_git_command(cmd: List[str]) -> Tuple[int, str]:
|
|
"""Execute git command and return exit code and output."""
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
check=False
|
|
)
|
|
return result.returncode, result.stdout.strip()
|
|
except Exception as e:
|
|
return 1, str(e)
|
|
|
|
|
|
def is_git_repository() -> bool:
|
|
"""Check if current directory is a git repository."""
|
|
code, _ = run_git_command(['git', 'rev-parse', '--git-dir'])
|
|
return code == 0
|
|
|
|
|
|
def has_commits() -> bool:
|
|
"""Check if repository has any commits."""
|
|
code, _ = run_git_command(['git', 'log', '-1'])
|
|
return code == 0
|
|
|
|
|
|
def get_commits(count: int, branch: str) -> List[Dict[str, str]]:
|
|
"""Fetch commit messages from git log."""
|
|
code, output = run_git_command([
|
|
'git', 'log',
|
|
f'-{count}',
|
|
branch,
|
|
'--format=%H%n%s%n%b%n---COMMIT_SEPARATOR---'
|
|
])
|
|
|
|
if code != 0:
|
|
return []
|
|
|
|
commits = []
|
|
lines = output.split('\n')
|
|
i = 0
|
|
|
|
while i < len(lines):
|
|
if i + 1 >= len(lines):
|
|
break
|
|
|
|
commit_hash = lines[i]
|
|
subject = lines[i + 1] if i + 1 < len(lines) else ""
|
|
|
|
# Find body (lines until separator)
|
|
body_lines = []
|
|
i += 2
|
|
while i < len(lines) and lines[i] != '---COMMIT_SEPARATOR---':
|
|
if lines[i].strip(): # Skip empty lines at start
|
|
body_lines.append(lines[i])
|
|
i += 1
|
|
|
|
body = '\n'.join(body_lines).strip()
|
|
|
|
commits.append({
|
|
'hash': commit_hash,
|
|
'subject': subject,
|
|
'body': body,
|
|
'full': subject + '\n\n' + body if body else subject
|
|
})
|
|
|
|
i += 1 # Skip separator
|
|
|
|
return commits
|
|
|
|
|
|
def is_conventional_commit(subject: str) -> bool:
|
|
"""Check if commit follows conventional commits format."""
|
|
pattern = r'^[a-z]+(\([^)]+\))?: .+'
|
|
return bool(re.match(pattern, subject))
|
|
|
|
|
|
def has_prefix(subject: str) -> bool:
|
|
"""Check if commit has prefix format like [PREFIX]."""
|
|
pattern = r'^\[[^\]]+\]'
|
|
return bool(re.match(pattern, subject))
|
|
|
|
|
|
def has_tag(subject: str) -> bool:
|
|
"""Check if commit starts with tag like #tag."""
|
|
return subject.startswith('#')
|
|
|
|
|
|
def is_imperative_mood(subject: str) -> bool:
|
|
"""
|
|
Check if subject uses imperative mood.
|
|
Simple heuristic: starts with common imperative verbs.
|
|
"""
|
|
# Extract first word after type/scope if conventional
|
|
words = subject.lower()
|
|
if ':' in words:
|
|
words = words.split(':', 1)[1].strip()
|
|
|
|
# Common imperative verbs and their non-imperative forms to avoid
|
|
imperative_verbs = [
|
|
'add', 'fix', 'update', 'remove', 'delete', 'create', 'implement',
|
|
'change', 'improve', 'optimize', 'refactor', 'enhance', 'correct',
|
|
'resolve', 'merge', 'bump', 'revert', 'document', 'upgrade',
|
|
'downgrade', 'rename', 'move', 'replace', 'extract', 'simplify'
|
|
]
|
|
|
|
# Non-imperative indicators
|
|
non_imperative = ['added', 'fixed', 'updated', 'removed', 'deleted',
|
|
'created', 'implemented', 'changed', 'improved',
|
|
'adding', 'fixing', 'updating']
|
|
|
|
first_word = words.split()[0] if words.split() else ""
|
|
|
|
if first_word in non_imperative:
|
|
return False
|
|
|
|
return first_word in imperative_verbs
|
|
|
|
|
|
def is_capitalized(subject: str) -> bool:
|
|
"""Check if subject is properly capitalized."""
|
|
# Extract text after type/scope if conventional
|
|
text = subject
|
|
if ':' in text:
|
|
text = text.split(':', 1)[1].strip()
|
|
|
|
return text[0].isupper() if text else False
|
|
|
|
|
|
def has_no_period_end(subject: str) -> bool:
|
|
"""Check if subject doesn't end with period."""
|
|
return not subject.endswith('.')
|
|
|
|
|
|
def has_blank_line_before_body(full_message: str) -> bool:
|
|
"""Check if there's a blank line between subject and body."""
|
|
lines = full_message.split('\n')
|
|
if len(lines) < 3:
|
|
return True # No body or only one line body
|
|
|
|
# Check if second line is empty
|
|
return lines[1].strip() == ''
|
|
|
|
|
|
def is_body_wrapped(body: str, max_width: int = 72) -> bool:
|
|
"""Check if body lines are wrapped at max_width."""
|
|
if not body:
|
|
return True
|
|
|
|
lines = body.split('\n')
|
|
for line in lines:
|
|
# Allow bullet points and URLs to exceed limit
|
|
if line.strip().startswith(('-', '*', '•', 'http://', 'https://')):
|
|
continue
|
|
if len(line) > max_width:
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def has_footer(full_message: str) -> bool:
|
|
"""Check if commit has footer (BREAKING CHANGE, issue refs, etc.)."""
|
|
footer_patterns = [
|
|
r'BREAKING CHANGE:',
|
|
r'Closes #\d+',
|
|
r'Fixes #\d+',
|
|
r'Refs #\d+',
|
|
r'Co-authored-by:',
|
|
r'Signed-off-by:'
|
|
]
|
|
|
|
for pattern in footer_patterns:
|
|
if re.search(pattern, full_message):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def references_issues(full_message: str) -> bool:
|
|
"""Check if commit references issues."""
|
|
pattern = r'#\d+|[Cc]loses|[Ff]ixes|[Rr]efs'
|
|
return bool(re.search(pattern, full_message))
|
|
|
|
|
|
def mentions_breaking(full_message: str) -> bool:
|
|
"""Check if commit mentions breaking changes."""
|
|
return 'BREAKING CHANGE:' in full_message or 'BREAKING-CHANGE:' in full_message
|
|
|
|
|
|
def has_co_authors(full_message: str) -> bool:
|
|
"""Check if commit has co-authors."""
|
|
return 'Co-authored-by:' in full_message
|
|
|
|
|
|
def is_signed_off(full_message: str) -> bool:
|
|
"""Check if commit is signed off."""
|
|
return 'Signed-off-by:' in full_message
|
|
|
|
|
|
def includes_rationale(body: str) -> bool:
|
|
"""Check if body includes rationale (why/because/to/for)."""
|
|
if not body:
|
|
return False
|
|
words = ['because', 'to ', 'for ', 'why', 'since', 'as ', 'in order to']
|
|
body_lower = body.lower()
|
|
return any(word in body_lower for word in words)
|
|
|
|
|
|
def mentions_impact(body: str) -> bool:
|
|
"""Check if body mentions impact."""
|
|
if not body:
|
|
return False
|
|
words = ['affect', 'impact', 'change', 'improve', 'break', 'fix']
|
|
body_lower = body.lower()
|
|
return any(word in body_lower for word in words)
|
|
|
|
|
|
def analyze_patterns(commits: List[Dict[str, str]]) -> Dict:
|
|
"""Analyze commit patterns and return results."""
|
|
total = len(commits)
|
|
|
|
# Initialize counters
|
|
patterns = {
|
|
'format': defaultdict(int),
|
|
'conventions': defaultdict(int),
|
|
'content': defaultdict(int)
|
|
}
|
|
|
|
# Count commits with bodies (for calculations)
|
|
commits_with_body = 0
|
|
|
|
for commit in commits:
|
|
subject = commit['subject']
|
|
body = commit['body']
|
|
full = commit['full']
|
|
|
|
# Format patterns
|
|
if is_conventional_commit(subject):
|
|
patterns['format']['conventional_commits'] += 1
|
|
elif has_prefix(subject):
|
|
patterns['format']['prefixed'] += 1
|
|
elif has_tag(subject):
|
|
patterns['format']['tagged'] += 1
|
|
else:
|
|
patterns['format']['simple_subject'] += 1
|
|
|
|
# Convention patterns
|
|
if is_imperative_mood(subject):
|
|
patterns['conventions']['imperative_mood'] += 1
|
|
|
|
if is_capitalized(subject):
|
|
patterns['conventions']['capitalized_subject'] += 1
|
|
|
|
if has_no_period_end(subject):
|
|
patterns['conventions']['no_period_end'] += 1
|
|
|
|
if body:
|
|
commits_with_body += 1
|
|
if has_blank_line_before_body(full):
|
|
patterns['conventions']['blank_line_before_body'] += 1
|
|
|
|
if is_body_wrapped(body):
|
|
patterns['conventions']['wrapped_body'] += 1
|
|
|
|
if has_footer(full):
|
|
patterns['conventions']['has_footer'] += 1
|
|
|
|
# Content patterns
|
|
if references_issues(full):
|
|
patterns['content']['references_issues'] += 1
|
|
|
|
if mentions_breaking(full):
|
|
patterns['content']['mentions_breaking'] += 1
|
|
|
|
if has_co_authors(full):
|
|
patterns['content']['has_co_authors'] += 1
|
|
|
|
if is_signed_off(full):
|
|
patterns['content']['signed_off'] += 1
|
|
|
|
if includes_rationale(body):
|
|
patterns['content']['includes_rationale'] += 1
|
|
|
|
if mentions_impact(body):
|
|
patterns['content']['mentions_impact'] += 1
|
|
|
|
# Calculate percentages and strength
|
|
def calc_percentage(count, denominator=total):
|
|
return round((count / denominator * 100), 1) if denominator > 0 else 0
|
|
|
|
def get_strength(percentage):
|
|
if percentage >= 95:
|
|
return "perfect"
|
|
elif percentage >= 80:
|
|
return "strong"
|
|
elif percentage >= 65:
|
|
return "dominant"
|
|
elif percentage >= 45:
|
|
return "common"
|
|
elif percentage >= 25:
|
|
return "moderate"
|
|
elif percentage >= 10:
|
|
return "occasional"
|
|
elif percentage >= 1:
|
|
return "rare"
|
|
else:
|
|
return "absent"
|
|
|
|
# Build results
|
|
results = {
|
|
'format': {},
|
|
'conventions': {},
|
|
'content': {}
|
|
}
|
|
|
|
for category, counters in patterns.items():
|
|
for pattern_name, count in counters.items():
|
|
# Use commits_with_body as denominator for body-specific patterns
|
|
if pattern_name in ['blank_line_before_body', 'wrapped_body']:
|
|
denominator = commits_with_body
|
|
else:
|
|
denominator = total
|
|
|
|
percentage = calc_percentage(count, denominator)
|
|
results[category][pattern_name] = {
|
|
'count': count,
|
|
'percentage': percentage,
|
|
'strength': get_strength(percentage)
|
|
}
|
|
|
|
# Calculate consistency score
|
|
# Weight: format(40), conventions(40), content(20)
|
|
format_score = results['format'].get('conventional_commits', {}).get('percentage', 0)
|
|
convention_scores = [
|
|
results['conventions'].get('imperative_mood', {}).get('percentage', 0),
|
|
results['conventions'].get('capitalized_subject', {}).get('percentage', 0),
|
|
results['conventions'].get('no_period_end', {}).get('percentage', 0)
|
|
]
|
|
avg_convention = sum(convention_scores) / len(convention_scores) if convention_scores else 0
|
|
content_scores = [
|
|
results['content'].get('references_issues', {}).get('percentage', 0),
|
|
results['content'].get('includes_rationale', {}).get('percentage', 0)
|
|
]
|
|
avg_content = sum(content_scores) / len(content_scores) if content_scores else 0
|
|
|
|
consistency_score = int(format_score * 0.4 + avg_convention * 0.4 + avg_content * 0.2)
|
|
|
|
# Determine dominant pattern
|
|
format_patterns = results['format']
|
|
dominant_pattern = max(format_patterns.items(), key=lambda x: x[1]['count'])[0] if format_patterns else "unknown"
|
|
|
|
return {
|
|
'commits_analyzed': total,
|
|
'patterns': results,
|
|
'consistency_score': consistency_score,
|
|
'dominant_pattern': dominant_pattern
|
|
}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Detect commit message patterns')
|
|
parser.add_argument('--count', type=int, default=50, help='Number of commits to analyze')
|
|
parser.add_argument('--branch', default='HEAD', help='Branch to analyze')
|
|
parser.add_argument('--detailed', action='store_true', help='Include detailed breakdown')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate git repository
|
|
if not is_git_repository():
|
|
print(json.dumps({'error': 'Not in a git repository'}), file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if not has_commits():
|
|
print(json.dumps({'error': 'No commit history found'}), file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
# Fetch commits
|
|
commits = get_commits(args.count, args.branch)
|
|
if not commits:
|
|
print(json.dumps({'error': 'Failed to fetch commits'}), file=sys.stderr)
|
|
sys.exit(3)
|
|
|
|
# Analyze patterns
|
|
results = analyze_patterns(commits)
|
|
results['branch'] = args.branch
|
|
results['detailed'] = args.detailed
|
|
|
|
# Output JSON
|
|
print(json.dumps(results, indent=2))
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|