Files
gh-dhofheinz-open-plugins-p…/commands/commit-best-practices/.scripts/commit-reviewer.py
2025-11-29 18:20:25 +08:00

346 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
################################################################################
# Commit Reviewer Script
#
# Purpose: Analyze commit quality including message, changes, and atomicity
# Version: 1.0.0
# Usage: ./commit-reviewer.py <commit-sha>
# Returns: JSON with comprehensive quality analysis
# Exit Codes:
# 0 = Success
# 1 = Commit not found
# 2 = Script execution error
################################################################################
import sys
import json
import subprocess
import re
from typing import Dict, List, Tuple, Any
################################################################################
# Git operations
################################################################################
def git_command(args: List[str]) -> str:
"""Execute git command and return output"""
try:
result = subprocess.run(
['git'] + args,
capture_output=True,
text=True,
check=True
)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
return ""
def commit_exists(sha: str) -> bool:
"""Check if commit exists"""
result = git_command(['rev-parse', '--verify', sha])
return bool(result)
def get_commit_info(sha: str) -> Dict[str, str]:
"""Get commit metadata"""
return {
'sha': git_command(['rev-parse', sha]),
'author': git_command(['log', '-1', '--format=%an <%ae>', sha]),
'date': git_command(['log', '-1', '--format=%ad', '--date=short', sha]),
'subject': git_command(['log', '-1', '--format=%s', sha]),
'body': git_command(['log', '-1', '--format=%b', sha]),
}
def get_commit_stats(sha: str) -> Dict[str, int]:
"""Get commit statistics"""
stats_raw = git_command(['show', '--stat', '--format=', sha])
files_changed = 0
insertions = 0
deletions = 0
test_files = 0
doc_files = 0
for line in stats_raw.split('\n'):
if '|' in line:
files_changed += 1
filename = line.split('|')[0].strip()
# Count test files
if 'test' in filename.lower() or 'spec' in filename.lower():
test_files += 1
# Count doc files
if filename.endswith('.md') or 'doc' in filename.lower():
doc_files += 1
# Parse summary line: "5 files changed, 234 insertions(+), 12 deletions(-)"
if 'insertion' in line:
match = re.search(r'(\d+) insertion', line)
if match:
insertions = int(match.group(1))
if 'deletion' in line:
match = re.search(r'(\d+) deletion', line)
if match:
deletions = int(match.group(1))
return {
'files_changed': files_changed,
'insertions': insertions,
'deletions': deletions,
'test_files': test_files,
'doc_files': doc_files,
}
################################################################################
# Message analysis
################################################################################
def analyze_message(subject: str, body: str) -> Dict[str, Any]:
"""Analyze commit message quality"""
# Check conventional commits format
conventional_pattern = r'^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\([a-z0-9\-]+\))?: .+'
is_conventional = bool(re.match(conventional_pattern, subject, re.IGNORECASE))
# Extract type and scope if conventional
commit_type = None
commit_scope = None
if is_conventional:
match = re.match(r'^([a-z]+)(?:\(([a-z0-9\-]+)\))?: ', subject, re.IGNORECASE)
if match:
commit_type = match.group(1).lower()
commit_scope = match.group(2) if match.group(2) else None
# Check subject length
subject_length = len(subject)
subject_ok = subject_length <= 50
# Check imperative mood (basic heuristics)
imperative_verbs = ['add', 'fix', 'update', 'remove', 'refactor', 'improve', 'implement']
past_tense_patterns = ['added', 'fixed', 'updated', 'removed', 'refactored', 'improved', 'implemented']
subject_lower = subject.lower()
uses_imperative = any(subject_lower.startswith(verb) for verb in imperative_verbs)
uses_past_tense = any(pattern in subject_lower for pattern in past_tense_patterns)
# Check if body exists and is useful
has_body = bool(body.strip())
body_lines = body.strip().split('\n') if has_body else []
body_line_count = len([line for line in body_lines if line.strip()])
# Body quality assessment
has_explanation = body_line_count > 2
uses_bullets = any(line.strip().startswith(('-', '*', '')) for line in body_lines)
return {
'subject': subject,
'body': body if has_body else None,
'subject_length': subject_length,
'subject_ok': subject_ok,
'has_body': has_body,
'body_line_count': body_line_count,
'conventional': is_conventional,
'type': commit_type,
'scope': commit_scope,
'imperative': uses_imperative and not uses_past_tense,
'explanation': has_explanation,
'uses_bullets': uses_bullets,
}
################################################################################
# Atomicity analysis
################################################################################
def analyze_atomicity(sha: str, stats: Dict[str, int]) -> Dict[str, Any]:
"""Analyze if commit is atomic"""
# Get changed files
changed_files = git_command(['show', '--name-only', '--format=', sha]).split('\n')
changed_files = [f for f in changed_files if f.strip()]
# Analyze file types
file_types = set()
scopes = set()
for filepath in changed_files:
# Determine file type
if 'test' in filepath.lower() or 'spec' in filepath.lower():
file_types.add('test')
elif filepath.endswith('.md') or 'doc' in filepath.lower():
file_types.add('docs')
elif any(filepath.endswith(ext) for ext in ['.js', '.ts', '.py', '.go', '.rs', '.java']):
file_types.add('code')
elif any(filepath.endswith(ext) for ext in ['.json', '.yaml', '.yml', '.toml']):
file_types.add('config')
# Determine scope from path
parts = filepath.split('/')
if len(parts) > 1:
scopes.add(parts[0])
# Check for multiple types (excluding test + code as acceptable)
suspicious_type_mix = False
if 'docs' in file_types and 'code' in file_types:
suspicious_type_mix = True
if 'config' in file_types and len(file_types) > 2:
suspicious_type_mix = True
# Check for multiple scopes
multiple_scopes = len(scopes) > 2
# Size check (too large likely non-atomic)
too_large = stats['files_changed'] > 15 or stats['insertions'] > 500
# Determine atomicity
is_atomic = not (suspicious_type_mix or multiple_scopes or too_large)
issues = []
if suspicious_type_mix:
issues.append(f"Mixes {' and '.join(file_types)}")
if multiple_scopes:
issues.append(f"Affects multiple scopes: {', '.join(sorted(scopes))}")
if too_large:
issues.append(f"Large commit: {stats['files_changed']} files")
return {
'atomic': is_atomic,
'file_types': sorted(file_types),
'scopes': sorted(scopes),
'issues': issues,
}
################################################################################
# Quality scoring
################################################################################
def calculate_score(message: Dict[str, Any], stats: Dict[str, int], atomicity: Dict[str, Any]) -> Tuple[int, str, List[str]]:
"""Calculate overall quality score (0-100)"""
score = 100
issues = []
# Message quality (40 points)
if not message['conventional']:
score -= 10
issues.append("Not using conventional commits format")
if not message['subject_ok']:
score -= 5
issues.append(f"Subject too long ({message['subject_length']} chars, should be ≤50)")
if not message['imperative']:
score -= 5
issues.append("Subject not in imperative mood")
if not message['has_body'] and stats['files_changed'] > 3:
score -= 10
issues.append("No commit body explaining changes")
elif message['has_body'] and not message['explanation']:
score -= 5
issues.append("Commit body too brief")
# Atomicity (30 points)
if not atomicity['atomic']:
score -= 20
issues.extend(atomicity['issues'])
# Test coverage (20 points)
has_code_changes = 'code' in atomicity['file_types']
has_test_changes = stats['test_files'] > 0
if has_code_changes and not has_test_changes and stats['insertions'] > 100:
score -= 15
issues.append("No tests included for significant code changes")
elif has_code_changes and not has_test_changes:
score -= 5
issues.append("No tests included")
# Size appropriateness (10 points)
if stats['files_changed'] > 20:
score -= 5
issues.append(f"Very large commit ({stats['files_changed']} files)")
if stats['insertions'] > 1000:
score -= 5
issues.append(f"Very large changeset ({stats['insertions']} insertions)")
# Determine quality level
if score >= 90:
quality = "excellent"
elif score >= 70:
quality = "good"
elif score >= 50:
quality = "fair"
else:
quality = "poor"
return max(0, score), quality, issues
################################################################################
# Main execution
################################################################################
def main():
if len(sys.argv) < 2:
print(json.dumps({"error": "Usage: commit-reviewer.py <commit-sha>"}))
sys.exit(2)
commit_sha = sys.argv[1]
# Check if git repository
if not git_command(['rev-parse', '--git-dir']):
print(json.dumps({"error": "Not a git repository"}))
sys.exit(2)
# Check if commit exists
if not commit_exists(commit_sha):
print(json.dumps({"error": f"Commit not found: {commit_sha}"}))
sys.exit(1)
# Gather commit information
info = get_commit_info(commit_sha)
stats = get_commit_stats(commit_sha)
message_analysis = analyze_message(info['subject'], info['body'])
atomicity_analysis = analyze_atomicity(commit_sha, stats)
# Calculate quality score
score, quality, issues = calculate_score(message_analysis, stats, atomicity_analysis)
# Build output
output = {
'commit': info['sha'][:8],
'author': info['author'],
'date': info['date'],
'message': {
'subject': message_analysis['subject'],
'body': message_analysis['body'],
'subject_length': message_analysis['subject_length'],
'has_body': message_analysis['has_body'],
'conventional': message_analysis['conventional'],
'type': message_analysis['type'],
'scope': message_analysis['scope'],
},
'changes': {
'files_changed': stats['files_changed'],
'insertions': stats['insertions'],
'deletions': stats['deletions'],
'test_files': stats['test_files'],
'doc_files': stats['doc_files'],
},
'quality': {
'atomic': atomicity_analysis['atomic'],
'message_quality': quality,
'test_coverage': stats['test_files'] > 0,
'issues': issues,
},
'score': score,
}
print(json.dumps(output, indent=2))
sys.exit(0)
if __name__ == '__main__':
main()