346 lines
12 KiB
Python
Executable File
346 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
################################################################################
|
|
# Commit Reviewer Script
|
|
#
|
|
# Purpose: Analyze commit quality including message, changes, and atomicity
|
|
# Version: 1.0.0
|
|
# Usage: ./commit-reviewer.py <commit-sha>
|
|
# Returns: JSON with comprehensive quality analysis
|
|
# Exit Codes:
|
|
# 0 = Success
|
|
# 1 = Commit not found
|
|
# 2 = Script execution error
|
|
################################################################################
|
|
|
|
import sys
|
|
import json
|
|
import subprocess
|
|
import re
|
|
from typing import Dict, List, Tuple, Any
|
|
|
|
################################################################################
|
|
# Git operations
|
|
################################################################################
|
|
|
|
def git_command(args: List[str]) -> str:
|
|
"""Execute git command and return output"""
|
|
try:
|
|
result = subprocess.run(
|
|
['git'] + args,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True
|
|
)
|
|
return result.stdout.strip()
|
|
except subprocess.CalledProcessError as e:
|
|
return ""
|
|
|
|
def commit_exists(sha: str) -> bool:
|
|
"""Check if commit exists"""
|
|
result = git_command(['rev-parse', '--verify', sha])
|
|
return bool(result)
|
|
|
|
def get_commit_info(sha: str) -> Dict[str, str]:
|
|
"""Get commit metadata"""
|
|
return {
|
|
'sha': git_command(['rev-parse', sha]),
|
|
'author': git_command(['log', '-1', '--format=%an <%ae>', sha]),
|
|
'date': git_command(['log', '-1', '--format=%ad', '--date=short', sha]),
|
|
'subject': git_command(['log', '-1', '--format=%s', sha]),
|
|
'body': git_command(['log', '-1', '--format=%b', sha]),
|
|
}
|
|
|
|
def get_commit_stats(sha: str) -> Dict[str, int]:
|
|
"""Get commit statistics"""
|
|
stats_raw = git_command(['show', '--stat', '--format=', sha])
|
|
|
|
files_changed = 0
|
|
insertions = 0
|
|
deletions = 0
|
|
test_files = 0
|
|
doc_files = 0
|
|
|
|
for line in stats_raw.split('\n'):
|
|
if '|' in line:
|
|
files_changed += 1
|
|
filename = line.split('|')[0].strip()
|
|
|
|
# Count test files
|
|
if 'test' in filename.lower() or 'spec' in filename.lower():
|
|
test_files += 1
|
|
|
|
# Count doc files
|
|
if filename.endswith('.md') or 'doc' in filename.lower():
|
|
doc_files += 1
|
|
|
|
# Parse summary line: "5 files changed, 234 insertions(+), 12 deletions(-)"
|
|
if 'insertion' in line:
|
|
match = re.search(r'(\d+) insertion', line)
|
|
if match:
|
|
insertions = int(match.group(1))
|
|
|
|
if 'deletion' in line:
|
|
match = re.search(r'(\d+) deletion', line)
|
|
if match:
|
|
deletions = int(match.group(1))
|
|
|
|
return {
|
|
'files_changed': files_changed,
|
|
'insertions': insertions,
|
|
'deletions': deletions,
|
|
'test_files': test_files,
|
|
'doc_files': doc_files,
|
|
}
|
|
|
|
################################################################################
|
|
# Message analysis
|
|
################################################################################
|
|
|
|
def analyze_message(subject: str, body: str) -> Dict[str, Any]:
|
|
"""Analyze commit message quality"""
|
|
|
|
# Check conventional commits format
|
|
conventional_pattern = r'^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\([a-z0-9\-]+\))?: .+'
|
|
is_conventional = bool(re.match(conventional_pattern, subject, re.IGNORECASE))
|
|
|
|
# Extract type and scope if conventional
|
|
commit_type = None
|
|
commit_scope = None
|
|
|
|
if is_conventional:
|
|
match = re.match(r'^([a-z]+)(?:\(([a-z0-9\-]+)\))?: ', subject, re.IGNORECASE)
|
|
if match:
|
|
commit_type = match.group(1).lower()
|
|
commit_scope = match.group(2) if match.group(2) else None
|
|
|
|
# Check subject length
|
|
subject_length = len(subject)
|
|
subject_ok = subject_length <= 50
|
|
|
|
# Check imperative mood (basic heuristics)
|
|
imperative_verbs = ['add', 'fix', 'update', 'remove', 'refactor', 'improve', 'implement']
|
|
past_tense_patterns = ['added', 'fixed', 'updated', 'removed', 'refactored', 'improved', 'implemented']
|
|
|
|
subject_lower = subject.lower()
|
|
uses_imperative = any(subject_lower.startswith(verb) for verb in imperative_verbs)
|
|
uses_past_tense = any(pattern in subject_lower for pattern in past_tense_patterns)
|
|
|
|
# Check if body exists and is useful
|
|
has_body = bool(body.strip())
|
|
body_lines = body.strip().split('\n') if has_body else []
|
|
body_line_count = len([line for line in body_lines if line.strip()])
|
|
|
|
# Body quality assessment
|
|
has_explanation = body_line_count > 2
|
|
uses_bullets = any(line.strip().startswith(('-', '*', '•')) for line in body_lines)
|
|
|
|
return {
|
|
'subject': subject,
|
|
'body': body if has_body else None,
|
|
'subject_length': subject_length,
|
|
'subject_ok': subject_ok,
|
|
'has_body': has_body,
|
|
'body_line_count': body_line_count,
|
|
'conventional': is_conventional,
|
|
'type': commit_type,
|
|
'scope': commit_scope,
|
|
'imperative': uses_imperative and not uses_past_tense,
|
|
'explanation': has_explanation,
|
|
'uses_bullets': uses_bullets,
|
|
}
|
|
|
|
################################################################################
|
|
# Atomicity analysis
|
|
################################################################################
|
|
|
|
def analyze_atomicity(sha: str, stats: Dict[str, int]) -> Dict[str, Any]:
|
|
"""Analyze if commit is atomic"""
|
|
|
|
# Get changed files
|
|
changed_files = git_command(['show', '--name-only', '--format=', sha]).split('\n')
|
|
changed_files = [f for f in changed_files if f.strip()]
|
|
|
|
# Analyze file types
|
|
file_types = set()
|
|
scopes = set()
|
|
|
|
for filepath in changed_files:
|
|
# Determine file type
|
|
if 'test' in filepath.lower() or 'spec' in filepath.lower():
|
|
file_types.add('test')
|
|
elif filepath.endswith('.md') or 'doc' in filepath.lower():
|
|
file_types.add('docs')
|
|
elif any(filepath.endswith(ext) for ext in ['.js', '.ts', '.py', '.go', '.rs', '.java']):
|
|
file_types.add('code')
|
|
elif any(filepath.endswith(ext) for ext in ['.json', '.yaml', '.yml', '.toml']):
|
|
file_types.add('config')
|
|
|
|
# Determine scope from path
|
|
parts = filepath.split('/')
|
|
if len(parts) > 1:
|
|
scopes.add(parts[0])
|
|
|
|
# Check for multiple types (excluding test + code as acceptable)
|
|
suspicious_type_mix = False
|
|
if 'docs' in file_types and 'code' in file_types:
|
|
suspicious_type_mix = True
|
|
if 'config' in file_types and len(file_types) > 2:
|
|
suspicious_type_mix = True
|
|
|
|
# Check for multiple scopes
|
|
multiple_scopes = len(scopes) > 2
|
|
|
|
# Size check (too large likely non-atomic)
|
|
too_large = stats['files_changed'] > 15 or stats['insertions'] > 500
|
|
|
|
# Determine atomicity
|
|
is_atomic = not (suspicious_type_mix or multiple_scopes or too_large)
|
|
|
|
issues = []
|
|
if suspicious_type_mix:
|
|
issues.append(f"Mixes {' and '.join(file_types)}")
|
|
if multiple_scopes:
|
|
issues.append(f"Affects multiple scopes: {', '.join(sorted(scopes))}")
|
|
if too_large:
|
|
issues.append(f"Large commit: {stats['files_changed']} files")
|
|
|
|
return {
|
|
'atomic': is_atomic,
|
|
'file_types': sorted(file_types),
|
|
'scopes': sorted(scopes),
|
|
'issues': issues,
|
|
}
|
|
|
|
################################################################################
|
|
# Quality scoring
|
|
################################################################################
|
|
|
|
def calculate_score(message: Dict[str, Any], stats: Dict[str, int], atomicity: Dict[str, Any]) -> Tuple[int, str, List[str]]:
|
|
"""Calculate overall quality score (0-100)"""
|
|
|
|
score = 100
|
|
issues = []
|
|
|
|
# Message quality (40 points)
|
|
if not message['conventional']:
|
|
score -= 10
|
|
issues.append("Not using conventional commits format")
|
|
|
|
if not message['subject_ok']:
|
|
score -= 5
|
|
issues.append(f"Subject too long ({message['subject_length']} chars, should be ≤50)")
|
|
|
|
if not message['imperative']:
|
|
score -= 5
|
|
issues.append("Subject not in imperative mood")
|
|
|
|
if not message['has_body'] and stats['files_changed'] > 3:
|
|
score -= 10
|
|
issues.append("No commit body explaining changes")
|
|
elif message['has_body'] and not message['explanation']:
|
|
score -= 5
|
|
issues.append("Commit body too brief")
|
|
|
|
# Atomicity (30 points)
|
|
if not atomicity['atomic']:
|
|
score -= 20
|
|
issues.extend(atomicity['issues'])
|
|
|
|
# Test coverage (20 points)
|
|
has_code_changes = 'code' in atomicity['file_types']
|
|
has_test_changes = stats['test_files'] > 0
|
|
|
|
if has_code_changes and not has_test_changes and stats['insertions'] > 100:
|
|
score -= 15
|
|
issues.append("No tests included for significant code changes")
|
|
elif has_code_changes and not has_test_changes:
|
|
score -= 5
|
|
issues.append("No tests included")
|
|
|
|
# Size appropriateness (10 points)
|
|
if stats['files_changed'] > 20:
|
|
score -= 5
|
|
issues.append(f"Very large commit ({stats['files_changed']} files)")
|
|
|
|
if stats['insertions'] > 1000:
|
|
score -= 5
|
|
issues.append(f"Very large changeset ({stats['insertions']} insertions)")
|
|
|
|
# Determine quality level
|
|
if score >= 90:
|
|
quality = "excellent"
|
|
elif score >= 70:
|
|
quality = "good"
|
|
elif score >= 50:
|
|
quality = "fair"
|
|
else:
|
|
quality = "poor"
|
|
|
|
return max(0, score), quality, issues
|
|
|
|
################################################################################
|
|
# Main execution
|
|
################################################################################
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(json.dumps({"error": "Usage: commit-reviewer.py <commit-sha>"}))
|
|
sys.exit(2)
|
|
|
|
commit_sha = sys.argv[1]
|
|
|
|
# Check if git repository
|
|
if not git_command(['rev-parse', '--git-dir']):
|
|
print(json.dumps({"error": "Not a git repository"}))
|
|
sys.exit(2)
|
|
|
|
# Check if commit exists
|
|
if not commit_exists(commit_sha):
|
|
print(json.dumps({"error": f"Commit not found: {commit_sha}"}))
|
|
sys.exit(1)
|
|
|
|
# Gather commit information
|
|
info = get_commit_info(commit_sha)
|
|
stats = get_commit_stats(commit_sha)
|
|
message_analysis = analyze_message(info['subject'], info['body'])
|
|
atomicity_analysis = analyze_atomicity(commit_sha, stats)
|
|
|
|
# Calculate quality score
|
|
score, quality, issues = calculate_score(message_analysis, stats, atomicity_analysis)
|
|
|
|
# Build output
|
|
output = {
|
|
'commit': info['sha'][:8],
|
|
'author': info['author'],
|
|
'date': info['date'],
|
|
'message': {
|
|
'subject': message_analysis['subject'],
|
|
'body': message_analysis['body'],
|
|
'subject_length': message_analysis['subject_length'],
|
|
'has_body': message_analysis['has_body'],
|
|
'conventional': message_analysis['conventional'],
|
|
'type': message_analysis['type'],
|
|
'scope': message_analysis['scope'],
|
|
},
|
|
'changes': {
|
|
'files_changed': stats['files_changed'],
|
|
'insertions': stats['insertions'],
|
|
'deletions': stats['deletions'],
|
|
'test_files': stats['test_files'],
|
|
'doc_files': stats['doc_files'],
|
|
},
|
|
'quality': {
|
|
'atomic': atomicity_analysis['atomic'],
|
|
'message_quality': quality,
|
|
'test_coverage': stats['test_files'] > 0,
|
|
'issues': issues,
|
|
},
|
|
'score': score,
|
|
}
|
|
|
|
print(json.dumps(output, indent=2))
|
|
sys.exit(0)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|