#!/usr/bin/env python3 ################################################################################ # Commit Reviewer Script # # Purpose: Analyze commit quality including message, changes, and atomicity # Version: 1.0.0 # Usage: ./commit-reviewer.py # Returns: JSON with comprehensive quality analysis # Exit Codes: # 0 = Success # 1 = Commit not found # 2 = Script execution error ################################################################################ import sys import json import subprocess import re from typing import Dict, List, Tuple, Any ################################################################################ # Git operations ################################################################################ def git_command(args: List[str]) -> str: """Execute git command and return output""" try: result = subprocess.run( ['git'] + args, capture_output=True, text=True, check=True ) return result.stdout.strip() except subprocess.CalledProcessError as e: return "" def commit_exists(sha: str) -> bool: """Check if commit exists""" result = git_command(['rev-parse', '--verify', sha]) return bool(result) def get_commit_info(sha: str) -> Dict[str, str]: """Get commit metadata""" return { 'sha': git_command(['rev-parse', sha]), 'author': git_command(['log', '-1', '--format=%an <%ae>', sha]), 'date': git_command(['log', '-1', '--format=%ad', '--date=short', sha]), 'subject': git_command(['log', '-1', '--format=%s', sha]), 'body': git_command(['log', '-1', '--format=%b', sha]), } def get_commit_stats(sha: str) -> Dict[str, int]: """Get commit statistics""" stats_raw = git_command(['show', '--stat', '--format=', sha]) files_changed = 0 insertions = 0 deletions = 0 test_files = 0 doc_files = 0 for line in stats_raw.split('\n'): if '|' in line: files_changed += 1 filename = line.split('|')[0].strip() # Count test files if 'test' in filename.lower() or 'spec' in filename.lower(): test_files += 1 # Count doc files if filename.endswith('.md') or 'doc' in filename.lower(): doc_files += 1 # Parse summary line: "5 files changed, 234 insertions(+), 12 deletions(-)" if 'insertion' in line: match = re.search(r'(\d+) insertion', line) if match: insertions = int(match.group(1)) if 'deletion' in line: match = re.search(r'(\d+) deletion', line) if match: deletions = int(match.group(1)) return { 'files_changed': files_changed, 'insertions': insertions, 'deletions': deletions, 'test_files': test_files, 'doc_files': doc_files, } ################################################################################ # Message analysis ################################################################################ def analyze_message(subject: str, body: str) -> Dict[str, Any]: """Analyze commit message quality""" # Check conventional commits format conventional_pattern = r'^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\([a-z0-9\-]+\))?: .+' is_conventional = bool(re.match(conventional_pattern, subject, re.IGNORECASE)) # Extract type and scope if conventional commit_type = None commit_scope = None if is_conventional: match = re.match(r'^([a-z]+)(?:\(([a-z0-9\-]+)\))?: ', subject, re.IGNORECASE) if match: commit_type = match.group(1).lower() commit_scope = match.group(2) if match.group(2) else None # Check subject length subject_length = len(subject) subject_ok = subject_length <= 50 # Check imperative mood (basic heuristics) imperative_verbs = ['add', 'fix', 'update', 'remove', 'refactor', 'improve', 'implement'] past_tense_patterns = ['added', 'fixed', 'updated', 'removed', 'refactored', 'improved', 'implemented'] subject_lower = subject.lower() uses_imperative = any(subject_lower.startswith(verb) for verb in imperative_verbs) uses_past_tense = any(pattern in subject_lower for pattern in past_tense_patterns) # Check if body exists and is useful has_body = bool(body.strip()) body_lines = body.strip().split('\n') if has_body else [] body_line_count = len([line for line in body_lines if line.strip()]) # Body quality assessment has_explanation = body_line_count > 2 uses_bullets = any(line.strip().startswith(('-', '*', '•')) for line in body_lines) return { 'subject': subject, 'body': body if has_body else None, 'subject_length': subject_length, 'subject_ok': subject_ok, 'has_body': has_body, 'body_line_count': body_line_count, 'conventional': is_conventional, 'type': commit_type, 'scope': commit_scope, 'imperative': uses_imperative and not uses_past_tense, 'explanation': has_explanation, 'uses_bullets': uses_bullets, } ################################################################################ # Atomicity analysis ################################################################################ def analyze_atomicity(sha: str, stats: Dict[str, int]) -> Dict[str, Any]: """Analyze if commit is atomic""" # Get changed files changed_files = git_command(['show', '--name-only', '--format=', sha]).split('\n') changed_files = [f for f in changed_files if f.strip()] # Analyze file types file_types = set() scopes = set() for filepath in changed_files: # Determine file type if 'test' in filepath.lower() or 'spec' in filepath.lower(): file_types.add('test') elif filepath.endswith('.md') or 'doc' in filepath.lower(): file_types.add('docs') elif any(filepath.endswith(ext) for ext in ['.js', '.ts', '.py', '.go', '.rs', '.java']): file_types.add('code') elif any(filepath.endswith(ext) for ext in ['.json', '.yaml', '.yml', '.toml']): file_types.add('config') # Determine scope from path parts = filepath.split('/') if len(parts) > 1: scopes.add(parts[0]) # Check for multiple types (excluding test + code as acceptable) suspicious_type_mix = False if 'docs' in file_types and 'code' in file_types: suspicious_type_mix = True if 'config' in file_types and len(file_types) > 2: suspicious_type_mix = True # Check for multiple scopes multiple_scopes = len(scopes) > 2 # Size check (too large likely non-atomic) too_large = stats['files_changed'] > 15 or stats['insertions'] > 500 # Determine atomicity is_atomic = not (suspicious_type_mix or multiple_scopes or too_large) issues = [] if suspicious_type_mix: issues.append(f"Mixes {' and '.join(file_types)}") if multiple_scopes: issues.append(f"Affects multiple scopes: {', '.join(sorted(scopes))}") if too_large: issues.append(f"Large commit: {stats['files_changed']} files") return { 'atomic': is_atomic, 'file_types': sorted(file_types), 'scopes': sorted(scopes), 'issues': issues, } ################################################################################ # Quality scoring ################################################################################ def calculate_score(message: Dict[str, Any], stats: Dict[str, int], atomicity: Dict[str, Any]) -> Tuple[int, str, List[str]]: """Calculate overall quality score (0-100)""" score = 100 issues = [] # Message quality (40 points) if not message['conventional']: score -= 10 issues.append("Not using conventional commits format") if not message['subject_ok']: score -= 5 issues.append(f"Subject too long ({message['subject_length']} chars, should be ≤50)") if not message['imperative']: score -= 5 issues.append("Subject not in imperative mood") if not message['has_body'] and stats['files_changed'] > 3: score -= 10 issues.append("No commit body explaining changes") elif message['has_body'] and not message['explanation']: score -= 5 issues.append("Commit body too brief") # Atomicity (30 points) if not atomicity['atomic']: score -= 20 issues.extend(atomicity['issues']) # Test coverage (20 points) has_code_changes = 'code' in atomicity['file_types'] has_test_changes = stats['test_files'] > 0 if has_code_changes and not has_test_changes and stats['insertions'] > 100: score -= 15 issues.append("No tests included for significant code changes") elif has_code_changes and not has_test_changes: score -= 5 issues.append("No tests included") # Size appropriateness (10 points) if stats['files_changed'] > 20: score -= 5 issues.append(f"Very large commit ({stats['files_changed']} files)") if stats['insertions'] > 1000: score -= 5 issues.append(f"Very large changeset ({stats['insertions']} insertions)") # Determine quality level if score >= 90: quality = "excellent" elif score >= 70: quality = "good" elif score >= 50: quality = "fair" else: quality = "poor" return max(0, score), quality, issues ################################################################################ # Main execution ################################################################################ def main(): if len(sys.argv) < 2: print(json.dumps({"error": "Usage: commit-reviewer.py "})) sys.exit(2) commit_sha = sys.argv[1] # Check if git repository if not git_command(['rev-parse', '--git-dir']): print(json.dumps({"error": "Not a git repository"})) sys.exit(2) # Check if commit exists if not commit_exists(commit_sha): print(json.dumps({"error": f"Commit not found: {commit_sha}"})) sys.exit(1) # Gather commit information info = get_commit_info(commit_sha) stats = get_commit_stats(commit_sha) message_analysis = analyze_message(info['subject'], info['body']) atomicity_analysis = analyze_atomicity(commit_sha, stats) # Calculate quality score score, quality, issues = calculate_score(message_analysis, stats, atomicity_analysis) # Build output output = { 'commit': info['sha'][:8], 'author': info['author'], 'date': info['date'], 'message': { 'subject': message_analysis['subject'], 'body': message_analysis['body'], 'subject_length': message_analysis['subject_length'], 'has_body': message_analysis['has_body'], 'conventional': message_analysis['conventional'], 'type': message_analysis['type'], 'scope': message_analysis['scope'], }, 'changes': { 'files_changed': stats['files_changed'], 'insertions': stats['insertions'], 'deletions': stats['deletions'], 'test_files': stats['test_files'], 'doc_files': stats['doc_files'], }, 'quality': { 'atomic': atomicity_analysis['atomic'], 'message_quality': quality, 'test_coverage': stats['test_files'] > 0, 'issues': issues, }, 'score': score, } print(json.dumps(output, indent=2)) sys.exit(0) if __name__ == '__main__': main()