284 lines
10 KiB
Python
Executable File
284 lines
10 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Split Analyzer - Determine if changes should be split
|
|
|
|
Purpose: Analyze git changes to determine if they should be split into multiple commits
|
|
Version: 1.0.0
|
|
Usage: ./split-analyzer.py [--verbose] [--threshold N]
|
|
Returns:
|
|
Exit 0: Should split
|
|
Exit 1: Already atomic
|
|
Exit 2: Error occurred
|
|
|
|
Dependencies: git, python3
|
|
"""
|
|
|
|
import sys
|
|
import subprocess
|
|
import re
|
|
import json
|
|
from collections import defaultdict
|
|
from typing import List, Dict, Tuple, Optional
|
|
|
|
# Conventional commit types
|
|
COMMIT_TYPES = ['feat', 'fix', 'docs', 'style', 'refactor', 'test', 'chore', 'perf', 'ci', 'build']
|
|
|
|
class SplitAnalyzer:
|
|
def __init__(self, threshold: int = 10, verbose: bool = False):
|
|
self.threshold = threshold
|
|
self.verbose = verbose
|
|
self.files = []
|
|
self.types = defaultdict(list)
|
|
self.scopes = defaultdict(list)
|
|
self.concerns = []
|
|
|
|
def log(self, message: str):
|
|
"""Print message if verbose mode enabled"""
|
|
if self.verbose:
|
|
print(f"[DEBUG] {message}", file=sys.stderr)
|
|
|
|
def run_git_command(self, args: List[str]) -> str:
|
|
"""Execute git command and return output"""
|
|
try:
|
|
result = subprocess.run(
|
|
['git'] + args,
|
|
capture_output=True,
|
|
text=True,
|
|
check=True
|
|
)
|
|
return result.stdout
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"Error running git command: {e}", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
def get_changed_files(self) -> List[str]:
|
|
"""Get list of changed files (staged and unstaged)"""
|
|
# Staged files
|
|
staged = self.run_git_command(['diff', '--cached', '--name-only'])
|
|
# Unstaged files
|
|
unstaged = self.run_git_command(['diff', '--name-only'])
|
|
|
|
files = set()
|
|
files.update(filter(None, staged.split('\n')))
|
|
files.update(filter(None, unstaged.split('\n')))
|
|
|
|
return list(files)
|
|
|
|
def get_file_diff(self, file_path: str) -> str:
|
|
"""Get diff for a specific file"""
|
|
try:
|
|
# Try staged first
|
|
diff = self.run_git_command(['diff', '--cached', file_path])
|
|
if not diff:
|
|
# Try unstaged
|
|
diff = self.run_git_command(['diff', file_path])
|
|
return diff
|
|
except:
|
|
return ""
|
|
|
|
def detect_type_from_diff(self, file_path: str, diff: str) -> str:
|
|
"""Detect commit type from file path and diff content"""
|
|
|
|
# Documentation files
|
|
if any(file_path.endswith(ext) for ext in ['.md', '.txt', '.rst', '.adoc']):
|
|
return 'docs'
|
|
|
|
# Test files
|
|
if any(pattern in file_path for pattern in ['test/', 'tests/', 'spec/', '__tests__', '.test.', '.spec.']):
|
|
return 'test'
|
|
|
|
# CI/CD files
|
|
if any(pattern in file_path for pattern in ['.github/', '.gitlab-ci', 'jenkins', '.circleci']):
|
|
return 'ci'
|
|
|
|
# Build files
|
|
if any(file_path.endswith(ext) for ext in ['package.json', 'pom.xml', 'build.gradle', 'Makefile', 'CMakeLists.txt']):
|
|
return 'build'
|
|
|
|
# Analyze diff content
|
|
if not diff:
|
|
return 'chore'
|
|
|
|
# Look for new functionality
|
|
added_lines = [line for line in diff.split('\n') if line.startswith('+') and not line.startswith('+++')]
|
|
|
|
# Check for function/class additions (new features)
|
|
if any(keyword in ' '.join(added_lines) for keyword in ['function ', 'class ', 'def ', 'const ', 'let ', 'var ']):
|
|
if any(keyword in ' '.join(added_lines) for keyword in ['new ', 'add', 'implement', 'create']):
|
|
return 'feat'
|
|
|
|
# Check for bug fix patterns
|
|
if any(keyword in ' '.join(added_lines).lower() for keyword in ['fix', 'bug', 'error', 'issue', 'null', 'undefined']):
|
|
return 'fix'
|
|
|
|
# Check for refactoring
|
|
if any(keyword in ' '.join(added_lines).lower() for keyword in ['refactor', 'rename', 'move', 'extract']):
|
|
return 'refactor'
|
|
|
|
# Check for performance
|
|
if any(keyword in ' '.join(added_lines).lower() for keyword in ['performance', 'optimize', 'cache', 'memoize']):
|
|
return 'perf'
|
|
|
|
# Check for style changes (formatting only)
|
|
removed_lines = [line for line in diff.split('\n') if line.startswith('-') and not line.startswith('---')]
|
|
if len(added_lines) == len(removed_lines):
|
|
# Similar number of additions and deletions might indicate formatting
|
|
return 'style'
|
|
|
|
# Default to feat for new code, chore for modifications
|
|
if len(added_lines) > len(removed_lines) * 2:
|
|
return 'feat'
|
|
|
|
return 'chore'
|
|
|
|
def extract_scope_from_path(self, file_path: str) -> str:
|
|
"""Extract scope from file path"""
|
|
parts = file_path.split('/')
|
|
|
|
# Skip common prefixes
|
|
skip_prefixes = ['src', 'lib', 'app', 'packages', 'tests', 'test']
|
|
|
|
for part in parts:
|
|
if part not in skip_prefixes and part != '.' and part != '..':
|
|
# Remove file extension
|
|
scope = part.split('.')[0]
|
|
return scope
|
|
|
|
return 'root'
|
|
|
|
def detect_mixed_concerns(self) -> List[str]:
|
|
"""Detect mixed concerns in changes"""
|
|
concerns = []
|
|
|
|
# Check for feature + unrelated changes
|
|
has_feature = 'feat' in self.types
|
|
has_refactor = 'refactor' in self.types
|
|
has_style = 'style' in self.types
|
|
|
|
if has_feature and has_refactor:
|
|
concerns.append("Feature implementation mixed with refactoring")
|
|
|
|
if has_feature and has_style:
|
|
concerns.append("Feature implementation mixed with style changes")
|
|
|
|
# Check for test + implementation in separate modules
|
|
if 'test' in self.types:
|
|
test_scopes = set(self.scopes[scope] for scope in self.scopes if 'test' in scope)
|
|
impl_scopes = set(self.scopes[scope] for scope in self.scopes if 'test' not in scope)
|
|
|
|
if test_scopes != impl_scopes and len(impl_scopes) > 1:
|
|
concerns.append("Tests for multiple unrelated implementations")
|
|
|
|
return concerns
|
|
|
|
def analyze(self) -> Tuple[bool, str, Dict]:
|
|
"""Analyze changes and determine if should split"""
|
|
|
|
# Get changed files
|
|
self.files = self.get_changed_files()
|
|
self.log(f"Found {len(self.files)} changed files")
|
|
|
|
if not self.files:
|
|
return False, "No changes detected", {}
|
|
|
|
# Analyze each file
|
|
for file_path in self.files:
|
|
self.log(f"Analyzing: {file_path}")
|
|
|
|
diff = self.get_file_diff(file_path)
|
|
file_type = self.detect_type_from_diff(file_path, diff)
|
|
scope = self.extract_scope_from_path(file_path)
|
|
|
|
self.types[file_type].append(file_path)
|
|
self.scopes[scope].append(file_path)
|
|
|
|
self.log(f" Type: {file_type}, Scope: {scope}")
|
|
|
|
# Check splitting criteria
|
|
reasons = []
|
|
|
|
# Check 1: Multiple types
|
|
if len(self.types) > 1:
|
|
type_list = ', '.join(self.types.keys())
|
|
reasons.append(f"Multiple types detected: {type_list}")
|
|
self.log(f"SPLIT REASON: Multiple types: {type_list}")
|
|
|
|
# Check 2: Multiple scopes
|
|
if len(self.scopes) > 1:
|
|
scope_list = ', '.join(self.scopes.keys())
|
|
reasons.append(f"Multiple scopes detected: {scope_list}")
|
|
self.log(f"SPLIT REASON: Multiple scopes: {scope_list}")
|
|
|
|
# Check 3: Too many files
|
|
if len(self.files) > self.threshold:
|
|
reasons.append(f"Large change: {len(self.files)} files (threshold: {self.threshold})")
|
|
self.log(f"SPLIT REASON: Too many files: {len(self.files)} > {self.threshold}")
|
|
|
|
# Check 4: Mixed concerns
|
|
self.concerns = self.detect_mixed_concerns()
|
|
if self.concerns:
|
|
reasons.append(f"Mixed concerns: {'; '.join(self.concerns)}")
|
|
self.log(f"SPLIT REASON: Mixed concerns detected")
|
|
|
|
# Prepare detailed metrics
|
|
metrics = {
|
|
'file_count': len(self.files),
|
|
'types_detected': list(self.types.keys()),
|
|
'type_counts': {t: len(files) for t, files in self.types.items()},
|
|
'scopes_detected': list(self.scopes.keys()),
|
|
'scope_counts': {s: len(files) for s, files in self.scopes.items()},
|
|
'concerns': self.concerns,
|
|
'threshold': self.threshold
|
|
}
|
|
|
|
# Determine result
|
|
if reasons:
|
|
should_split = True
|
|
reason = '; '.join(reasons)
|
|
self.log(f"RECOMMENDATION: Should split - {reason}")
|
|
else:
|
|
should_split = False
|
|
reason = "Changes are atomic - single logical unit"
|
|
self.log(f"RECOMMENDATION: Already atomic")
|
|
|
|
return should_split, reason, metrics
|
|
|
|
def main():
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description='Analyze if git changes should be split')
|
|
parser.add_argument('--verbose', action='store_true', help='Verbose output')
|
|
parser.add_argument('--threshold', type=int, default=10, help='File count threshold')
|
|
parser.add_argument('--json', action='store_true', help='Output JSON format')
|
|
|
|
args = parser.parse_args()
|
|
|
|
analyzer = SplitAnalyzer(threshold=args.threshold, verbose=args.verbose)
|
|
should_split, reason, metrics = analyzer.analyze()
|
|
|
|
if args.json:
|
|
# Output JSON format
|
|
result = {
|
|
'should_split': should_split,
|
|
'reason': reason,
|
|
'metrics': metrics,
|
|
'recommendation': 'split' if should_split else 'atomic'
|
|
}
|
|
print(json.dumps(result, indent=2))
|
|
else:
|
|
# Output human-readable format
|
|
print(f"Should split: {'YES' if should_split else 'NO'}")
|
|
print(f"Reason: {reason}")
|
|
print(f"\nMetrics:")
|
|
print(f" Files: {metrics['file_count']}")
|
|
print(f" Types: {', '.join(metrics['types_detected'])}")
|
|
print(f" Scopes: {', '.join(metrics['scopes_detected'])}")
|
|
if metrics['concerns']:
|
|
print(f" Concerns: {', '.join(metrics['concerns'])}")
|
|
|
|
# Exit code: 0 = should split, 1 = atomic
|
|
sys.exit(0 if should_split else 1)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|