Initial commit

2025-11-29 18:20:25 +08:00
commit 0d6226e0d8
69 changed files with 20934 additions and 0 deletions
--- a/commands/atomic-commit/.scripts/split-analyzer.py
+++ b/commands/atomic-commit/.scripts/split-analyzer.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+"""
+Split Analyzer - Determine if changes should be split
+
+Purpose: Analyze git changes to determine if they should be split into multiple commits
+Version: 1.0.0
+Usage: ./split-analyzer.py [--verbose] [--threshold N]
+Returns:
+  Exit 0: Should split
+  Exit 1: Already atomic
+  Exit 2: Error occurred
+
+Dependencies: git, python3
+"""
+
+import sys
+import subprocess
+import re
+import json
+from collections import defaultdict
+from typing import List, Dict, Tuple, Optional
+
+# Conventional commit types
+COMMIT_TYPES = ['feat', 'fix', 'docs', 'style', 'refactor', 'test', 'chore', 'perf', 'ci', 'build']
+
+class SplitAnalyzer:
+    def __init__(self, threshold: int = 10, verbose: bool = False):
+        self.threshold = threshold
+        self.verbose = verbose
+        self.files = []
+        self.types = defaultdict(list)
+        self.scopes = defaultdict(list)
+        self.concerns = []
+
+    def log(self, message: str):
+        """Print message if verbose mode enabled"""
+        if self.verbose:
+            print(f"[DEBUG] {message}", file=sys.stderr)
+
+    def run_git_command(self, args: List[str]) -> str:
+        """Execute git command and return output"""
+        try:
+            result = subprocess.run(
+                ['git'] + args,
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            return result.stdout
+        except subprocess.CalledProcessError as e:
+            print(f"Error running git command: {e}", file=sys.stderr)
+            sys.exit(2)
+
+    def get_changed_files(self) -> List[str]:
+        """Get list of changed files (staged and unstaged)"""
+        # Staged files
+        staged = self.run_git_command(['diff', '--cached', '--name-only'])
+        # Unstaged files
+        unstaged = self.run_git_command(['diff', '--name-only'])
+
+        files = set()
+        files.update(filter(None, staged.split('\n')))
+        files.update(filter(None, unstaged.split('\n')))
+
+        return list(files)
+
+    def get_file_diff(self, file_path: str) -> str:
+        """Get diff for a specific file"""
+        try:
+            # Try staged first
+            diff = self.run_git_command(['diff', '--cached', file_path])
+            if not diff:
+                # Try unstaged
+                diff = self.run_git_command(['diff', file_path])
+            return diff
+        except:
+            return ""
+
+    def detect_type_from_diff(self, file_path: str, diff: str) -> str:
+        """Detect commit type from file path and diff content"""
+
+        # Documentation files
+        if any(file_path.endswith(ext) for ext in ['.md', '.txt', '.rst', '.adoc']):
+            return 'docs'
+
+        # Test files
+        if any(pattern in file_path for pattern in ['test/', 'tests/', 'spec/', '__tests__', '.test.', '.spec.']):
+            return 'test'
+
+        # CI/CD files
+        if any(pattern in file_path for pattern in ['.github/', '.gitlab-ci', 'jenkins', '.circleci']):
+            return 'ci'
+
+        # Build files
+        if any(file_path.endswith(ext) for ext in ['package.json', 'pom.xml', 'build.gradle', 'Makefile', 'CMakeLists.txt']):
+            return 'build'
+
+        # Analyze diff content
+        if not diff:
+            return 'chore'
+
+        # Look for new functionality
+        added_lines = [line for line in diff.split('\n') if line.startswith('+') and not line.startswith('+++')]
+
+        # Check for function/class additions (new features)
+        if any(keyword in ' '.join(added_lines) for keyword in ['function ', 'class ', 'def ', 'const ', 'let ', 'var ']):
+            if any(keyword in ' '.join(added_lines) for keyword in ['new ', 'add', 'implement', 'create']):
+                return 'feat'
+
+        # Check for bug fix patterns
+        if any(keyword in ' '.join(added_lines).lower() for keyword in ['fix', 'bug', 'error', 'issue', 'null', 'undefined']):
+            return 'fix'
+
+        # Check for refactoring
+        if any(keyword in ' '.join(added_lines).lower() for keyword in ['refactor', 'rename', 'move', 'extract']):
+            return 'refactor'
+
+        # Check for performance
+        if any(keyword in ' '.join(added_lines).lower() for keyword in ['performance', 'optimize', 'cache', 'memoize']):
+            return 'perf'
+
+        # Check for style changes (formatting only)
+        removed_lines = [line for line in diff.split('\n') if line.startswith('-') and not line.startswith('---')]
+        if len(added_lines) == len(removed_lines):
+            # Similar number of additions and deletions might indicate formatting
+            return 'style'
+
+        # Default to feat for new code, chore for modifications
+        if len(added_lines) > len(removed_lines) * 2:
+            return 'feat'
+
+        return 'chore'
+
+    def extract_scope_from_path(self, file_path: str) -> str:
+        """Extract scope from file path"""
+        parts = file_path.split('/')
+
+        # Skip common prefixes
+        skip_prefixes = ['src', 'lib', 'app', 'packages', 'tests', 'test']
+
+        for part in parts:
+            if part not in skip_prefixes and part != '.' and part != '..':
+                # Remove file extension
+                scope = part.split('.')[0]
+                return scope
+
+        return 'root'
+
+    def detect_mixed_concerns(self) -> List[str]:
+        """Detect mixed concerns in changes"""
+        concerns = []
+
+        # Check for feature + unrelated changes
+        has_feature = 'feat' in self.types
+        has_refactor = 'refactor' in self.types
+        has_style = 'style' in self.types
+
+        if has_feature and has_refactor:
+            concerns.append("Feature implementation mixed with refactoring")
+
+        if has_feature and has_style:
+            concerns.append("Feature implementation mixed with style changes")
+
+        # Check for test + implementation in separate modules
+        if 'test' in self.types:
+            test_scopes = set(self.scopes[scope] for scope in self.scopes if 'test' in scope)
+            impl_scopes = set(self.scopes[scope] for scope in self.scopes if 'test' not in scope)
+
+            if test_scopes != impl_scopes and len(impl_scopes) > 1:
+                concerns.append("Tests for multiple unrelated implementations")
+
+        return concerns
+
+    def analyze(self) -> Tuple[bool, str, Dict]:
+        """Analyze changes and determine if should split"""
+
+        # Get changed files
+        self.files = self.get_changed_files()
+        self.log(f"Found {len(self.files)} changed files")
+
+        if not self.files:
+            return False, "No changes detected", {}
+
+        # Analyze each file
+        for file_path in self.files:
+            self.log(f"Analyzing: {file_path}")
+
+            diff = self.get_file_diff(file_path)
+            file_type = self.detect_type_from_diff(file_path, diff)
+            scope = self.extract_scope_from_path(file_path)
+
+            self.types[file_type].append(file_path)
+            self.scopes[scope].append(file_path)
+
+            self.log(f"  Type: {file_type}, Scope: {scope}")
+
+        # Check splitting criteria
+        reasons = []
+
+        # Check 1: Multiple types
+        if len(self.types) > 1:
+            type_list = ', '.join(self.types.keys())
+            reasons.append(f"Multiple types detected: {type_list}")
+            self.log(f"SPLIT REASON: Multiple types: {type_list}")
+
+        # Check 2: Multiple scopes
+        if len(self.scopes) > 1:
+            scope_list = ', '.join(self.scopes.keys())
+            reasons.append(f"Multiple scopes detected: {scope_list}")
+            self.log(f"SPLIT REASON: Multiple scopes: {scope_list}")
+
+        # Check 3: Too many files
+        if len(self.files) > self.threshold:
+            reasons.append(f"Large change: {len(self.files)} files (threshold: {self.threshold})")
+            self.log(f"SPLIT REASON: Too many files: {len(self.files)} > {self.threshold}")
+
+        # Check 4: Mixed concerns
+        self.concerns = self.detect_mixed_concerns()
+        if self.concerns:
+            reasons.append(f"Mixed concerns: {'; '.join(self.concerns)}")
+            self.log(f"SPLIT REASON: Mixed concerns detected")
+
+        # Prepare detailed metrics
+        metrics = {
+            'file_count': len(self.files),
+            'types_detected': list(self.types.keys()),
+            'type_counts': {t: len(files) for t, files in self.types.items()},
+            'scopes_detected': list(self.scopes.keys()),
+            'scope_counts': {s: len(files) for s, files in self.scopes.items()},
+            'concerns': self.concerns,
+            'threshold': self.threshold
+        }
+
+        # Determine result
+        if reasons:
+            should_split = True
+            reason = '; '.join(reasons)
+            self.log(f"RECOMMENDATION: Should split - {reason}")
+        else:
+            should_split = False
+            reason = "Changes are atomic - single logical unit"
+            self.log(f"RECOMMENDATION: Already atomic")
+
+        return should_split, reason, metrics
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Analyze if git changes should be split')
+    parser.add_argument('--verbose', action='store_true', help='Verbose output')
+    parser.add_argument('--threshold', type=int, default=10, help='File count threshold')
+    parser.add_argument('--json', action='store_true', help='Output JSON format')
+
+    args = parser.parse_args()
+
+    analyzer = SplitAnalyzer(threshold=args.threshold, verbose=args.verbose)
+    should_split, reason, metrics = analyzer.analyze()
+
+    if args.json:
+        # Output JSON format
+        result = {
+            'should_split': should_split,
+            'reason': reason,
+            'metrics': metrics,
+            'recommendation': 'split' if should_split else 'atomic'
+        }
+        print(json.dumps(result, indent=2))
+    else:
+        # Output human-readable format
+        print(f"Should split: {'YES' if should_split else 'NO'}")
+        print(f"Reason: {reason}")
+        print(f"\nMetrics:")
+        print(f"  Files: {metrics['file_count']}")
+        print(f"  Types: {', '.join(metrics['types_detected'])}")
+        print(f"  Scopes: {', '.join(metrics['scopes_detected'])}")
+        if metrics['concerns']:
+            print(f"  Concerns: {', '.join(metrics['concerns'])}")
+
+    # Exit code: 0 = should split, 1 = atomic
+    sys.exit(0 if should_split else 1)
+
+if __name__ == '__main__':
+    main()