Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:20:25 +08:00
commit 0d6226e0d8
69 changed files with 20934 additions and 0 deletions

View File

@@ -0,0 +1,342 @@
#!/usr/bin/env python3
"""
Commit Planner - Create optimal commit sequence
Purpose: Generate optimal commit sequence from file groups
Version: 1.0.0
Usage: ./commit-planner.py [--input FILE] [--output plan|script]
Returns:
Exit 0: Success
Exit 1: Error
Exit 2: Invalid parameters
Dependencies: git, python3
"""
import sys
import json
import subprocess
from collections import defaultdict, deque
from typing import List, Dict, Set, Tuple, Optional
# Type priority for ordering
TYPE_PRIORITY = {
'feat': 1, # Features enable other changes
'fix': 2, # Fixes should be applied early
'refactor': 3, # Restructuring before additions
'perf': 4, # Performance after stability
'test': 5, # Tests after implementation
'docs': 6, # Documentation last
'style': 7, # Style changes last
'chore': 8, # Housekeeping last
'ci': 9, # CI changes last
'build': 10 # Build changes last
}
class CommitPlanner:
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.commits = []
self.dependencies = defaultdict(set)
def log(self, message: str):
"""Print message if verbose mode enabled"""
if self.verbose:
print(f"[DEBUG] {message}", file=sys.stderr)
def run_git_command(self, args: List[str]) -> str:
"""Execute git command and return output"""
try:
result = subprocess.run(
['git'] + args,
capture_output=True,
text=True,
check=True
)
return result.stdout
except subprocess.CalledProcessError as e:
print(f"Error running git command: {e}", file=sys.stderr)
return ""
def load_suggestions(self, input_file: Optional[str] = None) -> List[Dict]:
"""Load commit suggestions from file or stdin"""
try:
if input_file:
with open(input_file, 'r') as f:
data = json.load(f)
else:
# Read from stdin
data = json.load(sys.stdin)
return data.get('suggestions', [])
except Exception as e:
print(f"Error loading suggestions: {e}", file=sys.stderr)
sys.exit(1)
def detect_dependencies(self, commit1: Dict, commit2: Dict) -> bool:
"""Check if commit2 depends on commit1"""
# Test files depend on implementation files
if commit1['type'] != 'test' and commit2['type'] == 'test':
# Check if test scope matches implementation scope
if commit1.get('scope') == commit2.get('scope'):
return True
# Docs depend on features they document
if commit1['type'] == 'feat' and commit2['type'] == 'docs':
# Check if docs reference the feature scope
if commit1.get('scope') in commit2.get('subject', ''):
return True
# Fixes may depend on features
if commit1['type'] == 'feat' and commit2['type'] == 'fix':
# Check if same scope
if commit1.get('scope') == commit2.get('scope'):
return True
# Check file dependencies (imports)
files1 = set(commit1.get('files', []))
files2 = set(commit2.get('files', []))
# If commit2 files might import from commit1 files
for file2 in files2:
for file1 in files1:
if self.has_import_dependency(file1, file2):
return True
return False
def has_import_dependency(self, source_file: str, target_file: str) -> bool:
"""Check if target_file imports from source_file"""
try:
# Get content of target file
content = self.run_git_command(['show', f':{target_file}'])
# Extract module path from source file
source_module = source_file.replace('/', '.').replace('.py', '').replace('.js', '').replace('.ts', '')
# Check for import statements
if any(imp in content for imp in [f'import {source_module}', f'from {source_module}', f"require('{source_module}"]):
return True
except:
pass
return False
def build_dependency_graph(self, commits: List[Dict]) -> Dict[int, Set[int]]:
"""Build dependency graph between commits"""
graph = defaultdict(set)
for i, commit1 in enumerate(commits):
for j, commit2 in enumerate(commits):
if i != j and self.detect_dependencies(commit1, commit2):
# commit2 depends on commit1
graph[j].add(i)
self.log(f"Dependency: Commit {j+1} depends on Commit {i+1}")
return graph
def topological_sort(self, commits: List[Dict], dependencies: Dict[int, Set[int]]) -> List[int]:
"""Perform topological sort to respect dependencies"""
# Calculate in-degree for each node
in_degree = defaultdict(int)
for node in range(len(commits)):
in_degree[node] = len(dependencies[node])
# Queue of nodes with no dependencies
queue = deque([node for node in range(len(commits)) if in_degree[node] == 0])
result = []
while queue:
# Sort queue by priority (type priority)
queue_list = list(queue)
queue_list.sort(key=lambda x: TYPE_PRIORITY.get(commits[x]['type'], 99))
queue = deque(queue_list)
node = queue.popleft()
result.append(node)
# Update dependencies
for other_node in range(len(commits)):
if node in dependencies[other_node]:
dependencies[other_node].remove(node)
in_degree[other_node] -= 1
if in_degree[other_node] == 0:
queue.append(other_node)
# Check for cycles
if len(result) != len(commits):
print("Error: Circular dependency detected", file=sys.stderr)
sys.exit(1)
return result
def create_sequence(self, commits: List[Dict]) -> List[Dict]:
"""Create optimal commit sequence"""
self.log(f"Planning sequence for {len(commits)} commits")
# Build dependency graph
dependencies = self.build_dependency_graph(commits)
# Topological sort
order = self.topological_sort(commits, dependencies)
# Create ordered sequence
sequence = []
for idx, commit_idx in enumerate(order):
commit = commits[commit_idx].copy()
commit['order'] = idx + 1
commit['commit_id'] = commit_idx + 1
commit['original_id'] = commit_idx
# Determine when can execute
deps = dependencies[commit_idx]
if not deps:
commit['can_execute'] = 'now'
else:
dep_ids = [order.index(d) + 1 for d in deps]
commit['can_execute'] = f"after commit {min(dep_ids)}"
sequence.append(commit)
return sequence
def format_plan(self, sequence: List[Dict]) -> str:
"""Format sequence as readable plan"""
lines = []
lines.append("=" * 60)
lines.append("COMMIT SEQUENCE PLAN")
lines.append("=" * 60)
lines.append("")
lines.append(f"Execution Order: {len(sequence)} commits in sequence")
lines.append("")
for commit in sequence:
lines.append("" * 60)
lines.append(f"COMMIT {commit['order']}: {commit['type']}" +
(f"({commit.get('scope', '')})" if commit.get('scope') else ""))
lines.append(f"Files: {len(commit['files'])}")
lines.append(f"Can execute: {commit['can_execute']}")
lines.append("" * 60)
lines.append("")
# Message
lines.append("Message:")
lines.append(f" {commit['subject']}")
if commit.get('body'):
lines.append("")
for line in commit['body'].split('\n'):
lines.append(f" {line}")
lines.append("")
# Files to stage
lines.append("Files to stage:")
for file in commit['files']:
lines.append(f" git add {file}")
lines.append("")
# Commit command
commit_msg = commit['subject']
if commit.get('body'):
body = commit['body'].replace('"', '\\"')
commit_cmd = f'git commit -m "{commit_msg}" -m "{body}"'
else:
commit_cmd = f'git commit -m "{commit_msg}"'
lines.append("Command:")
lines.append(f" {commit_cmd}")
lines.append("")
lines.append("=" * 60)
lines.append(f"Total commits: {len(sequence)}")
lines.append(f"Total files: {sum(len(c['files']) for c in sequence)}")
lines.append("=" * 60)
return '\n'.join(lines)
def format_script(self, sequence: List[Dict]) -> str:
"""Format sequence as executable bash script"""
lines = [
"#!/bin/bash",
"# Atomic commit sequence",
f"# Generated: {subprocess.run(['date'], capture_output=True, text=True).stdout.strip()}",
f"# Total commits: {len(sequence)}",
"",
"set -e # Exit on error",
"",
'echo "🚀 Starting commit sequence..."',
""
]
for commit in sequence:
lines.append(f"# Commit {commit['order']}: {commit['type']}" +
(f"({commit.get('scope', '')})" if commit.get('scope') else ""))
lines.append('echo ""')
lines.append(f'echo "📝 Commit {commit["order"]}/{len(sequence)}: {commit["type"]}"')
# Stage files
for file in commit['files']:
lines.append(f'git add "{file}"')
# Commit
commit_msg = commit['subject']
if commit.get('body'):
body = commit['body'].replace('"', '\\"').replace('\n', ' ')
lines.append(f'git commit -m "{commit_msg}" -m "{body}"')
else:
lines.append(f'git commit -m "{commit_msg}"')
lines.append(f'echo "✅ Commit {commit["order"]} complete"')
lines.append("")
lines.append('echo ""')
lines.append('echo "🎉 All commits completed successfully!"')
lines.append(f'echo "Total commits: {len(sequence)}"')
lines.append(f'echo "Total files: {sum(len(c["files"]) for c in sequence)}"')
return '\n'.join(lines)
def main():
import argparse
parser = argparse.ArgumentParser(description='Create optimal commit sequence')
parser.add_argument('--input', help='Input JSON file (default: stdin)')
parser.add_argument('--output', choices=['plan', 'script', 'json'], default='plan',
help='Output format')
parser.add_argument('--verbose', action='store_true', help='Verbose output')
args = parser.parse_args()
planner = CommitPlanner(verbose=args.verbose)
# Load suggestions
commits = planner.load_suggestions(args.input)
if not commits:
print("No commit suggestions provided", file=sys.stderr)
sys.exit(1)
# Create sequence
sequence = planner.create_sequence(commits)
# Output
if args.output == 'json':
result = {
'sequence': sequence,
'summary': {
'total_commits': len(sequence),
'total_files': sum(len(c['files']) for c in sequence)
}
}
print(json.dumps(result, indent=2))
elif args.output == 'script':
print(planner.format_script(sequence))
else: # plan
print(planner.format_plan(sequence))
sys.exit(0)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,328 @@
#!/bin/bash
# Dependency Checker - Analyze file dependencies
#
# Purpose: Check for dependencies between files to inform commit ordering
# Version: 1.0.0
# Usage: ./dependency-checker.sh [files...]
# If no files specified, analyzes all changed files
# Returns:
# Exit 0: Success
# Exit 1: Error
# Exit 2: Invalid parameters
#
# Dependencies: git, bash 4.0+, grep
set -euo pipefail
VERBOSE=false
OUTPUT_FORMAT="text"
# Parse options
while [[ $# -gt 0 ]]; do
case "$1" in
--verbose)
VERBOSE=true
shift
;;
--format)
OUTPUT_FORMAT="${2:-text}"
shift 2
;;
*)
break
;;
esac
done
# Logging
log() {
if [[ "$VERBOSE" == "true" ]]; then
echo "[DEBUG] $*" >&2
fi
}
# Get changed files if not specified
FILES=()
if [[ $# -eq 0 ]]; then
log "Getting changed files from git..."
while IFS= read -r file; do
[[ -n "$file" ]] && FILES+=("$file")
done < <(git diff --cached --name-only; git diff --name-only)
else
FILES=("$@")
fi
# Remove duplicates
mapfile -t FILES < <(printf '%s\n' "${FILES[@]}" | sort -u)
if [[ ${#FILES[@]} -eq 0 ]]; then
echo "No files to analyze" >&2
exit 1
fi
log "Analyzing ${#FILES[@]} files for dependencies..."
# Dependency storage
declare -A dependencies
declare -A file_types
# Detect file type
detect_file_type() {
local file="$1"
case "$file" in
*.py)
echo "python"
;;
*.js|*.jsx)
echo "javascript"
;;
*.ts|*.tsx)
echo "typescript"
;;
*.go)
echo "go"
;;
*.java)
echo "java"
;;
*.rb)
echo "ruby"
;;
*.rs)
echo "rust"
;;
*.md|*.txt|*.rst)
echo "docs"
;;
*test*|*spec*)
echo "test"
;;
*)
echo "unknown"
;;
esac
}
# Extract imports from Python file
extract_python_imports() {
local file="$1"
local content
if [[ -f "$file" ]]; then
content=$(cat "$file")
else
# Try to get from git
content=$(git show :"$file" 2>/dev/null || echo "")
fi
# Match: import module, from module import
echo "$content" | grep -E "^(import|from) " | sed -E 's/^(import|from) ([a-zA-Z0-9_.]+).*/\2/' || true
}
# Extract imports from JavaScript/TypeScript
extract_js_imports() {
local file="$1"
local content
if [[ -f "$file" ]]; then
content=$(cat "$file")
else
content=$(git show :"$file" 2>/dev/null || echo "")
fi
# Match: import from, require()
{
echo "$content" | grep -oE "import .* from ['\"]([^'\"]+)['\"]" | sed -E "s/.*from ['\"](.*)['\"].*/\1/" || true
echo "$content" | grep -oE "require\(['\"]([^'\"]+)['\"]\)" | sed -E "s/.*require\(['\"]([^'\"]+)['\"]\).*/\1/" || true
} | sort -u
}
# Extract imports from Go
extract_go_imports() {
local file="$1"
local content
if [[ -f "$file" ]]; then
content=$(cat "$file")
else
content=$(git show :"$file" 2>/dev/null || echo "")
fi
# Match: import "module" or import ( "module" )
echo "$content" | grep -oE 'import +"[^"]+"' | sed -E 's/import +"([^"]+)".*/\1/' || true
echo "$content" | sed -n '/^import (/,/^)/p' | grep -oE '"[^"]+"' | tr -d '"' || true
}
# Convert import path to file path
import_to_file() {
local import_path="$1"
local file_type="$2"
case "$file_type" in
python)
# module.submodule -> module/submodule.py
echo "$import_path" | tr '.' '/' | sed 's|$|.py|'
;;
javascript|typescript)
# Handle relative imports
if [[ "$import_path" == ./* ]] || [[ "$import_path" == ../* ]]; then
echo "$import_path"
else
# node_modules - not a local file
echo ""
fi
;;
go)
# Package imports - check if local
if [[ "$import_path" == github.com/* ]]; then
echo ""
else
echo "$import_path"
fi
;;
*)
echo ""
;;
esac
}
# Find dependencies for each file
log "Extracting imports and dependencies..."
for file in "${FILES[@]}"; do
file_type=$(detect_file_type "$file")
file_types["$file"]="$file_type"
log " $file: type=$file_type"
case "$file_type" in
python)
imports=$(extract_python_imports "$file")
;;
javascript|typescript)
imports=$(extract_js_imports "$file")
;;
go)
imports=$(extract_go_imports "$file")
;;
*)
imports=""
;;
esac
if [[ -n "$imports" ]]; then
log " Imports:"
while IFS= read -r import_path; do
[[ -z "$import_path" ]] && continue
log " - $import_path"
# Convert import to file path
imported_file=$(import_to_file "$import_path" "$file_type")
# Check if imported file is in our file list
if [[ -n "$imported_file" ]]; then
for other_file in "${FILES[@]}"; do
if [[ "$other_file" == *"$imported_file"* ]]; then
# file depends on other_file
if [[ -z "${dependencies[$file]:-}" ]]; then
dependencies["$file"]="$other_file"
else
dependencies["$file"]="${dependencies[$file]},$other_file"
fi
log " Dependency: $file -> $other_file"
fi
done
fi
done <<< "$imports"
fi
done
# Detect test dependencies
log "Detecting test dependencies..."
for file in "${FILES[@]}"; do
if [[ "${file_types[$file]}" == "test" ]]; then
# Test file depends on implementation file
impl_file="${file//test/}"
impl_file="${impl_file//.test/}"
impl_file="${impl_file//.spec/}"
impl_file="${impl_file//tests\//}"
impl_file="${impl_file//spec\//}"
for other_file in "${FILES[@]}"; do
if [[ "$other_file" == *"$impl_file"* ]] && [[ "$other_file" != "$file" ]]; then
if [[ -z "${dependencies[$file]:-}" ]]; then
dependencies["$file"]="$other_file"
else
dependencies["$file"]="${dependencies[$file]},$other_file"
fi
log " Test dependency: $file -> $other_file"
fi
done
fi
done
# Output results
if [[ "$OUTPUT_FORMAT" == "json" ]]; then
echo "{"
echo " \"files\": ["
local first=true
for file in "${FILES[@]}"; do
if [[ "$first" == "true" ]]; then
first=false
else
echo ","
fi
echo -n " {\"file\": \"$file\", \"type\": \"${file_types[$file]}\""
if [[ -n "${dependencies[$file]:-}" ]]; then
IFS=',' read -ra deps <<< "${dependencies[$file]}"
echo -n ", \"depends_on\": ["
local first_dep=true
for dep in "${deps[@]}"; do
if [[ "$first_dep" == "true" ]]; then
first_dep=false
else
echo -n ", "
fi
echo -n "\"$dep\""
done
echo -n "]"
fi
echo -n "}"
done
echo ""
echo " ]"
echo "}"
else
echo "=== FILE DEPENDENCIES ==="
echo ""
echo "Files analyzed: ${#FILES[@]}"
echo ""
local has_dependencies=false
for file in "${FILES[@]}"; do
if [[ -n "${dependencies[$file]:-}" ]]; then
has_dependencies=true
echo "File: $file"
echo " Type: ${file_types[$file]}"
echo " Depends on:"
IFS=',' read -ra deps <<< "${dependencies[$file]}"
for dep in "${deps[@]}"; do
echo " - $dep"
done
echo ""
fi
done
if [[ "$has_dependencies" == "false" ]]; then
echo "No dependencies detected."
echo "All files can be committed independently."
else
echo "Recommendation:"
echo " Commit dependencies before dependent files."
echo " Group dependent files in same commit if they form atomic unit."
fi
fi
log "Dependency analysis complete"
exit 0

View File

@@ -0,0 +1,395 @@
#!/bin/bash
# File Grouper - Group changed files by type, scope, or feature
#
# Purpose: Group git changed files using different strategies
# Version: 1.0.0
# Usage: ./file-grouper.sh <strategy> [options]
# strategy: type|scope|feature
# options: --verbose, --format json|text
# Returns:
# Exit 0: Success
# Exit 1: No changes
# Exit 2: Invalid parameters
#
# Dependencies: git, bash 4.0+
set -euo pipefail
# Configuration
STRATEGY="${1:-type}"
VERBOSE=false
FORMAT="text"
# Parse arguments
shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
--verbose)
VERBOSE=true
shift
;;
--format)
FORMAT="${2:-text}"
shift 2
;;
*)
echo "Unknown option: $1" >&2
exit 2
;;
esac
done
# Logging function
log() {
if [[ "$VERBOSE" == "true" ]]; then
echo "[DEBUG] $*" >&2
fi
}
# Get changed files
get_changed_files() {
local files=()
# Staged files
while IFS= read -r file; do
[[ -n "$file" ]] && files+=("$file")
done < <(git diff --cached --name-only)
# Unstaged files
while IFS= read -r file; do
[[ -n "$file" ]] && files+=("$file")
done < <(git diff --name-only)
# Remove duplicates
printf '%s\n' "${files[@]}" | sort -u
}
# Detect commit type from file
detect_type() {
local file="$1"
local diff
# Get diff for analysis
diff=$(git diff --cached "$file" 2>/dev/null || git diff "$file" 2>/dev/null || echo "")
# Documentation files
if [[ "$file" =~ \.(md|txt|rst|adoc)$ ]]; then
echo "docs"
return
fi
# Test files
if [[ "$file" =~ (test|spec|__tests__)/ ]] || [[ "$file" =~ \.(test|spec)\. ]]; then
echo "test"
return
fi
# CI/CD files
if [[ "$file" =~ \.github/|\.gitlab-ci|jenkins|\.circleci ]]; then
echo "ci"
return
fi
# Build files
if [[ "$file" =~ (package\.json|pom\.xml|build\.gradle|Makefile|CMakeLists\.txt)$ ]]; then
echo "build"
return
fi
# Analyze diff content
if [[ -z "$diff" ]]; then
echo "chore"
return
fi
# Check for new functionality
if echo "$diff" | grep -q "^+.*\(function\|class\|def\|const\|let\)"; then
if echo "$diff" | grep -iq "^+.*\(new\|add\|implement\|create\)"; then
echo "feat"
return
fi
fi
# Check for bug fixes
if echo "$diff" | grep -iq "^+.*\(fix\|bug\|error\|issue\|null\|undefined\)"; then
echo "fix"
return
fi
# Check for refactoring
if echo "$diff" | grep -iq "^+.*\(refactor\|rename\|move\|extract\)"; then
echo "refactor"
return
fi
# Check for performance
if echo "$diff" | grep -iq "^+.*\(performance\|optimize\|cache\|memoize\)"; then
echo "perf"
return
fi
# Default to chore
echo "chore"
}
# Extract scope from file path
extract_scope() {
local file="$1"
local scope
# Remove leading ./
file="${file#./}"
# Split path
IFS='/' read -ra parts <<< "$file"
# Skip common prefixes
for part in "${parts[@]}"; do
case "$part" in
src|lib|app|packages|tests|test|.)
continue
;;
*)
# Remove file extension
scope="${part%%.*}"
echo "$scope"
return
;;
esac
done
echo "root"
}
# Group by type
group_by_type() {
log "Grouping by type..."
declare -A groups
local files
mapfile -t files < <(get_changed_files)
if [[ ${#files[@]} -eq 0 ]]; then
echo "No changes detected" >&2
exit 1
fi
# Group files by type
for file in "${files[@]}"; do
local type
type=$(detect_type "$file")
log " $file -> type:$type"
if [[ -z "${groups[$type]:-}" ]]; then
groups[$type]="$file"
else
groups[$type]="${groups[$type]},$file"
fi
done
# Output results
if [[ "$FORMAT" == "json" ]]; then
echo "{"
echo " \"strategy\": \"type\","
echo " \"groups\": {"
local first=true
for type in "${!groups[@]}"; do
if [[ "$first" == "true" ]]; then
first=false
else
echo ","
fi
IFS=',' read -ra file_list <<< "${groups[$type]}"
echo -n " \"$type\": ["
local first_file=true
for f in "${file_list[@]}"; do
if [[ "$first_file" == "true" ]]; then
first_file=false
else
echo -n ", "
fi
echo -n "\"$f\""
done
echo -n "]"
done
echo ""
echo " }"
echo "}"
else
echo "=== FILE GROUPS (strategy: type) ==="
echo ""
local group_num=1
for type in "${!groups[@]}"; do
IFS=',' read -ra file_list <<< "${groups[$type]}"
echo "Group $group_num: $type (${#file_list[@]} files)"
for file in "${file_list[@]}"; do
echo " $file"
done
echo ""
((group_num++))
done
fi
}
# Group by scope
group_by_scope() {
log "Grouping by scope..."
declare -A groups
local files
mapfile -t files < <(get_changed_files)
if [[ ${#files[@]} -eq 0 ]]; then
echo "No changes detected" >&2
exit 1
fi
# Group files by scope
for file in "${files[@]}"; do
local scope
scope=$(extract_scope "$file")
log " $file -> scope:$scope"
if [[ -z "${groups[$scope]:-}" ]]; then
groups[$scope]="$file"
else
groups[$scope]="${groups[$scope]},$file"
fi
done
# Output results
if [[ "$FORMAT" == "json" ]]; then
echo "{"
echo " \"strategy\": \"scope\","
echo " \"groups\": {"
local first=true
for scope in "${!groups[@]}"; do
if [[ "$first" == "true" ]]; then
first=false
else
echo ","
fi
IFS=',' read -ra file_list <<< "${groups[$scope]}"
echo -n " \"$scope\": ["
local first_file=true
for f in "${file_list[@]}"; do
if [[ "$first_file" == "true" ]]; then
first_file=false
else
echo -n ", "
fi
echo -n "\"$f\""
done
echo -n "]"
done
echo ""
echo " }"
echo "}"
else
echo "=== FILE GROUPS (strategy: scope) ==="
echo ""
local group_num=1
for scope in "${!groups[@]}"; do
IFS=',' read -ra file_list <<< "${groups[$scope]}"
echo "Group $group_num: $scope (${#file_list[@]} files)"
for file in "${file_list[@]}"; do
echo " $file"
done
echo ""
((group_num++))
done
fi
}
# Group by feature (simplified - uses type and scope combination)
group_by_feature() {
log "Grouping by feature..."
declare -A groups
local files
mapfile -t files < <(get_changed_files)
if [[ ${#files[@]} -eq 0 ]]; then
echo "No changes detected" >&2
exit 1
fi
# Group files by type+scope combination
for file in "${files[@]}"; do
local type scope feature
type=$(detect_type "$file")
scope=$(extract_scope "$file")
feature="${type}_${scope}"
log " $file -> feature:$feature"
if [[ -z "${groups[$feature]:-}" ]]; then
groups[$feature]="$file"
else
groups[$feature]="${groups[$feature]},$file"
fi
done
# Output results
if [[ "$FORMAT" == "json" ]]; then
echo "{"
echo " \"strategy\": \"feature\","
echo " \"groups\": {"
local first=true
for feature in "${!groups[@]}"; do
if [[ "$first" == "true" ]]; then
first=false
else
echo ","
fi
IFS=',' read -ra file_list <<< "${groups[$feature]}"
echo -n " \"$feature\": ["
local first_file=true
for f in "${file_list[@]}"; do
if [[ "$first_file" == "true" ]]; then
first_file=false
else
echo -n ", "
fi
echo -n "\"$f\""
done
echo -n "]"
done
echo ""
echo " }"
echo "}"
else
echo "=== FILE GROUPS (strategy: feature) ==="
echo ""
local group_num=1
for feature in "${!groups[@]}"; do
IFS=',' read -ra file_list <<< "${groups[$feature]}"
IFS='_' read -ra feature_parts <<< "$feature"
local type="${feature_parts[0]}"
local scope="${feature_parts[1]}"
echo "Group $group_num: $type($scope) (${#file_list[@]} files)"
for file in "${file_list[@]}"; do
echo " $file"
done
echo ""
((group_num++))
done
fi
}
# Main execution
case "$STRATEGY" in
type)
group_by_type
;;
scope)
group_by_scope
;;
feature)
group_by_feature
;;
*)
echo "Invalid strategy: $STRATEGY" >&2
echo "Valid strategies: type, scope, feature" >&2
exit 2
;;
esac

View File

@@ -0,0 +1,283 @@
#!/usr/bin/env python3
"""
Split Analyzer - Determine if changes should be split
Purpose: Analyze git changes to determine if they should be split into multiple commits
Version: 1.0.0
Usage: ./split-analyzer.py [--verbose] [--threshold N]
Returns:
Exit 0: Should split
Exit 1: Already atomic
Exit 2: Error occurred
Dependencies: git, python3
"""
import sys
import subprocess
import re
import json
from collections import defaultdict
from typing import List, Dict, Tuple, Optional
# Conventional commit types
COMMIT_TYPES = ['feat', 'fix', 'docs', 'style', 'refactor', 'test', 'chore', 'perf', 'ci', 'build']
class SplitAnalyzer:
def __init__(self, threshold: int = 10, verbose: bool = False):
self.threshold = threshold
self.verbose = verbose
self.files = []
self.types = defaultdict(list)
self.scopes = defaultdict(list)
self.concerns = []
def log(self, message: str):
"""Print message if verbose mode enabled"""
if self.verbose:
print(f"[DEBUG] {message}", file=sys.stderr)
def run_git_command(self, args: List[str]) -> str:
"""Execute git command and return output"""
try:
result = subprocess.run(
['git'] + args,
capture_output=True,
text=True,
check=True
)
return result.stdout
except subprocess.CalledProcessError as e:
print(f"Error running git command: {e}", file=sys.stderr)
sys.exit(2)
def get_changed_files(self) -> List[str]:
"""Get list of changed files (staged and unstaged)"""
# Staged files
staged = self.run_git_command(['diff', '--cached', '--name-only'])
# Unstaged files
unstaged = self.run_git_command(['diff', '--name-only'])
files = set()
files.update(filter(None, staged.split('\n')))
files.update(filter(None, unstaged.split('\n')))
return list(files)
def get_file_diff(self, file_path: str) -> str:
"""Get diff for a specific file"""
try:
# Try staged first
diff = self.run_git_command(['diff', '--cached', file_path])
if not diff:
# Try unstaged
diff = self.run_git_command(['diff', file_path])
return diff
except:
return ""
def detect_type_from_diff(self, file_path: str, diff: str) -> str:
"""Detect commit type from file path and diff content"""
# Documentation files
if any(file_path.endswith(ext) for ext in ['.md', '.txt', '.rst', '.adoc']):
return 'docs'
# Test files
if any(pattern in file_path for pattern in ['test/', 'tests/', 'spec/', '__tests__', '.test.', '.spec.']):
return 'test'
# CI/CD files
if any(pattern in file_path for pattern in ['.github/', '.gitlab-ci', 'jenkins', '.circleci']):
return 'ci'
# Build files
if any(file_path.endswith(ext) for ext in ['package.json', 'pom.xml', 'build.gradle', 'Makefile', 'CMakeLists.txt']):
return 'build'
# Analyze diff content
if not diff:
return 'chore'
# Look for new functionality
added_lines = [line for line in diff.split('\n') if line.startswith('+') and not line.startswith('+++')]
# Check for function/class additions (new features)
if any(keyword in ' '.join(added_lines) for keyword in ['function ', 'class ', 'def ', 'const ', 'let ', 'var ']):
if any(keyword in ' '.join(added_lines) for keyword in ['new ', 'add', 'implement', 'create']):
return 'feat'
# Check for bug fix patterns
if any(keyword in ' '.join(added_lines).lower() for keyword in ['fix', 'bug', 'error', 'issue', 'null', 'undefined']):
return 'fix'
# Check for refactoring
if any(keyword in ' '.join(added_lines).lower() for keyword in ['refactor', 'rename', 'move', 'extract']):
return 'refactor'
# Check for performance
if any(keyword in ' '.join(added_lines).lower() for keyword in ['performance', 'optimize', 'cache', 'memoize']):
return 'perf'
# Check for style changes (formatting only)
removed_lines = [line for line in diff.split('\n') if line.startswith('-') and not line.startswith('---')]
if len(added_lines) == len(removed_lines):
# Similar number of additions and deletions might indicate formatting
return 'style'
# Default to feat for new code, chore for modifications
if len(added_lines) > len(removed_lines) * 2:
return 'feat'
return 'chore'
def extract_scope_from_path(self, file_path: str) -> str:
"""Extract scope from file path"""
parts = file_path.split('/')
# Skip common prefixes
skip_prefixes = ['src', 'lib', 'app', 'packages', 'tests', 'test']
for part in parts:
if part not in skip_prefixes and part != '.' and part != '..':
# Remove file extension
scope = part.split('.')[0]
return scope
return 'root'
def detect_mixed_concerns(self) -> List[str]:
"""Detect mixed concerns in changes"""
concerns = []
# Check for feature + unrelated changes
has_feature = 'feat' in self.types
has_refactor = 'refactor' in self.types
has_style = 'style' in self.types
if has_feature and has_refactor:
concerns.append("Feature implementation mixed with refactoring")
if has_feature and has_style:
concerns.append("Feature implementation mixed with style changes")
# Check for test + implementation in separate modules
if 'test' in self.types:
test_scopes = set(self.scopes[scope] for scope in self.scopes if 'test' in scope)
impl_scopes = set(self.scopes[scope] for scope in self.scopes if 'test' not in scope)
if test_scopes != impl_scopes and len(impl_scopes) > 1:
concerns.append("Tests for multiple unrelated implementations")
return concerns
def analyze(self) -> Tuple[bool, str, Dict]:
"""Analyze changes and determine if should split"""
# Get changed files
self.files = self.get_changed_files()
self.log(f"Found {len(self.files)} changed files")
if not self.files:
return False, "No changes detected", {}
# Analyze each file
for file_path in self.files:
self.log(f"Analyzing: {file_path}")
diff = self.get_file_diff(file_path)
file_type = self.detect_type_from_diff(file_path, diff)
scope = self.extract_scope_from_path(file_path)
self.types[file_type].append(file_path)
self.scopes[scope].append(file_path)
self.log(f" Type: {file_type}, Scope: {scope}")
# Check splitting criteria
reasons = []
# Check 1: Multiple types
if len(self.types) > 1:
type_list = ', '.join(self.types.keys())
reasons.append(f"Multiple types detected: {type_list}")
self.log(f"SPLIT REASON: Multiple types: {type_list}")
# Check 2: Multiple scopes
if len(self.scopes) > 1:
scope_list = ', '.join(self.scopes.keys())
reasons.append(f"Multiple scopes detected: {scope_list}")
self.log(f"SPLIT REASON: Multiple scopes: {scope_list}")
# Check 3: Too many files
if len(self.files) > self.threshold:
reasons.append(f"Large change: {len(self.files)} files (threshold: {self.threshold})")
self.log(f"SPLIT REASON: Too many files: {len(self.files)} > {self.threshold}")
# Check 4: Mixed concerns
self.concerns = self.detect_mixed_concerns()
if self.concerns:
reasons.append(f"Mixed concerns: {'; '.join(self.concerns)}")
self.log(f"SPLIT REASON: Mixed concerns detected")
# Prepare detailed metrics
metrics = {
'file_count': len(self.files),
'types_detected': list(self.types.keys()),
'type_counts': {t: len(files) for t, files in self.types.items()},
'scopes_detected': list(self.scopes.keys()),
'scope_counts': {s: len(files) for s, files in self.scopes.items()},
'concerns': self.concerns,
'threshold': self.threshold
}
# Determine result
if reasons:
should_split = True
reason = '; '.join(reasons)
self.log(f"RECOMMENDATION: Should split - {reason}")
else:
should_split = False
reason = "Changes are atomic - single logical unit"
self.log(f"RECOMMENDATION: Already atomic")
return should_split, reason, metrics
def main():
import argparse
parser = argparse.ArgumentParser(description='Analyze if git changes should be split')
parser.add_argument('--verbose', action='store_true', help='Verbose output')
parser.add_argument('--threshold', type=int, default=10, help='File count threshold')
parser.add_argument('--json', action='store_true', help='Output JSON format')
args = parser.parse_args()
analyzer = SplitAnalyzer(threshold=args.threshold, verbose=args.verbose)
should_split, reason, metrics = analyzer.analyze()
if args.json:
# Output JSON format
result = {
'should_split': should_split,
'reason': reason,
'metrics': metrics,
'recommendation': 'split' if should_split else 'atomic'
}
print(json.dumps(result, indent=2))
else:
# Output human-readable format
print(f"Should split: {'YES' if should_split else 'NO'}")
print(f"Reason: {reason}")
print(f"\nMetrics:")
print(f" Files: {metrics['file_count']}")
print(f" Types: {', '.join(metrics['types_detected'])}")
print(f" Scopes: {', '.join(metrics['scopes_detected'])}")
if metrics['concerns']:
print(f" Concerns: {', '.join(metrics['concerns'])}")
# Exit code: 0 = should split, 1 = atomic
sys.exit(0 if should_split else 1)
if __name__ == '__main__':
main()