Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 17:55:13 +08:00
commit 02de918865
11 changed files with 2014 additions and 0 deletions

View File

@@ -0,0 +1,402 @@
#!/usr/bin/env python3
"""
Analyze staged git changes and suggest commit messages.
Usage:
python analyze-diff.py # Analyze staged changes
python analyze-diff.py --commit HEAD # Analyze specific commit
python analyze-diff.py --file path.py # Analyze specific file
"""
import subprocess
import sys
import re
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from collections import defaultdict
class DiffAnalyzer:
"""Analyze git diffs to suggest commit messages."""
# Map file patterns to scopes
SCOPE_PATTERNS = {
r'.*/(auth|login|oauth)': 'auth',
r'.*/(api|endpoints|routes)': 'api',
r'.*/database|migrations': 'database',
r'.*/tests?/': 'test',
r'.*/(ui|components|views)': 'ui',
r'.*/docs?/': 'docs',
r'.*/(config|settings)': 'config',
r'.*\.github/': 'ci',
r'Dockerfile|docker-compose': 'docker',
r'.*/(deploy|infra|terraform)': 'ops',
}
# Keywords in diff that suggest commit types
TYPE_KEYWORDS = {
'feat': [
'add', 'create', 'implement', 'introduce', 'new',
'class', 'function', 'feature', 'endpoint', 'component'
],
'fix': [
'fix', 'bug', 'issue', 'error', 'crash', 'correct',
'resolve', 'patch', 'repair'
],
'refactor': [
'refactor', 'restructure', 'reorganize', 'extract',
'rename', 'move', 'cleanup', 'simplify'
],
'perf': [
'optimize', 'performance', 'faster', 'speed', 'cache',
'index', 'query', 'efficient'
],
'style': [
'format', 'lint', 'prettier', 'whitespace', 'indent'
],
'test': [
'test', 'spec', 'coverage', 'mock', 'fixture'
],
'docs': [
'readme', 'documentation', 'comment', 'docstring'
],
'build': [
'package.json', 'requirements.txt', 'dependencies',
'dependency', 'upgrade', 'bump'
],
}
def __init__(self):
self.git_root = self._get_git_root()
def _get_git_root(self) -> Optional[Path]:
"""Get git repository root."""
try:
result = subprocess.run(
['git', 'rev-parse', '--show-toplevel'],
capture_output=True,
text=True,
check=True
)
return Path(result.stdout.strip())
except subprocess.CalledProcessError:
return None
def _run_git(self, args: List[str]) -> str:
"""Run git command and return output."""
try:
result = subprocess.run(
['git'] + args,
capture_output=True,
text=True,
check=True
)
return result.stdout
except subprocess.CalledProcessError as e:
return ""
def get_staged_changes(self) -> Dict[str, any]:
"""Get information about staged changes."""
# Get list of changed files
files_output = self._run_git(['diff', '--staged', '--name-status'])
if not files_output:
return None
files = []
for line in files_output.strip().split('\n'):
if not line:
continue
parts = line.split('\t', 1)
status = parts[0]
filepath = parts[1] if len(parts) > 1 else ''
files.append({
'path': filepath,
'status': status, # A=added, M=modified, D=deleted
})
# Get the actual diff
diff = self._run_git(['diff', '--staged'])
# Get stats
stats_output = self._run_git(['diff', '--staged', '--stat'])
return {
'files': files,
'diff': diff,
'stats': stats_output,
}
def infer_scope(self, files: List[Dict]) -> Optional[str]:
"""Infer scope from changed file paths."""
scopes = []
for file in files:
path = file['path'].lower()
for pattern, scope in self.SCOPE_PATTERNS.items():
if re.search(pattern, path):
scopes.append(scope)
break
if not scopes:
# Try to extract from path
for file in files:
path = Path(file['path'])
if len(path.parts) > 1:
# Use first directory as scope
potential_scope = path.parts[0]
if potential_scope not in ['src', 'lib', 'app']:
return potential_scope[:20] # Truncate to 20 chars
# Return most common scope
if scopes:
return max(set(scopes), key=scopes.count)
return None
def infer_type(self, files: List[Dict], diff: str) -> Tuple[str, float]:
"""
Infer commit type from changes.
Returns (type, confidence) where confidence is 0-1.
"""
# Check file status first
has_new_files = any(f['status'] == 'A' for f in files)
has_deletions = any(f['status'] == 'D' for f in files)
only_tests = all('test' in f['path'].lower() for f in files)
only_docs = all(
f['path'].lower().endswith(('.md', '.txt', '.rst'))
for f in files
)
if only_tests:
return 'test', 0.9
if only_docs:
return 'docs', 0.9
# Analyze diff content
diff_lower = diff.lower()
type_scores = defaultdict(int)
for commit_type, keywords in self.TYPE_KEYWORDS.items():
for keyword in keywords:
# Count occurrences in added lines
added_lines = [
line for line in diff.split('\n')
if line.startswith('+') and not line.startswith('+++')
]
for line in added_lines:
if keyword in line.lower():
type_scores[commit_type] += 1
# Adjust scores based on file status
if has_new_files:
type_scores['feat'] += 5
if has_deletions and not has_new_files:
type_scores['refactor'] += 2
# Get best guess
if not type_scores:
return 'chore', 0.3
best_type = max(type_scores.items(), key=lambda x: x[1])
confidence = min(best_type[1] / 10, 1.0) # Normalize to 0-1
return best_type[0], confidence
def generate_description(self,
files: List[Dict],
diff: str,
commit_type: str) -> str:
"""Generate a description based on changes."""
# Extract function/class names from diff
added_patterns = [
r'\+.*def (\w+)', # Python functions
r'\+.*function (\w+)', # JS functions
r'\+.*class (\w+)', # Classes
r'\+.*const (\w+)', # Constants
]
entities = []
for pattern in added_patterns:
matches = re.findall(pattern, diff)
entities.extend(matches[:3]) # Limit to first 3
# Generate description based on type
if commit_type == 'feat' and entities:
entity = entities[0]
return f"add {entity}"
elif commit_type == 'fix':
# Look for bug-related keywords in diff
if 'null' in diff.lower() and 'check' in diff.lower():
return "prevent null pointer exception"
elif 'error' in diff.lower():
return "fix error handling"
else:
return "fix bug"
elif commit_type == 'refactor':
if entities:
return f"extract {entities[0]} logic"
return "restructure code"
elif commit_type == 'perf':
if 'cache' in diff.lower():
return "add caching"
elif 'index' in diff.lower():
return "optimize database queries"
return "improve performance"
elif commit_type == 'docs':
return "update documentation"
elif commit_type == 'test':
if entities:
return f"add tests for {entities[0]}"
return "add tests"
# Default descriptions
action = 'add' if any(f['status'] == 'A' for f in files) else 'update'
if len(files) == 1:
filename = Path(files[0]['path']).stem
return f"{action} {filename}"
else:
return f"{action} {len(files)} files"
def is_breaking_change(self, diff: str) -> Tuple[bool, Optional[str]]:
"""Detect if this might be a breaking change."""
breaking_indicators = [
(r'\-.*public ', 'removed public API'),
(r'\-.*export ', 'removed exports'),
(r'BREAKING CHANGE', 'explicitly marked'),
(r'\-.*@deprecated', 'removed deprecated feature'),
]
for pattern, reason in breaking_indicators:
if re.search(pattern, diff, re.IGNORECASE):
return True, reason
return False, None
def analyze(self) -> Optional[Dict]:
"""Analyze staged changes and return suggestions."""
if not self.git_root:
return {
'error': 'Not in a git repository'
}
changes = self.get_staged_changes()
if not changes:
return {
'error': 'No staged changes found. Use: git add <files>'
}
files = changes['files']
diff = changes['diff']
# Infer commit components
scope = self.infer_scope(files)
commit_type, confidence = self.infer_type(files, diff)
description = self.generate_description(files, diff, commit_type)
is_breaking, breaking_reason = self.is_breaking_change(diff)
return {
'type': commit_type,
'scope': scope,
'description': description,
'confidence': confidence,
'breaking': is_breaking,
'breaking_reason': breaking_reason,
'files_changed': len(files),
'stats': changes['stats'],
}
def main():
"""Main entry point."""
import argparse
import json
parser = argparse.ArgumentParser(
description='Analyze git changes and suggest commits'
)
parser.add_argument(
'--json',
action='store_true',
help='Output as JSON'
)
parser.add_argument(
'--commit',
action='store_true',
help='Generate and execute git commit (interactive)'
)
args = parser.parse_args()
analyzer = DiffAnalyzer()
result = analyzer.analyze()
if not result:
print("No changes to analyze")
sys.exit(1)
if 'error' in result:
print(f"Error: {result['error']}")
sys.exit(1)
if args.json:
print(json.dumps(result, indent=2))
sys.exit(0)
# Pretty output
print("📊 Analyzed your changes:\n")
print(f"Files changed: {result['files_changed']}")
print(result['stats'])
print()
# Build commit message
commit_msg = result['type']
if result['scope']:
commit_msg += f"({result['scope']})"
if result['breaking']:
commit_msg += "!"
commit_msg += f": {result['description']}"
print("💡 Suggested commit:\n")
print(f" {commit_msg}")
print()
if result['confidence'] < 0.5:
print("⚠️ Low confidence - please review and adjust")
print()
if result['breaking']:
print(f"⚠️ Possible breaking change detected: {result['breaking_reason']}")
print(" Consider adding BREAKING CHANGE: in commit body")
print()
if args.commit:
response = input("Execute this commit? [y/N]: ")
if response.lower() == 'y':
subprocess.run(['git', 'commit', '-m', commit_msg])
print("✓ Committed!")
else:
print("Copy and adjust as needed:")
print(f" git commit -m\"{commit_msg}\"")
else:
print("To commit:")
print(f" git commit -m\"{commit_msg}\"")
print()
print("To auto-commit next time:")
print(" python analyze-diff.py --commit")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,251 @@
#!/usr/bin/env python3
"""
Generate changelog from conventional commits.
Usage:
python changelog.py # Since last tag
python changelog.py --from v1.0.0 # Since specific version
python changelog.py --version 2.0.0 # Add version header
"""
import subprocess
import sys
import re
from typing import List, Dict, Optional
from collections import defaultdict
from datetime import datetime
class Commit:
"""Parsed commit."""
def __init__(self, hash: str, message: str):
self.hash = hash
self.message = message
self.type = None
self.scope = None
self.breaking = False
self.description = None
self._parse()
def _parse(self):
"""Parse conventional commit message."""
header = self.message.split('\n')[0]
# Match: type(scope)!: description
pattern = r'^(?P<type>\w+)(?:\((?P<scope>[^)]+)\))?(?P<breaking>!)?:\s*(?P<desc>.+)$'
match = re.match(pattern, header)
if match:
self.type = match.group('type')
self.scope = match.group('scope')
self.breaking = bool(match.group('breaking'))
self.description = match.group('desc')
# Check for BREAKING CHANGE in body
if 'BREAKING CHANGE:' in self.message:
self.breaking = True
@property
def is_valid(self):
"""Check if this is a valid conventional commit."""
return self.type is not None
class ChangelogGenerator:
"""Generate formatted changelog."""
TYPE_HEADERS = {
'feat': '### ✨ Features',
'fix': '### 🐛 Bug Fixes',
'perf': '### ⚡ Performance',
'refactor': '### ♻️ Refactoring',
'docs': '### 📚 Documentation',
'style': '### 💄 Styling',
'test': '### ✅ Tests',
'build': '### 📦 Build',
'ops': '### 🔧 Operations',
'chore': '### 🏗️ Chores',
}
TYPE_ORDER = [
'feat', 'fix', 'perf', 'refactor',
'docs', 'style', 'test', 'build', 'ops', 'chore'
]
def __init__(self, include_hash: bool = False):
self.include_hash = include_hash
def get_commits(self, from_ref: Optional[str] = None) -> List[Commit]:
"""Get commits from git log."""
cmd = ['git', 'log', '--format=%H%n%B%n---END---']
if from_ref:
cmd.insert(2, f'{from_ref}..HEAD')
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
except subprocess.CalledProcessError:
return []
commits = []
lines = result.stdout.split('\n')
current_hash = None
current_message = []
for line in lines:
if not current_hash:
current_hash = line.strip()
elif line == '---END---':
if current_hash and current_message:
message = '\n'.join(current_message)
commits.append(Commit(current_hash, message))
current_hash = None
current_message = []
else:
current_message.append(line)
return commits
def group_commits(self, commits: List[Commit]) -> Dict[str, List[Commit]]:
"""Group commits by type."""
breaking = []
by_type = defaultdict(list)
for commit in commits:
if not commit.is_valid:
continue
if commit.breaking:
breaking.append(commit)
by_type[commit.type].append(commit)
return {
'breaking': breaking,
'by_type': by_type
}
def format_commit(self, commit: Commit) -> str:
"""Format a single commit line."""
parts = []
if commit.scope:
parts.append(f"**{commit.scope}**:")
parts.append(commit.description)
if self.include_hash:
parts.append(f"([`{commit.hash[:7]}`])")
return '- ' + ' '.join(parts)
def generate(
self,
from_ref: Optional[str] = None,
version: Optional[str] = None,
date: Optional[str] = None
) -> str:
"""Generate complete changelog."""
commits = self.get_commits(from_ref)
if not commits:
return "No commits found."
grouped = self.group_commits(commits)
lines = []
# Version header
if version:
header = f"## [{version}]"
if date:
header += f" - {date}"
lines.append(header)
lines.append("")
# Breaking changes first
if grouped['breaking']:
lines.append("### ⚠️ BREAKING CHANGES")
lines.append("")
for commit in grouped['breaking']:
lines.append(self.format_commit(commit))
# Add BREAKING CHANGE description if available
for line in commit.message.split('\n'):
if line.startswith('BREAKING CHANGE:'):
detail = line.replace('BREAKING CHANGE:', '').strip()
if detail:
lines.append(f" - {detail}")
lines.append("")
# Group by type
for commit_type in self.TYPE_ORDER:
if commit_type not in grouped['by_type']:
continue
type_commits = grouped['by_type'][commit_type]
if not type_commits:
continue
lines.append(self.TYPE_HEADERS[commit_type])
lines.append("")
for commit in type_commits:
lines.append(self.format_commit(commit))
lines.append("")
return '\n'.join(lines)
def get_latest_tag() -> Optional[str]:
"""Get latest git tag."""
try:
result = subprocess.run(
['git', 'describe', '--tags', '--abbrev=0'],
capture_output=True,
text=True,
check=True
)
return result.stdout.strip()
except subprocess.CalledProcessError:
return None
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(description='Generate changelog')
parser.add_argument('--from', dest='from_ref', help='Start from this ref')
parser.add_argument('--version', help='Version for header')
parser.add_argument('--date', help='Date for header (default: today)')
parser.add_argument('--include-hash', action='store_true', help='Include commit hashes')
parser.add_argument('--output', help='Output file (default: stdout)')
args = parser.parse_args()
# Get from ref
from_ref = args.from_ref
if not from_ref:
from_ref = get_latest_tag()
if from_ref:
print(f"# Generating changelog since {from_ref}", file=sys.stderr)
# Generate
generator = ChangelogGenerator(include_hash=args.include_hash)
date = args.date or datetime.now().strftime('%Y-%m-%d')
changelog = generator.generate(from_ref, args.version, date)
# Output
if args.output:
with open(args.output, 'w') as f:
f.write(changelog)
print(f"Changelog written to {args.output}", file=sys.stderr)
else:
print(changelog)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""
Simple commit message validator for git hooks.
Usage:
As git hook: cp validate.py .git/hooks/commit-msg && chmod +x .git/hooks/commit-msg
Standalone: python validate.py --message "feat: add feature"
From file: python validate.py commit-msg-file
"""
import sys
import re
def validate_commit(message):
"""Validate commit message, return (is_valid, error_message)."""
header = message.split('\n')[0]
# Special formats are always valid
if (header.startswith('Merge branch') or
header.startswith('Revert') or
header == 'chore: init'):
return True, None
# Standard format
pattern = (
r'^(feat|fix|refactor|perf|style|test|docs|build|ops|chore)'
r'(\([a-z0-9-]+\))?'
r'!?'
r': '
r'.{1,100}$'
)
if not re.match(pattern, header):
return False, (
f"Invalid format: {header}\n\n"
f"Expected: <type>(<scope>): <description>\n"
f"Example: feat(auth): add login\n\n"
f"Valid types: feat, fix, refactor, perf, style, test, docs, build, ops, chore"
)
# Additional checks
desc = header.split(': ', 1)[1]
if desc[0].isupper():
return False, "Description should start with lowercase"
if desc.endswith('.'):
return False, "Description should not end with period"
return True, None
def main():
import argparse
parser = argparse.ArgumentParser(description='Validate conventional commits')
parser.add_argument('file', nargs='?', help='Commit message file')
parser.add_argument('--message', help='Validate message directly')
args = parser.parse_args()
# Get message
if args.message:
message = args.message
elif args.file:
with open(args.file) as f:
message = f.read()
else:
message = sys.stdin.read()
# Validate
valid, error = validate_commit(message.strip())
if valid:
print("✓ Valid commit message")
sys.exit(0)
else:
print(f"{error}")
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,217 @@
#!/usr/bin/env python3
"""
Calculate next semantic version from commits.
Usage:
python version.py # Auto-detect current, suggest next
python version.py --current 1.2.3 # Start from specific version
python version.py --verbose # Show detailed analysis
"""
import subprocess
import sys
import re
from typing import Optional, Tuple
class Version:
"""Semantic version."""
def __init__(self, major: int, minor: int, patch: int):
self.major = major
self.minor = minor
self.patch = patch
def __str__(self):
return f"{self.major}.{self.minor}.{self.patch}"
@classmethod
def parse(cls, version_str: str) -> 'Version':
"""Parse version string."""
version_str = version_str.lstrip('v')
parts = version_str.split('.')
if len(parts) != 3:
raise ValueError(f"Invalid version: {version_str}")
return cls(int(parts[0]), int(parts[1]), int(parts[2]))
def bump_major(self) -> 'Version':
"""Bump major version."""
return Version(self.major + 1, 0, 0)
def bump_minor(self) -> 'Version':
"""Bump minor version."""
return Version(self.major, self.minor + 1, 0)
def bump_patch(self) -> 'Version':
"""Bump patch version."""
return Version(self.major, self.minor, self.patch + 1)
class CommitAnalyzer:
"""Analyze commits for versioning."""
def __init__(self):
self.breaking_commits = []
self.feature_commits = []
self.fix_commits = []
self.other_commits = []
def analyze(self, from_ref: Optional[str] = None):
"""Analyze commits since ref."""
commits = self._get_commits(from_ref)
for commit in commits:
self._classify_commit(commit)
def _get_commits(self, from_ref: Optional[str]) -> list:
"""Get commit messages."""
cmd = ['git', 'log', '--format=%s']
if from_ref:
cmd.insert(2, f'{from_ref}..HEAD')
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return [line.strip() for line in result.stdout.split('\n') if line.strip()]
except subprocess.CalledProcessError:
return []
def _classify_commit(self, message: str):
"""Classify a commit message."""
# Check for breaking change
if '!' in message and ':' in message:
if message.index('!') < message.index(':'):
self.breaking_commits.append(message)
return
# Parse type
pattern = r'^(?P<type>\w+)(?:\([^)]+\))?:\s*'
match = re.match(pattern, message)
if not match:
self.other_commits.append(message)
return
commit_type = match.group('type')
if commit_type == 'feat':
self.feature_commits.append(message)
elif commit_type == 'fix':
self.fix_commits.append(message)
else:
self.other_commits.append(message)
def get_bump_type(self) -> Tuple[str, str]:
"""
Get version bump type and reason.
Returns:
(bump_type, reason) where bump_type is 'major', 'minor', or 'patch'
"""
if self.breaking_commits:
return 'major', f'{len(self.breaking_commits)} breaking change(s)'
if self.feature_commits or self.fix_commits:
feat_count = len(self.feature_commits)
fix_count = len(self.fix_commits)
parts = []
if feat_count:
parts.append(f'{feat_count} feature(s)')
if fix_count:
parts.append(f'{fix_count} fix(es)')
return 'minor', ', '.join(parts)
return 'patch', f'{len(self.other_commits)} other change(s)'
def get_next_version(self, current: Version) -> Tuple[Version, str, str]:
"""
Get next version.
Returns:
(next_version, bump_type, reason)
"""
bump_type, reason = self.get_bump_type()
if bump_type == 'major':
next_version = current.bump_major()
elif bump_type == 'minor':
next_version = current.bump_minor()
else:
next_version = current.bump_patch()
return next_version, bump_type, reason
def get_latest_tag() -> Optional[str]:
"""Get latest git tag."""
try:
result = subprocess.run(
['git', 'describe', '--tags', '--abbrev=0'],
capture_output=True,
text=True,
check=True
)
return result.stdout.strip()
except subprocess.CalledProcessError:
return None
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(description='Calculate next version')
parser.add_argument('--current', help='Current version (default: latest tag)')
parser.add_argument('--from', dest='from_ref', help='Analyze from this ref')
parser.add_argument('--verbose', action='store_true', help='Show detailed analysis')
args = parser.parse_args()
# Get current version
if args.current:
current = Version.parse(args.current)
from_ref = args.from_ref or f'v{current}'
else:
tag = get_latest_tag()
if tag:
current = Version.parse(tag)
from_ref = args.from_ref or tag
else:
current = Version(0, 0, 0)
from_ref = args.from_ref
# Analyze commits
analyzer = CommitAnalyzer()
analyzer.analyze(from_ref)
next_version, bump_type, reason = analyzer.get_next_version(current)
if args.verbose:
print(f"📊 Version Analysis\n")
print(f"Current version: {current}")
if from_ref:
print(f"Analyzing since: {from_ref}")
print()
print("Commits found:")
print(f"{len(analyzer.breaking_commits)} breaking change(s)")
print(f"{len(analyzer.feature_commits)} feature(s)")
print(f"{len(analyzer.fix_commits)} fix(es)")
print(f"{len(analyzer.other_commits)} other change(s)")
print()
print(f"Bump type: {bump_type.upper()}")
print(f"Reason: {reason}")
print()
print(f"Next version: {next_version}")
if analyzer.breaking_commits:
print()
print("Breaking commits:")
for commit in analyzer.breaking_commits[:5]:
print(f"{commit}")
else:
print(next_version)
if __name__ == '__main__':
main()