Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:20:25 +08:00
commit 0d6226e0d8
69 changed files with 20934 additions and 0 deletions

View File

@@ -0,0 +1,408 @@
#!/usr/bin/env python3
"""
============================================================================
SCRIPT: convention-recommender.py
PURPOSE: Generate project-specific commit convention recommendations
VERSION: 1.0.0
USAGE: ./convention-recommender.py --count N --branch BRANCH [--priority LEVEL]
RETURNS: JSON format with prioritized recommendations
EXIT CODES:
0 - Success
1 - Not a git repository
2 - No commit history
3 - Analysis failed
DEPENDENCIES: git, python3, style-analyzer.sh, pattern-detector.py, scope-extractor.sh
============================================================================
"""
import subprocess
import sys
import json
import argparse
import os
from typing import Dict, List, Tuple
def run_script(script_path: str, args: List[str]) -> Tuple[int, str]:
"""Execute a script and return exit code and output."""
try:
cmd = [script_path] + args
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=False,
cwd=os.path.dirname(script_path)
)
return result.returncode, result.stdout.strip()
except Exception as e:
return 1, json.dumps({'error': str(e)})
def gather_analysis_data(count: int, branch: str, scripts_dir: str) -> Dict:
"""Gather all analysis data from other scripts."""
data = {}
# Run style-analyzer.sh
style_script = os.path.join(scripts_dir, 'style-analyzer.sh')
code, output = run_script(style_script, [str(count), branch])
if code == 0:
try:
data['style'] = json.loads(output)
except json.JSONDecodeError:
data['style'] = {}
# Run pattern-detector.py
pattern_script = os.path.join(scripts_dir, 'pattern-detector.py')
code, output = run_script(pattern_script, [
'--count', str(count),
'--branch', branch
])
if code == 0:
try:
data['patterns'] = json.loads(output)
except json.JSONDecodeError:
data['patterns'] = {}
# Run scope-extractor.sh
scope_script = os.path.join(scripts_dir, 'scope-extractor.sh')
code, output = run_script(scope_script, [
'--count', str(count),
'--branch', branch,
'--min-frequency', '2'
])
if code == 0:
try:
data['scopes'] = json.loads(output)
except json.JSONDecodeError:
data['scopes'] = {}
return data
def generate_recommendations(data: Dict, priority_filter: str) -> Dict:
"""Generate prioritized recommendations based on analysis data."""
recommendations = {
'high_priority': [],
'medium_priority': [],
'low_priority': []
}
style = data.get('style', {}).get('project_style', {})
patterns = data.get('patterns', {})
scopes = data.get('scopes', {})
# HIGH PRIORITY RECOMMENDATIONS
# 1. Conventional commits adoption
conv_pct = style.get('conventional_commits_percentage', 0)
if conv_pct < 50:
recommendations['high_priority'].append({
'id': 1,
'title': 'Adopt Conventional Commits Format',
'status': 'needs_improvement',
'current_usage': conv_pct,
'target_usage': 80,
'action': 'Migrate to conventional commits format: <type>(<scope>): <subject>',
'benefit': 'Enables automated changelog, semantic versioning, and better git history',
'priority': 'high',
'examples': [
'feat(auth): implement OAuth2 authentication',
'fix(api): handle null pointer in user endpoint',
'docs: update API documentation'
]
})
elif conv_pct < 80:
recommendations['medium_priority'].append({
'id': 1,
'title': 'Increase Conventional Commits Usage',
'status': 'moderate',
'current_usage': conv_pct,
'target_usage': 90,
'action': 'Encourage team to use conventional commits consistently',
'benefit': 'Better consistency and tooling support',
'priority': 'medium'
})
else:
recommendations['high_priority'].append({
'id': 1,
'title': 'Continue Using Conventional Commits',
'status': 'good',
'current_usage': conv_pct,
'target_usage': 90,
'action': 'Maintain current practice',
'benefit': 'Already well-adopted, enables automation',
'priority': 'high'
})
# 2. Subject line length
avg_length = style.get('average_subject_length', 0)
if avg_length > 60:
recommendations['high_priority'].append({
'id': 2,
'title': 'Reduce Subject Line Length',
'status': 'needs_improvement',
'current_value': avg_length,
'target_value': 50,
'action': 'Keep subject lines under 50 characters',
'benefit': 'Better readability in git log, GitHub UI, and terminal',
'priority': 'high'
})
elif avg_length > 50:
recommendations['medium_priority'].append({
'id': 2,
'title': 'Optimize Subject Line Length',
'status': 'moderate',
'current_value': avg_length,
'target_value': 50,
'action': 'Aim for concise subject lines (under 50 chars)',
'priority': 'medium'
})
# 3. Imperative mood
imperative_pct = style.get('imperative_mood_percentage', 0)
if imperative_pct < 80:
recommendations['high_priority'].append({
'id': 3,
'title': 'Use Imperative Mood Consistently',
'status': 'needs_improvement',
'current_usage': imperative_pct,
'target_usage': 90,
'action': 'Use imperative mood: "add" not "added", "fix" not "fixed"',
'benefit': 'Clearer, more professional commit messages',
'priority': 'high',
'examples': [
'✓ add user authentication',
'✗ added user authentication',
'✓ fix null pointer exception',
'✗ fixed null pointer exception'
]
})
# MEDIUM PRIORITY RECOMMENDATIONS
# 4. Body usage
body_pct = style.get('has_body_percentage', 0)
if body_pct < 50:
recommendations['medium_priority'].append({
'id': 4,
'title': 'Increase Body Usage for Complex Changes',
'status': 'low',
'current_usage': body_pct,
'target_usage': 50,
'action': 'Add commit body for non-trivial changes (>3 files, complex logic)',
'benefit': 'Better context for code review and future reference',
'priority': 'medium',
'when_to_use': [
'Multiple files changed (>3)',
'Complex logic modifications',
'Breaking changes',
'Security-related changes'
]
})
# 5. Issue references
issue_pct = style.get('references_issues_percentage', 0)
if issue_pct > 50:
recommendations['medium_priority'].append({
'id': 5,
'title': 'Continue Issue Referencing Practice',
'status': 'good',
'current_usage': issue_pct,
'action': 'Maintain consistent issue references',
'benefit': 'Excellent traceability between commits and issues',
'priority': 'medium'
})
elif issue_pct > 25:
recommendations['medium_priority'].append({
'id': 5,
'title': 'Increase Issue References',
'status': 'moderate',
'current_usage': issue_pct,
'target_usage': 60,
'action': 'Reference related issues: "Closes #123", "Fixes #456", "Refs #789"',
'benefit': 'Better traceability',
'priority': 'medium'
})
# LOW PRIORITY RECOMMENDATIONS
# 6. Scope standardization
scope_count = scopes.get('total_scopes', 0)
if scope_count > 0:
top_scopes = scopes.get('scopes', [])[:5]
scope_names = [s['name'] for s in top_scopes]
recommendations['medium_priority'].append({
'id': 6,
'title': 'Use Standard Project Scopes',
'status': 'good',
'action': f'Use these common scopes: {", ".join(scope_names)}',
'benefit': 'Consistent scope usage across team',
'priority': 'medium',
'scopes': scope_names
})
# 7. Co-author attribution
recommendations['low_priority'].append({
'id': 7,
'title': 'Consider Co-Author Attribution',
'status': 'optional',
'action': 'Add co-authors for pair programming: Co-authored-by: Name <email>',
'benefit': 'Team recognition and contribution tracking',
'priority': 'low',
'example': 'Co-authored-by: Jane Doe <jane@example.com>'
})
# 8. Breaking change documentation
recommendations['low_priority'].append({
'id': 8,
'title': 'Document Breaking Changes',
'status': 'important',
'action': 'Use BREAKING CHANGE: footer when applicable',
'benefit': 'Clear communication of breaking changes for semantic versioning',
'priority': 'low',
'example': 'BREAKING CHANGE: API now requires OAuth tokens instead of API keys'
})
# Filter by priority if specified
if priority_filter and priority_filter != 'all':
priority_key = f'{priority_filter}_priority'
filtered = {priority_key: recommendations.get(priority_key, [])}
return filtered
return recommendations
def generate_style_guide(data: Dict) -> Dict:
"""Generate project-specific style guide."""
style = data.get('style', {}).get('project_style', {})
scopes_data = data.get('scopes', {})
# Extract common types
common_types = style.get('common_types', [])
types_sorted = sorted(common_types, key=lambda x: x.get('count', 0), reverse=True)
# Extract common scopes
scopes = scopes_data.get('scopes', [])[:10]
# Build style guide
return {
'format': '<type>(<scope>): <subject>',
'max_subject_length': 50,
'body_wrap': 72,
'types': [
{
'name': t.get('type', 'unknown'),
'percentage': t.get('percentage', 0),
'description': get_type_description(t.get('type', 'unknown'))
}
for t in types_sorted
],
'scopes': [
{
'name': s.get('name', 'unknown'),
'percentage': s.get('percentage', 0),
'description': s.get('description', 'Unknown'),
'category': s.get('category', 'other')
}
for s in scopes
],
'rules': [
'Use imperative mood ("add" not "added")',
'Capitalize first letter of subject',
'No period at end of subject line',
'Use lowercase for scopes',
'Wrap body at 72 characters',
'Separate body and footer with blank line',
'Use bullet points in body',
'Reference issues when applicable'
]
}
def get_type_description(type_name: str) -> str:
"""Get description for commit type."""
descriptions = {
'feat': 'New features',
'fix': 'Bug fixes',
'docs': 'Documentation changes',
'style': 'Formatting changes (no code change)',
'refactor': 'Code restructuring',
'perf': 'Performance improvements',
'test': 'Test additions/updates',
'build': 'Build system changes',
'ci': 'CI/CD changes',
'chore': 'Maintenance tasks',
'revert': 'Revert previous commit'
}
return descriptions.get(type_name, 'Other changes')
def calculate_confidence(data: Dict) -> str:
"""Calculate confidence level of recommendations."""
commits_analyzed = data.get('style', {}).get('project_style', {}).get('commits_analyzed', 0)
if commits_analyzed >= 100:
return 'high'
elif commits_analyzed >= 50:
return 'medium'
elif commits_analyzed >= 20:
return 'low'
else:
return 'very_low'
def main():
parser = argparse.ArgumentParser(description='Generate convention recommendations')
parser.add_argument('--count', type=int, default=50, help='Number of commits to analyze')
parser.add_argument('--branch', default='HEAD', help='Branch to analyze')
parser.add_argument('--priority', choices=['high', 'medium', 'low', 'all'], default='all',
help='Filter by priority level')
args = parser.parse_args()
# Get scripts directory
scripts_dir = os.path.dirname(os.path.abspath(__file__))
# Gather analysis data
data = gather_analysis_data(args.count, args.branch, scripts_dir)
if not data:
print(json.dumps({'error': 'Failed to gather analysis data'}), file=sys.stderr)
sys.exit(3)
# Generate recommendations
recommendations = generate_recommendations(data, args.priority)
# Generate style guide
style_guide = generate_style_guide(data)
# Calculate confidence
confidence = calculate_confidence(data)
# Calculate consistency score
consistency_score = data.get('patterns', {}).get('consistency_score', 0)
# Build output
output = {
'commits_analyzed': data.get('style', {}).get('project_style', {}).get('commits_analyzed', 0),
'branch': args.branch,
'consistency_score': consistency_score,
'confidence': confidence,
'recommendations': recommendations,
'style_guide': style_guide,
'automation': {
'commitlint': True,
'changelog_generator': 'standard-version',
'semantic_release': True
}
}
# Output JSON
print(json.dumps(output, indent=2))
sys.exit(0)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,421 @@
#!/usr/bin/env python3
"""
============================================================================
SCRIPT: pattern-detector.py
PURPOSE: Detect commit message patterns and conventions from git history
VERSION: 1.0.0
USAGE: ./pattern-detector.py --count N --branch BRANCH [--detailed]
RETURNS: JSON format with pattern detection results
EXIT CODES:
0 - Success
1 - Not a git repository
2 - No commit history
3 - Git command failed
DEPENDENCIES: git, python3
============================================================================
"""
import subprocess
import sys
import json
import re
import argparse
from collections import defaultdict
from typing import Dict, List, Tuple
def run_git_command(cmd: List[str]) -> Tuple[int, str]:
"""Execute git command and return exit code and output."""
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=False
)
return result.returncode, result.stdout.strip()
except Exception as e:
return 1, str(e)
def is_git_repository() -> bool:
"""Check if current directory is a git repository."""
code, _ = run_git_command(['git', 'rev-parse', '--git-dir'])
return code == 0
def has_commits() -> bool:
"""Check if repository has any commits."""
code, _ = run_git_command(['git', 'log', '-1'])
return code == 0
def get_commits(count: int, branch: str) -> List[Dict[str, str]]:
"""Fetch commit messages from git log."""
code, output = run_git_command([
'git', 'log',
f'-{count}',
branch,
'--format=%H%n%s%n%b%n---COMMIT_SEPARATOR---'
])
if code != 0:
return []
commits = []
lines = output.split('\n')
i = 0
while i < len(lines):
if i + 1 >= len(lines):
break
commit_hash = lines[i]
subject = lines[i + 1] if i + 1 < len(lines) else ""
# Find body (lines until separator)
body_lines = []
i += 2
while i < len(lines) and lines[i] != '---COMMIT_SEPARATOR---':
if lines[i].strip(): # Skip empty lines at start
body_lines.append(lines[i])
i += 1
body = '\n'.join(body_lines).strip()
commits.append({
'hash': commit_hash,
'subject': subject,
'body': body,
'full': subject + '\n\n' + body if body else subject
})
i += 1 # Skip separator
return commits
def is_conventional_commit(subject: str) -> bool:
"""Check if commit follows conventional commits format."""
pattern = r'^[a-z]+(\([^)]+\))?: .+'
return bool(re.match(pattern, subject))
def has_prefix(subject: str) -> bool:
"""Check if commit has prefix format like [PREFIX]."""
pattern = r'^\[[^\]]+\]'
return bool(re.match(pattern, subject))
def has_tag(subject: str) -> bool:
"""Check if commit starts with tag like #tag."""
return subject.startswith('#')
def is_imperative_mood(subject: str) -> bool:
"""
Check if subject uses imperative mood.
Simple heuristic: starts with common imperative verbs.
"""
# Extract first word after type/scope if conventional
words = subject.lower()
if ':' in words:
words = words.split(':', 1)[1].strip()
# Common imperative verbs and their non-imperative forms to avoid
imperative_verbs = [
'add', 'fix', 'update', 'remove', 'delete', 'create', 'implement',
'change', 'improve', 'optimize', 'refactor', 'enhance', 'correct',
'resolve', 'merge', 'bump', 'revert', 'document', 'upgrade',
'downgrade', 'rename', 'move', 'replace', 'extract', 'simplify'
]
# Non-imperative indicators
non_imperative = ['added', 'fixed', 'updated', 'removed', 'deleted',
'created', 'implemented', 'changed', 'improved',
'adding', 'fixing', 'updating']
first_word = words.split()[0] if words.split() else ""
if first_word in non_imperative:
return False
return first_word in imperative_verbs
def is_capitalized(subject: str) -> bool:
"""Check if subject is properly capitalized."""
# Extract text after type/scope if conventional
text = subject
if ':' in text:
text = text.split(':', 1)[1].strip()
return text[0].isupper() if text else False
def has_no_period_end(subject: str) -> bool:
"""Check if subject doesn't end with period."""
return not subject.endswith('.')
def has_blank_line_before_body(full_message: str) -> bool:
"""Check if there's a blank line between subject and body."""
lines = full_message.split('\n')
if len(lines) < 3:
return True # No body or only one line body
# Check if second line is empty
return lines[1].strip() == ''
def is_body_wrapped(body: str, max_width: int = 72) -> bool:
"""Check if body lines are wrapped at max_width."""
if not body:
return True
lines = body.split('\n')
for line in lines:
# Allow bullet points and URLs to exceed limit
if line.strip().startswith(('-', '*', '', 'http://', 'https://')):
continue
if len(line) > max_width:
return False
return True
def has_footer(full_message: str) -> bool:
"""Check if commit has footer (BREAKING CHANGE, issue refs, etc.)."""
footer_patterns = [
r'BREAKING CHANGE:',
r'Closes #\d+',
r'Fixes #\d+',
r'Refs #\d+',
r'Co-authored-by:',
r'Signed-off-by:'
]
for pattern in footer_patterns:
if re.search(pattern, full_message):
return True
return False
def references_issues(full_message: str) -> bool:
"""Check if commit references issues."""
pattern = r'#\d+|[Cc]loses|[Ff]ixes|[Rr]efs'
return bool(re.search(pattern, full_message))
def mentions_breaking(full_message: str) -> bool:
"""Check if commit mentions breaking changes."""
return 'BREAKING CHANGE:' in full_message or 'BREAKING-CHANGE:' in full_message
def has_co_authors(full_message: str) -> bool:
"""Check if commit has co-authors."""
return 'Co-authored-by:' in full_message
def is_signed_off(full_message: str) -> bool:
"""Check if commit is signed off."""
return 'Signed-off-by:' in full_message
def includes_rationale(body: str) -> bool:
"""Check if body includes rationale (why/because/to/for)."""
if not body:
return False
words = ['because', 'to ', 'for ', 'why', 'since', 'as ', 'in order to']
body_lower = body.lower()
return any(word in body_lower for word in words)
def mentions_impact(body: str) -> bool:
"""Check if body mentions impact."""
if not body:
return False
words = ['affect', 'impact', 'change', 'improve', 'break', 'fix']
body_lower = body.lower()
return any(word in body_lower for word in words)
def analyze_patterns(commits: List[Dict[str, str]]) -> Dict:
"""Analyze commit patterns and return results."""
total = len(commits)
# Initialize counters
patterns = {
'format': defaultdict(int),
'conventions': defaultdict(int),
'content': defaultdict(int)
}
# Count commits with bodies (for calculations)
commits_with_body = 0
for commit in commits:
subject = commit['subject']
body = commit['body']
full = commit['full']
# Format patterns
if is_conventional_commit(subject):
patterns['format']['conventional_commits'] += 1
elif has_prefix(subject):
patterns['format']['prefixed'] += 1
elif has_tag(subject):
patterns['format']['tagged'] += 1
else:
patterns['format']['simple_subject'] += 1
# Convention patterns
if is_imperative_mood(subject):
patterns['conventions']['imperative_mood'] += 1
if is_capitalized(subject):
patterns['conventions']['capitalized_subject'] += 1
if has_no_period_end(subject):
patterns['conventions']['no_period_end'] += 1
if body:
commits_with_body += 1
if has_blank_line_before_body(full):
patterns['conventions']['blank_line_before_body'] += 1
if is_body_wrapped(body):
patterns['conventions']['wrapped_body'] += 1
if has_footer(full):
patterns['conventions']['has_footer'] += 1
# Content patterns
if references_issues(full):
patterns['content']['references_issues'] += 1
if mentions_breaking(full):
patterns['content']['mentions_breaking'] += 1
if has_co_authors(full):
patterns['content']['has_co_authors'] += 1
if is_signed_off(full):
patterns['content']['signed_off'] += 1
if includes_rationale(body):
patterns['content']['includes_rationale'] += 1
if mentions_impact(body):
patterns['content']['mentions_impact'] += 1
# Calculate percentages and strength
def calc_percentage(count, denominator=total):
return round((count / denominator * 100), 1) if denominator > 0 else 0
def get_strength(percentage):
if percentage >= 95:
return "perfect"
elif percentage >= 80:
return "strong"
elif percentage >= 65:
return "dominant"
elif percentage >= 45:
return "common"
elif percentage >= 25:
return "moderate"
elif percentage >= 10:
return "occasional"
elif percentage >= 1:
return "rare"
else:
return "absent"
# Build results
results = {
'format': {},
'conventions': {},
'content': {}
}
for category, counters in patterns.items():
for pattern_name, count in counters.items():
# Use commits_with_body as denominator for body-specific patterns
if pattern_name in ['blank_line_before_body', 'wrapped_body']:
denominator = commits_with_body
else:
denominator = total
percentage = calc_percentage(count, denominator)
results[category][pattern_name] = {
'count': count,
'percentage': percentage,
'strength': get_strength(percentage)
}
# Calculate consistency score
# Weight: format(40), conventions(40), content(20)
format_score = results['format'].get('conventional_commits', {}).get('percentage', 0)
convention_scores = [
results['conventions'].get('imperative_mood', {}).get('percentage', 0),
results['conventions'].get('capitalized_subject', {}).get('percentage', 0),
results['conventions'].get('no_period_end', {}).get('percentage', 0)
]
avg_convention = sum(convention_scores) / len(convention_scores) if convention_scores else 0
content_scores = [
results['content'].get('references_issues', {}).get('percentage', 0),
results['content'].get('includes_rationale', {}).get('percentage', 0)
]
avg_content = sum(content_scores) / len(content_scores) if content_scores else 0
consistency_score = int(format_score * 0.4 + avg_convention * 0.4 + avg_content * 0.2)
# Determine dominant pattern
format_patterns = results['format']
dominant_pattern = max(format_patterns.items(), key=lambda x: x[1]['count'])[0] if format_patterns else "unknown"
return {
'commits_analyzed': total,
'patterns': results,
'consistency_score': consistency_score,
'dominant_pattern': dominant_pattern
}
def main():
parser = argparse.ArgumentParser(description='Detect commit message patterns')
parser.add_argument('--count', type=int, default=50, help='Number of commits to analyze')
parser.add_argument('--branch', default='HEAD', help='Branch to analyze')
parser.add_argument('--detailed', action='store_true', help='Include detailed breakdown')
args = parser.parse_args()
# Validate git repository
if not is_git_repository():
print(json.dumps({'error': 'Not in a git repository'}), file=sys.stderr)
sys.exit(1)
if not has_commits():
print(json.dumps({'error': 'No commit history found'}), file=sys.stderr)
sys.exit(2)
# Fetch commits
commits = get_commits(args.count, args.branch)
if not commits:
print(json.dumps({'error': 'Failed to fetch commits'}), file=sys.stderr)
sys.exit(3)
# Analyze patterns
results = analyze_patterns(commits)
results['branch'] = args.branch
results['detailed'] = args.detailed
# Output JSON
print(json.dumps(results, indent=2))
sys.exit(0)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,230 @@
#!/bin/bash
# ============================================================================
# SCRIPT: scope-extractor.sh
# PURPOSE: Extract and analyze scopes from commit messages
# VERSION: 1.0.0
# USAGE: ./scope-extractor.sh --count N --branch BRANCH [--min-frequency N]
# RETURNS: JSON format with scope analysis results
# EXIT CODES:
# 0 - Success
# 1 - Not a git repository
# 2 - No commit history
# 3 - Git command failed
# DEPENDENCIES: git, jq (optional)
# ============================================================================
# Parse command line arguments
COUNT=50
BRANCH="HEAD"
MIN_FREQUENCY=2
while [[ $# -gt 0 ]]; do
case $1 in
--count)
COUNT="$2"
shift 2
;;
--branch)
BRANCH="$2"
shift 2
;;
--min-frequency)
MIN_FREQUENCY="$2"
shift 2
;;
*)
shift
;;
esac
done
# Validate git repository
if ! git rev-parse --git-dir >/dev/null 2>&1; then
echo '{"error": "Not in a git repository"}' >&2
exit 1
fi
# Check if commits exist
if ! git log -1 >/dev/null 2>&1; then
echo '{"error": "No commit history found"}' >&2
exit 2
fi
# Get commit subjects
SUBJECTS=$(git log --format='%s' -"$COUNT" "$BRANCH" 2>/dev/null)
if [ $? -ne 0 ]; then
echo '{"error": "Failed to fetch git log"}' >&2
exit 3
fi
# Create temporary files for processing
TEMP_SCOPES=$(mktemp)
TEMP_RESULTS=$(mktemp)
# Clean up temp files on exit
trap "rm -f $TEMP_SCOPES $TEMP_RESULTS" EXIT
# Extract scopes from conventional commit format: type(scope): subject
# Also handle nested scopes: type(parent/child): subject
# And multi-scopes: type(scope1,scope2): subject
TOTAL_COMMITS=$(echo "$SUBJECTS" | wc -l)
SCOPED_COMMITS=0
while IFS= read -r subject; do
[ -z "$subject" ] && continue
# Match conventional commit with scope: type(scope): subject
if echo "$subject" | grep -qE '^[a-z]+\([^)]+\): '; then
((SCOPED_COMMITS++))
# Extract scope(s) - everything between parentheses
SCOPE_PART=$(echo "$subject" | sed -E 's/^[a-z]+\(([^)]+)\): .*/\1/')
# Handle multiple scopes (comma or space separated)
# Split by comma and/or space
echo "$SCOPE_PART" | tr ',' '\n' | tr ' ' '\n' | while read -r scope; do
scope=$(echo "$scope" | xargs) # Trim whitespace
[ -n "$scope" ] && echo "$scope" >> "$TEMP_SCOPES"
done
fi
done <<< "$SUBJECTS"
# Count scope frequencies
if [ ! -s "$TEMP_SCOPES" ]; then
# No scopes found
cat << EOF
{
"total_scopes": 0,
"scoped_commits": 0,
"scoped_percentage": 0,
"scopes": [],
"message": "No scopes detected in commit history"
}
EOF
exit 0
fi
# Sort and count unique scopes
SCOPE_COUNTS=$(sort "$TEMP_SCOPES" | uniq -c | sort -rn)
# Calculate scoped percentage
SCOPED_PCT=$(echo "scale=1; $SCOPED_COMMITS * 100 / $TOTAL_COMMITS" | bc -l)
# Build JSON output
echo "{" > "$TEMP_RESULTS"
echo " \"commits_analyzed\": $TOTAL_COMMITS," >> "$TEMP_RESULTS"
echo " \"scoped_commits\": $SCOPED_COMMITS," >> "$TEMP_RESULTS"
echo " \"scoped_percentage\": $SCOPED_PCT," >> "$TEMP_RESULTS"
echo " \"branch\": \"$BRANCH\"," >> "$TEMP_RESULTS"
# Count unique scopes
UNIQUE_SCOPES=$(echo "$SCOPE_COUNTS" | wc -l)
echo " \"total_scopes\": $UNIQUE_SCOPES," >> "$TEMP_RESULTS"
# Build scopes array
echo " \"scopes\": [" >> "$TEMP_RESULTS"
FIRST=true
while read -r count scope; do
# Skip if below min frequency
[ "$count" -lt "$MIN_FREQUENCY" ] && continue
# Calculate percentage
PCT=$(echo "scale=1; $count * 100 / $SCOPED_COMMITS" | bc -l)
# Determine if hierarchical (contains /)
HIERARCHY="null"
PARENT=""
CHILD=""
if echo "$scope" | grep -q '/'; then
PARENT=$(echo "$scope" | cut -d'/' -f1)
CHILD=$(echo "$scope" | cut -d'/' -f2)
HIERARCHY="\"$PARENT/$CHILD\""
fi
# Categorize scope
CATEGORY="other"
DESCRIPTION="Other"
case "$scope" in
auth|security|login|oauth|session)
CATEGORY="feature"
DESCRIPTION="Authentication and authorization"
;;
api|endpoint|backend|server|middleware)
CATEGORY="backend"
DESCRIPTION="API and backend services"
;;
ui|component|style|frontend|view)
CATEGORY="ui"
DESCRIPTION="User interface"
;;
db|database|schema|migration|query)
CATEGORY="backend"
DESCRIPTION="Database operations"
;;
docs|readme|changelog|guide)
CATEGORY="documentation"
DESCRIPTION="Documentation"
;;
test|e2e|unit|integration|spec)
CATEGORY="testing"
DESCRIPTION="Testing"
;;
ci|cd|deploy|docker|k8s|pipeline)
CATEGORY="infrastructure"
DESCRIPTION="Infrastructure and deployment"
;;
config|settings|env)
CATEGORY="configuration"
DESCRIPTION="Configuration"
;;
core|utils|lib|common)
CATEGORY="core"
DESCRIPTION="Core functionality"
;;
esac
# Check if active (used in recent commits - last 10)
RECENT_USAGE=$(git log --format='%s' -10 "$BRANCH" 2>/dev/null | grep -c "($scope)" || echo "0")
ACTIVE="true"
[ "$RECENT_USAGE" -eq 0 ] && ACTIVE="false"
# Get example commits
EXAMPLES=$(git log --format='%s' --grep="($scope)" -3 "$BRANCH" 2>/dev/null | \
awk '{printf "\"%s\",", $0}' | sed 's/,$//')
[ -z "$EXAMPLES" ] && EXAMPLES=""
# Add comma if not first
if [ "$FIRST" = true ]; then
FIRST=false
else
echo " ," >> "$TEMP_RESULTS"
fi
# Write scope entry
cat << EOF >> "$TEMP_RESULTS"
{
"name": "$scope",
"count": $count,
"percentage": $PCT,
"category": "$CATEGORY",
"description": "$DESCRIPTION",
"hierarchy": $HIERARCHY,
"active": $ACTIVE,
"recent_usage": $RECENT_USAGE,
"examples": [$EXAMPLES]
}
EOF
done <<< "$SCOPE_COUNTS"
echo " ]" >> "$TEMP_RESULTS"
echo "}" >> "$TEMP_RESULTS"
# Output result
cat "$TEMP_RESULTS"
exit 0

View File

@@ -0,0 +1,232 @@
#!/bin/bash
# ============================================================================
# SCRIPT: style-analyzer.sh
# PURPOSE: Analyze git commit history for style patterns and conventions
# VERSION: 1.0.0
# USAGE: ./style-analyzer.sh [count] [branch]
# RETURNS: JSON format with style analysis results
# EXIT CODES:
# 0 - Success
# 1 - Not a git repository
# 2 - No commit history
# 3 - Git command failed
# DEPENDENCIES: git, jq (optional for pretty JSON)
# ============================================================================
# Default parameters
COUNT="${1:-50}"
BRANCH="${2:-HEAD}"
# Validate git repository
if ! git rev-parse --git-dir >/dev/null 2>&1; then
echo '{"error": "Not in a git repository"}' >&2
exit 1
fi
# Check if commits exist
if ! git log -1 >/dev/null 2>&1; then
echo '{"error": "No commit history found"}' >&2
exit 2
fi
# Get commit subjects
SUBJECTS=$(git log --format='%s' -"$COUNT" "$BRANCH" 2>/dev/null)
if [ $? -ne 0 ]; then
echo '{"error": "Failed to fetch git log"}' >&2
exit 3
fi
# Get full commit messages (subject + body)
FULL_MESSAGES=$(git log --format='%B%n---COMMIT_SEPARATOR---' -"$COUNT" "$BRANCH" 2>/dev/null)
# Count total commits analyzed
TOTAL_COMMITS=$(echo "$SUBJECTS" | wc -l)
# Initialize counters
CONVENTIONAL_COUNT=0
IMPERATIVE_COUNT=0
HAS_BODY_COUNT=0
REFERENCES_ISSUES_COUNT=0
CAPITALIZED_COUNT=0
NO_PERIOD_COUNT=0
# Arrays for type and scope counting
declare -A TYPE_COUNT
declare -A SCOPE_COUNT
# Calculate subject line lengths
LENGTHS=""
TOTAL_LENGTH=0
# Process each subject line
while IFS= read -r subject; do
[ -z "$subject" ] && continue
# Length analysis
LENGTH=${#subject}
LENGTHS="$LENGTHS $LENGTH"
TOTAL_LENGTH=$((TOTAL_LENGTH + LENGTH))
# Check conventional commits format: type(scope): subject
if echo "$subject" | grep -qE '^[a-z]+(\([^)]+\))?: '; then
((CONVENTIONAL_COUNT++))
# Extract type
TYPE=$(echo "$subject" | sed -E 's/^([a-z]+)(\([^)]+\))?: .*/\1/')
TYPE_COUNT[$TYPE]=$((${TYPE_COUNT[$TYPE]:-0} + 1))
# Extract scope if present
if echo "$subject" | grep -qE '^[a-z]+\([^)]+\): '; then
SCOPE=$(echo "$subject" | sed -E 's/^[a-z]+\(([^)]+)\): .*/\1/')
SCOPE_COUNT[$SCOPE]=$((${SCOPE_COUNT[$SCOPE]:-0} + 1))
fi
fi
# Check imperative mood (simple heuristic - starts with verb in base form)
# Common imperative verbs
if echo "$subject" | grep -qiE '^(add|fix|update|remove|refactor|implement|create|delete|change|improve|optimize|enhance|correct|resolve|merge|bump|revert|feat|docs|style|test|chore|perf|build|ci)[:(]'; then
((IMPERATIVE_COUNT++))
fi
# Check capitalization (first letter after type/scope)
if echo "$subject" | grep -qE ': [A-Z]'; then
((CAPITALIZED_COUNT++))
fi
# Check no period at end
if ! echo "$subject" | grep -qE '\.$'; then
((NO_PERIOD_COUNT++))
fi
done <<< "$SUBJECTS"
# Analyze full messages for body and issue references
COMMIT_NUM=0
while IFS= read -r line; do
if [ "$line" = "---COMMIT_SEPARATOR---" ]; then
if [ $HAS_BODY = true ]; then
((HAS_BODY_COUNT++))
fi
if [ $HAS_ISSUE_REF = true ]; then
((REFERENCES_ISSUES_COUNT++))
fi
HAS_BODY=false
HAS_ISSUE_REF=false
IN_SUBJECT=true
COMMIT_NUM=$((COMMIT_NUM + 1))
continue
fi
if [ "$IN_SUBJECT" = true ]; then
IN_SUBJECT=false
continue
fi
# Check if has body (non-empty line after subject)
if [ -n "$line" ] && [ "$line" != "" ]; then
HAS_BODY=true
fi
# Check for issue references
if echo "$line" | grep -qE '#[0-9]+|[Cc]loses|[Ff]ixes|[Rr]efs'; then
HAS_ISSUE_REF=true
fi
done <<< "$FULL_MESSAGES"
# Calculate average subject length
if [ $TOTAL_COMMITS -gt 0 ]; then
AVG_LENGTH=$((TOTAL_LENGTH / TOTAL_COMMITS))
else
AVG_LENGTH=0
fi
# Calculate standard deviation (simplified)
SUM_SQUARED_DIFF=0
for length in $LENGTHS; do
DIFF=$((length - AVG_LENGTH))
SUM_SQUARED_DIFF=$((SUM_SQUARED_DIFF + DIFF * DIFF))
done
if [ $TOTAL_COMMITS -gt 0 ]; then
STDDEV=$(echo "scale=1; sqrt($SUM_SQUARED_DIFF / $TOTAL_COMMITS)" | bc -l 2>/dev/null || echo "0")
else
STDDEV=0
fi
# Calculate percentages
calc_percentage() {
if [ $TOTAL_COMMITS -gt 0 ]; then
echo "scale=1; $1 * 100 / $TOTAL_COMMITS" | bc -l
else
echo "0"
fi
}
CONVENTIONAL_PCT=$(calc_percentage $CONVENTIONAL_COUNT)
IMPERATIVE_PCT=$(calc_percentage $IMPERATIVE_COUNT)
HAS_BODY_PCT=$(calc_percentage $HAS_BODY_COUNT)
REFERENCES_ISSUES_PCT=$(calc_percentage $REFERENCES_ISSUES_COUNT)
CAPITALIZED_PCT=$(calc_percentage $CAPITALIZED_COUNT)
NO_PERIOD_PCT=$(calc_percentage $NO_PERIOD_COUNT)
# Build common_types JSON array
COMMON_TYPES_JSON="["
FIRST=true
for type in "${!TYPE_COUNT[@]}"; do
count=${TYPE_COUNT[$type]}
pct=$(calc_percentage $count)
if [ "$FIRST" = true ]; then
FIRST=false
else
COMMON_TYPES_JSON="$COMMON_TYPES_JSON,"
fi
COMMON_TYPES_JSON="$COMMON_TYPES_JSON{\"type\":\"$type\",\"count\":$count,\"percentage\":$pct}"
done
COMMON_TYPES_JSON="$COMMON_TYPES_JSON]"
# Build common_scopes JSON array
COMMON_SCOPES_JSON="["
FIRST=true
for scope in "${!SCOPE_COUNT[@]}"; do
count=${SCOPE_COUNT[$scope]}
pct=$(calc_percentage $count)
if [ "$FIRST" = true ]; then
FIRST=false
else
COMMON_SCOPES_JSON="$COMMON_SCOPES_JSON,"
fi
COMMON_SCOPES_JSON="$COMMON_SCOPES_JSON{\"scope\":\"$scope\",\"count\":$count,\"percentage\":$pct}"
done
COMMON_SCOPES_JSON="$COMMON_SCOPES_JSON]"
# Get sample commits (first 3)
SAMPLE_COMMITS=$(git log --format='%s' -3 "$BRANCH" 2>/dev/null | awk '{printf "\"%s\",", $0}' | sed 's/,$//')
# Calculate consistency score (weighted average)
# Weights: conventional(30), imperative(25), capitalized(15), no_period(15), body(10), issues(5)
CONSISTENCY_SCORE=$(echo "scale=0; ($CONVENTIONAL_PCT * 0.3 + $IMPERATIVE_PCT * 0.25 + $CAPITALIZED_PCT * 0.15 + $NO_PERIOD_PCT * 0.15 + $HAS_BODY_PCT * 0.1 + $REFERENCES_ISSUES_PCT * 0.05)" | bc -l)
# Output JSON
cat << EOF
{
"project_style": {
"commits_analyzed": $TOTAL_COMMITS,
"branch": "$BRANCH",
"uses_conventional_commits": $([ $CONVENTIONAL_COUNT -gt $((TOTAL_COMMITS / 2)) ] && echo "true" || echo "false"),
"conventional_commits_percentage": $CONVENTIONAL_PCT,
"average_subject_length": $AVG_LENGTH,
"subject_length_stddev": $STDDEV,
"common_types": $COMMON_TYPES_JSON,
"common_scopes": $COMMON_SCOPES_JSON,
"imperative_mood_percentage": $IMPERATIVE_PCT,
"capitalized_subject_percentage": $CAPITALIZED_PCT,
"no_period_end_percentage": $NO_PERIOD_PCT,
"has_body_percentage": $HAS_BODY_PCT,
"references_issues_percentage": $REFERENCES_ISSUES_PCT,
"consistency_score": $CONSISTENCY_SCORE,
"sample_commits": [$SAMPLE_COMMITS]
}
}
EOF
exit 0