364 lines
12 KiB
Python
364 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
代码统计器 - 分析代码库统计信息,包括行数、函数、类、注释率等
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import re
|
||
from pathlib import Path
|
||
from typing import Dict, List, Any
|
||
import argparse
|
||
|
||
|
||
class CodeStats:
|
||
def __init__(self, project_path: Path):
|
||
self.project_path = Path(project_path)
|
||
self.stats: Dict[str, Any] = {
|
||
'files': 0,
|
||
'total_lines': 0,
|
||
'code_lines': 0,
|
||
'comment_lines': 0,
|
||
'blank_lines': 0,
|
||
'functions': 0,
|
||
'classes': 0,
|
||
'imports': 0,
|
||
'by_extension': {}
|
||
}
|
||
|
||
def analyze_file(self, file_path: Path) -> Dict[str, Any]:
|
||
"""分析单个文件"""
|
||
file_stats = {
|
||
'lines': 0,
|
||
'code': 0,
|
||
'comments': 0,
|
||
'blank': 0,
|
||
'functions': 0,
|
||
'classes': 0,
|
||
'imports': 0
|
||
}
|
||
|
||
try:
|
||
content = file_path.read_text(encoding='utf-8')
|
||
lines = content.splitlines()
|
||
|
||
in_block_comment = False
|
||
|
||
for line in lines:
|
||
file_stats['lines'] += 1
|
||
stripped = line.strip()
|
||
|
||
# 空行
|
||
if not stripped:
|
||
file_stats['blank'] += 1
|
||
continue
|
||
|
||
# 块注释检测
|
||
if '/*' in stripped and not in_block_comment:
|
||
in_block_comment = True
|
||
file_stats['comments'] += 1
|
||
continue
|
||
|
||
if in_block_comment:
|
||
file_stats['comments'] += 1
|
||
if '*/' in stripped:
|
||
in_block_comment = False
|
||
continue
|
||
|
||
# 行注释
|
||
if stripped.startswith('#') or stripped.startswith('//'):
|
||
file_stats['comments'] += 1
|
||
continue
|
||
|
||
if '#' in stripped and not stripped.startswith('"') and not stripped.startswith("'"):
|
||
file_stats['comments'] += 1
|
||
file_stats['code'] += 1
|
||
continue
|
||
|
||
# 代码行
|
||
file_stats['code'] += 1
|
||
|
||
# 函数/类检测
|
||
if 'def ' in line and 'class ' not in line:
|
||
file_stats['functions'] += 1
|
||
elif 'class ' in line:
|
||
file_stats['classes'] += 1
|
||
elif 'import ' in line or 'from ' in line:
|
||
file_stats['imports'] += 1
|
||
|
||
return file_stats
|
||
|
||
except Exception as e:
|
||
print(f"⚠️ 警告: 无法读取文件 {file_path}: {e}", file=sys.stderr)
|
||
return file_stats
|
||
|
||
def analyze_directory(self, directory: Path):
|
||
"""递归分析目录"""
|
||
ignore_patterns = {
|
||
'.git', '.svn', '.hg', '__pycache__', 'node_modules',
|
||
'venv', 'env', '.venv', 'dist', 'build', '*.egg-info'
|
||
}
|
||
|
||
for item in directory.iterdir():
|
||
# 忽略模式
|
||
if any(pattern in str(item) for pattern in ignore_patterns):
|
||
continue
|
||
|
||
if item.is_dir():
|
||
self.analyze_directory(item)
|
||
elif item.is_file():
|
||
# 支持的文件类型
|
||
supported_ext = {'.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.sh'}
|
||
ext = item.suffix.lower()
|
||
|
||
if ext in supported_ext:
|
||
file_stats = self.analyze_file(item)
|
||
|
||
# 更新全局统计
|
||
self.stats['files'] += 1
|
||
self.stats['total_lines'] += file_stats['lines']
|
||
self.stats['code_lines'] += file_stats['code']
|
||
self.stats['comment_lines'] += file_stats['comments']
|
||
self.stats['blank_lines'] += file_stats['blank']
|
||
self.stats['functions'] += file_stats['functions']
|
||
self.stats['classes'] += file_stats['classes']
|
||
self.stats['imports'] += file_stats['imports']
|
||
|
||
# 按扩展名分组
|
||
if ext not in self.stats['by_extension']:
|
||
self.stats['by_extension'][ext] = {
|
||
'files': 0,
|
||
'lines': 0,
|
||
'code': 0,
|
||
'comments': 0,
|
||
'blank': 0,
|
||
'functions': 0,
|
||
'classes': 0,
|
||
'imports': 0
|
||
}
|
||
|
||
ext_stats = self.stats['by_extension'][ext]
|
||
ext_stats['files'] += 1
|
||
ext_stats['lines'] += file_stats['lines']
|
||
ext_stats['code'] += file_stats['code']
|
||
ext_stats['comments'] += file_stats['comments']
|
||
ext_stats['blank'] += file_stats['blank']
|
||
ext_stats['functions'] += file_stats['functions']
|
||
ext_stats['classes'] += file_stats['classes']
|
||
ext_stats['imports'] += file_stats['imports']
|
||
|
||
def calculate_complexity_score(self) -> float:
|
||
"""计算代码复杂度分数"""
|
||
if self.stats['files'] == 0:
|
||
return 0.0
|
||
|
||
# 基于以下几个因素:
|
||
# 1. 平均文件大小
|
||
avg_file_size = self.stats['total_lines'] / self.stats['files']
|
||
size_score = min(avg_file_size / 500, 1.0) # 超过500行/文件扣分
|
||
|
||
# 2. 注释率
|
||
comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1)
|
||
comment_score = 1.0 if comment_ratio > 0.1 else 0.5 # 注释率过低扣分
|
||
|
||
# 3. 函数密度
|
||
func_density = self.stats['functions'] / max(self.stats['files'], 1)
|
||
func_score = min(func_density / 20, 1.0) # 函数过多扣分
|
||
|
||
# 综合评分(0-100)
|
||
complexity = (size_score + comment_score + func_score) / 3 * 100
|
||
return min(complexity, 100.0)
|
||
|
||
def print_report(self):
|
||
"""打印分析报告"""
|
||
print("📊 代码统计报告")
|
||
print("=" * 60)
|
||
print(f"项目路径: {self.project_path}")
|
||
print("=" * 60)
|
||
print()
|
||
|
||
if self.stats['files'] == 0:
|
||
print("⚠️ 未找到支持的代码文件")
|
||
return
|
||
|
||
# 总体统计
|
||
print("📁 总体统计:")
|
||
print("-" * 60)
|
||
print(f"文件总数: {self.stats['files']:,}")
|
||
print(f"总行数: {self.stats['total_lines']:,}")
|
||
print(f"代码行数: {self.stats['code_lines']:,} ({self.stats['code_lines']/self.stats['total_lines']*100:.1f}%)")
|
||
print(f"注释行数: {self.stats['comment_lines']:,} ({self.stats['comment_lines']/self.stats['total_lines']*100:.1f}%)")
|
||
print(f"空行行数: {self.stats['blank_lines']:,} ({self.stats['blank_lines']/self.stats['total_lines']*100:.1f}%)")
|
||
print()
|
||
print(f"函数总数: {self.stats['functions']:,}")
|
||
print(f"类总数: {self.stats['classes']:,}")
|
||
print(f"导入语句: {self.stats['imports']:,}")
|
||
print()
|
||
|
||
# 复杂度评分
|
||
complexity = self.calculate_complexity_score()
|
||
if complexity < 50:
|
||
complexity_color = "\033[92m" # 绿色
|
||
complexity_level = "低"
|
||
elif complexity < 75:
|
||
complexity_color = "\033[93m" # 黄色
|
||
complexity_level = "中等"
|
||
else:
|
||
complexity_color = "\033[91m" # 红色
|
||
complexity_level = "高"
|
||
|
||
print(f"代码复杂度: {complexity_color}{complexity:.1f} ({complexity_level})\033[0m")
|
||
print()
|
||
|
||
# 按文件类型统计
|
||
if self.stats['by_extension']:
|
||
print("📂 按文件类型统计:")
|
||
print("-" * 60)
|
||
print(f"{'类型':<10} {'文件数':>10} {'总行数':>12} {'代码行':>12} {'注释':>10} {'函数':>10} {'类':>8}")
|
||
print("-" * 60)
|
||
|
||
for ext, stats in sorted(self.stats['by_extension'].items()):
|
||
print(f"{ext:<10} {stats['files']:>10,} {stats['lines']:>12,} {stats['code']:>12,} "
|
||
f"{stats['comments']:>10,} {stats['functions']:>10,} {stats['classes']:>8,}")
|
||
|
||
print()
|
||
|
||
# 健康评分
|
||
health_score = 0
|
||
health_issues = []
|
||
|
||
# 注释率健康度
|
||
comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1)
|
||
if comment_ratio >= 0.1:
|
||
health_score += 25
|
||
else:
|
||
health_issues.append(f"注释率偏低 ({comment_ratio*100:.1f}%,建议>10%)")
|
||
|
||
# 文件大小健康度
|
||
avg_file_size = self.stats['total_lines'] / self.stats['files']
|
||
if avg_file_size <= 300:
|
||
health_score += 25
|
||
elif avg_file_size <= 500:
|
||
health_score += 15
|
||
else:
|
||
health_issues.append(f"平均文件大小偏大 ({avg_file_size:.0f}行,建议<300)")
|
||
|
||
# 空白行健康度
|
||
blank_ratio = self.stats['blank_lines'] / max(self.stats['total_lines'], 1)
|
||
if 0.05 <= blank_ratio <= 0.2:
|
||
health_score += 25
|
||
else:
|
||
health_issues.append(f"空白行比例异常 ({blank_ratio*100:.1f}%)")
|
||
|
||
# 函数分布健康度
|
||
avg_funcs_per_file = self.stats['functions'] / self.stats['files']
|
||
if avg_funcs_per_file <= 15:
|
||
health_score += 25
|
||
else:
|
||
health_issues.append(f"平均函数数偏高 ({avg_funcs_per_file:.1f}个/文件)")
|
||
|
||
print("🏥 代码健康度:")
|
||
print("-" * 60)
|
||
print(f"健康评分: {health_score}/100")
|
||
|
||
if health_issues:
|
||
print()
|
||
print("⚠️ 发现的问题:")
|
||
for issue in health_issues:
|
||
print(f" - {issue}")
|
||
else:
|
||
print("✅ 代码健康状况良好")
|
||
|
||
print()
|
||
|
||
# 建议
|
||
print("💡 建议:")
|
||
print("-" * 60)
|
||
|
||
if comment_ratio < 0.1:
|
||
print(" - 增加代码注释,提高可维护性")
|
||
|
||
if avg_file_size > 500:
|
||
print(" - 考虑拆分大文件,遵循单一职责原则")
|
||
|
||
if len(self.stats['by_extension']) > 5:
|
||
print(" - 项目包含多种语言,注意依赖管理")
|
||
elif len(self.stats['by_extension']) == 1:
|
||
print(" - 单一语言项目,结构清晰")
|
||
|
||
if self.stats['classes'] > 0:
|
||
avg_methods_per_class = self.stats['functions'] / max(self.stats['classes'], 1)
|
||
if avg_methods_per_class > 20:
|
||
print(" - 类的职责可能过重,考虑拆分类")
|
||
|
||
if health_score >= 80:
|
||
print(" - ✅ 代码质量良好,继续保持")
|
||
|
||
print()
|
||
print("=" * 60)
|
||
print("代码统计完成")
|
||
print("=" * 60)
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(
|
||
description='代码统计器 - 分析代码库统计信息',
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog="""
|
||
示例:
|
||
python3 code-stats.py . # 分析当前目录
|
||
python3 code-stats.py /path/to/project # 分析指定项目
|
||
python3 code-stats.py . --json # JSON格式输出
|
||
python3 code-stats.py . --output report.md # 保存报告
|
||
"""
|
||
)
|
||
|
||
parser.add_argument(
|
||
'project_path',
|
||
nargs='?',
|
||
default='.',
|
||
help='项目路径(默认:当前目录)'
|
||
)
|
||
|
||
parser.add_argument(
|
||
'--json',
|
||
action='store_true',
|
||
help='JSON格式输出'
|
||
)
|
||
|
||
parser.add_argument(
|
||
'-o', '--output',
|
||
help='输出报告到文件'
|
||
)
|
||
|
||
args = parser.parse_args()
|
||
|
||
if not os.path.exists(args.project_path):
|
||
print(f"❌ 错误: 路径不存在: {args.project_path}")
|
||
sys.exit(1)
|
||
|
||
analyzer = CodeStats(args.project_path)
|
||
analyzer.analyze_directory(Path(args.project_path))
|
||
|
||
if args.json:
|
||
import json
|
||
print(json.dumps(analyzer.stats, indent=2))
|
||
elif args.output:
|
||
# 重定向输出到文件
|
||
with open(args.output, 'w', encoding='utf-8') as f:
|
||
old_stdout = sys.stdout
|
||
sys.stdout = f
|
||
try:
|
||
analyzer.print_report()
|
||
finally:
|
||
sys.stdout = old_stdout
|
||
print(f"✅ 报告已保存: {args.output}")
|
||
else:
|
||
analyzer.print_report()
|
||
|
||
sys.exit(0)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|