Files
gh-dwsy-ai-runtime-ai-runti…/skills/toolkit/python/analysis/code-stats.py
2025-11-29 18:24:37 +08:00

364 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
代码统计器 - 分析代码库统计信息,包括行数、函数、类、注释率等
"""
import os
import sys
import re
from pathlib import Path
from typing import Dict, List, Any
import argparse
class CodeStats:
def __init__(self, project_path: Path):
self.project_path = Path(project_path)
self.stats: Dict[str, Any] = {
'files': 0,
'total_lines': 0,
'code_lines': 0,
'comment_lines': 0,
'blank_lines': 0,
'functions': 0,
'classes': 0,
'imports': 0,
'by_extension': {}
}
def analyze_file(self, file_path: Path) -> Dict[str, Any]:
"""分析单个文件"""
file_stats = {
'lines': 0,
'code': 0,
'comments': 0,
'blank': 0,
'functions': 0,
'classes': 0,
'imports': 0
}
try:
content = file_path.read_text(encoding='utf-8')
lines = content.splitlines()
in_block_comment = False
for line in lines:
file_stats['lines'] += 1
stripped = line.strip()
# 空行
if not stripped:
file_stats['blank'] += 1
continue
# 块注释检测
if '/*' in stripped and not in_block_comment:
in_block_comment = True
file_stats['comments'] += 1
continue
if in_block_comment:
file_stats['comments'] += 1
if '*/' in stripped:
in_block_comment = False
continue
# 行注释
if stripped.startswith('#') or stripped.startswith('//'):
file_stats['comments'] += 1
continue
if '#' in stripped and not stripped.startswith('"') and not stripped.startswith("'"):
file_stats['comments'] += 1
file_stats['code'] += 1
continue
# 代码行
file_stats['code'] += 1
# 函数/类检测
if 'def ' in line and 'class ' not in line:
file_stats['functions'] += 1
elif 'class ' in line:
file_stats['classes'] += 1
elif 'import ' in line or 'from ' in line:
file_stats['imports'] += 1
return file_stats
except Exception as e:
print(f"⚠️ 警告: 无法读取文件 {file_path}: {e}", file=sys.stderr)
return file_stats
def analyze_directory(self, directory: Path):
"""递归分析目录"""
ignore_patterns = {
'.git', '.svn', '.hg', '__pycache__', 'node_modules',
'venv', 'env', '.venv', 'dist', 'build', '*.egg-info'
}
for item in directory.iterdir():
# 忽略模式
if any(pattern in str(item) for pattern in ignore_patterns):
continue
if item.is_dir():
self.analyze_directory(item)
elif item.is_file():
# 支持的文件类型
supported_ext = {'.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.sh'}
ext = item.suffix.lower()
if ext in supported_ext:
file_stats = self.analyze_file(item)
# 更新全局统计
self.stats['files'] += 1
self.stats['total_lines'] += file_stats['lines']
self.stats['code_lines'] += file_stats['code']
self.stats['comment_lines'] += file_stats['comments']
self.stats['blank_lines'] += file_stats['blank']
self.stats['functions'] += file_stats['functions']
self.stats['classes'] += file_stats['classes']
self.stats['imports'] += file_stats['imports']
# 按扩展名分组
if ext not in self.stats['by_extension']:
self.stats['by_extension'][ext] = {
'files': 0,
'lines': 0,
'code': 0,
'comments': 0,
'blank': 0,
'functions': 0,
'classes': 0,
'imports': 0
}
ext_stats = self.stats['by_extension'][ext]
ext_stats['files'] += 1
ext_stats['lines'] += file_stats['lines']
ext_stats['code'] += file_stats['code']
ext_stats['comments'] += file_stats['comments']
ext_stats['blank'] += file_stats['blank']
ext_stats['functions'] += file_stats['functions']
ext_stats['classes'] += file_stats['classes']
ext_stats['imports'] += file_stats['imports']
def calculate_complexity_score(self) -> float:
"""计算代码复杂度分数"""
if self.stats['files'] == 0:
return 0.0
# 基于以下几个因素:
# 1. 平均文件大小
avg_file_size = self.stats['total_lines'] / self.stats['files']
size_score = min(avg_file_size / 500, 1.0) # 超过500行/文件扣分
# 2. 注释率
comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1)
comment_score = 1.0 if comment_ratio > 0.1 else 0.5 # 注释率过低扣分
# 3. 函数密度
func_density = self.stats['functions'] / max(self.stats['files'], 1)
func_score = min(func_density / 20, 1.0) # 函数过多扣分
# 综合评分0-100
complexity = (size_score + comment_score + func_score) / 3 * 100
return min(complexity, 100.0)
def print_report(self):
"""打印分析报告"""
print("📊 代码统计报告")
print("=" * 60)
print(f"项目路径: {self.project_path}")
print("=" * 60)
print()
if self.stats['files'] == 0:
print("⚠️ 未找到支持的代码文件")
return
# 总体统计
print("📁 总体统计:")
print("-" * 60)
print(f"文件总数: {self.stats['files']:,}")
print(f"总行数: {self.stats['total_lines']:,}")
print(f"代码行数: {self.stats['code_lines']:,} ({self.stats['code_lines']/self.stats['total_lines']*100:.1f}%)")
print(f"注释行数: {self.stats['comment_lines']:,} ({self.stats['comment_lines']/self.stats['total_lines']*100:.1f}%)")
print(f"空行行数: {self.stats['blank_lines']:,} ({self.stats['blank_lines']/self.stats['total_lines']*100:.1f}%)")
print()
print(f"函数总数: {self.stats['functions']:,}")
print(f"类总数: {self.stats['classes']:,}")
print(f"导入语句: {self.stats['imports']:,}")
print()
# 复杂度评分
complexity = self.calculate_complexity_score()
if complexity < 50:
complexity_color = "\033[92m" # 绿色
complexity_level = ""
elif complexity < 75:
complexity_color = "\033[93m" # 黄色
complexity_level = "中等"
else:
complexity_color = "\033[91m" # 红色
complexity_level = ""
print(f"代码复杂度: {complexity_color}{complexity:.1f} ({complexity_level})\033[0m")
print()
# 按文件类型统计
if self.stats['by_extension']:
print("📂 按文件类型统计:")
print("-" * 60)
print(f"{'类型':<10} {'文件数':>10} {'总行数':>12} {'代码行':>12} {'注释':>10} {'函数':>10} {'':>8}")
print("-" * 60)
for ext, stats in sorted(self.stats['by_extension'].items()):
print(f"{ext:<10} {stats['files']:>10,} {stats['lines']:>12,} {stats['code']:>12,} "
f"{stats['comments']:>10,} {stats['functions']:>10,} {stats['classes']:>8,}")
print()
# 健康评分
health_score = 0
health_issues = []
# 注释率健康度
comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1)
if comment_ratio >= 0.1:
health_score += 25
else:
health_issues.append(f"注释率偏低 ({comment_ratio*100:.1f}%,建议>10%)")
# 文件大小健康度
avg_file_size = self.stats['total_lines'] / self.stats['files']
if avg_file_size <= 300:
health_score += 25
elif avg_file_size <= 500:
health_score += 15
else:
health_issues.append(f"平均文件大小偏大 ({avg_file_size:.0f}行,建议<300)")
# 空白行健康度
blank_ratio = self.stats['blank_lines'] / max(self.stats['total_lines'], 1)
if 0.05 <= blank_ratio <= 0.2:
health_score += 25
else:
health_issues.append(f"空白行比例异常 ({blank_ratio*100:.1f}%)")
# 函数分布健康度
avg_funcs_per_file = self.stats['functions'] / self.stats['files']
if avg_funcs_per_file <= 15:
health_score += 25
else:
health_issues.append(f"平均函数数偏高 ({avg_funcs_per_file:.1f}个/文件)")
print("🏥 代码健康度:")
print("-" * 60)
print(f"健康评分: {health_score}/100")
if health_issues:
print()
print("⚠️ 发现的问题:")
for issue in health_issues:
print(f" - {issue}")
else:
print("✅ 代码健康状况良好")
print()
# 建议
print("💡 建议:")
print("-" * 60)
if comment_ratio < 0.1:
print(" - 增加代码注释,提高可维护性")
if avg_file_size > 500:
print(" - 考虑拆分大文件,遵循单一职责原则")
if len(self.stats['by_extension']) > 5:
print(" - 项目包含多种语言,注意依赖管理")
elif len(self.stats['by_extension']) == 1:
print(" - 单一语言项目,结构清晰")
if self.stats['classes'] > 0:
avg_methods_per_class = self.stats['functions'] / max(self.stats['classes'], 1)
if avg_methods_per_class > 20:
print(" - 类的职责可能过重,考虑拆分类")
if health_score >= 80:
print(" - ✅ 代码质量良好,继续保持")
print()
print("=" * 60)
print("代码统计完成")
print("=" * 60)
def main():
parser = argparse.ArgumentParser(
description='代码统计器 - 分析代码库统计信息',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
python3 code-stats.py . # 分析当前目录
python3 code-stats.py /path/to/project # 分析指定项目
python3 code-stats.py . --json # JSON格式输出
python3 code-stats.py . --output report.md # 保存报告
"""
)
parser.add_argument(
'project_path',
nargs='?',
default='.',
help='项目路径(默认:当前目录)'
)
parser.add_argument(
'--json',
action='store_true',
help='JSON格式输出'
)
parser.add_argument(
'-o', '--output',
help='输出报告到文件'
)
args = parser.parse_args()
if not os.path.exists(args.project_path):
print(f"❌ 错误: 路径不存在: {args.project_path}")
sys.exit(1)
analyzer = CodeStats(args.project_path)
analyzer.analyze_directory(Path(args.project_path))
if args.json:
import json
print(json.dumps(analyzer.stats, indent=2))
elif args.output:
# 重定向输出到文件
with open(args.output, 'w', encoding='utf-8') as f:
old_stdout = sys.stdout
sys.stdout = f
try:
analyzer.print_report()
finally:
sys.stdout = old_stdout
print(f"✅ 报告已保存: {args.output}")
else:
analyzer.print_report()
sys.exit(0)
if __name__ == '__main__':
main()