#!/usr/bin/env python3 """ 代码统计器 - 分析代码库统计信息,包括行数、函数、类、注释率等 """ import os import sys import re from pathlib import Path from typing import Dict, List, Any import argparse class CodeStats: def __init__(self, project_path: Path): self.project_path = Path(project_path) self.stats: Dict[str, Any] = { 'files': 0, 'total_lines': 0, 'code_lines': 0, 'comment_lines': 0, 'blank_lines': 0, 'functions': 0, 'classes': 0, 'imports': 0, 'by_extension': {} } def analyze_file(self, file_path: Path) -> Dict[str, Any]: """分析单个文件""" file_stats = { 'lines': 0, 'code': 0, 'comments': 0, 'blank': 0, 'functions': 0, 'classes': 0, 'imports': 0 } try: content = file_path.read_text(encoding='utf-8') lines = content.splitlines() in_block_comment = False for line in lines: file_stats['lines'] += 1 stripped = line.strip() # 空行 if not stripped: file_stats['blank'] += 1 continue # 块注释检测 if '/*' in stripped and not in_block_comment: in_block_comment = True file_stats['comments'] += 1 continue if in_block_comment: file_stats['comments'] += 1 if '*/' in stripped: in_block_comment = False continue # 行注释 if stripped.startswith('#') or stripped.startswith('//'): file_stats['comments'] += 1 continue if '#' in stripped and not stripped.startswith('"') and not stripped.startswith("'"): file_stats['comments'] += 1 file_stats['code'] += 1 continue # 代码行 file_stats['code'] += 1 # 函数/类检测 if 'def ' in line and 'class ' not in line: file_stats['functions'] += 1 elif 'class ' in line: file_stats['classes'] += 1 elif 'import ' in line or 'from ' in line: file_stats['imports'] += 1 return file_stats except Exception as e: print(f"⚠️ 警告: 无法读取文件 {file_path}: {e}", file=sys.stderr) return file_stats def analyze_directory(self, directory: Path): """递归分析目录""" ignore_patterns = { '.git', '.svn', '.hg', '__pycache__', 'node_modules', 'venv', 'env', '.venv', 'dist', 'build', '*.egg-info' } for item in directory.iterdir(): # 忽略模式 if any(pattern in str(item) for pattern in ignore_patterns): continue if item.is_dir(): self.analyze_directory(item) elif item.is_file(): # 支持的文件类型 supported_ext = {'.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.sh'} ext = item.suffix.lower() if ext in supported_ext: file_stats = self.analyze_file(item) # 更新全局统计 self.stats['files'] += 1 self.stats['total_lines'] += file_stats['lines'] self.stats['code_lines'] += file_stats['code'] self.stats['comment_lines'] += file_stats['comments'] self.stats['blank_lines'] += file_stats['blank'] self.stats['functions'] += file_stats['functions'] self.stats['classes'] += file_stats['classes'] self.stats['imports'] += file_stats['imports'] # 按扩展名分组 if ext not in self.stats['by_extension']: self.stats['by_extension'][ext] = { 'files': 0, 'lines': 0, 'code': 0, 'comments': 0, 'blank': 0, 'functions': 0, 'classes': 0, 'imports': 0 } ext_stats = self.stats['by_extension'][ext] ext_stats['files'] += 1 ext_stats['lines'] += file_stats['lines'] ext_stats['code'] += file_stats['code'] ext_stats['comments'] += file_stats['comments'] ext_stats['blank'] += file_stats['blank'] ext_stats['functions'] += file_stats['functions'] ext_stats['classes'] += file_stats['classes'] ext_stats['imports'] += file_stats['imports'] def calculate_complexity_score(self) -> float: """计算代码复杂度分数""" if self.stats['files'] == 0: return 0.0 # 基于以下几个因素: # 1. 平均文件大小 avg_file_size = self.stats['total_lines'] / self.stats['files'] size_score = min(avg_file_size / 500, 1.0) # 超过500行/文件扣分 # 2. 注释率 comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1) comment_score = 1.0 if comment_ratio > 0.1 else 0.5 # 注释率过低扣分 # 3. 函数密度 func_density = self.stats['functions'] / max(self.stats['files'], 1) func_score = min(func_density / 20, 1.0) # 函数过多扣分 # 综合评分(0-100) complexity = (size_score + comment_score + func_score) / 3 * 100 return min(complexity, 100.0) def print_report(self): """打印分析报告""" print("📊 代码统计报告") print("=" * 60) print(f"项目路径: {self.project_path}") print("=" * 60) print() if self.stats['files'] == 0: print("⚠️ 未找到支持的代码文件") return # 总体统计 print("📁 总体统计:") print("-" * 60) print(f"文件总数: {self.stats['files']:,}") print(f"总行数: {self.stats['total_lines']:,}") print(f"代码行数: {self.stats['code_lines']:,} ({self.stats['code_lines']/self.stats['total_lines']*100:.1f}%)") print(f"注释行数: {self.stats['comment_lines']:,} ({self.stats['comment_lines']/self.stats['total_lines']*100:.1f}%)") print(f"空行行数: {self.stats['blank_lines']:,} ({self.stats['blank_lines']/self.stats['total_lines']*100:.1f}%)") print() print(f"函数总数: {self.stats['functions']:,}") print(f"类总数: {self.stats['classes']:,}") print(f"导入语句: {self.stats['imports']:,}") print() # 复杂度评分 complexity = self.calculate_complexity_score() if complexity < 50: complexity_color = "\033[92m" # 绿色 complexity_level = "低" elif complexity < 75: complexity_color = "\033[93m" # 黄色 complexity_level = "中等" else: complexity_color = "\033[91m" # 红色 complexity_level = "高" print(f"代码复杂度: {complexity_color}{complexity:.1f} ({complexity_level})\033[0m") print() # 按文件类型统计 if self.stats['by_extension']: print("📂 按文件类型统计:") print("-" * 60) print(f"{'类型':<10} {'文件数':>10} {'总行数':>12} {'代码行':>12} {'注释':>10} {'函数':>10} {'类':>8}") print("-" * 60) for ext, stats in sorted(self.stats['by_extension'].items()): print(f"{ext:<10} {stats['files']:>10,} {stats['lines']:>12,} {stats['code']:>12,} " f"{stats['comments']:>10,} {stats['functions']:>10,} {stats['classes']:>8,}") print() # 健康评分 health_score = 0 health_issues = [] # 注释率健康度 comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1) if comment_ratio >= 0.1: health_score += 25 else: health_issues.append(f"注释率偏低 ({comment_ratio*100:.1f}%,建议>10%)") # 文件大小健康度 avg_file_size = self.stats['total_lines'] / self.stats['files'] if avg_file_size <= 300: health_score += 25 elif avg_file_size <= 500: health_score += 15 else: health_issues.append(f"平均文件大小偏大 ({avg_file_size:.0f}行,建议<300)") # 空白行健康度 blank_ratio = self.stats['blank_lines'] / max(self.stats['total_lines'], 1) if 0.05 <= blank_ratio <= 0.2: health_score += 25 else: health_issues.append(f"空白行比例异常 ({blank_ratio*100:.1f}%)") # 函数分布健康度 avg_funcs_per_file = self.stats['functions'] / self.stats['files'] if avg_funcs_per_file <= 15: health_score += 25 else: health_issues.append(f"平均函数数偏高 ({avg_funcs_per_file:.1f}个/文件)") print("🏥 代码健康度:") print("-" * 60) print(f"健康评分: {health_score}/100") if health_issues: print() print("⚠️ 发现的问题:") for issue in health_issues: print(f" - {issue}") else: print("✅ 代码健康状况良好") print() # 建议 print("💡 建议:") print("-" * 60) if comment_ratio < 0.1: print(" - 增加代码注释,提高可维护性") if avg_file_size > 500: print(" - 考虑拆分大文件,遵循单一职责原则") if len(self.stats['by_extension']) > 5: print(" - 项目包含多种语言,注意依赖管理") elif len(self.stats['by_extension']) == 1: print(" - 单一语言项目,结构清晰") if self.stats['classes'] > 0: avg_methods_per_class = self.stats['functions'] / max(self.stats['classes'], 1) if avg_methods_per_class > 20: print(" - 类的职责可能过重,考虑拆分类") if health_score >= 80: print(" - ✅ 代码质量良好,继续保持") print() print("=" * 60) print("代码统计完成") print("=" * 60) def main(): parser = argparse.ArgumentParser( description='代码统计器 - 分析代码库统计信息', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: python3 code-stats.py . # 分析当前目录 python3 code-stats.py /path/to/project # 分析指定项目 python3 code-stats.py . --json # JSON格式输出 python3 code-stats.py . --output report.md # 保存报告 """ ) parser.add_argument( 'project_path', nargs='?', default='.', help='项目路径(默认:当前目录)' ) parser.add_argument( '--json', action='store_true', help='JSON格式输出' ) parser.add_argument( '-o', '--output', help='输出报告到文件' ) args = parser.parse_args() if not os.path.exists(args.project_path): print(f"❌ 错误: 路径不存在: {args.project_path}") sys.exit(1) analyzer = CodeStats(args.project_path) analyzer.analyze_directory(Path(args.project_path)) if args.json: import json print(json.dumps(analyzer.stats, indent=2)) elif args.output: # 重定向输出到文件 with open(args.output, 'w', encoding='utf-8') as f: old_stdout = sys.stdout sys.stdout = f try: analyzer.print_report() finally: sys.stdout = old_stdout print(f"✅ 报告已保存: {args.output}") else: analyzer.print_report() sys.exit(0) if __name__ == '__main__': main()