Initial commit

2025-11-29 18:24:37 +08:00
commit 8cd5c7679d
61 changed files with 6788 additions and 0 deletions
--- a/skills/toolkit/python/analysis/code-stats.py
+++ b/skills/toolkit/python/analysis/code-stats.py
@@ -0,0 +1,363 @@
+#!/usr/bin/env python3
+"""
+代码统计器 - 分析代码库统计信息，包括行数、函数、类、注释率等
+"""
+
+import os
+import sys
+import re
+from pathlib import Path
+from typing import Dict, List, Any
+import argparse
+
+
+class CodeStats:
+    def __init__(self, project_path: Path):
+        self.project_path = Path(project_path)
+        self.stats: Dict[str, Any] = {
+            'files': 0,
+            'total_lines': 0,
+            'code_lines': 0,
+            'comment_lines': 0,
+            'blank_lines': 0,
+            'functions': 0,
+            'classes': 0,
+            'imports': 0,
+            'by_extension': {}
+        }
+
+    def analyze_file(self, file_path: Path) -> Dict[str, Any]:
+        """分析单个文件"""
+        file_stats = {
+            'lines': 0,
+            'code': 0,
+            'comments': 0,
+            'blank': 0,
+            'functions': 0,
+            'classes': 0,
+            'imports': 0
+        }
+
+        try:
+            content = file_path.read_text(encoding='utf-8')
+            lines = content.splitlines()
+
+            in_block_comment = False
+
+            for line in lines:
+                file_stats['lines'] += 1
+                stripped = line.strip()
+
+                # 空行
+                if not stripped:
+                    file_stats['blank'] += 1
+                    continue
+
+                # 块注释检测
+                if '/*' in stripped and not in_block_comment:
+                    in_block_comment = True
+                    file_stats['comments'] += 1
+                    continue
+
+                if in_block_comment:
+                    file_stats['comments'] += 1
+                    if '*/' in stripped:
+                        in_block_comment = False
+                    continue
+
+                # 行注释
+                if stripped.startswith('#') or stripped.startswith('//'):
+                    file_stats['comments'] += 1
+                    continue
+
+                if '#' in stripped and not stripped.startswith('"') and not stripped.startswith("'"):
+                    file_stats['comments'] += 1
+                    file_stats['code'] += 1
+                    continue
+
+                # 代码行
+                file_stats['code'] += 1
+
+                # 函数/类检测
+                if 'def ' in line and 'class ' not in line:
+                    file_stats['functions'] += 1
+                elif 'class ' in line:
+                    file_stats['classes'] += 1
+                elif 'import ' in line or 'from ' in line:
+                    file_stats['imports'] += 1
+
+            return file_stats
+
+        except Exception as e:
+            print(f"⚠️  警告: 无法读取文件 {file_path}: {e}", file=sys.stderr)
+            return file_stats
+
+    def analyze_directory(self, directory: Path):
+        """递归分析目录"""
+        ignore_patterns = {
+            '.git', '.svn', '.hg', '__pycache__', 'node_modules',
+            'venv', 'env', '.venv', 'dist', 'build', '*.egg-info'
+        }
+
+        for item in directory.iterdir():
+            # 忽略模式
+            if any(pattern in str(item) for pattern in ignore_patterns):
+                continue
+
+            if item.is_dir():
+                self.analyze_directory(item)
+            elif item.is_file():
+                # 支持的文件类型
+                supported_ext = {'.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.sh'}
+                ext = item.suffix.lower()
+
+                if ext in supported_ext:
+                    file_stats = self.analyze_file(item)
+
+                    # 更新全局统计
+                    self.stats['files'] += 1
+                    self.stats['total_lines'] += file_stats['lines']
+                    self.stats['code_lines'] += file_stats['code']
+                    self.stats['comment_lines'] += file_stats['comments']
+                    self.stats['blank_lines'] += file_stats['blank']
+                    self.stats['functions'] += file_stats['functions']
+                    self.stats['classes'] += file_stats['classes']
+                    self.stats['imports'] += file_stats['imports']
+
+                    # 按扩展名分组
+                    if ext not in self.stats['by_extension']:
+                        self.stats['by_extension'][ext] = {
+                            'files': 0,
+                            'lines': 0,
+                            'code': 0,
+                            'comments': 0,
+                            'blank': 0,
+                            'functions': 0,
+                            'classes': 0,
+                            'imports': 0
+                        }
+
+                    ext_stats = self.stats['by_extension'][ext]
+                    ext_stats['files'] += 1
+                    ext_stats['lines'] += file_stats['lines']
+                    ext_stats['code'] += file_stats['code']
+                    ext_stats['comments'] += file_stats['comments']
+                    ext_stats['blank'] += file_stats['blank']
+                    ext_stats['functions'] += file_stats['functions']
+                    ext_stats['classes'] += file_stats['classes']
+                    ext_stats['imports'] += file_stats['imports']
+
+    def calculate_complexity_score(self) -> float:
+        """计算代码复杂度分数"""
+        if self.stats['files'] == 0:
+            return 0.0
+
+        # 基于以下几个因素：
+        # 1. 平均文件大小
+        avg_file_size = self.stats['total_lines'] / self.stats['files']
+        size_score = min(avg_file_size / 500, 1.0)  # 超过500行/文件扣分
+
+        # 2. 注释率
+        comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1)
+        comment_score = 1.0 if comment_ratio > 0.1 else 0.5  # 注释率过低扣分
+
+        # 3. 函数密度
+        func_density = self.stats['functions'] / max(self.stats['files'], 1)
+        func_score = min(func_density / 20, 1.0)  # 函数过多扣分
+
+        # 综合评分（0-100）
+        complexity = (size_score + comment_score + func_score) / 3 * 100
+        return min(complexity, 100.0)
+
+    def print_report(self):
+        """打印分析报告"""
+        print("📊 代码统计报告")
+        print("=" * 60)
+        print(f"项目路径: {self.project_path}")
+        print("=" * 60)
+        print()
+
+        if self.stats['files'] == 0:
+            print("⚠️  未找到支持的代码文件")
+            return
+
+        # 总体统计
+        print("📁 总体统计:")
+        print("-" * 60)
+        print(f"文件总数: {self.stats['files']:,}")
+        print(f"总行数: {self.stats['total_lines']:,}")
+        print(f"代码行数: {self.stats['code_lines']:,} ({self.stats['code_lines']/self.stats['total_lines']*100:.1f}%)")
+        print(f"注释行数: {self.stats['comment_lines']:,} ({self.stats['comment_lines']/self.stats['total_lines']*100:.1f}%)")
+        print(f"空行行数: {self.stats['blank_lines']:,} ({self.stats['blank_lines']/self.stats['total_lines']*100:.1f}%)")
+        print()
+        print(f"函数总数: {self.stats['functions']:,}")
+        print(f"类总数: {self.stats['classes']:,}")
+        print(f"导入语句: {self.stats['imports']:,}")
+        print()
+
+        # 复杂度评分
+        complexity = self.calculate_complexity_score()
+        if complexity < 50:
+            complexity_color = "\033[92m"  # 绿色
+            complexity_level = "低"
+        elif complexity < 75:
+            complexity_color = "\033[93m"  # 黄色
+            complexity_level = "中等"
+        else:
+            complexity_color = "\033[91m"  # 红色
+            complexity_level = "高"
+
+        print(f"代码复杂度: {complexity_color}{complexity:.1f} ({complexity_level})\033[0m")
+        print()
+
+        # 按文件类型统计
+        if self.stats['by_extension']:
+            print("📂 按文件类型统计:")
+            print("-" * 60)
+            print(f"{'类型':<10} {'文件数':>10} {'总行数':>12} {'代码行':>12} {'注释':>10} {'函数':>10} {'类':>8}")
+            print("-" * 60)
+
+            for ext, stats in sorted(self.stats['by_extension'].items()):
+                print(f"{ext:<10} {stats['files']:>10,} {stats['lines']:>12,} {stats['code']:>12,} "
+                      f"{stats['comments']:>10,} {stats['functions']:>10,} {stats['classes']:>8,}")
+
+            print()
+
+        # 健康评分
+        health_score = 0
+        health_issues = []
+
+        # 注释率健康度
+        comment_ratio = self.stats['comment_lines'] / max(self.stats['total_lines'], 1)
+        if comment_ratio >= 0.1:
+            health_score += 25
+        else:
+            health_issues.append(f"注释率偏低 ({comment_ratio*100:.1f}%，建议>10%)")
+
+        # 文件大小健康度
+        avg_file_size = self.stats['total_lines'] / self.stats['files']
+        if avg_file_size <= 300:
+            health_score += 25
+        elif avg_file_size <= 500:
+            health_score += 15
+        else:
+            health_issues.append(f"平均文件大小偏大 ({avg_file_size:.0f}行，建议<300)")
+
+        # 空白行健康度
+        blank_ratio = self.stats['blank_lines'] / max(self.stats['total_lines'], 1)
+        if 0.05 <= blank_ratio <= 0.2:
+            health_score += 25
+        else:
+            health_issues.append(f"空白行比例异常 ({blank_ratio*100:.1f}%)")
+
+        # 函数分布健康度
+        avg_funcs_per_file = self.stats['functions'] / self.stats['files']
+        if avg_funcs_per_file <= 15:
+            health_score += 25
+        else:
+            health_issues.append(f"平均函数数偏高 ({avg_funcs_per_file:.1f}个/文件)")
+
+        print("🏥 代码健康度:")
+        print("-" * 60)
+        print(f"健康评分: {health_score}/100")
+
+        if health_issues:
+            print()
+            print("⚠️  发现的问题:")
+            for issue in health_issues:
+                print(f"  - {issue}")
+        else:
+            print("✅ 代码健康状况良好")
+
+        print()
+
+        # 建议
+        print("💡 建议:")
+        print("-" * 60)
+
+        if comment_ratio < 0.1:
+            print("  - 增加代码注释，提高可维护性")
+
+        if avg_file_size > 500:
+            print("  - 考虑拆分大文件，遵循单一职责原则")
+
+        if len(self.stats['by_extension']) > 5:
+            print("  - 项目包含多种语言，注意依赖管理")
+        elif len(self.stats['by_extension']) == 1:
+            print("  - 单一语言项目，结构清晰")
+
+        if self.stats['classes'] > 0:
+            avg_methods_per_class = self.stats['functions'] / max(self.stats['classes'], 1)
+            if avg_methods_per_class > 20:
+                print("  - 类的职责可能过重，考虑拆分类")
+
+        if health_score >= 80:
+            print("  - ✅ 代码质量良好，继续保持")
+
+        print()
+        print("=" * 60)
+        print("代码统计完成")
+        print("=" * 60)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='代码统计器 - 分析代码库统计信息',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+示例:
+  python3 code-stats.py .                    # 分析当前目录
+  python3 code-stats.py /path/to/project     # 分析指定项目
+  python3 code-stats.py . --json             # JSON格式输出
+  python3 code-stats.py . --output report.md # 保存报告
+        """
+    )
+
+    parser.add_argument(
+        'project_path',
+        nargs='?',
+        default='.',
+        help='项目路径（默认：当前目录）'
+    )
+
+    parser.add_argument(
+        '--json',
+        action='store_true',
+        help='JSON格式输出'
+    )
+
+    parser.add_argument(
+        '-o', '--output',
+        help='输出报告到文件'
+    )
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.project_path):
+        print(f"❌ 错误: 路径不存在: {args.project_path}")
+        sys.exit(1)
+
+    analyzer = CodeStats(args.project_path)
+    analyzer.analyze_directory(Path(args.project_path))
+
+    if args.json:
+        import json
+        print(json.dumps(analyzer.stats, indent=2))
+    elif args.output:
+        # 重定向输出到文件
+        with open(args.output, 'w', encoding='utf-8') as f:
+            old_stdout = sys.stdout
+            sys.stdout = f
+            try:
+                analyzer.print_report()
+            finally:
+                sys.stdout = old_stdout
+        print(f"✅ 报告已保存: {args.output}")
+    else:
+        analyzer.print_report()
+
+    sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()