#!/usr/bin/env python3 """ Simple Project Metrics Collector Just collects basic data - NO decision making or pattern matching The project-analyzer agent does the intelligent analysis """ import json from pathlib import Path from collections import defaultdict from typing import Dict class ProjectMetricsCollector: """Collects basic project metrics for agent analysis""" def __init__(self, project_root: str = "."): self.root = Path(project_root).resolve() def collect_metrics(self) -> Dict: """Collect basic project metrics""" return { 'file_analysis': self._analyze_files(), 'directory_structure': self._get_directory_structure(), 'key_files': self._find_key_files(), 'project_stats': self._get_basic_stats() } def _analyze_files(self) -> Dict: """Count files by category""" type_categories = { 'code': {'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.go', '.rs', '.c', '.cpp', '.php', '.rb'}, 'markup': {'.html', '.xml', '.svg'}, 'stylesheet': {'.css', '.scss', '.sass', '.less'}, 'document': {'.md', '.txt', '.pdf', '.doc', '.docx', '.odt'}, 'latex': {'.tex', '.bib', '.cls', '.sty'}, 'spreadsheet': {'.xlsx', '.xls', '.ods', '.csv'}, 'image': {'.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp'}, 'video': {'.mp4', '.avi', '.mov', '.mkv'}, 'data': {'.json', '.yaml', '.yml', '.toml', '.xml'}, 'notebook': {'.ipynb'}, } counts = defaultdict(int) files_by_type = defaultdict(list) for item in self.root.rglob('*'): if item.is_file() and not self._is_ignored(item): suffix = item.suffix.lower() categorized = False for category, extensions in type_categories.items(): if suffix in extensions: counts[category] += 1 files_by_type[category].append(str(item.relative_to(self.root))) categorized = True break if not categorized: counts['other'] += 1 total = sum(counts.values()) or 1 return { 'counts': dict(counts), 'percentages': {k: round((v / total) * 100, 1) for k, v in counts.items()}, 'total_files': total, 'sample_files': {k: v[:5] for k, v in files_by_type.items()} # First 5 of each type } def _get_directory_structure(self) -> Dict: """Get top-level directory structure""" dirs = [] for item in self.root.iterdir(): if item.is_dir() and not self._is_ignored(item): file_count = sum(1 for _ in item.rglob('*') if _.is_file()) dirs.append({ 'name': item.name, 'file_count': file_count }) return { 'top_level_directories': sorted(dirs, key=lambda x: x['file_count'], reverse=True), 'total_directories': len(dirs) } def _find_key_files(self) -> Dict: """Find common configuration and important files""" key_patterns = { # Programming 'package.json': 'Node.js project', 'requirements.txt': 'Python project', 'Cargo.toml': 'Rust project', 'go.mod': 'Go project', 'pom.xml': 'Java Maven project', 'build.gradle': 'Java Gradle project', # Configuration '.eslintrc*': 'ESLint config', 'tsconfig.json': 'TypeScript config', 'jest.config.js': 'Jest testing', 'pytest.ini': 'Pytest config', # CI/CD '.github/workflows': 'GitHub Actions', '.gitlab-ci.yml': 'GitLab CI', 'Jenkinsfile': 'Jenkins', # Hooks '.pre-commit-config.yaml': 'Pre-commit hooks', '.husky': 'Husky hooks', # Documentation 'README.md': 'README', 'CONTRIBUTING.md': 'Contribution guide', 'LICENSE': 'License file', # LaTeX 'main.tex': 'LaTeX main', '*.bib': 'Bibliography', # Build tools 'Makefile': 'Makefile', 'CMakeLists.txt': 'CMake', 'docker-compose.yml': 'Docker Compose', 'Dockerfile': 'Docker', } found = {} for pattern, description in key_patterns.items(): matches = list(self.root.glob(pattern)) if matches: found[pattern] = { 'description': description, 'count': len(matches), 'paths': [str(m.relative_to(self.root)) for m in matches[:3]] } return found def _get_basic_stats(self) -> Dict: """Get basic project statistics""" total_files = 0 total_dirs = 0 total_size = 0 max_depth = 0 for item in self.root.rglob('*'): if self._is_ignored(item): continue if item.is_file(): total_files += 1 try: total_size += item.stat().st_size except: pass depth = len(item.relative_to(self.root).parts) max_depth = max(max_depth, depth) elif item.is_dir(): total_dirs += 1 return { 'total_files': total_files, 'total_directories': total_dirs, 'total_size_bytes': total_size, 'total_size_mb': round(total_size / (1024 * 1024), 2), 'deepest_nesting': max_depth } def _is_ignored(self, path: Path) -> bool: """Check if path should be ignored""" ignore_patterns = { 'node_modules', '.git', '__pycache__', '.venv', 'venv', 'dist', 'build', '.cache', '.pytest_cache', 'coverage', '.next', '.nuxt', 'out', 'target' } parts = path.parts return any(pattern in parts for pattern in ignore_patterns) def generate_report(self) -> Dict: """Generate complete metrics report""" metrics = self.collect_metrics() return { 'project_root': str(self.root), 'scan_purpose': 'Basic metrics collection for intelligent agent analysis', 'metrics': metrics, 'note': 'This is raw data. The project-analyzer agent will interpret it intelligently.' } if __name__ == '__main__': import sys path = sys.argv[1] if len(sys.argv) > 1 else '.' collector = ProjectMetricsCollector(path) report = collector.generate_report() print(json.dumps(report, indent=2))