Initial commit
This commit is contained in:
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple Project Metrics Collector
|
||||
Just collects basic data - NO decision making or pattern matching
|
||||
The project-analyzer agent does the intelligent analysis
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from typing import Dict
|
||||
|
||||
class ProjectMetricsCollector:
|
||||
"""Collects basic project metrics for agent analysis"""
|
||||
|
||||
def __init__(self, project_root: str = "."):
|
||||
self.root = Path(project_root).resolve()
|
||||
|
||||
def collect_metrics(self) -> Dict:
|
||||
"""Collect basic project metrics"""
|
||||
return {
|
||||
'file_analysis': self._analyze_files(),
|
||||
'directory_structure': self._get_directory_structure(),
|
||||
'key_files': self._find_key_files(),
|
||||
'project_stats': self._get_basic_stats()
|
||||
}
|
||||
|
||||
def _analyze_files(self) -> Dict:
|
||||
"""Count files by category"""
|
||||
type_categories = {
|
||||
'code': {'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.go', '.rs', '.c', '.cpp', '.php', '.rb'},
|
||||
'markup': {'.html', '.xml', '.svg'},
|
||||
'stylesheet': {'.css', '.scss', '.sass', '.less'},
|
||||
'document': {'.md', '.txt', '.pdf', '.doc', '.docx', '.odt'},
|
||||
'latex': {'.tex', '.bib', '.cls', '.sty'},
|
||||
'spreadsheet': {'.xlsx', '.xls', '.ods', '.csv'},
|
||||
'image': {'.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp'},
|
||||
'video': {'.mp4', '.avi', '.mov', '.mkv'},
|
||||
'data': {'.json', '.yaml', '.yml', '.toml', '.xml'},
|
||||
'notebook': {'.ipynb'},
|
||||
}
|
||||
|
||||
counts = defaultdict(int)
|
||||
files_by_type = defaultdict(list)
|
||||
|
||||
for item in self.root.rglob('*'):
|
||||
if item.is_file() and not self._is_ignored(item):
|
||||
suffix = item.suffix.lower()
|
||||
categorized = False
|
||||
|
||||
for category, extensions in type_categories.items():
|
||||
if suffix in extensions:
|
||||
counts[category] += 1
|
||||
files_by_type[category].append(str(item.relative_to(self.root)))
|
||||
categorized = True
|
||||
break
|
||||
|
||||
if not categorized:
|
||||
counts['other'] += 1
|
||||
|
||||
total = sum(counts.values()) or 1
|
||||
|
||||
return {
|
||||
'counts': dict(counts),
|
||||
'percentages': {k: round((v / total) * 100, 1) for k, v in counts.items()},
|
||||
'total_files': total,
|
||||
'sample_files': {k: v[:5] for k, v in files_by_type.items()} # First 5 of each type
|
||||
}
|
||||
|
||||
def _get_directory_structure(self) -> Dict:
|
||||
"""Get top-level directory structure"""
|
||||
dirs = []
|
||||
for item in self.root.iterdir():
|
||||
if item.is_dir() and not self._is_ignored(item):
|
||||
file_count = sum(1 for _ in item.rglob('*') if _.is_file())
|
||||
dirs.append({
|
||||
'name': item.name,
|
||||
'file_count': file_count
|
||||
})
|
||||
|
||||
return {
|
||||
'top_level_directories': sorted(dirs, key=lambda x: x['file_count'], reverse=True),
|
||||
'total_directories': len(dirs)
|
||||
}
|
||||
|
||||
def _find_key_files(self) -> Dict:
|
||||
"""Find common configuration and important files"""
|
||||
key_patterns = {
|
||||
# Programming
|
||||
'package.json': 'Node.js project',
|
||||
'requirements.txt': 'Python project',
|
||||
'Cargo.toml': 'Rust project',
|
||||
'go.mod': 'Go project',
|
||||
'pom.xml': 'Java Maven project',
|
||||
'build.gradle': 'Java Gradle project',
|
||||
|
||||
# Configuration
|
||||
'.eslintrc*': 'ESLint config',
|
||||
'tsconfig.json': 'TypeScript config',
|
||||
'jest.config.js': 'Jest testing',
|
||||
'pytest.ini': 'Pytest config',
|
||||
|
||||
# CI/CD
|
||||
'.github/workflows': 'GitHub Actions',
|
||||
'.gitlab-ci.yml': 'GitLab CI',
|
||||
'Jenkinsfile': 'Jenkins',
|
||||
|
||||
# Hooks
|
||||
'.pre-commit-config.yaml': 'Pre-commit hooks',
|
||||
'.husky': 'Husky hooks',
|
||||
|
||||
# Documentation
|
||||
'README.md': 'README',
|
||||
'CONTRIBUTING.md': 'Contribution guide',
|
||||
'LICENSE': 'License file',
|
||||
|
||||
# LaTeX
|
||||
'main.tex': 'LaTeX main',
|
||||
'*.bib': 'Bibliography',
|
||||
|
||||
# Build tools
|
||||
'Makefile': 'Makefile',
|
||||
'CMakeLists.txt': 'CMake',
|
||||
'docker-compose.yml': 'Docker Compose',
|
||||
'Dockerfile': 'Docker',
|
||||
}
|
||||
|
||||
found = {}
|
||||
for pattern, description in key_patterns.items():
|
||||
matches = list(self.root.glob(pattern))
|
||||
if matches:
|
||||
found[pattern] = {
|
||||
'description': description,
|
||||
'count': len(matches),
|
||||
'paths': [str(m.relative_to(self.root)) for m in matches[:3]]
|
||||
}
|
||||
|
||||
return found
|
||||
|
||||
def _get_basic_stats(self) -> Dict:
|
||||
"""Get basic project statistics"""
|
||||
total_files = 0
|
||||
total_dirs = 0
|
||||
total_size = 0
|
||||
max_depth = 0
|
||||
|
||||
for item in self.root.rglob('*'):
|
||||
if self._is_ignored(item):
|
||||
continue
|
||||
|
||||
if item.is_file():
|
||||
total_files += 1
|
||||
try:
|
||||
total_size += item.stat().st_size
|
||||
except:
|
||||
pass
|
||||
|
||||
depth = len(item.relative_to(self.root).parts)
|
||||
max_depth = max(max_depth, depth)
|
||||
elif item.is_dir():
|
||||
total_dirs += 1
|
||||
|
||||
return {
|
||||
'total_files': total_files,
|
||||
'total_directories': total_dirs,
|
||||
'total_size_bytes': total_size,
|
||||
'total_size_mb': round(total_size / (1024 * 1024), 2),
|
||||
'deepest_nesting': max_depth
|
||||
}
|
||||
|
||||
def _is_ignored(self, path: Path) -> bool:
|
||||
"""Check if path should be ignored"""
|
||||
ignore_patterns = {
|
||||
'node_modules', '.git', '__pycache__', '.venv', 'venv',
|
||||
'dist', 'build', '.cache', '.pytest_cache', 'coverage',
|
||||
'.next', '.nuxt', 'out', 'target'
|
||||
}
|
||||
|
||||
parts = path.parts
|
||||
return any(pattern in parts for pattern in ignore_patterns)
|
||||
|
||||
def generate_report(self) -> Dict:
|
||||
"""Generate complete metrics report"""
|
||||
metrics = self.collect_metrics()
|
||||
|
||||
return {
|
||||
'project_root': str(self.root),
|
||||
'scan_purpose': 'Basic metrics collection for intelligent agent analysis',
|
||||
'metrics': metrics,
|
||||
'note': 'This is raw data. The project-analyzer agent will interpret it intelligently.'
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
path = sys.argv[1] if len(sys.argv) > 1 else '.'
|
||||
collector = ProjectMetricsCollector(path)
|
||||
report = collector.generate_report()
|
||||
print(json.dumps(report, indent=2))
|
||||
Reference in New Issue
Block a user