#!/usr/bin/env python3 """ Report Generator for CLAUDE.md Audits Generates reports in multiple formats: Markdown, JSON, and Refactored CLAUDE.md """ import json from pathlib import Path from typing import Dict, List from datetime import datetime from analyzer import AuditResults, Finding, Severity, Category class ReportGenerator: """Generates audit reports in multiple formats""" def __init__(self, results: AuditResults, original_file_path: Path): self.results = results self.original_file_path = Path(original_file_path) self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") def generate_markdown_report(self) -> str: """Generate comprehensive markdown audit report""" report = [] # Header report.append("# CLAUDE.md Audit Report\n") report.append(f"**File**: `{self.original_file_path}`\n") report.append(f"**Generated**: {self.timestamp}\n") report.append(f"**Tier**: {self.results.metadata.get('tier', 'Unknown')}\n") report.append("\n---\n") # Executive Summary report.append("\n## Executive Summary\n") report.append(self._generate_summary_table()) # Score Dashboard report.append("\n## Score Dashboard\n") report.append(self._generate_score_dashboard()) # File Metrics report.append("\n## File Metrics\n") report.append(self._generate_metrics_section()) # Findings by Severity report.append("\n## Findings\n") report.append(self._generate_findings_by_severity()) # Findings by Category report.append("\n## Findings by Category\n") report.append(self._generate_findings_by_category()) # Detailed Findings report.append("\n## Detailed Findings\n") report.append(self._generate_detailed_findings()) # Recommendations report.append("\n## Priority Recommendations\n") report.append(self._generate_recommendations()) # Footer report.append("\n---\n") report.append("\n*Generated by claude-md-auditor v1.0.0*\n") report.append("*Based on official Anthropic documentation, community best practices, and academic research*\n") return "\n".join(report) def generate_json_report(self) -> str: """Generate JSON audit report for CI/CD integration""" report_data = { "metadata": { "file": str(self.original_file_path), "generated_at": self.timestamp, "tier": self.results.metadata.get('tier', 'Unknown'), "analyzer_version": "1.0.0" }, "metrics": self.results.metadata, "scores": self.results.scores, "findings": [ { "severity": f.severity.value, "category": f.category.value, "title": f.title, "description": f.description, "line_number": f.line_number, "code_snippet": f.code_snippet, "impact": f.impact, "remediation": f.remediation, "source": f.source } for f in self.results.findings ], "summary": { "total_findings": len(self.results.findings), "critical": self.results.scores['critical_count'], "high": self.results.scores['high_count'], "medium": self.results.scores['medium_count'], "low": self.results.scores['low_count'], "overall_health": self.results.scores['overall'] } } return json.dumps(report_data, indent=2) def generate_refactored_claude_md(self, original_content: str) -> str: """Generate improved CLAUDE.md based on findings""" refactored = [] # Add header comment refactored.append("# CLAUDE.md") refactored.append("") refactored.append(f"") refactored.append("") refactored.append("") # Add tier information if known tier = self.results.metadata.get('tier', 'Unknown') if tier != 'Unknown': refactored.append(f"") refactored.append("") # Generate improved structure refactored.append(self._generate_refactored_structure(original_content)) # Add footer refactored.append("") refactored.append("---") refactored.append("") refactored.append(f"**Last Updated**: {datetime.now().strftime('%Y-%m-%d')}") refactored.append("**Maintained By**: [Team/Owner]") refactored.append("") refactored.append("") return "\n".join(refactored) # ========== PRIVATE HELPER METHODS ========== def _generate_summary_table(self) -> str: """Generate executive summary table""" critical = self.results.scores['critical_count'] high = self.results.scores['high_count'] medium = self.results.scores['medium_count'] low = self.results.scores['low_count'] overall = self.results.scores['overall'] # Determine health status if critical > 0: status = "🚨 **CRITICAL ISSUES** - Immediate action required" health = "Poor" elif high > 3: status = "âš ī¸ **HIGH PRIORITY** - Address this sprint" health = "Fair" elif high > 0 or medium > 5: status = "📋 **MODERATE** - Schedule improvements" health = "Good" else: status = "✅ **HEALTHY** - Minor optimizations available" health = "Excellent" lines = [ "| Metric | Value |", "|--------|-------|", f"| **Overall Health** | {overall}/100 ({health}) |", f"| **Status** | {status} |", f"| **Critical Issues** | {critical} |", f"| **High Priority** | {high} |", f"| **Medium Priority** | {medium} |", f"| **Low Priority** | {low} |", f"| **Total Findings** | {critical + high + medium + low} |", ] return "\n".join(lines) def _generate_score_dashboard(self) -> str: """Generate score dashboard""" scores = self.results.scores lines = [ "| Category | Score | Status |", "|----------|-------|--------|", f"| **Security** | {scores['security']}/100 | {self._score_status(scores['security'])} |", f"| **Official Compliance** | {scores['official_compliance']}/100 | {self._score_status(scores['official_compliance'])} |", f"| **Best Practices** | {scores['best_practices']}/100 | {self._score_status(scores['best_practices'])} |", f"| **Research Optimization** | {scores['research_optimization']}/100 | {self._score_status(scores['research_optimization'])} |", ] return "\n".join(lines) def _score_status(self, score: int) -> str: """Convert score to status emoji""" if score >= 90: return "✅ Excellent" elif score >= 75: return "đŸŸĸ Good" elif score >= 60: return "🟡 Fair" elif score >= 40: return "🟠 Poor" else: return "🔴 Critical" def _generate_metrics_section(self) -> str: """Generate file metrics section""" meta = self.results.metadata lines = [ "| Metric | Value | Recommendation |", "|--------|-------|----------------|", f"| **Lines** | {meta['line_count']} | 100-300 lines ideal |", f"| **Characters** | {meta['character_count']:,} | Keep concise |", f"| **Est. Tokens** | {meta['token_estimate']:,} | < 3,000 recommended |", f"| **Context Usage (200K)** | {meta['context_usage_200k']}% | < 2% ideal |", f"| **Context Usage (1M)** | {meta['context_usage_1m']}% | Reference only |", ] # Add size assessment line_count = meta['line_count'] if line_count < 50: lines.append("") lines.append("âš ī¸ **Assessment**: File may be too sparse. Consider adding more project context.") elif line_count > 500: lines.append("") lines.append("🚨 **Assessment**: File exceeds recommended length. Use @imports for detailed docs.") elif 100 <= line_count <= 300: lines.append("") lines.append("✅ **Assessment**: File length is in optimal range (100-300 lines).") return "\n".join(lines) def _generate_findings_by_severity(self) -> str: """Generate findings breakdown by severity""" severity_counts = { Severity.CRITICAL: self.results.scores['critical_count'], Severity.HIGH: self.results.scores['high_count'], Severity.MEDIUM: self.results.scores['medium_count'], Severity.LOW: self.results.scores['low_count'], } lines = [ "| Severity | Count | Description |", "|----------|-------|-------------|", f"| 🚨 **Critical** | {severity_counts[Severity.CRITICAL]} | Security risks, immediate action required |", f"| âš ī¸ **High** | {severity_counts[Severity.HIGH]} | Significant issues, fix this sprint |", f"| 📋 **Medium** | {severity_counts[Severity.MEDIUM]} | Moderate issues, schedule for next quarter |", f"| â„šī¸ **Low** | {severity_counts[Severity.LOW]} | Minor improvements, backlog |", ] return "\n".join(lines) def _generate_findings_by_category(self) -> str: """Generate findings breakdown by category""" category_counts = {} for finding in self.results.findings: cat = finding.category.value category_counts[cat] = category_counts.get(cat, 0) + 1 lines = [ "| Category | Count | Description |", "|----------|-------|-------------|", ] category_descriptions = { "security": "Security vulnerabilities and sensitive information", "official_compliance": "Compliance with official Anthropic documentation", "best_practices": "Community best practices and field experience", "research_optimization": "Research-based optimizations (lost in the middle, etc.)", "structure": "Document structure and organization", "maintenance": "Maintenance indicators and staleness", } for cat, desc in category_descriptions.items(): count = category_counts.get(cat, 0) lines.append(f"| **{cat.replace('_', ' ').title()}** | {count} | {desc} |") return "\n".join(lines) def _generate_detailed_findings(self) -> str: """Generate detailed findings section""" if not self.results.findings: return "_No findings. CLAUDE.md is in excellent condition!_ ✅\n" lines = [] # Group by severity severity_order = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO] for severity in severity_order: findings = [f for f in self.results.findings if f.severity == severity] if not findings: continue severity_emoji = { Severity.CRITICAL: "🚨", Severity.HIGH: "âš ī¸", Severity.MEDIUM: "📋", Severity.LOW: "â„šī¸", Severity.INFO: "💡" } lines.append(f"\n### {severity_emoji[severity]} {severity.value.upper()} Priority\n") for i, finding in enumerate(findings, 1): lines.append(f"#### {i}. {finding.title}\n") lines.append(f"**Category**: {finding.category.value.replace('_', ' ').title()}") lines.append(f"**Source**: {finding.source.title()} Guidance\n") if finding.line_number: lines.append(f"**Location**: Line {finding.line_number}\n") lines.append(f"**Description**: {finding.description}\n") if finding.code_snippet: lines.append("**Code**:") lines.append("```") lines.append(finding.code_snippet) lines.append("```\n") if finding.impact: lines.append(f"**Impact**: {finding.impact}\n") if finding.remediation: lines.append(f"**Remediation**:\n{finding.remediation}\n") lines.append("---\n") return "\n".join(lines) def _generate_recommendations(self) -> str: """Generate prioritized recommendations""" lines = [] critical = [f for f in self.results.findings if f.severity == Severity.CRITICAL] high = [f for f in self.results.findings if f.severity == Severity.HIGH] if critical: lines.append("### 🚨 Priority 0: IMMEDIATE ACTION (Critical)\n") for i, finding in enumerate(critical, 1): lines.append(f"{i}. **{finding.title}**") lines.append(f" - {finding.description}") if finding.line_number: lines.append(f" - Line: {finding.line_number}") lines.append("") if high: lines.append("### âš ī¸ Priority 1: THIS SPRINT (High)\n") for i, finding in enumerate(high, 1): lines.append(f"{i}. **{finding.title}**") lines.append(f" - {finding.description}") lines.append("") # General recommendations lines.append("### 💡 General Recommendations\n") if self.results.metadata['line_count'] > 300: lines.append("- **Reduce file length**: Use @imports for detailed documentation") if self.results.metadata['token_estimate'] > 5000: lines.append("- **Optimize token usage**: Aim for < 3,000 tokens (≈200 lines)") official_score = self.results.scores['official_compliance'] if official_score < 80: lines.append("- **Improve official compliance**: Review official Anthropic documentation") lines.append("- **Regular maintenance**: Schedule quarterly CLAUDE.md reviews") lines.append("- **Team collaboration**: Share CLAUDE.md improvements via PR") lines.append("- **Validate effectiveness**: Test that Claude follows standards without prompting") return "\n".join(lines) def _generate_refactored_structure(self, original_content: str) -> str: """Generate refactored CLAUDE.md structure""" lines = [] # Detect project name from original (look for # header) import re project_match = re.search(r'^#\s+(.+)$', original_content, re.MULTILINE) project_name = project_match.group(1) if project_match else "Project Name" lines.append(f"# {project_name}") lines.append("") # Add critical standards section at top (optimal positioning) lines.append("## 🚨 CRITICAL: Must-Follow Standards") lines.append("") lines.append("") lines.append("") lines.append("- [Add critical security requirements]") lines.append("- [Add critical quality gates]") lines.append("- [Add critical workflow requirements]") lines.append("") # Project overview lines.append("## 📋 Project Overview") lines.append("") lines.append("**Tech Stack**: [List technologies]") lines.append("**Architecture**: [Architecture pattern]") lines.append("**Purpose**: [Project purpose]") lines.append("") # Development workflow lines.append("## 🔧 Development Workflow") lines.append("") lines.append("### Git Workflow") lines.append("- Branch pattern: `feature/{name}`, `bugfix/{name}`") lines.append("- Conventional commit messages required") lines.append("- PRs require: tests + review + passing CI") lines.append("") # Code standards lines.append("## 📝 Code Standards") lines.append("") lines.append("### TypeScript/JavaScript") lines.append("- TypeScript strict mode: enabled") lines.append("- No `any` types (use `unknown` if needed)") lines.append("- Explicit return types required") lines.append("") lines.append("### Testing") lines.append("- Minimum coverage: 80%") lines.append("- Testing trophy: 70% integration, 20% unit, 10% E2E") lines.append("- Test naming: 'should [behavior] when [condition]'") lines.append("") # Common tasks (bottom position for recency attention) lines.append("## 📌 REFERENCE: Common Tasks") lines.append("") lines.append("") lines.append("") lines.append("### Build & Test") lines.append("```bash") lines.append("npm run build # Build production") lines.append("npm test # Run tests") lines.append("npm run lint # Run linter") lines.append("```") lines.append("") lines.append("### Key File Locations") lines.append("- Config: `/config/app.config.ts`") lines.append("- Types: `/src/types/index.ts`") lines.append("- Utils: `/src/utils/index.ts`") lines.append("") # Import detailed docs lines.append("## 📚 Detailed Documentation (Imports)") lines.append("") lines.append("") lines.append("") lines.append("") lines.append("") return "\n".join(lines) def generate_report(results: AuditResults, file_path: Path, format: str = "markdown") -> str: """ Generate audit report in specified format Args: results: AuditResults from analyzer file_path: Path to original CLAUDE.md format: "markdown", "json", or "refactored" Returns: Report content as string """ generator = ReportGenerator(results, file_path) if format == "json": return generator.generate_json_report() elif format == "refactored": # Read original content for refactoring try: with open(file_path, 'r') as f: original = f.read() except: original = "" return generator.generate_refactored_claude_md(original) else: # markdown (default) return generator.generate_markdown_report() if __name__ == "__main__": import sys from analyzer import analyze_file if len(sys.argv) < 2: print("Usage: python report_generator.py [format]") print("Formats: markdown (default), json, refactored") sys.exit(1) file_path = Path(sys.argv[1]) report_format = sys.argv[2] if len(sys.argv) > 2 else "markdown" # Run analysis results = analyze_file(str(file_path)) # Generate report report = generate_report(results, file_path, report_format) print(report)