503 lines
19 KiB
Python
503 lines
19 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Report Generator for CLAUDE.md Audits
|
||
Generates reports in multiple formats: Markdown, JSON, and Refactored CLAUDE.md
|
||
"""
|
||
|
||
import json
|
||
from pathlib import Path
|
||
from typing import Dict, List
|
||
from datetime import datetime
|
||
from analyzer import AuditResults, Finding, Severity, Category
|
||
|
||
|
||
class ReportGenerator:
|
||
"""Generates audit reports in multiple formats"""
|
||
|
||
def __init__(self, results: AuditResults, original_file_path: Path):
|
||
self.results = results
|
||
self.original_file_path = Path(original_file_path)
|
||
self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||
|
||
def generate_markdown_report(self) -> str:
|
||
"""Generate comprehensive markdown audit report"""
|
||
report = []
|
||
|
||
# Header
|
||
report.append("# CLAUDE.md Audit Report\n")
|
||
report.append(f"**File**: `{self.original_file_path}`\n")
|
||
report.append(f"**Generated**: {self.timestamp}\n")
|
||
report.append(f"**Tier**: {self.results.metadata.get('tier', 'Unknown')}\n")
|
||
report.append("\n---\n")
|
||
|
||
# Executive Summary
|
||
report.append("\n## Executive Summary\n")
|
||
report.append(self._generate_summary_table())
|
||
|
||
# Score Dashboard
|
||
report.append("\n## Score Dashboard\n")
|
||
report.append(self._generate_score_dashboard())
|
||
|
||
# File Metrics
|
||
report.append("\n## File Metrics\n")
|
||
report.append(self._generate_metrics_section())
|
||
|
||
# Findings by Severity
|
||
report.append("\n## Findings\n")
|
||
report.append(self._generate_findings_by_severity())
|
||
|
||
# Findings by Category
|
||
report.append("\n## Findings by Category\n")
|
||
report.append(self._generate_findings_by_category())
|
||
|
||
# Detailed Findings
|
||
report.append("\n## Detailed Findings\n")
|
||
report.append(self._generate_detailed_findings())
|
||
|
||
# Recommendations
|
||
report.append("\n## Priority Recommendations\n")
|
||
report.append(self._generate_recommendations())
|
||
|
||
# Footer
|
||
report.append("\n---\n")
|
||
report.append("\n*Generated by claude-md-auditor v1.0.0*\n")
|
||
report.append("*Based on official Anthropic documentation, community best practices, and academic research*\n")
|
||
|
||
return "\n".join(report)
|
||
|
||
def generate_json_report(self) -> str:
|
||
"""Generate JSON audit report for CI/CD integration"""
|
||
report_data = {
|
||
"metadata": {
|
||
"file": str(self.original_file_path),
|
||
"generated_at": self.timestamp,
|
||
"tier": self.results.metadata.get('tier', 'Unknown'),
|
||
"analyzer_version": "1.0.0"
|
||
},
|
||
"metrics": self.results.metadata,
|
||
"scores": self.results.scores,
|
||
"findings": [
|
||
{
|
||
"severity": f.severity.value,
|
||
"category": f.category.value,
|
||
"title": f.title,
|
||
"description": f.description,
|
||
"line_number": f.line_number,
|
||
"code_snippet": f.code_snippet,
|
||
"impact": f.impact,
|
||
"remediation": f.remediation,
|
||
"source": f.source
|
||
}
|
||
for f in self.results.findings
|
||
],
|
||
"summary": {
|
||
"total_findings": len(self.results.findings),
|
||
"critical": self.results.scores['critical_count'],
|
||
"high": self.results.scores['high_count'],
|
||
"medium": self.results.scores['medium_count'],
|
||
"low": self.results.scores['low_count'],
|
||
"overall_health": self.results.scores['overall']
|
||
}
|
||
}
|
||
|
||
return json.dumps(report_data, indent=2)
|
||
|
||
def generate_refactored_claude_md(self, original_content: str) -> str:
|
||
"""Generate improved CLAUDE.md based on findings"""
|
||
refactored = []
|
||
|
||
# Add header comment
|
||
refactored.append("# CLAUDE.md")
|
||
refactored.append("")
|
||
refactored.append(f"<!-- Refactored: {self.timestamp} -->")
|
||
refactored.append("<!-- Based on official Anthropic guidelines and best practices -->")
|
||
refactored.append("")
|
||
|
||
# Add tier information if known
|
||
tier = self.results.metadata.get('tier', 'Unknown')
|
||
if tier != 'Unknown':
|
||
refactored.append(f"<!-- Tier: {tier} -->")
|
||
refactored.append("")
|
||
|
||
# Generate improved structure
|
||
refactored.append(self._generate_refactored_structure(original_content))
|
||
|
||
# Add footer
|
||
refactored.append("")
|
||
refactored.append("---")
|
||
refactored.append("")
|
||
refactored.append(f"**Last Updated**: {datetime.now().strftime('%Y-%m-%d')}")
|
||
refactored.append("**Maintained By**: [Team/Owner]")
|
||
refactored.append("")
|
||
refactored.append("<!-- Follow official guidance: Keep lean, be specific, use structure -->")
|
||
|
||
return "\n".join(refactored)
|
||
|
||
# ========== PRIVATE HELPER METHODS ==========
|
||
|
||
def _generate_summary_table(self) -> str:
|
||
"""Generate executive summary table"""
|
||
critical = self.results.scores['critical_count']
|
||
high = self.results.scores['high_count']
|
||
medium = self.results.scores['medium_count']
|
||
low = self.results.scores['low_count']
|
||
overall = self.results.scores['overall']
|
||
|
||
# Determine health status
|
||
if critical > 0:
|
||
status = "🚨 **CRITICAL ISSUES** - Immediate action required"
|
||
health = "Poor"
|
||
elif high > 3:
|
||
status = "⚠️ **HIGH PRIORITY** - Address this sprint"
|
||
health = "Fair"
|
||
elif high > 0 or medium > 5:
|
||
status = "📋 **MODERATE** - Schedule improvements"
|
||
health = "Good"
|
||
else:
|
||
status = "✅ **HEALTHY** - Minor optimizations available"
|
||
health = "Excellent"
|
||
|
||
lines = [
|
||
"| Metric | Value |",
|
||
"|--------|-------|",
|
||
f"| **Overall Health** | {overall}/100 ({health}) |",
|
||
f"| **Status** | {status} |",
|
||
f"| **Critical Issues** | {critical} |",
|
||
f"| **High Priority** | {high} |",
|
||
f"| **Medium Priority** | {medium} |",
|
||
f"| **Low Priority** | {low} |",
|
||
f"| **Total Findings** | {critical + high + medium + low} |",
|
||
]
|
||
|
||
return "\n".join(lines)
|
||
|
||
def _generate_score_dashboard(self) -> str:
|
||
"""Generate score dashboard"""
|
||
scores = self.results.scores
|
||
|
||
lines = [
|
||
"| Category | Score | Status |",
|
||
"|----------|-------|--------|",
|
||
f"| **Security** | {scores['security']}/100 | {self._score_status(scores['security'])} |",
|
||
f"| **Official Compliance** | {scores['official_compliance']}/100 | {self._score_status(scores['official_compliance'])} |",
|
||
f"| **Best Practices** | {scores['best_practices']}/100 | {self._score_status(scores['best_practices'])} |",
|
||
f"| **Research Optimization** | {scores['research_optimization']}/100 | {self._score_status(scores['research_optimization'])} |",
|
||
]
|
||
|
||
return "\n".join(lines)
|
||
|
||
def _score_status(self, score: int) -> str:
|
||
"""Convert score to status emoji"""
|
||
if score >= 90:
|
||
return "✅ Excellent"
|
||
elif score >= 75:
|
||
return "🟢 Good"
|
||
elif score >= 60:
|
||
return "🟡 Fair"
|
||
elif score >= 40:
|
||
return "🟠 Poor"
|
||
else:
|
||
return "🔴 Critical"
|
||
|
||
def _generate_metrics_section(self) -> str:
|
||
"""Generate file metrics section"""
|
||
meta = self.results.metadata
|
||
|
||
lines = [
|
||
"| Metric | Value | Recommendation |",
|
||
"|--------|-------|----------------|",
|
||
f"| **Lines** | {meta['line_count']} | 100-300 lines ideal |",
|
||
f"| **Characters** | {meta['character_count']:,} | Keep concise |",
|
||
f"| **Est. Tokens** | {meta['token_estimate']:,} | < 3,000 recommended |",
|
||
f"| **Context Usage (200K)** | {meta['context_usage_200k']}% | < 2% ideal |",
|
||
f"| **Context Usage (1M)** | {meta['context_usage_1m']}% | Reference only |",
|
||
]
|
||
|
||
# Add size assessment
|
||
line_count = meta['line_count']
|
||
if line_count < 50:
|
||
lines.append("")
|
||
lines.append("⚠️ **Assessment**: File may be too sparse. Consider adding more project context.")
|
||
elif line_count > 500:
|
||
lines.append("")
|
||
lines.append("🚨 **Assessment**: File exceeds recommended length. Use @imports for detailed docs.")
|
||
elif 100 <= line_count <= 300:
|
||
lines.append("")
|
||
lines.append("✅ **Assessment**: File length is in optimal range (100-300 lines).")
|
||
|
||
return "\n".join(lines)
|
||
|
||
def _generate_findings_by_severity(self) -> str:
|
||
"""Generate findings breakdown by severity"""
|
||
severity_counts = {
|
||
Severity.CRITICAL: self.results.scores['critical_count'],
|
||
Severity.HIGH: self.results.scores['high_count'],
|
||
Severity.MEDIUM: self.results.scores['medium_count'],
|
||
Severity.LOW: self.results.scores['low_count'],
|
||
}
|
||
|
||
lines = [
|
||
"| Severity | Count | Description |",
|
||
"|----------|-------|-------------|",
|
||
f"| 🚨 **Critical** | {severity_counts[Severity.CRITICAL]} | Security risks, immediate action required |",
|
||
f"| ⚠️ **High** | {severity_counts[Severity.HIGH]} | Significant issues, fix this sprint |",
|
||
f"| 📋 **Medium** | {severity_counts[Severity.MEDIUM]} | Moderate issues, schedule for next quarter |",
|
||
f"| ℹ️ **Low** | {severity_counts[Severity.LOW]} | Minor improvements, backlog |",
|
||
]
|
||
|
||
return "\n".join(lines)
|
||
|
||
def _generate_findings_by_category(self) -> str:
|
||
"""Generate findings breakdown by category"""
|
||
category_counts = {}
|
||
for finding in self.results.findings:
|
||
cat = finding.category.value
|
||
category_counts[cat] = category_counts.get(cat, 0) + 1
|
||
|
||
lines = [
|
||
"| Category | Count | Description |",
|
||
"|----------|-------|-------------|",
|
||
]
|
||
|
||
category_descriptions = {
|
||
"security": "Security vulnerabilities and sensitive information",
|
||
"official_compliance": "Compliance with official Anthropic documentation",
|
||
"best_practices": "Community best practices and field experience",
|
||
"research_optimization": "Research-based optimizations (lost in the middle, etc.)",
|
||
"structure": "Document structure and organization",
|
||
"maintenance": "Maintenance indicators and staleness",
|
||
}
|
||
|
||
for cat, desc in category_descriptions.items():
|
||
count = category_counts.get(cat, 0)
|
||
lines.append(f"| **{cat.replace('_', ' ').title()}** | {count} | {desc} |")
|
||
|
||
return "\n".join(lines)
|
||
|
||
def _generate_detailed_findings(self) -> str:
|
||
"""Generate detailed findings section"""
|
||
if not self.results.findings:
|
||
return "_No findings. CLAUDE.md is in excellent condition!_ ✅\n"
|
||
|
||
lines = []
|
||
|
||
# Group by severity
|
||
severity_order = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO]
|
||
|
||
for severity in severity_order:
|
||
findings = [f for f in self.results.findings if f.severity == severity]
|
||
if not findings:
|
||
continue
|
||
|
||
severity_emoji = {
|
||
Severity.CRITICAL: "🚨",
|
||
Severity.HIGH: "⚠️",
|
||
Severity.MEDIUM: "📋",
|
||
Severity.LOW: "ℹ️",
|
||
Severity.INFO: "💡"
|
||
}
|
||
|
||
lines.append(f"\n### {severity_emoji[severity]} {severity.value.upper()} Priority\n")
|
||
|
||
for i, finding in enumerate(findings, 1):
|
||
lines.append(f"#### {i}. {finding.title}\n")
|
||
lines.append(f"**Category**: {finding.category.value.replace('_', ' ').title()}")
|
||
lines.append(f"**Source**: {finding.source.title()} Guidance\n")
|
||
|
||
if finding.line_number:
|
||
lines.append(f"**Location**: Line {finding.line_number}\n")
|
||
|
||
lines.append(f"**Description**: {finding.description}\n")
|
||
|
||
if finding.code_snippet:
|
||
lines.append("**Code**:")
|
||
lines.append("```")
|
||
lines.append(finding.code_snippet)
|
||
lines.append("```\n")
|
||
|
||
if finding.impact:
|
||
lines.append(f"**Impact**: {finding.impact}\n")
|
||
|
||
if finding.remediation:
|
||
lines.append(f"**Remediation**:\n{finding.remediation}\n")
|
||
|
||
lines.append("---\n")
|
||
|
||
return "\n".join(lines)
|
||
|
||
def _generate_recommendations(self) -> str:
|
||
"""Generate prioritized recommendations"""
|
||
lines = []
|
||
|
||
critical = [f for f in self.results.findings if f.severity == Severity.CRITICAL]
|
||
high = [f for f in self.results.findings if f.severity == Severity.HIGH]
|
||
|
||
if critical:
|
||
lines.append("### 🚨 Priority 0: IMMEDIATE ACTION (Critical)\n")
|
||
for i, finding in enumerate(critical, 1):
|
||
lines.append(f"{i}. **{finding.title}**")
|
||
lines.append(f" - {finding.description}")
|
||
if finding.line_number:
|
||
lines.append(f" - Line: {finding.line_number}")
|
||
lines.append("")
|
||
|
||
if high:
|
||
lines.append("### ⚠️ Priority 1: THIS SPRINT (High)\n")
|
||
for i, finding in enumerate(high, 1):
|
||
lines.append(f"{i}. **{finding.title}**")
|
||
lines.append(f" - {finding.description}")
|
||
lines.append("")
|
||
|
||
# General recommendations
|
||
lines.append("### 💡 General Recommendations\n")
|
||
|
||
if self.results.metadata['line_count'] > 300:
|
||
lines.append("- **Reduce file length**: Use @imports for detailed documentation")
|
||
|
||
if self.results.metadata['token_estimate'] > 5000:
|
||
lines.append("- **Optimize token usage**: Aim for < 3,000 tokens (≈200 lines)")
|
||
|
||
official_score = self.results.scores['official_compliance']
|
||
if official_score < 80:
|
||
lines.append("- **Improve official compliance**: Review official Anthropic documentation")
|
||
|
||
lines.append("- **Regular maintenance**: Schedule quarterly CLAUDE.md reviews")
|
||
lines.append("- **Team collaboration**: Share CLAUDE.md improvements via PR")
|
||
lines.append("- **Validate effectiveness**: Test that Claude follows standards without prompting")
|
||
|
||
return "\n".join(lines)
|
||
|
||
def _generate_refactored_structure(self, original_content: str) -> str:
|
||
"""Generate refactored CLAUDE.md structure"""
|
||
lines = []
|
||
|
||
# Detect project name from original (look for # header)
|
||
import re
|
||
project_match = re.search(r'^#\s+(.+)$', original_content, re.MULTILINE)
|
||
project_name = project_match.group(1) if project_match else "Project Name"
|
||
|
||
lines.append(f"# {project_name}")
|
||
lines.append("")
|
||
|
||
# Add critical standards section at top (optimal positioning)
|
||
lines.append("## 🚨 CRITICAL: Must-Follow Standards")
|
||
lines.append("")
|
||
lines.append("<!-- Place non-negotiable standards here (top position = highest attention) -->")
|
||
lines.append("")
|
||
lines.append("- [Add critical security requirements]")
|
||
lines.append("- [Add critical quality gates]")
|
||
lines.append("- [Add critical workflow requirements]")
|
||
lines.append("")
|
||
|
||
# Project overview
|
||
lines.append("## 📋 Project Overview")
|
||
lines.append("")
|
||
lines.append("**Tech Stack**: [List technologies]")
|
||
lines.append("**Architecture**: [Architecture pattern]")
|
||
lines.append("**Purpose**: [Project purpose]")
|
||
lines.append("")
|
||
|
||
# Development workflow
|
||
lines.append("## 🔧 Development Workflow")
|
||
lines.append("")
|
||
lines.append("### Git Workflow")
|
||
lines.append("- Branch pattern: `feature/{name}`, `bugfix/{name}`")
|
||
lines.append("- Conventional commit messages required")
|
||
lines.append("- PRs require: tests + review + passing CI")
|
||
lines.append("")
|
||
|
||
# Code standards
|
||
lines.append("## 📝 Code Standards")
|
||
lines.append("")
|
||
lines.append("### TypeScript/JavaScript")
|
||
lines.append("- TypeScript strict mode: enabled")
|
||
lines.append("- No `any` types (use `unknown` if needed)")
|
||
lines.append("- Explicit return types required")
|
||
lines.append("")
|
||
lines.append("### Testing")
|
||
lines.append("- Minimum coverage: 80%")
|
||
lines.append("- Testing trophy: 70% integration, 20% unit, 10% E2E")
|
||
lines.append("- Test naming: 'should [behavior] when [condition]'")
|
||
lines.append("")
|
||
|
||
# Common tasks (bottom position for recency attention)
|
||
lines.append("## 📌 REFERENCE: Common Tasks")
|
||
lines.append("")
|
||
lines.append("<!-- Bottom position = recency attention, good for frequently accessed info -->")
|
||
lines.append("")
|
||
lines.append("### Build & Test")
|
||
lines.append("```bash")
|
||
lines.append("npm run build # Build production")
|
||
lines.append("npm test # Run tests")
|
||
lines.append("npm run lint # Run linter")
|
||
lines.append("```")
|
||
lines.append("")
|
||
lines.append("### Key File Locations")
|
||
lines.append("- Config: `/config/app.config.ts`")
|
||
lines.append("- Types: `/src/types/index.ts`")
|
||
lines.append("- Utils: `/src/utils/index.ts`")
|
||
lines.append("")
|
||
|
||
# Import detailed docs
|
||
lines.append("## 📚 Detailed Documentation (Imports)")
|
||
lines.append("")
|
||
lines.append("<!-- Use imports to keep this file lean (<300 lines) -->")
|
||
lines.append("")
|
||
lines.append("<!-- Example:")
|
||
lines.append("@docs/architecture.md")
|
||
lines.append("@docs/testing-strategy.md")
|
||
lines.append("@docs/deployment.md")
|
||
lines.append("-->")
|
||
lines.append("")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
def generate_report(results: AuditResults, file_path: Path, format: str = "markdown") -> str:
|
||
"""
|
||
Generate audit report in specified format
|
||
|
||
Args:
|
||
results: AuditResults from analyzer
|
||
file_path: Path to original CLAUDE.md
|
||
format: "markdown", "json", or "refactored"
|
||
|
||
Returns:
|
||
Report content as string
|
||
"""
|
||
generator = ReportGenerator(results, file_path)
|
||
|
||
if format == "json":
|
||
return generator.generate_json_report()
|
||
elif format == "refactored":
|
||
# Read original content for refactoring
|
||
try:
|
||
with open(file_path, 'r') as f:
|
||
original = f.read()
|
||
except:
|
||
original = ""
|
||
return generator.generate_refactored_claude_md(original)
|
||
else: # markdown (default)
|
||
return generator.generate_markdown_report()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import sys
|
||
from analyzer import analyze_file
|
||
|
||
if len(sys.argv) < 2:
|
||
print("Usage: python report_generator.py <path-to-CLAUDE.md> [format]")
|
||
print("Formats: markdown (default), json, refactored")
|
||
sys.exit(1)
|
||
|
||
file_path = Path(sys.argv[1])
|
||
report_format = sys.argv[2] if len(sys.argv) > 2 else "markdown"
|
||
|
||
# Run analysis
|
||
results = analyze_file(str(file_path))
|
||
|
||
# Generate report
|
||
report = generate_report(results, file_path, report_format)
|
||
|
||
print(report)
|