Files
2025-11-29 18:16:51 +08:00

503 lines
19 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Report Generator for CLAUDE.md Audits
Generates reports in multiple formats: Markdown, JSON, and Refactored CLAUDE.md
"""
import json
from pathlib import Path
from typing import Dict, List
from datetime import datetime
from analyzer import AuditResults, Finding, Severity, Category
class ReportGenerator:
"""Generates audit reports in multiple formats"""
def __init__(self, results: AuditResults, original_file_path: Path):
self.results = results
self.original_file_path = Path(original_file_path)
self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def generate_markdown_report(self) -> str:
"""Generate comprehensive markdown audit report"""
report = []
# Header
report.append("# CLAUDE.md Audit Report\n")
report.append(f"**File**: `{self.original_file_path}`\n")
report.append(f"**Generated**: {self.timestamp}\n")
report.append(f"**Tier**: {self.results.metadata.get('tier', 'Unknown')}\n")
report.append("\n---\n")
# Executive Summary
report.append("\n## Executive Summary\n")
report.append(self._generate_summary_table())
# Score Dashboard
report.append("\n## Score Dashboard\n")
report.append(self._generate_score_dashboard())
# File Metrics
report.append("\n## File Metrics\n")
report.append(self._generate_metrics_section())
# Findings by Severity
report.append("\n## Findings\n")
report.append(self._generate_findings_by_severity())
# Findings by Category
report.append("\n## Findings by Category\n")
report.append(self._generate_findings_by_category())
# Detailed Findings
report.append("\n## Detailed Findings\n")
report.append(self._generate_detailed_findings())
# Recommendations
report.append("\n## Priority Recommendations\n")
report.append(self._generate_recommendations())
# Footer
report.append("\n---\n")
report.append("\n*Generated by claude-md-auditor v1.0.0*\n")
report.append("*Based on official Anthropic documentation, community best practices, and academic research*\n")
return "\n".join(report)
def generate_json_report(self) -> str:
"""Generate JSON audit report for CI/CD integration"""
report_data = {
"metadata": {
"file": str(self.original_file_path),
"generated_at": self.timestamp,
"tier": self.results.metadata.get('tier', 'Unknown'),
"analyzer_version": "1.0.0"
},
"metrics": self.results.metadata,
"scores": self.results.scores,
"findings": [
{
"severity": f.severity.value,
"category": f.category.value,
"title": f.title,
"description": f.description,
"line_number": f.line_number,
"code_snippet": f.code_snippet,
"impact": f.impact,
"remediation": f.remediation,
"source": f.source
}
for f in self.results.findings
],
"summary": {
"total_findings": len(self.results.findings),
"critical": self.results.scores['critical_count'],
"high": self.results.scores['high_count'],
"medium": self.results.scores['medium_count'],
"low": self.results.scores['low_count'],
"overall_health": self.results.scores['overall']
}
}
return json.dumps(report_data, indent=2)
def generate_refactored_claude_md(self, original_content: str) -> str:
"""Generate improved CLAUDE.md based on findings"""
refactored = []
# Add header comment
refactored.append("# CLAUDE.md")
refactored.append("")
refactored.append(f"<!-- Refactored: {self.timestamp} -->")
refactored.append("<!-- Based on official Anthropic guidelines and best practices -->")
refactored.append("")
# Add tier information if known
tier = self.results.metadata.get('tier', 'Unknown')
if tier != 'Unknown':
refactored.append(f"<!-- Tier: {tier} -->")
refactored.append("")
# Generate improved structure
refactored.append(self._generate_refactored_structure(original_content))
# Add footer
refactored.append("")
refactored.append("---")
refactored.append("")
refactored.append(f"**Last Updated**: {datetime.now().strftime('%Y-%m-%d')}")
refactored.append("**Maintained By**: [Team/Owner]")
refactored.append("")
refactored.append("<!-- Follow official guidance: Keep lean, be specific, use structure -->")
return "\n".join(refactored)
# ========== PRIVATE HELPER METHODS ==========
def _generate_summary_table(self) -> str:
"""Generate executive summary table"""
critical = self.results.scores['critical_count']
high = self.results.scores['high_count']
medium = self.results.scores['medium_count']
low = self.results.scores['low_count']
overall = self.results.scores['overall']
# Determine health status
if critical > 0:
status = "🚨 **CRITICAL ISSUES** - Immediate action required"
health = "Poor"
elif high > 3:
status = "⚠️ **HIGH PRIORITY** - Address this sprint"
health = "Fair"
elif high > 0 or medium > 5:
status = "📋 **MODERATE** - Schedule improvements"
health = "Good"
else:
status = "✅ **HEALTHY** - Minor optimizations available"
health = "Excellent"
lines = [
"| Metric | Value |",
"|--------|-------|",
f"| **Overall Health** | {overall}/100 ({health}) |",
f"| **Status** | {status} |",
f"| **Critical Issues** | {critical} |",
f"| **High Priority** | {high} |",
f"| **Medium Priority** | {medium} |",
f"| **Low Priority** | {low} |",
f"| **Total Findings** | {critical + high + medium + low} |",
]
return "\n".join(lines)
def _generate_score_dashboard(self) -> str:
"""Generate score dashboard"""
scores = self.results.scores
lines = [
"| Category | Score | Status |",
"|----------|-------|--------|",
f"| **Security** | {scores['security']}/100 | {self._score_status(scores['security'])} |",
f"| **Official Compliance** | {scores['official_compliance']}/100 | {self._score_status(scores['official_compliance'])} |",
f"| **Best Practices** | {scores['best_practices']}/100 | {self._score_status(scores['best_practices'])} |",
f"| **Research Optimization** | {scores['research_optimization']}/100 | {self._score_status(scores['research_optimization'])} |",
]
return "\n".join(lines)
def _score_status(self, score: int) -> str:
"""Convert score to status emoji"""
if score >= 90:
return "✅ Excellent"
elif score >= 75:
return "🟢 Good"
elif score >= 60:
return "🟡 Fair"
elif score >= 40:
return "🟠 Poor"
else:
return "🔴 Critical"
def _generate_metrics_section(self) -> str:
"""Generate file metrics section"""
meta = self.results.metadata
lines = [
"| Metric | Value | Recommendation |",
"|--------|-------|----------------|",
f"| **Lines** | {meta['line_count']} | 100-300 lines ideal |",
f"| **Characters** | {meta['character_count']:,} | Keep concise |",
f"| **Est. Tokens** | {meta['token_estimate']:,} | < 3,000 recommended |",
f"| **Context Usage (200K)** | {meta['context_usage_200k']}% | < 2% ideal |",
f"| **Context Usage (1M)** | {meta['context_usage_1m']}% | Reference only |",
]
# Add size assessment
line_count = meta['line_count']
if line_count < 50:
lines.append("")
lines.append("⚠️ **Assessment**: File may be too sparse. Consider adding more project context.")
elif line_count > 500:
lines.append("")
lines.append("🚨 **Assessment**: File exceeds recommended length. Use @imports for detailed docs.")
elif 100 <= line_count <= 300:
lines.append("")
lines.append("✅ **Assessment**: File length is in optimal range (100-300 lines).")
return "\n".join(lines)
def _generate_findings_by_severity(self) -> str:
"""Generate findings breakdown by severity"""
severity_counts = {
Severity.CRITICAL: self.results.scores['critical_count'],
Severity.HIGH: self.results.scores['high_count'],
Severity.MEDIUM: self.results.scores['medium_count'],
Severity.LOW: self.results.scores['low_count'],
}
lines = [
"| Severity | Count | Description |",
"|----------|-------|-------------|",
f"| 🚨 **Critical** | {severity_counts[Severity.CRITICAL]} | Security risks, immediate action required |",
f"| ⚠️ **High** | {severity_counts[Severity.HIGH]} | Significant issues, fix this sprint |",
f"| 📋 **Medium** | {severity_counts[Severity.MEDIUM]} | Moderate issues, schedule for next quarter |",
f"| **Low** | {severity_counts[Severity.LOW]} | Minor improvements, backlog |",
]
return "\n".join(lines)
def _generate_findings_by_category(self) -> str:
"""Generate findings breakdown by category"""
category_counts = {}
for finding in self.results.findings:
cat = finding.category.value
category_counts[cat] = category_counts.get(cat, 0) + 1
lines = [
"| Category | Count | Description |",
"|----------|-------|-------------|",
]
category_descriptions = {
"security": "Security vulnerabilities and sensitive information",
"official_compliance": "Compliance with official Anthropic documentation",
"best_practices": "Community best practices and field experience",
"research_optimization": "Research-based optimizations (lost in the middle, etc.)",
"structure": "Document structure and organization",
"maintenance": "Maintenance indicators and staleness",
}
for cat, desc in category_descriptions.items():
count = category_counts.get(cat, 0)
lines.append(f"| **{cat.replace('_', ' ').title()}** | {count} | {desc} |")
return "\n".join(lines)
def _generate_detailed_findings(self) -> str:
"""Generate detailed findings section"""
if not self.results.findings:
return "_No findings. CLAUDE.md is in excellent condition!_ ✅\n"
lines = []
# Group by severity
severity_order = [Severity.CRITICAL, Severity.HIGH, Severity.MEDIUM, Severity.LOW, Severity.INFO]
for severity in severity_order:
findings = [f for f in self.results.findings if f.severity == severity]
if not findings:
continue
severity_emoji = {
Severity.CRITICAL: "🚨",
Severity.HIGH: "⚠️",
Severity.MEDIUM: "📋",
Severity.LOW: "",
Severity.INFO: "💡"
}
lines.append(f"\n### {severity_emoji[severity]} {severity.value.upper()} Priority\n")
for i, finding in enumerate(findings, 1):
lines.append(f"#### {i}. {finding.title}\n")
lines.append(f"**Category**: {finding.category.value.replace('_', ' ').title()}")
lines.append(f"**Source**: {finding.source.title()} Guidance\n")
if finding.line_number:
lines.append(f"**Location**: Line {finding.line_number}\n")
lines.append(f"**Description**: {finding.description}\n")
if finding.code_snippet:
lines.append("**Code**:")
lines.append("```")
lines.append(finding.code_snippet)
lines.append("```\n")
if finding.impact:
lines.append(f"**Impact**: {finding.impact}\n")
if finding.remediation:
lines.append(f"**Remediation**:\n{finding.remediation}\n")
lines.append("---\n")
return "\n".join(lines)
def _generate_recommendations(self) -> str:
"""Generate prioritized recommendations"""
lines = []
critical = [f for f in self.results.findings if f.severity == Severity.CRITICAL]
high = [f for f in self.results.findings if f.severity == Severity.HIGH]
if critical:
lines.append("### 🚨 Priority 0: IMMEDIATE ACTION (Critical)\n")
for i, finding in enumerate(critical, 1):
lines.append(f"{i}. **{finding.title}**")
lines.append(f" - {finding.description}")
if finding.line_number:
lines.append(f" - Line: {finding.line_number}")
lines.append("")
if high:
lines.append("### ⚠️ Priority 1: THIS SPRINT (High)\n")
for i, finding in enumerate(high, 1):
lines.append(f"{i}. **{finding.title}**")
lines.append(f" - {finding.description}")
lines.append("")
# General recommendations
lines.append("### 💡 General Recommendations\n")
if self.results.metadata['line_count'] > 300:
lines.append("- **Reduce file length**: Use @imports for detailed documentation")
if self.results.metadata['token_estimate'] > 5000:
lines.append("- **Optimize token usage**: Aim for < 3,000 tokens (≈200 lines)")
official_score = self.results.scores['official_compliance']
if official_score < 80:
lines.append("- **Improve official compliance**: Review official Anthropic documentation")
lines.append("- **Regular maintenance**: Schedule quarterly CLAUDE.md reviews")
lines.append("- **Team collaboration**: Share CLAUDE.md improvements via PR")
lines.append("- **Validate effectiveness**: Test that Claude follows standards without prompting")
return "\n".join(lines)
def _generate_refactored_structure(self, original_content: str) -> str:
"""Generate refactored CLAUDE.md structure"""
lines = []
# Detect project name from original (look for # header)
import re
project_match = re.search(r'^#\s+(.+)$', original_content, re.MULTILINE)
project_name = project_match.group(1) if project_match else "Project Name"
lines.append(f"# {project_name}")
lines.append("")
# Add critical standards section at top (optimal positioning)
lines.append("## 🚨 CRITICAL: Must-Follow Standards")
lines.append("")
lines.append("<!-- Place non-negotiable standards here (top position = highest attention) -->")
lines.append("")
lines.append("- [Add critical security requirements]")
lines.append("- [Add critical quality gates]")
lines.append("- [Add critical workflow requirements]")
lines.append("")
# Project overview
lines.append("## 📋 Project Overview")
lines.append("")
lines.append("**Tech Stack**: [List technologies]")
lines.append("**Architecture**: [Architecture pattern]")
lines.append("**Purpose**: [Project purpose]")
lines.append("")
# Development workflow
lines.append("## 🔧 Development Workflow")
lines.append("")
lines.append("### Git Workflow")
lines.append("- Branch pattern: `feature/{name}`, `bugfix/{name}`")
lines.append("- Conventional commit messages required")
lines.append("- PRs require: tests + review + passing CI")
lines.append("")
# Code standards
lines.append("## 📝 Code Standards")
lines.append("")
lines.append("### TypeScript/JavaScript")
lines.append("- TypeScript strict mode: enabled")
lines.append("- No `any` types (use `unknown` if needed)")
lines.append("- Explicit return types required")
lines.append("")
lines.append("### Testing")
lines.append("- Minimum coverage: 80%")
lines.append("- Testing trophy: 70% integration, 20% unit, 10% E2E")
lines.append("- Test naming: 'should [behavior] when [condition]'")
lines.append("")
# Common tasks (bottom position for recency attention)
lines.append("## 📌 REFERENCE: Common Tasks")
lines.append("")
lines.append("<!-- Bottom position = recency attention, good for frequently accessed info -->")
lines.append("")
lines.append("### Build & Test")
lines.append("```bash")
lines.append("npm run build # Build production")
lines.append("npm test # Run tests")
lines.append("npm run lint # Run linter")
lines.append("```")
lines.append("")
lines.append("### Key File Locations")
lines.append("- Config: `/config/app.config.ts`")
lines.append("- Types: `/src/types/index.ts`")
lines.append("- Utils: `/src/utils/index.ts`")
lines.append("")
# Import detailed docs
lines.append("## 📚 Detailed Documentation (Imports)")
lines.append("")
lines.append("<!-- Use imports to keep this file lean (<300 lines) -->")
lines.append("")
lines.append("<!-- Example:")
lines.append("@docs/architecture.md")
lines.append("@docs/testing-strategy.md")
lines.append("@docs/deployment.md")
lines.append("-->")
lines.append("")
return "\n".join(lines)
def generate_report(results: AuditResults, file_path: Path, format: str = "markdown") -> str:
"""
Generate audit report in specified format
Args:
results: AuditResults from analyzer
file_path: Path to original CLAUDE.md
format: "markdown", "json", or "refactored"
Returns:
Report content as string
"""
generator = ReportGenerator(results, file_path)
if format == "json":
return generator.generate_json_report()
elif format == "refactored":
# Read original content for refactoring
try:
with open(file_path, 'r') as f:
original = f.read()
except:
original = ""
return generator.generate_refactored_claude_md(original)
else: # markdown (default)
return generator.generate_markdown_report()
if __name__ == "__main__":
import sys
from analyzer import analyze_file
if len(sys.argv) < 2:
print("Usage: python report_generator.py <path-to-CLAUDE.md> [format]")
print("Formats: markdown (default), json, refactored")
sys.exit(1)
file_path = Path(sys.argv[1])
report_format = sys.argv[2] if len(sys.argv) > 2 else "markdown"
# Run analysis
results = analyze_file(str(file_path))
# Generate report
report = generate_report(results, file_path, report_format)
print(report)