Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 17:55:11 +08:00
commit f9707d7bd8
16 changed files with 3770 additions and 0 deletions

View File

@@ -0,0 +1,247 @@
#!/usr/bin/env python3
"""
Claude.md Validator
Validates existing claude.md files for completeness, accuracy, and quality.
Checks for TODO markers, outdated information, and missing key sections.
Usage:
python validate_claude_md.py <path> [--strict] [--auto-fix]
Examples:
python validate_claude_md.py /path/to/repo
python validate_claude_md.py /path/to/src/claude.md
python validate_claude_md.py /path/to/repo --strict
"""
import os
import sys
import argparse
from pathlib import Path
from typing import List, Dict, Tuple
import re
REQUIRED_SECTIONS = [
'Overview',
'Purpose' # Alternative to Overview
]
RECOMMENDED_SECTIONS = [
'Directory Structure',
'Key Files',
'Important Patterns',
'Dependencies',
'Usage'
]
def find_claude_md_files(root_path: Path) -> List[Path]:
"""Find all claude.md files in the directory tree."""
claude_md_files = []
for dirpath, dirnames, filenames in os.walk(root_path):
# Skip common ignored directories
dirnames[:] = [d for d in dirnames if not d.startswith('.') and d not in {
'node_modules', '__pycache__', 'venv', 'env', 'dist', 'build'
}]
if 'claude.md' in filenames:
claude_md_files.append(Path(dirpath) / 'claude.md')
return claude_md_files
def validate_claude_md(file_path: Path, strict: bool = False) -> Dict:
"""Validate a single claude.md file."""
issues = []
warnings = []
try:
with open(file_path, 'r') as f:
content = f.read()
except Exception as e:
return {
'valid': False,
'issues': [f"Could not read file: {e}"],
'warnings': [],
'stats': {}
}
lines = content.split('\n')
# Check for empty or very short files
if len(content.strip()) < 50:
issues.append("File is too short (less than 50 characters)")
# Check for TODO markers
todo_count = len(re.findall(r'TODO|FIXME|XXX', content, re.IGNORECASE))
if todo_count > 0:
if strict:
issues.append(f"Found {todo_count} TODO/FIXME markers")
else:
warnings.append(f"Found {todo_count} TODO/FIXME markers")
# Check for required sections
has_overview = any(re.search(r'^##?\s+(Overview|Purpose)', line, re.IGNORECASE) for line in lines)
if not has_overview:
issues.append("Missing required section: Overview or Purpose")
# Check for recommended sections
found_sections = []
for section in RECOMMENDED_SECTIONS:
if any(re.search(rf'^##?\s+{section}', line, re.IGNORECASE) for line in lines):
found_sections.append(section)
missing_recommended = set(RECOMMENDED_SECTIONS) - set(found_sections)
if missing_recommended and strict:
warnings.append(f"Missing recommended sections: {', '.join(missing_recommended)}")
# Check for placeholder text
if '<!-- TODO' in content or 'Description' in content and 'TODO' in content:
if strict:
issues.append("Contains placeholder TODO comments that need completion")
else:
warnings.append("Contains placeholder TODO comments")
# Check for minimal content in sections
sections = re.split(r'^##?\s+', content, flags=re.MULTILINE)[1:] # Split by headers
for section in sections:
lines_in_section = [l.strip() for l in section.split('\n')[1:] if l.strip() and not l.strip().startswith('<!--')]
if len(lines_in_section) < 2:
section_name = section.split('\n')[0]
warnings.append(f"Section '{section_name}' has minimal content")
# Check for broken links (basic check)
broken_link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
links = re.findall(broken_link_pattern, content)
for link_text, link_url in links:
if link_url.startswith('./') or link_url.startswith('../'):
# Check if relative path exists
target_path = file_path.parent / link_url
if not target_path.exists():
warnings.append(f"Potentially broken relative link: {link_url}")
# Check age (if git is available)
stats = {
'line_count': len(lines),
'word_count': len(content.split()),
'todo_count': todo_count,
'sections_found': len(found_sections)
}
return {
'valid': len(issues) == 0,
'issues': issues,
'warnings': warnings,
'stats': stats
}
def print_validation_results(results: Dict[Path, Dict], strict: bool):
"""Print validation results in a readable format."""
print("\n" + "="*70)
print("CLAUDE.MD VALIDATION RESULTS")
print("="*70)
total_files = len(results)
valid_files = sum(1 for r in results.values() if r['valid'])
files_with_warnings = sum(1 for r in results.values() if r['warnings'])
print(f"\n📊 Summary:")
print(f" Total files checked: {total_files}")
print(f" Valid files: {valid_files}")
print(f" Files with issues: {total_files - valid_files}")
print(f" Files with warnings: {files_with_warnings}")
# Show files with issues
files_with_issues = {p: r for p, r in results.items() if not r['valid']}
if files_with_issues:
print(f"\n❌ Files with issues:")
print("-" * 70)
for file_path, result in files_with_issues.items():
print(f"\n📄 {file_path}")
for issue in result['issues']:
print(f"{issue}")
if result['warnings']:
for warning in result['warnings']:
print(f" ⚠️ {warning}")
# Show files with warnings only
files_with_only_warnings = {
p: r for p, r in results.items()
if r['valid'] and r['warnings']
}
if files_with_only_warnings:
print(f"\n⚠️ Files with warnings:")
print("-" * 70)
for file_path, result in files_with_only_warnings.items():
print(f"\n📄 {file_path}")
for warning in result['warnings']:
print(f" ⚠️ {warning}")
# Show fully valid files
fully_valid = {
p: r for p, r in results.items()
if r['valid'] and not r['warnings']
}
if fully_valid:
print(f"\n✅ Fully valid files:")
print("-" * 70)
for file_path in fully_valid.keys():
print(f" 📄 {file_path}")
print("\n" + "="*70)
def main():
parser = argparse.ArgumentParser(
description='Validate claude.md files'
)
parser.add_argument(
'path',
type=str,
help='Path to directory or specific claude.md file'
)
parser.add_argument(
'--strict',
action='store_true',
help='Enable strict validation (TODOs become errors)'
)
args = parser.parse_args()
path = Path(args.path).resolve()
if not path.exists():
print(f"Error: Path does not exist: {path}")
sys.exit(1)
# Find claude.md files
if path.is_file() and path.name == 'claude.md':
files_to_validate = [path]
elif path.is_dir():
files_to_validate = find_claude_md_files(path)
if not files_to_validate:
print(f"No claude.md files found in {path}")
sys.exit(0)
else:
print(f"Error: Path must be a directory or a claude.md file")
sys.exit(1)
print(f"Validating {len(files_to_validate)} claude.md file(s)...")
# Validate each file
results = {}
for file_path in files_to_validate:
results[file_path] = validate_claude_md(file_path, strict=args.strict)
# Print results
print_validation_results(results, args.strict)
# Exit with error code if any files have issues
if any(not r['valid'] for r in results.values()):
sys.exit(1)
if __name__ == '__main__':
main()