Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:57:46 +08:00
commit 0958e2ff2c
17 changed files with 2762 additions and 0 deletions

View File

@@ -0,0 +1,267 @@
#!/usr/bin/env python3
"""
Markdown Documentation Validator
Validates markdown documentation files for common issues:
- Proper heading hierarchy (no skipped levels)
- Code block formatting (language tags, closing backticks)
- Link syntax validation
- Basic structure checks
Usage:
python validate_markdown.py <file_path>
python validate_markdown.py <directory_path>
"""
import re
import sys
from pathlib import Path
from typing import List, Tuple
class ValidationError:
"""Represents a validation error with line number and description."""
def __init__(self, line_num: int, error_type: str, message: str):
self.line_num = line_num
self.error_type = error_type
self.message = message
def __str__(self):
return f"Line {self.line_num}: [{self.error_type}] {self.message}"
class MarkdownValidator:
"""Validates markdown documentation files."""
def __init__(self, file_path: Path):
self.file_path = file_path
self.errors: List[ValidationError] = []
self.lines: List[str] = []
self.heading_levels: List[int] = []
def validate(self) -> List[ValidationError]:
"""Run all validation checks and return list of errors."""
try:
with open(self.file_path, 'r', encoding='utf-8') as f:
self.lines = f.readlines()
except Exception as e:
return [ValidationError(0, "FILE_ERROR", f"Cannot read file: {e}")]
self._check_heading_hierarchy()
self._check_code_blocks()
self._check_links()
self._check_basic_structure()
return sorted(self.errors, key=lambda e: e.line_num)
def _check_heading_hierarchy(self):
"""Check that heading levels don't skip (e.g., h1 -> h3)."""
prev_level = 0
for i, line in enumerate(self.lines, 1):
# Match ATX-style headings (# Heading)
match = re.match(r'^(#{1,6})\s+(.+)', line)
if match:
level = len(match.group(1))
self.heading_levels.append(level)
# Check for skipped levels
if level > prev_level + 1:
self.errors.append(ValidationError(
i,
"HEADING_SKIP",
f"Heading level skipped (h{prev_level} -> h{level}). Use h{prev_level + 1} instead."
))
# Check for title case (common mistake)
title = match.group(2).strip()
if self._is_title_case(title) and level <= 3:
self.errors.append(ValidationError(
i,
"TITLE_CASE",
f"Heading appears to use title case. Use sentence case instead: '{title}'"
))
prev_level = level
def _is_title_case(self, text: str) -> bool:
"""Check if text appears to be in title case."""
# Skip if too short or starts with code
if len(text.split()) < 3 or text.startswith('`'):
return False
words = text.split()
# Articles and short words that should be lowercase in sentence case
small_words = {'a', 'an', 'the', 'and', 'but', 'or', 'for', 'nor', 'on', 'at', 'to', 'from', 'by', 'with'}
capitalized_count = sum(1 for word in words[1:] if word and word[0].isupper() and word.lower() not in small_words)
# If more than 50% of non-first words are capitalized, likely title case
return capitalized_count > len(words[1:]) * 0.5
def _check_code_blocks(self):
"""Check code block formatting."""
in_code_block = False
code_block_start = 0
for i, line in enumerate(self.lines, 1):
# Check for code block fences
if line.strip().startswith('```'):
if not in_code_block:
# Starting a code block
in_code_block = True
code_block_start = i
# Check for language tag
fence_content = line.strip()[3:].strip()
if not fence_content:
self.errors.append(ValidationError(
i,
"CODE_BLOCK_LANG",
"Code block missing language identifier (e.g., ```python, ```bash, ```markdown)"
))
else:
# Ending a code block
in_code_block = False
# Check for unclosed code block
if in_code_block:
self.errors.append(ValidationError(
code_block_start,
"CODE_BLOCK_UNCLOSED",
"Code block opened but never closed"
))
def _check_links(self):
"""Check link syntax."""
for i, line in enumerate(self.lines, 1):
# Find all markdown links [text](url)
links = re.finditer(r'\[([^\]]+)\]\(([^)]+)\)', line)
for match in links:
link_text = match.group(1)
link_url = match.group(2)
# Check for "click here" anti-pattern
if link_text.lower() in ['click here', 'here', 'read more', 'more']:
self.errors.append(ValidationError(
i,
"LINK_TEXT",
f"Non-descriptive link text: '{link_text}'. Use descriptive text that tells where the link goes."
))
# Check for empty link text
if not link_text.strip():
self.errors.append(ValidationError(
i,
"LINK_EMPTY",
"Link has empty text"
))
# Check for empty URL
if not link_url.strip():
self.errors.append(ValidationError(
i,
"LINK_EMPTY_URL",
f"Link '{link_text}' has empty URL"
))
def _check_basic_structure(self):
"""Check basic document structure."""
# Check for at least one H1
if not any(line.startswith('# ') for line in self.lines):
self.errors.append(ValidationError(
1,
"NO_H1",
"Document should have at least one top-level heading (# Title)"
))
# Check for multiple H1s (often unintended)
h1_count = sum(1 for line in self.lines if line.startswith('# '))
if h1_count > 1:
self.errors.append(ValidationError(
1,
"MULTIPLE_H1",
f"Document has {h1_count} top-level headings. Consider using only one H1 as the document title."
))
def validate_file(file_path: Path) -> Tuple[Path, List[ValidationError]]:
"""Validate a single markdown file."""
validator = MarkdownValidator(file_path)
errors = validator.validate()
return file_path, errors
def validate_directory(dir_path: Path) -> List[Tuple[Path, List[ValidationError]]]:
"""Validate all markdown files in a directory."""
results = []
for md_file in dir_path.rglob('*.md'):
file_path, errors = validate_file(md_file)
if errors:
results.append((file_path, errors))
return results
def print_results(results: List[Tuple[Path, List[ValidationError]]]):
"""Print validation results."""
if not results:
print("✓ No validation errors found!")
return
total_errors = sum(len(errors) for _, errors in results)
print(f"\n{'='*70}")
print(f"Found {total_errors} validation error(s) in {len(results)} file(s)")
print(f"{'='*70}\n")
for file_path, errors in results:
print(f"\n{file_path}:")
print("-" * 70)
for error in errors:
print(f" {error}")
print(f"\n{'='*70}")
print(f"Total: {total_errors} error(s)")
print(f"{'='*70}\n")
def main():
"""Main entry point."""
if len(sys.argv) != 2:
print("Usage: python validate_markdown.py <file_or_directory>")
sys.exit(1)
path = Path(sys.argv[1])
if not path.exists():
print(f"Error: Path does not exist: {path}")
sys.exit(1)
results = []
if path.is_file():
if path.suffix == '.md':
file_path, errors = validate_file(path)
if errors:
results.append((file_path, errors))
else:
print(f"Error: Not a markdown file: {path}")
sys.exit(1)
elif path.is_dir():
results = validate_directory(path)
else:
print(f"Error: Invalid path: {path}")
sys.exit(1)
print_results(results)
# Exit with error code if validation errors found
sys.exit(1 if results else 0)
if __name__ == '__main__':
main()