Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 09:07:22 +08:00
commit fab98d059b
179 changed files with 46209 additions and 0 deletions

View File

@@ -0,0 +1,346 @@
#!/usr/bin/env python3
"""
Validate command examples and code blocks in markdown documentation.
Purpose: Extract code blocks from markdown files and validate syntax/formatting.
Author: Generated by documentation methodology experiment
Version: 1.0
"""
import re
import sys
import subprocess
from pathlib import Path
from typing import List, Tuple, Dict
from dataclasses import dataclass
@dataclass
class CodeBlock:
"""Represents a code block found in markdown."""
language: str
content: str
line_number: int
file_path: Path
@dataclass
class ValidationResult:
"""Result of validating a code block."""
code_block: CodeBlock
is_valid: bool
error_message: str = ""
class MarkdownValidator:
"""Extract and validate code blocks from markdown files."""
def __init__(self):
self.supported_validators = {
'bash': self._validate_bash,
'sh': self._validate_bash,
'shell': self._validate_bash,
'python': self._validate_python,
'go': self._validate_go,
'json': self._validate_json,
'yaml': self._validate_yaml,
'yml': self._validate_yaml,
}
def extract_code_blocks(self, file_path: Path) -> List[CodeBlock]:
"""Extract all code blocks from markdown file."""
code_blocks = []
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
in_code_block = False
current_language = ""
current_content = []
start_line = 0
for line_num, line in enumerate(lines, start=1):
# Match code block start (```language)
start_match = re.match(r'^```(\w+)?', line)
if start_match and not in_code_block:
in_code_block = True
current_language = start_match.group(1) or ''
current_content = []
start_line = line_num
continue
# Match code block end (```)
if line.startswith('```') and in_code_block:
code_blocks.append(CodeBlock(
language=current_language,
content='\n'.join(current_content),
line_number=start_line,
file_path=file_path
))
in_code_block = False
current_language = ""
current_content = []
continue
# Accumulate code block content
if in_code_block:
current_content.append(line)
return code_blocks
def validate_code_block(self, code_block: CodeBlock) -> ValidationResult:
"""Validate a single code block based on its language."""
if not code_block.language:
# No language specified, skip validation
return ValidationResult(
code_block=code_block,
is_valid=True,
error_message=""
)
language = code_block.language.lower()
if language not in self.supported_validators:
# Language not supported for validation, skip
return ValidationResult(
code_block=code_block,
is_valid=True,
error_message=f"Validation not supported for language: {language}"
)
validator = self.supported_validators[language]
return validator(code_block)
def _validate_bash(self, code_block: CodeBlock) -> ValidationResult:
"""Validate bash/shell syntax using shellcheck or basic parsing."""
# Check for common bash syntax errors
content = code_block.content
# Skip if it's just comments or examples (not executable)
lines = [line.strip() for line in content.split('\n') if line.strip()]
if all(line.startswith('#') or not line for line in lines):
return ValidationResult(code_block=code_block, is_valid=True)
# Check for unmatched quotes
single_quotes = content.count("'") - content.count("\\'")
double_quotes = content.count('"') - content.count('\\"')
if single_quotes % 2 != 0:
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message="Unmatched single quote"
)
if double_quotes % 2 != 0:
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message="Unmatched double quote"
)
# Check for unmatched braces/brackets
if content.count('{') != content.count('}'):
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message="Unmatched curly braces"
)
if content.count('[') != content.count(']'):
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message="Unmatched square brackets"
)
if content.count('(') != content.count(')'):
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message="Unmatched parentheses"
)
# Try shellcheck if available
try:
result = subprocess.run(
['shellcheck', '-'],
input=content.encode('utf-8'),
capture_output=True,
timeout=5
)
if result.returncode != 0:
error = result.stdout.decode('utf-8')
# Extract first meaningful error
error_lines = [l for l in error.split('\n') if l.strip() and not l.startswith('In -')]
error_msg = error_lines[0] if error_lines else "Shellcheck validation failed"
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message=f"shellcheck: {error_msg}"
)
except (subprocess.TimeoutExpired, FileNotFoundError):
# shellcheck not available or timed out, basic validation passed
pass
return ValidationResult(code_block=code_block, is_valid=True)
def _validate_python(self, code_block: CodeBlock) -> ValidationResult:
"""Validate Python syntax using ast.parse."""
import ast
try:
ast.parse(code_block.content)
return ValidationResult(code_block=code_block, is_valid=True)
except SyntaxError as e:
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message=f"Python syntax error: {e.msg} at line {e.lineno}"
)
def _validate_go(self, code_block: CodeBlock) -> ValidationResult:
"""Validate Go syntax using gofmt."""
try:
result = subprocess.run(
['gofmt', '-e'],
input=code_block.content.encode('utf-8'),
capture_output=True,
timeout=5
)
if result.returncode != 0:
error = result.stderr.decode('utf-8')
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message=f"gofmt: {error.strip()}"
)
return ValidationResult(code_block=code_block, is_valid=True)
except (subprocess.TimeoutExpired, FileNotFoundError):
# gofmt not available, skip validation
return ValidationResult(
code_block=code_block,
is_valid=True,
error_message="gofmt not available"
)
def _validate_json(self, code_block: CodeBlock) -> ValidationResult:
"""Validate JSON syntax."""
import json
try:
json.loads(code_block.content)
return ValidationResult(code_block=code_block, is_valid=True)
except json.JSONDecodeError as e:
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message=f"JSON error: {e.msg} at line {e.lineno}"
)
def _validate_yaml(self, code_block: CodeBlock) -> ValidationResult:
"""Validate YAML syntax."""
try:
import yaml
yaml.safe_load(code_block.content)
return ValidationResult(code_block=code_block, is_valid=True)
except ImportError:
return ValidationResult(
code_block=code_block,
is_valid=True,
error_message="PyYAML not installed, skipping validation"
)
except yaml.YAMLError as e:
return ValidationResult(
code_block=code_block,
is_valid=False,
error_message=f"YAML error: {str(e)}"
)
def validate_file(self, file_path: Path) -> List[ValidationResult]:
"""Extract and validate all code blocks in a file."""
code_blocks = self.extract_code_blocks(file_path)
results = []
for code_block in code_blocks:
result = self.validate_code_block(code_block)
results.append(result)
return results
def print_results(results: List[ValidationResult], verbose: bool = False):
"""Print validation results."""
total_blocks = len(results)
valid_blocks = sum(1 for r in results if r.is_valid)
invalid_blocks = total_blocks - valid_blocks
if verbose or invalid_blocks > 0:
for result in results:
if not result.is_valid:
print(f"{result.code_block.file_path}:{result.code_block.line_number}")
print(f" Language: {result.code_block.language}")
print(f" Error: {result.error_message}")
print()
elif verbose:
print(f"{result.code_block.file_path}:{result.code_block.line_number} ({result.code_block.language})")
print(f"\nValidation Summary:")
print(f" Total code blocks: {total_blocks}")
print(f" Valid: {valid_blocks}")
print(f" Invalid: {invalid_blocks}")
if invalid_blocks == 0:
print("\n✅ All code blocks validated successfully!")
else:
print(f"\n{invalid_blocks} code block(s) have validation errors")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Validate code blocks in markdown documentation'
)
parser.add_argument(
'files',
nargs='+',
type=Path,
help='Markdown files to validate'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='Show all validation results (not just errors)'
)
args = parser.parse_args()
validator = MarkdownValidator()
all_results = []
for file_path in args.files:
if not file_path.exists():
print(f"Error: File not found: {file_path}", file=sys.stderr)
sys.exit(1)
if not file_path.suffix == '.md':
print(f"Warning: Skipping non-markdown file: {file_path}", file=sys.stderr)
continue
results = validator.validate_file(file_path)
all_results.extend(results)
print_results(all_results, verbose=args.verbose)
# Exit with error code if any validation failed
if any(not r.is_valid for r in all_results):
sys.exit(1)
sys.exit(0)
if __name__ == '__main__':
main()