#!/usr/bin/env python3 """ Validate command examples and code blocks in markdown documentation. Purpose: Extract code blocks from markdown files and validate syntax/formatting. Author: Generated by documentation methodology experiment Version: 1.0 """ import re import sys import subprocess from pathlib import Path from typing import List, Tuple, Dict from dataclasses import dataclass @dataclass class CodeBlock: """Represents a code block found in markdown.""" language: str content: str line_number: int file_path: Path @dataclass class ValidationResult: """Result of validating a code block.""" code_block: CodeBlock is_valid: bool error_message: str = "" class MarkdownValidator: """Extract and validate code blocks from markdown files.""" def __init__(self): self.supported_validators = { 'bash': self._validate_bash, 'sh': self._validate_bash, 'shell': self._validate_bash, 'python': self._validate_python, 'go': self._validate_go, 'json': self._validate_json, 'yaml': self._validate_yaml, 'yml': self._validate_yaml, } def extract_code_blocks(self, file_path: Path) -> List[CodeBlock]: """Extract all code blocks from markdown file.""" code_blocks = [] with open(file_path, 'r', encoding='utf-8') as f: content = f.read() lines = content.split('\n') in_code_block = False current_language = "" current_content = [] start_line = 0 for line_num, line in enumerate(lines, start=1): # Match code block start (```language) start_match = re.match(r'^```(\w+)?', line) if start_match and not in_code_block: in_code_block = True current_language = start_match.group(1) or '' current_content = [] start_line = line_num continue # Match code block end (```) if line.startswith('```') and in_code_block: code_blocks.append(CodeBlock( language=current_language, content='\n'.join(current_content), line_number=start_line, file_path=file_path )) in_code_block = False current_language = "" current_content = [] continue # Accumulate code block content if in_code_block: current_content.append(line) return code_blocks def validate_code_block(self, code_block: CodeBlock) -> ValidationResult: """Validate a single code block based on its language.""" if not code_block.language: # No language specified, skip validation return ValidationResult( code_block=code_block, is_valid=True, error_message="" ) language = code_block.language.lower() if language not in self.supported_validators: # Language not supported for validation, skip return ValidationResult( code_block=code_block, is_valid=True, error_message=f"Validation not supported for language: {language}" ) validator = self.supported_validators[language] return validator(code_block) def _validate_bash(self, code_block: CodeBlock) -> ValidationResult: """Validate bash/shell syntax using shellcheck or basic parsing.""" # Check for common bash syntax errors content = code_block.content # Skip if it's just comments or examples (not executable) lines = [line.strip() for line in content.split('\n') if line.strip()] if all(line.startswith('#') or not line for line in lines): return ValidationResult(code_block=code_block, is_valid=True) # Check for unmatched quotes single_quotes = content.count("'") - content.count("\\'") double_quotes = content.count('"') - content.count('\\"') if single_quotes % 2 != 0: return ValidationResult( code_block=code_block, is_valid=False, error_message="Unmatched single quote" ) if double_quotes % 2 != 0: return ValidationResult( code_block=code_block, is_valid=False, error_message="Unmatched double quote" ) # Check for unmatched braces/brackets if content.count('{') != content.count('}'): return ValidationResult( code_block=code_block, is_valid=False, error_message="Unmatched curly braces" ) if content.count('[') != content.count(']'): return ValidationResult( code_block=code_block, is_valid=False, error_message="Unmatched square brackets" ) if content.count('(') != content.count(')'): return ValidationResult( code_block=code_block, is_valid=False, error_message="Unmatched parentheses" ) # Try shellcheck if available try: result = subprocess.run( ['shellcheck', '-'], input=content.encode('utf-8'), capture_output=True, timeout=5 ) if result.returncode != 0: error = result.stdout.decode('utf-8') # Extract first meaningful error error_lines = [l for l in error.split('\n') if l.strip() and not l.startswith('In -')] error_msg = error_lines[0] if error_lines else "Shellcheck validation failed" return ValidationResult( code_block=code_block, is_valid=False, error_message=f"shellcheck: {error_msg}" ) except (subprocess.TimeoutExpired, FileNotFoundError): # shellcheck not available or timed out, basic validation passed pass return ValidationResult(code_block=code_block, is_valid=True) def _validate_python(self, code_block: CodeBlock) -> ValidationResult: """Validate Python syntax using ast.parse.""" import ast try: ast.parse(code_block.content) return ValidationResult(code_block=code_block, is_valid=True) except SyntaxError as e: return ValidationResult( code_block=code_block, is_valid=False, error_message=f"Python syntax error: {e.msg} at line {e.lineno}" ) def _validate_go(self, code_block: CodeBlock) -> ValidationResult: """Validate Go syntax using gofmt.""" try: result = subprocess.run( ['gofmt', '-e'], input=code_block.content.encode('utf-8'), capture_output=True, timeout=5 ) if result.returncode != 0: error = result.stderr.decode('utf-8') return ValidationResult( code_block=code_block, is_valid=False, error_message=f"gofmt: {error.strip()}" ) return ValidationResult(code_block=code_block, is_valid=True) except (subprocess.TimeoutExpired, FileNotFoundError): # gofmt not available, skip validation return ValidationResult( code_block=code_block, is_valid=True, error_message="gofmt not available" ) def _validate_json(self, code_block: CodeBlock) -> ValidationResult: """Validate JSON syntax.""" import json try: json.loads(code_block.content) return ValidationResult(code_block=code_block, is_valid=True) except json.JSONDecodeError as e: return ValidationResult( code_block=code_block, is_valid=False, error_message=f"JSON error: {e.msg} at line {e.lineno}" ) def _validate_yaml(self, code_block: CodeBlock) -> ValidationResult: """Validate YAML syntax.""" try: import yaml yaml.safe_load(code_block.content) return ValidationResult(code_block=code_block, is_valid=True) except ImportError: return ValidationResult( code_block=code_block, is_valid=True, error_message="PyYAML not installed, skipping validation" ) except yaml.YAMLError as e: return ValidationResult( code_block=code_block, is_valid=False, error_message=f"YAML error: {str(e)}" ) def validate_file(self, file_path: Path) -> List[ValidationResult]: """Extract and validate all code blocks in a file.""" code_blocks = self.extract_code_blocks(file_path) results = [] for code_block in code_blocks: result = self.validate_code_block(code_block) results.append(result) return results def print_results(results: List[ValidationResult], verbose: bool = False): """Print validation results.""" total_blocks = len(results) valid_blocks = sum(1 for r in results if r.is_valid) invalid_blocks = total_blocks - valid_blocks if verbose or invalid_blocks > 0: for result in results: if not result.is_valid: print(f"āŒ {result.code_block.file_path}:{result.code_block.line_number}") print(f" Language: {result.code_block.language}") print(f" Error: {result.error_message}") print() elif verbose: print(f"āœ… {result.code_block.file_path}:{result.code_block.line_number} ({result.code_block.language})") print(f"\nValidation Summary:") print(f" Total code blocks: {total_blocks}") print(f" Valid: {valid_blocks}") print(f" Invalid: {invalid_blocks}") if invalid_blocks == 0: print("\nāœ… All code blocks validated successfully!") else: print(f"\nāŒ {invalid_blocks} code block(s) have validation errors") def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser( description='Validate code blocks in markdown documentation' ) parser.add_argument( 'files', nargs='+', type=Path, help='Markdown files to validate' ) parser.add_argument( '-v', '--verbose', action='store_true', help='Show all validation results (not just errors)' ) args = parser.parse_args() validator = MarkdownValidator() all_results = [] for file_path in args.files: if not file_path.exists(): print(f"Error: File not found: {file_path}", file=sys.stderr) sys.exit(1) if not file_path.suffix == '.md': print(f"Warning: Skipping non-markdown file: {file_path}", file=sys.stderr) continue results = validator.validate_file(file_path) all_results.extend(results) print_results(all_results, verbose=args.verbose) # Exit with error code if any validation failed if any(not r.is_valid for r in all_results): sys.exit(1) sys.exit(0) if __name__ == '__main__': main()