#!/usr/bin/env python3 """ PostToolUse hook: Format Markdown files and embedded code blocks. Inspired by https://github.com/ultralytics/actions/blob/main/actions/update_markdown_code_blocks.py """ from __future__ import annotations import hashlib import json import re import shutil import subprocess import sys from pathlib import Path from tempfile import TemporaryDirectory PYTHON_BLOCK_PATTERN = r"^( *)```(?:python|py|\{[ ]*\.py[ ]*\.annotate[ ]*\})\n(.*?)\n\1```" BASH_BLOCK_PATTERN = r"^( *)```(?:bash|sh|shell)\n(.*?)\n\1```" LANGUAGE_TAGS = {"python": ["python", "py", "{ .py .annotate }"], "bash": ["bash", "sh", "shell"]} def check_prettier_version() -> bool: """Check if prettier is installed and warn if version differs from 3.6.2.""" if not shutil.which("npx"): return False try: result = subprocess.run(["npx", "prettier", "--version"], capture_output=True, text=True, check=False, timeout=5) if result.returncode == 0: version = result.stdout.strip() if "3.6.2" not in version: print(f"⚠️ Prettier version mismatch: expected 3.6.2, found {version}") return True except Exception: pass return False def extract_code_blocks(markdown_content: str) -> dict[str, list[tuple[str, str]]]: """Extract code blocks from markdown content. Args: markdown_content (str): Markdown text to inspect. Returns: (dict): Mapping of language names to lists of (indentation, block) pairs. """ python_blocks = re.compile(PYTHON_BLOCK_PATTERN, re.DOTALL | re.MULTILINE).findall(markdown_content) bash_blocks = re.compile(BASH_BLOCK_PATTERN, re.DOTALL | re.MULTILINE).findall(markdown_content) return {"python": python_blocks, "bash": bash_blocks} def remove_indentation(code_block: str, num_spaces: int) -> str: """Remove indentation from a block of code. Args: code_block (str): Code snippet to adjust. num_spaces (int): Leading space count to strip. Returns: (str): Code with indentation removed. """ lines = code_block.split("\n") stripped_lines = [line[num_spaces:] if len(line) >= num_spaces else line for line in lines] return "\n".join(stripped_lines) def add_indentation(code_block: str, num_spaces: int) -> str: """Add indentation back to non-empty lines in a code block. Args: code_block (str): Code snippet to indent. num_spaces (int): Space count to prefix. Returns: (str): Code with indentation restored. """ indent = " " * num_spaces lines = code_block.split("\n") return "\n".join([indent + line if line.strip() else line for line in lines]) def format_code_with_ruff(temp_dir: Path) -> None: """Format Python files in a temporary directory with Ruff. Args: temp_dir (Path): Directory containing extracted Python blocks. """ try: subprocess.run(["ruff", "format", "--line-length=120", str(temp_dir)], check=True) print("Completed ruff format ✅") except Exception as exc: print(f"ERROR running ruff format ❌ {exc}") try: subprocess.run( [ "ruff", "check", "--fix", "--extend-select=F,I,D,UP,RUF,FA", "--target-version=py39", "--ignore=D100,D101,D103,D104,D203,D205,D212,D213,D401,D406,D407,D413,F821,F841,RUF001,RUF002,RUF012", str(temp_dir), ], check=True, ) print("Completed ruff check ✅") except Exception as exc: print(f"ERROR running ruff check ❌ {exc}") def format_bash_with_prettier(temp_dir: Path) -> None: """Format Bash files in a temporary directory with prettier-plugin-sh. Args: temp_dir (Path): Directory containing extracted Bash blocks. """ try: result = subprocess.run( "npx prettier --write --print-width 120 --plugin=$(npm root -g)/prettier-plugin-sh/lib/index.cjs ./**/*.sh", shell=True, capture_output=True, text=True, cwd=temp_dir, ) if result.returncode != 0: print(f"ERROR running prettier-plugin-sh ❌ {result.stderr}") else: print("Completed bash formatting ✅") except Exception as exc: print(f"ERROR running prettier-plugin-sh ❌ {exc}") def generate_temp_filename(file_path: Path, index: int, code_type: str) -> str: """Generate a deterministic filename for a temporary code block. Args: file_path (Path): Source markdown path. index (int): Block index for uniqueness. code_type (str): Language identifier. Returns: (str): Safe filename for the temporary code file. """ stem = file_path.stem code_letter = code_type[0] path_part = str(file_path.parent).replace("/", "_").replace("\\", "_").replace(" ", "-") hash_val = hashlib.md5(f"{file_path}_{index}".encode(), usedforsecurity=False).hexdigest()[:6] ext = ".py" if code_type == "python" else ".sh" filename = f"{stem}_{path_part}_{code_letter}{index}_{hash_val}{ext}" return re.sub(r"[^\w\-.]", "_", filename) def process_markdown_file( file_path: Path, temp_dir: Path, process_python: bool = True, process_bash: bool = True, ) -> tuple[str, list[tuple[int, str, Path, str]]]: """Extract code blocks from a markdown file and store them as temporary files. Args: file_path (Path): Markdown path to process. temp_dir (Path): Directory to store temporary files. process_python (bool, optional): Enable Python block extraction. process_bash (bool, optional): Enable Bash block extraction. Returns: markdown_content (str): Original markdown content. temp_files (list): Extracted block metadata. """ try: markdown_content = file_path.read_text() except Exception as exc: print(f"Error reading file {file_path}: {exc}") return "", [] code_blocks_by_type = extract_code_blocks(markdown_content) temp_files: list[tuple[int, str, Path, str]] = [] code_types: list[tuple[str, int]] = [] if process_python: code_types.append(("python", 0)) if process_bash: code_types.append(("bash", 1000)) for code_type, offset in code_types: for i, (indentation, code_block) in enumerate(code_blocks_by_type[code_type]): num_spaces = len(indentation) code_without_indentation = remove_indentation(code_block, num_spaces) temp_file_path = temp_dir / generate_temp_filename(file_path, i + offset, code_type) try: temp_file_path.write_text(code_without_indentation) except Exception as exc: print(f"Error writing temp file {temp_file_path}: {exc}") continue temp_files.append((num_spaces, code_block, temp_file_path, code_type)) return markdown_content, temp_files def update_markdown_file(file_path: Path, markdown_content: str, temp_files: list[tuple[int, str, Path, str]]) -> None: """Replace markdown code blocks with formatted versions. Args: file_path (Path): Markdown file to update. markdown_content (str): Original content. temp_files (list): Metadata for formatted code blocks. """ for num_spaces, original_code_block, temp_file_path, code_type in temp_files: try: formatted_code = temp_file_path.read_text().rstrip("\n") except Exception as exc: print(f"Error reading temp file {temp_file_path}: {exc}") continue formatted_code_with_indentation = add_indentation(formatted_code, num_spaces) for lang in LANGUAGE_TAGS[code_type]: markdown_content = markdown_content.replace( f"{' ' * num_spaces}```{lang}\n{original_code_block}\n{' ' * num_spaces}```", f"{' ' * num_spaces}```{lang}\n{formatted_code_with_indentation}\n{' ' * num_spaces}```", ) try: file_path.write_text(markdown_content) except Exception as exc: print(f"Error writing file {file_path}: {exc}") def run_prettier(markdown_file: Path) -> None: """Format a markdown file with Prettier when available. Args: markdown_file (Path): Markdown file to format. """ if not check_prettier_version(): return is_docs = "docs" in markdown_file.parts and "reference" not in markdown_file.parts command = ["npx", "prettier", "--write", "--list-different", str(markdown_file)] if is_docs: command = ["npx", "prettier", "--tab-width", "4", "--write", "--list-different", str(markdown_file)] subprocess.run(command, capture_output=True, check=False, cwd=markdown_file.parent) def format_markdown_file(markdown_file: Path) -> None: """Format markdown-embedded code and run Prettier on the file. Args: markdown_file (Path): Markdown file to process. """ with TemporaryDirectory() as tmp_dir_name: temp_dir = Path(tmp_dir_name) markdown_content, temp_files = process_markdown_file(markdown_file, temp_dir) if not temp_files: run_prettier(markdown_file) return has_python = any(code_type == "python" for *_, code_type in temp_files) has_bash = any(code_type == "bash" for *_, code_type in temp_files) if has_python: format_code_with_ruff(temp_dir) if has_bash: format_bash_with_prettier(temp_dir) update_markdown_file(markdown_file, markdown_content, temp_files) run_prettier(markdown_file) def read_markdown_path() -> Path | None: """Read the markdown path from stdin payload. Returns: markdown_path (Path | None): Markdown path when present and valid. """ try: data = json.load(sys.stdin) except Exception: return None file_path = data.get("tool_input", {}).get("file_path", "") path = Path(file_path) if file_path else None if not path or path.suffix.lower() != ".md" or not path.exists(): return None return path def main() -> None: """Run markdown formatting hook.""" markdown_file = read_markdown_path() if markdown_file: format_markdown_file(markdown_file) sys.exit(0) if __name__ == "__main__": main()