Files
gh-fcakyon-claude-codex-set…/hooks/scripts/markdown_formatting.py
2025-11-29 18:26:40 +08:00

297 lines
10 KiB
Python
Executable File

#!/usr/bin/env python3
"""
PostToolUse hook: Format Markdown files and embedded code blocks.
Inspired by https://github.com/ultralytics/actions/blob/main/actions/update_markdown_code_blocks.py
"""
from __future__ import annotations
import hashlib
import json
import re
import shutil
import subprocess
import sys
from pathlib import Path
from tempfile import TemporaryDirectory
PYTHON_BLOCK_PATTERN = r"^( *)```(?:python|py|\{[ ]*\.py[ ]*\.annotate[ ]*\})\n(.*?)\n\1```"
BASH_BLOCK_PATTERN = r"^( *)```(?:bash|sh|shell)\n(.*?)\n\1```"
LANGUAGE_TAGS = {"python": ["python", "py", "{ .py .annotate }"], "bash": ["bash", "sh", "shell"]}
def check_prettier_version() -> bool:
"""Check if prettier is installed and warn if version differs from 3.6.2."""
if not shutil.which("npx"):
return False
try:
result = subprocess.run(["npx", "prettier", "--version"],
capture_output=True, text=True, check=False, timeout=5)
if result.returncode == 0:
version = result.stdout.strip()
if "3.6.2" not in version:
print(f"⚠️ Prettier version mismatch: expected 3.6.2, found {version}")
return True
except Exception:
pass
return False
def extract_code_blocks(markdown_content: str) -> dict[str, list[tuple[str, str]]]:
"""Extract code blocks from markdown content.
Args:
markdown_content (str): Markdown text to inspect.
Returns:
(dict): Mapping of language names to lists of (indentation, block) pairs.
"""
python_blocks = re.compile(PYTHON_BLOCK_PATTERN, re.DOTALL | re.MULTILINE).findall(markdown_content)
bash_blocks = re.compile(BASH_BLOCK_PATTERN, re.DOTALL | re.MULTILINE).findall(markdown_content)
return {"python": python_blocks, "bash": bash_blocks}
def remove_indentation(code_block: str, num_spaces: int) -> str:
"""Remove indentation from a block of code.
Args:
code_block (str): Code snippet to adjust.
num_spaces (int): Leading space count to strip.
Returns:
(str): Code with indentation removed.
"""
lines = code_block.split("\n")
stripped_lines = [line[num_spaces:] if len(line) >= num_spaces else line for line in lines]
return "\n".join(stripped_lines)
def add_indentation(code_block: str, num_spaces: int) -> str:
"""Add indentation back to non-empty lines in a code block.
Args:
code_block (str): Code snippet to indent.
num_spaces (int): Space count to prefix.
Returns:
(str): Code with indentation restored.
"""
indent = " " * num_spaces
lines = code_block.split("\n")
return "\n".join([indent + line if line.strip() else line for line in lines])
def format_code_with_ruff(temp_dir: Path) -> None:
"""Format Python files in a temporary directory with Ruff.
Args:
temp_dir (Path): Directory containing extracted Python blocks.
"""
try:
subprocess.run(["ruff", "format", "--line-length=120", str(temp_dir)], check=True)
print("Completed ruff format ✅")
except Exception as exc:
print(f"ERROR running ruff format ❌ {exc}")
try:
subprocess.run(
[
"ruff",
"check",
"--fix",
"--extend-select=F,I,D,UP,RUF,FA",
"--target-version=py39",
"--ignore=D100,D101,D103,D104,D203,D205,D212,D213,D401,D406,D407,D413,F821,F841,RUF001,RUF002,RUF012",
str(temp_dir),
],
check=True,
)
print("Completed ruff check ✅")
except Exception as exc:
print(f"ERROR running ruff check ❌ {exc}")
def format_bash_with_prettier(temp_dir: Path) -> None:
"""Format Bash files in a temporary directory with prettier-plugin-sh.
Args:
temp_dir (Path): Directory containing extracted Bash blocks.
"""
try:
result = subprocess.run(
"npx prettier --write --print-width 120 --plugin=$(npm root -g)/prettier-plugin-sh/lib/index.cjs ./**/*.sh",
shell=True,
capture_output=True,
text=True,
cwd=temp_dir,
)
if result.returncode != 0:
print(f"ERROR running prettier-plugin-sh ❌ {result.stderr}")
else:
print("Completed bash formatting ✅")
except Exception as exc:
print(f"ERROR running prettier-plugin-sh ❌ {exc}")
def generate_temp_filename(file_path: Path, index: int, code_type: str) -> str:
"""Generate a deterministic filename for a temporary code block.
Args:
file_path (Path): Source markdown path.
index (int): Block index for uniqueness.
code_type (str): Language identifier.
Returns:
(str): Safe filename for the temporary code file.
"""
stem = file_path.stem
code_letter = code_type[0]
path_part = str(file_path.parent).replace("/", "_").replace("\\", "_").replace(" ", "-")
hash_val = hashlib.md5(f"{file_path}_{index}".encode(), usedforsecurity=False).hexdigest()[:6]
ext = ".py" if code_type == "python" else ".sh"
filename = f"{stem}_{path_part}_{code_letter}{index}_{hash_val}{ext}"
return re.sub(r"[^\w\-.]", "_", filename)
def process_markdown_file(
file_path: Path,
temp_dir: Path,
process_python: bool = True,
process_bash: bool = True,
) -> tuple[str, list[tuple[int, str, Path, str]]]:
"""Extract code blocks from a markdown file and store them as temporary files.
Args:
file_path (Path): Markdown path to process.
temp_dir (Path): Directory to store temporary files.
process_python (bool, optional): Enable Python block extraction.
process_bash (bool, optional): Enable Bash block extraction.
Returns:
markdown_content (str): Original markdown content.
temp_files (list): Extracted block metadata.
"""
try:
markdown_content = file_path.read_text()
except Exception as exc:
print(f"Error reading file {file_path}: {exc}")
return "", []
code_blocks_by_type = extract_code_blocks(markdown_content)
temp_files: list[tuple[int, str, Path, str]] = []
code_types: list[tuple[str, int]] = []
if process_python:
code_types.append(("python", 0))
if process_bash:
code_types.append(("bash", 1000))
for code_type, offset in code_types:
for i, (indentation, code_block) in enumerate(code_blocks_by_type[code_type]):
num_spaces = len(indentation)
code_without_indentation = remove_indentation(code_block, num_spaces)
temp_file_path = temp_dir / generate_temp_filename(file_path, i + offset, code_type)
try:
temp_file_path.write_text(code_without_indentation)
except Exception as exc:
print(f"Error writing temp file {temp_file_path}: {exc}")
continue
temp_files.append((num_spaces, code_block, temp_file_path, code_type))
return markdown_content, temp_files
def update_markdown_file(file_path: Path, markdown_content: str, temp_files: list[tuple[int, str, Path, str]]) -> None:
"""Replace markdown code blocks with formatted versions.
Args:
file_path (Path): Markdown file to update.
markdown_content (str): Original content.
temp_files (list): Metadata for formatted code blocks.
"""
for num_spaces, original_code_block, temp_file_path, code_type in temp_files:
try:
formatted_code = temp_file_path.read_text().rstrip("\n")
except Exception as exc:
print(f"Error reading temp file {temp_file_path}: {exc}")
continue
formatted_code_with_indentation = add_indentation(formatted_code, num_spaces)
for lang in LANGUAGE_TAGS[code_type]:
markdown_content = markdown_content.replace(
f"{' ' * num_spaces}```{lang}\n{original_code_block}\n{' ' * num_spaces}```",
f"{' ' * num_spaces}```{lang}\n{formatted_code_with_indentation}\n{' ' * num_spaces}```",
)
try:
file_path.write_text(markdown_content)
except Exception as exc:
print(f"Error writing file {file_path}: {exc}")
def run_prettier(markdown_file: Path) -> None:
"""Format a markdown file with Prettier when available.
Args:
markdown_file (Path): Markdown file to format.
"""
if not check_prettier_version():
return
is_docs = "docs" in markdown_file.parts and "reference" not in markdown_file.parts
command = ["npx", "prettier", "--write", "--list-different", str(markdown_file)]
if is_docs:
command = ["npx", "prettier", "--tab-width", "4", "--write", "--list-different", str(markdown_file)]
subprocess.run(command, capture_output=True, check=False, cwd=markdown_file.parent)
def format_markdown_file(markdown_file: Path) -> None:
"""Format markdown-embedded code and run Prettier on the file.
Args:
markdown_file (Path): Markdown file to process.
"""
with TemporaryDirectory() as tmp_dir_name:
temp_dir = Path(tmp_dir_name)
markdown_content, temp_files = process_markdown_file(markdown_file, temp_dir)
if not temp_files:
run_prettier(markdown_file)
return
has_python = any(code_type == "python" for *_, code_type in temp_files)
has_bash = any(code_type == "bash" for *_, code_type in temp_files)
if has_python:
format_code_with_ruff(temp_dir)
if has_bash:
format_bash_with_prettier(temp_dir)
update_markdown_file(markdown_file, markdown_content, temp_files)
run_prettier(markdown_file)
def read_markdown_path() -> Path | None:
"""Read the markdown path from stdin payload.
Returns:
markdown_path (Path | None): Markdown path when present and valid.
"""
try:
data = json.load(sys.stdin)
except Exception:
return None
file_path = data.get("tool_input", {}).get("file_path", "")
path = Path(file_path) if file_path else None
if not path or path.suffix.lower() != ".md" or not path.exists():
return None
return path
def main() -> None:
"""Run markdown formatting hook."""
markdown_file = read_markdown_path()
if markdown_file:
format_markdown_file(markdown_file)
sys.exit(0)
if __name__ == "__main__":
main()