186 lines
5.0 KiB
Python
Executable File
186 lines
5.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
validate-links.py - Validate markdown links in documentation
|
|
|
|
Usage:
|
|
./validate-links.py [file.md] # Check one file
|
|
./validate-links.py [directory] # Check all .md files
|
|
|
|
Exit codes:
|
|
0 - All links valid
|
|
1 - One or more broken links found
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Colors
|
|
RED = '\033[0;31m'
|
|
GREEN = '\033[0;32m'
|
|
YELLOW = '\033[1;33m'
|
|
NC = '\033[0m'
|
|
|
|
# Counters
|
|
total_links = 0
|
|
valid_links = 0
|
|
broken_links = 0
|
|
broken_list = []
|
|
|
|
|
|
def heading_to_anchor(heading):
|
|
"""Convert heading text to GitHub-style anchor"""
|
|
# Remove markdown formatting
|
|
heading = re.sub(r'[`*_]', '', heading)
|
|
# Lowercase and replace spaces with hyphens
|
|
anchor = heading.lower().replace(' ', '-')
|
|
# Remove non-alphanumeric except hyphens
|
|
anchor = re.sub(r'[^a-z0-9-]', '', anchor)
|
|
return anchor
|
|
|
|
|
|
def check_anchor(file_path, anchor):
|
|
"""Check if anchor exists in file"""
|
|
# Remove leading #
|
|
anchor = anchor.lstrip('#')
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
# Match heading lines
|
|
match = re.match(r'^(#+)\s+(.+)$', line)
|
|
if match:
|
|
heading_text = match.group(2).strip()
|
|
heading_anchor = heading_to_anchor(heading_text)
|
|
if heading_anchor == anchor.lower():
|
|
return True
|
|
return False
|
|
|
|
|
|
def validate_link(file_path, link_text, link_url):
|
|
"""Validate a single link"""
|
|
global total_links, valid_links, broken_links
|
|
|
|
total_links += 1
|
|
|
|
# Skip external links
|
|
if link_url.startswith(('http://', 'https://')):
|
|
valid_links += 1
|
|
return True
|
|
|
|
# Handle anchor-only links
|
|
if link_url.startswith('#'):
|
|
if check_anchor(file_path, link_url):
|
|
valid_links += 1
|
|
return True
|
|
else:
|
|
broken_links += 1
|
|
broken_list.append(f"{file_path}: [{link_text}]({link_url}) - Anchor not found")
|
|
return False
|
|
|
|
# Handle file links (with or without anchor)
|
|
link_file = link_url
|
|
link_anchor = None
|
|
if '#' in link_url:
|
|
link_file, link_anchor = link_url.split('#', 1)
|
|
link_anchor = '#' + link_anchor
|
|
|
|
# Resolve relative path
|
|
current_dir = os.path.dirname(file_path)
|
|
if link_file.startswith('/'):
|
|
# Absolute path from repo root (not supported in this simple version)
|
|
resolved_path = link_file
|
|
else:
|
|
# Relative path
|
|
resolved_path = os.path.join(current_dir, link_file)
|
|
|
|
# Normalize path
|
|
resolved_path = os.path.normpath(resolved_path)
|
|
|
|
# Check file exists
|
|
if not os.path.isfile(resolved_path):
|
|
broken_links += 1
|
|
broken_list.append(f"{file_path}: [{link_text}]({link_url}) - File not found: {resolved_path}")
|
|
return False
|
|
|
|
# Check anchor if present
|
|
if link_anchor:
|
|
if check_anchor(resolved_path, link_anchor):
|
|
valid_links += 1
|
|
return True
|
|
else:
|
|
broken_links += 1
|
|
broken_list.append(f"{file_path}: [{link_text}]({link_url}) - Anchor not found in {resolved_path}")
|
|
return False
|
|
|
|
valid_links += 1
|
|
return True
|
|
|
|
|
|
def validate_file(file_path):
|
|
"""Validate all links in a markdown file"""
|
|
print(f"{YELLOW}Checking:{NC} {file_path}")
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Find all markdown links: [text](url)
|
|
link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
for match in re.finditer(link_pattern, content):
|
|
link_text = match.group(1)
|
|
link_url = match.group(2)
|
|
validate_link(file_path, link_text, link_url)
|
|
|
|
|
|
def main():
|
|
"""Main function"""
|
|
if len(sys.argv) < 2:
|
|
target = '.'
|
|
else:
|
|
target = sys.argv[1]
|
|
|
|
print(f"{YELLOW}Link Validation Tool{NC}")
|
|
print("====================")
|
|
print("")
|
|
|
|
target_path = Path(target)
|
|
|
|
if not target_path.exists():
|
|
print(f"{RED}Error:{NC} {target} not found")
|
|
sys.exit(2)
|
|
|
|
if target_path.is_file():
|
|
if target_path.suffix != '.md':
|
|
print(f"{RED}Error:{NC} Not a markdown file: {target}")
|
|
sys.exit(2)
|
|
validate_file(str(target_path))
|
|
elif target_path.is_dir():
|
|
for md_file in target_path.rglob('*.md'):
|
|
validate_file(str(md_file))
|
|
else:
|
|
print(f"{RED}Error:{NC} {target} is neither a file nor directory")
|
|
sys.exit(2)
|
|
|
|
# Summary
|
|
print("")
|
|
print("====================")
|
|
print(f"{YELLOW}Summary{NC}")
|
|
print("====================")
|
|
print(f"Total links: {total_links}")
|
|
print(f"{GREEN}Valid:{NC} {valid_links}")
|
|
print(f"{RED}Broken:{NC} {broken_links}")
|
|
|
|
if broken_links > 0:
|
|
print("")
|
|
print("Details:")
|
|
for broken in broken_list:
|
|
print(f"{RED} ✗{NC} {broken}")
|
|
sys.exit(1)
|
|
else:
|
|
print(f"{GREEN}✓ All links valid!{NC}")
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|