Initial commit
This commit is contained in:
177
skills/cyberarian/scripts/index_docs.py
Executable file
177
skills/cyberarian/scripts/index_docs.py
Executable file
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate and update the INDEX.md file by scanning all documents in docs/.
|
||||
Reads YAML frontmatter to extract metadata and organize the index.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import yaml
|
||||
|
||||
|
||||
def extract_frontmatter(file_path: Path) -> dict:
|
||||
"""Extract YAML frontmatter from a markdown file."""
|
||||
try:
|
||||
content = file_path.read_text()
|
||||
|
||||
# Match YAML frontmatter between --- delimiters
|
||||
match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
|
||||
if not match:
|
||||
return {}
|
||||
|
||||
frontmatter_text = match.group(1)
|
||||
metadata = yaml.safe_load(frontmatter_text)
|
||||
|
||||
return metadata if isinstance(metadata, dict) else {}
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Warning: Could not parse frontmatter in {file_path}: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def get_file_stats(file_path: Path) -> dict:
|
||||
"""Get file statistics."""
|
||||
stats = file_path.stat()
|
||||
return {
|
||||
'size': stats.st_size,
|
||||
'modified': datetime.fromtimestamp(stats.st_mtime)
|
||||
}
|
||||
|
||||
|
||||
def scan_documents(docs_path: Path) -> dict:
|
||||
"""Scan all markdown documents in docs/ and extract metadata."""
|
||||
categories = defaultdict(list)
|
||||
|
||||
# Skip these files/directories
|
||||
skip_files = {'README.md', 'INDEX.md', '.gitkeep'}
|
||||
skip_dirs = {'archive'} # We'll handle archive separately
|
||||
|
||||
for category_dir in docs_path.iterdir():
|
||||
if not category_dir.is_dir() or category_dir.name.startswith('.'):
|
||||
continue
|
||||
|
||||
category_name = category_dir.name
|
||||
|
||||
# Find all markdown files
|
||||
for md_file in category_dir.rglob('*.md'):
|
||||
if md_file.name in skip_files:
|
||||
continue
|
||||
|
||||
# Extract metadata
|
||||
metadata = extract_frontmatter(md_file)
|
||||
stats = get_file_stats(md_file)
|
||||
|
||||
# Build document entry
|
||||
relative_path = md_file.relative_to(docs_path)
|
||||
doc_entry = {
|
||||
'path': str(relative_path),
|
||||
'title': metadata.get('title', md_file.stem),
|
||||
'status': metadata.get('status', 'unknown'),
|
||||
'created': metadata.get('created', 'unknown'),
|
||||
'last_updated': metadata.get('last_updated', stats['modified'].strftime('%Y-%m-%d')),
|
||||
'tags': metadata.get('tags', []),
|
||||
'category': category_name,
|
||||
'file_modified': stats['modified']
|
||||
}
|
||||
|
||||
categories[category_name].append(doc_entry)
|
||||
|
||||
return categories
|
||||
|
||||
|
||||
def generate_index(categories: dict) -> str:
|
||||
"""Generate the INDEX.md content."""
|
||||
total_docs = sum(len(docs) for docs in categories.values())
|
||||
|
||||
index_lines = [
|
||||
"# Documentation Index",
|
||||
"",
|
||||
f"Auto-generated index of all documents. Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
||||
"",
|
||||
"Run `python scripts/index_docs.py` to regenerate this index.",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"## Summary",
|
||||
"",
|
||||
f"Total documents: {total_docs}",
|
||||
""
|
||||
]
|
||||
|
||||
# Add category breakdown
|
||||
if categories:
|
||||
index_lines.append("By category:")
|
||||
for category in sorted(categories.keys()):
|
||||
count = len(categories[category])
|
||||
index_lines.append(f"- **{category}**: {count} document{'s' if count != 1 else ''}")
|
||||
index_lines.append("")
|
||||
|
||||
index_lines.append("---")
|
||||
index_lines.append("")
|
||||
|
||||
# Add documents by category
|
||||
if not categories:
|
||||
index_lines.append("_No documents found. Add documents to the category directories and regenerate the index._")
|
||||
else:
|
||||
for category in sorted(categories.keys()):
|
||||
docs = categories[category]
|
||||
docs.sort(key=lambda d: d['last_updated'], reverse=True)
|
||||
|
||||
index_lines.append(f"## {category.replace('_', ' ').title()}")
|
||||
index_lines.append("")
|
||||
|
||||
for doc in docs:
|
||||
# Format: [Title](path) - status | updated: date | tags
|
||||
title_link = f"[{doc['title']}]({doc['path']})"
|
||||
status_badge = f"**{doc['status']}**"
|
||||
updated = f"updated: {doc['last_updated']}"
|
||||
tags = f"tags: [{', '.join(doc['tags'])}]" if doc['tags'] else ""
|
||||
|
||||
parts = [title_link, status_badge, updated]
|
||||
if tags:
|
||||
parts.append(tags)
|
||||
|
||||
index_lines.append(f"- {' | '.join(parts)}")
|
||||
|
||||
index_lines.append("")
|
||||
|
||||
return '\n'.join(index_lines)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
if len(sys.argv) > 1:
|
||||
base_path = Path(sys.argv[1]).resolve()
|
||||
else:
|
||||
base_path = Path.cwd()
|
||||
|
||||
docs_path = base_path / 'docs'
|
||||
|
||||
if not docs_path.exists():
|
||||
print(f"❌ Error: docs/ directory not found at {docs_path}")
|
||||
print("Run 'python scripts/init_docs_structure.py' first to initialize the structure.")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Scanning documents in: {docs_path}")
|
||||
|
||||
# Scan all documents
|
||||
categories = scan_documents(docs_path)
|
||||
|
||||
# Generate index content
|
||||
index_content = generate_index(categories)
|
||||
|
||||
# Write INDEX.md
|
||||
index_path = docs_path / 'INDEX.md'
|
||||
index_path.write_text(index_content)
|
||||
|
||||
total_docs = sum(len(docs) for docs in categories.values())
|
||||
print(f"✅ Generated index with {total_docs} documents")
|
||||
print(f"✅ Updated: {index_path}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user