#!/usr/bin/env python3
"""
Create HTML files for log viewing with line numbers, regex filtering, and line selection.
For files >1MB, creates context files with ±1000 lines around each referenced line.
"""
import os
import sys
import json
import hashlib
from pathlib import Path
from collections import defaultdict
# HTML template for viewing log files
HTML_TEMPLATE = '''
{title}
{context_notice}
'''
def create_html_for_file(file_path, logs_dir, build_id, line_numbers=None, context_lines=1000):
"""
Create an HTML file for viewing a log file.
Args:
file_path: Absolute path to the log file
logs_dir: Base logs directory
build_id: Build ID
line_numbers: List of line numbers to include (for large files). If None, includes all lines.
context_lines: Number of lines before/after each line_number to include (default 1000)
Returns:
Tuple of (relative_path_key, html_file_path) or None if file should be skipped
"""
file_size = os.path.getsize(file_path)
relative_path = os.path.relpath(file_path, logs_dir)
# For small files (<1MB), create full HTML
if file_size < 1024 * 1024:
line_numbers = None # Include all lines
# Read the file and extract lines
try:
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
all_lines = f.readlines()
except Exception as e:
print(f"Error reading {file_path}: {e}", file=sys.stderr)
return None
# Determine which lines to include
if line_numbers is None:
# Include all lines (small file)
lines_to_include = set(range(1, len(all_lines) + 1))
context_notice = ''
else:
# Include context around each line number (large file)
lines_to_include = set()
for line_num in line_numbers:
start = max(1, line_num - context_lines)
end = min(len(all_lines), line_num + context_lines)
lines_to_include.update(range(start, end + 1))
# Create context notice
line_ranges = []
sorted_lines = sorted(lines_to_include)
if sorted_lines:
range_start = sorted_lines[0]
range_end = sorted_lines[0]
for line in sorted_lines[1:]:
if line == range_end + 1:
range_end = line
else:
line_ranges.append(f"{range_start}-{range_end}" if range_start != range_end else str(range_start))
range_start = line
range_end = line
line_ranges.append(f"{range_start}-{range_end}" if range_start != range_end else str(range_start))
context_notice = f'''
Context View: Showing {len(lines_to_include):,} of {len(all_lines):,} lines
(±{context_lines} lines around {len(line_numbers)} reference points).
Full file is {file_size / (1024 * 1024):.1f}MB.
'''
# Build HTML content with line numbers
html_lines = []
for i, line in enumerate(all_lines, 1):
if i in lines_to_include:
# Escape HTML characters
line_content = line.rstrip('\n').replace('&', '&').replace('<', '<').replace('>', '>')
html_lines.append(f'{i:>5} {line_content}')
content = '\n'.join(html_lines)
# Generate unique filename based on content and line selection
if line_numbers is None:
# For full files, use simple hash of path
hash_str = hashlib.md5(relative_path.encode()).hexdigest()[:8]
suffix = ''
else:
# For context files, include line numbers in hash
hash_input = f"{relative_path}:{','.join(map(str, sorted(line_numbers)))}"
hash_str = hashlib.md5(hash_input.encode()).hexdigest()[:8]
suffix = f"-ctx{len(line_numbers)}"
filename = os.path.basename(file_path)
html_filename = f"{filename}{suffix}.{hash_str}.html"
# Create _links directory
links_dir = os.path.join(logs_dir, "_links")
os.makedirs(links_dir, exist_ok=True)
html_path = os.path.join(links_dir, html_filename)
relative_html_path = f"logs/_links/{html_filename}"
# Generate HTML
title = filename
html = HTML_TEMPLATE.format(
title=title,
context_notice=context_notice,
content=content
)
# Write HTML file
with open(html_path, 'w', encoding='utf-8') as f:
f.write(html)
return (relative_path, relative_html_path)
def main():
if len(sys.argv) < 3:
print("Usage: create_context_html_files.py [entries_json]", file=sys.stderr)
sys.exit(1)
logs_dir = sys.argv[1]
build_id = sys.argv[2]
entries_json = sys.argv[3] if len(sys.argv) > 3 else None
# Load entries to get line numbers per file
file_line_numbers = defaultdict(set)
if entries_json:
with open(entries_json, 'r') as f:
entries = json.load(f)
for entry in entries:
filename = entry.get('filename', '')
line_num = entry.get('line_number', 0)
if filename and line_num:
file_line_numbers[filename].add(line_num)
# Collect all log files
log_files = []
for root, dirs, files in os.walk(logs_dir):
# Skip _links directory
if '_links' in root:
continue
for file in files:
if file.endswith('.log') or file.endswith('.jsonl'):
log_files.append(os.path.join(root, file))
# Create HTML files
file_mapping = {}
for log_file in log_files:
# Get line numbers for this file (if any)
line_nums = file_line_numbers.get(log_file)
if line_nums:
line_nums = sorted(list(line_nums))
else:
line_nums = None
result = create_html_for_file(log_file, logs_dir, build_id, line_nums)
if result:
relative_path, html_path = result
file_mapping[relative_path] = html_path
# Output JSON mapping to stdout
print(json.dumps(file_mapping, indent=2))
if __name__ == '__main__':
main()