#!/usr/bin/env python3 """ Create HTML files for log viewing with line numbers, regex filtering, and line selection. For files >1MB, creates context files with ±1000 lines around each referenced line. """ import os import sys import json import hashlib from pathlib import Path from collections import defaultdict # HTML template for viewing log files HTML_TEMPLATE = ''' {title}
Invalid regex pattern
{context_notice}
{content}
''' def create_html_for_file(file_path, logs_dir, build_id, line_numbers=None, context_lines=1000): """ Create an HTML file for viewing a log file. Args: file_path: Absolute path to the log file logs_dir: Base logs directory build_id: Build ID line_numbers: List of line numbers to include (for large files). If None, includes all lines. context_lines: Number of lines before/after each line_number to include (default 1000) Returns: Tuple of (relative_path_key, html_file_path) or None if file should be skipped """ file_size = os.path.getsize(file_path) relative_path = os.path.relpath(file_path, logs_dir) # For small files (<1MB), create full HTML if file_size < 1024 * 1024: line_numbers = None # Include all lines # Read the file and extract lines try: with open(file_path, 'r', encoding='utf-8', errors='replace') as f: all_lines = f.readlines() except Exception as e: print(f"Error reading {file_path}: {e}", file=sys.stderr) return None # Determine which lines to include if line_numbers is None: # Include all lines (small file) lines_to_include = set(range(1, len(all_lines) + 1)) context_notice = '' else: # Include context around each line number (large file) lines_to_include = set() for line_num in line_numbers: start = max(1, line_num - context_lines) end = min(len(all_lines), line_num + context_lines) lines_to_include.update(range(start, end + 1)) # Create context notice line_ranges = [] sorted_lines = sorted(lines_to_include) if sorted_lines: range_start = sorted_lines[0] range_end = sorted_lines[0] for line in sorted_lines[1:]: if line == range_end + 1: range_end = line else: line_ranges.append(f"{range_start}-{range_end}" if range_start != range_end else str(range_start)) range_start = line range_end = line line_ranges.append(f"{range_start}-{range_end}" if range_start != range_end else str(range_start)) context_notice = f'''
Context View: Showing {len(lines_to_include):,} of {len(all_lines):,} lines (±{context_lines} lines around {len(line_numbers)} reference points). Full file is {file_size / (1024 * 1024):.1f}MB.
''' # Build HTML content with line numbers html_lines = [] for i, line in enumerate(all_lines, 1): if i in lines_to_include: # Escape HTML characters line_content = line.rstrip('\n').replace('&', '&').replace('<', '<').replace('>', '>') html_lines.append(f'{i:>5} {line_content}') content = '\n'.join(html_lines) # Generate unique filename based on content and line selection if line_numbers is None: # For full files, use simple hash of path hash_str = hashlib.md5(relative_path.encode()).hexdigest()[:8] suffix = '' else: # For context files, include line numbers in hash hash_input = f"{relative_path}:{','.join(map(str, sorted(line_numbers)))}" hash_str = hashlib.md5(hash_input.encode()).hexdigest()[:8] suffix = f"-ctx{len(line_numbers)}" filename = os.path.basename(file_path) html_filename = f"{filename}{suffix}.{hash_str}.html" # Create _links directory links_dir = os.path.join(logs_dir, "_links") os.makedirs(links_dir, exist_ok=True) html_path = os.path.join(links_dir, html_filename) relative_html_path = f"logs/_links/{html_filename}" # Generate HTML title = filename html = HTML_TEMPLATE.format( title=title, context_notice=context_notice, content=content ) # Write HTML file with open(html_path, 'w', encoding='utf-8') as f: f.write(html) return (relative_path, relative_html_path) def main(): if len(sys.argv) < 3: print("Usage: create_context_html_files.py [entries_json]", file=sys.stderr) sys.exit(1) logs_dir = sys.argv[1] build_id = sys.argv[2] entries_json = sys.argv[3] if len(sys.argv) > 3 else None # Load entries to get line numbers per file file_line_numbers = defaultdict(set) if entries_json: with open(entries_json, 'r') as f: entries = json.load(f) for entry in entries: filename = entry.get('filename', '') line_num = entry.get('line_number', 0) if filename and line_num: file_line_numbers[filename].add(line_num) # Collect all log files log_files = [] for root, dirs, files in os.walk(logs_dir): # Skip _links directory if '_links' in root: continue for file in files: if file.endswith('.log') or file.endswith('.jsonl'): log_files.append(os.path.join(root, file)) # Create HTML files file_mapping = {} for log_file in log_files: # Get line numbers for this file (if any) line_nums = file_line_numbers.get(log_file) if line_nums: line_nums = sorted(list(line_nums)) else: line_nums = None result = create_html_for_file(log_file, logs_dir, build_id, line_nums) if result: relative_path, html_path = result file_mapping[relative_path] = html_path # Output JSON mapping to stdout print(json.dumps(file_mapping, indent=2)) if __name__ == '__main__': main()