Local Path: .work/prow-job-extract-must-gather/{build_id}/logs/
{total_files:,}
Total Files
{total_size_human}
Total Size
'''
# Add stats for each file type
for file_type in sorted(type_counts.keys()):
count = type_counts[file_type]
html += f'''
{count:,}
{file_type}
'''
html += '''
'''
# Add filter buttons for each type
for file_type in sorted(type_counts.keys()):
count = type_counts[file_type]
html += f'''
'''
html += f'''
'''
# Add filter buttons for each top-level directory
for directory in sorted(dir_counts.keys()):
count = dir_counts[directory]
# Display name with proper formatting
display_name = directory if directory else '(root)'
html += f'''
'''
html += f'''
Invalid regex pattern
'''
# Add file items
for file in files:
icon = get_file_icon(file['type'])
# Use symlink path for iframe if available, otherwise use original
iframe_path = file.get('symlink_path', f"logs/{file['path']}")
original_path = f"logs/{file['path']}"
html += f'''
'''
return html
def create_txt_symlinks(logs_dir, files):
"""Create .html files with escaped content for files under 1MB to prevent download dialogs."""
MAX_INLINE_SIZE = 1 * 1024 * 1024 # 1MB
links_dir = os.path.join(logs_dir, 'content', '_links')
# Create _links directory if it doesn't exist
os.makedirs(links_dir, exist_ok=True)
html_count = 0
for file in files:
if file['size'] < MAX_INLINE_SIZE:
# Create HTML file with escaped content
original_path = os.path.join(logs_dir, file['path'])
# Generate unique HTML name by hashing the full path
path_hash = hashlib.md5(file['path'].encode()).hexdigest()[:8]
html_name = f"{file['name']}.{path_hash}.html"
html_path = os.path.join(links_dir, html_name)
try:
# Read original file content
with open(original_path, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
# Split into lines and add line numbers
lines = content.split('\n')
line_count = len(lines)
line_number_width = len(str(line_count))
# Build content with line numbers
numbered_lines = []
for i, line in enumerate(lines, 1):
escaped_line = html_module.escape(line)
line_num = str(i).rjust(line_number_width)
numbered_lines.append(f'{line_num} {escaped_line}')
numbered_content = '\n'.join(numbered_lines)
# Wrap in HTML
html_content = f'''
{html_module.escape(file['name'])}
Invalid regex pattern
{numbered_content}
'''
# Write HTML file
with open(html_path, 'w', encoding='utf-8') as f:
f.write(html_content)
# Store HTML path in file metadata
file['symlink_path'] = f"logs/content/_links/{html_name}"
html_count += 1
except Exception as e:
print(f"WARNING: Could not create HTML for {file['path']}: {e}", file=sys.stderr)
file['symlink_path'] = None
else:
file['symlink_path'] = None
print(f"Created {html_count:,} .html files for inline viewing")
return files
def main():
if len(sys.argv) < 6:
print("Usage: generate_html_report.py ")
sys.exit(1)
logs_dir = sys.argv[1]
prowjob_name = sys.argv[2]
build_id = sys.argv[3]
target = sys.argv[4]
gcsweb_url = sys.argv[5]
# Validate logs directory
if not os.path.exists(logs_dir):
print(f"ERROR: Logs directory not found: {logs_dir}", file=sys.stderr)
sys.exit(1)
print("Scanning directory tree...")
files, type_counts, dir_counts, total_size = scan_directory(logs_dir)
print(f"Found {len(files):,} files ({human_readable_size(total_size)})")
print("Creating .html files for inline viewing...")
files = create_txt_symlinks(logs_dir, files)
print("Generating HTML report...")
html = generate_html_report(files, type_counts, dir_counts, total_size, prowjob_name, build_id, target, gcsweb_url)
# Determine output path
output_dir = os.path.dirname(logs_dir)
output_file = os.path.join(output_dir, 'must-gather-browser.html')
# Write to file
with open(output_file, 'w') as f:
f.write(html)
print(f"Report generated: {output_file}")
if __name__ == '__main__':
main()