#!/usr/bin/env python3 """Generate interactive HTML report for resource lifecycle analysis.""" import json import sys import hashlib from datetime import datetime from pathlib import Path def generate_html_report(entries, prowjob_name, build_id, target, resource_name, gcsweb_url, file_mapping=None): """Generate an interactive HTML report.""" # Default to empty dict if no file mapping provided if file_mapping is None: file_mapping = {} # Calculate time range timestamps = [e['timestamp'] for e in entries if e['timestamp']] if timestamps: min_time = min(timestamps) max_time = max(timestamps) time_range_seconds = (max_time - min_time).total_seconds() else: min_time = max_time = None time_range_seconds = 1 total_entries = len(entries) audit_entries = len([e for e in entries if e['source'] == 'audit']) pod_entries = len([e for e in entries if e['source'] == 'pod']) # Parse resource_name to extract searched resources # resource_name could be a single name or a regex pattern like "res1|res2" if '|' in resource_name: # Regex pattern with multiple resources resources_list = sorted(resource_name.split('|')) else: # Single resource or pattern - just use as-is resources_list = [resource_name] # Group by verb verb_counts = {} for e in entries: verb = e.get('verb', 'unknown') verb_counts[verb] = verb_counts.get(verb, 0) + 1 html = f''' Resource Lifecycle: {resource_name}
File Viewer
Height:

Prow Job Resource Lifecycle Analysis

Prow Job: {prowjob_name}

Build ID: {build_id}

Target: {target}

Resources: {', '.join(resources_list) if resources_list else resource_name}

GCS URL: View in gcsweb

{total_entries}
Total Events
{audit_entries}
Audit Log Events
{pod_entries}
Pod Log Events
''' for verb, count in sorted(verb_counts.items(), key=lambda x: -x[1])[:5]: html += f'''
{count}
{verb}
''' html += '''
Timeline
''' if min_time and max_time: # Store min/max time as ISO strings for JavaScript min_time_iso = min_time.isoformat() max_time_iso = max_time.isoformat() html += f'''
Start Time
{min_time.strftime('%H:%M:%S')}
End Time
{max_time.strftime('%H:%M:%S')}
''' else: min_time_iso = "" max_time_iso = "" html += '''
''' # Add timeline events for idx, entry in enumerate(entries): if entry['timestamp'] and min_time and time_range_seconds > 0: position = ((entry['timestamp'] - min_time).total_seconds() / time_range_seconds) * 100 color = {'info': '#58a6ff', 'warn': '#d29922', 'error': '#f85149'}.get(entry['level'], '#8b949e') html += f''' {entry['summary']} ''' html += f'''
''' for verb in sorted(set(e.get('verb', '') for e in entries if e.get('verb'))): html += f''' ''' html += f'''
''' # Add entries for idx, entry in enumerate(entries): timestamp_display = entry['timestamp'].strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] if entry['timestamp'] else 'No timestamp' level = entry.get('level', 'info') source = entry.get('source', 'audit') summary = entry.get('summary', '') full_filename = entry.get('filename', '') filename_short = full_filename.split('/')[-1] line_num = entry.get('line_number', '') verb = entry.get('verb', '') content = entry.get('content', '') # Calculate relative path from HTML to log file # HTML is at: .work/prow-job-analyze-resource/{build_id}/{resource_name}.html # Logs are at: .work/prow-job-analyze-resource/{build_id}/logs/... # So we need to extract everything after {build_id}/ if full_filename and f'.work/prow-job-analyze-resource/{build_id}/' in full_filename: relative_path = full_filename.split(f'.work/prow-job-analyze-resource/{build_id}/')[-1] else: relative_path = '' # Get file size for display file_size_str = '' if full_filename: try: import os file_size_bytes = os.path.getsize(full_filename) # Format file size in human-readable format if file_size_bytes < 1024: file_size_str = f'{file_size_bytes}B' elif file_size_bytes < 1024 * 1024: file_size_str = f'{file_size_bytes / 1024:.1f}K' elif file_size_bytes < 1024 * 1024 * 1024: file_size_str = f'{file_size_bytes / (1024 * 1024):.1f}M' else: file_size_str = f'{file_size_bytes / (1024 * 1024 * 1024):.1f}G' except: file_size_str = '' # Format JSON content for better display try: content_obj = json.loads(content) content_formatted = json.dumps(content_obj, indent=2) except: content_formatted = content # Create source link if we have a relative path if relative_path: # The file_mapping keys are relative to logs/ directory # So strip 'logs/' prefix from relative_path to match the mapping keys mapping_key = relative_path[5:] if relative_path.startswith('logs/') else relative_path # Check if HTML version exists in file_mapping html_path = file_mapping.get(mapping_key) if html_path: # Use HTML version with line anchor target_path = f'{html_path}#line-{line_num}' else: # Use original file with line anchor (browser may not support this for non-HTML) target_path = f'{relative_path}#line-{line_num}' # Include file size in the source HTML display size_display = f' ({file_size_str})' if file_size_str else '' source_html = f'{filename_short}:{line_num}{size_display}' else: source_html = f'{filename_short}:{line_num}' html += f'''
{timestamp_display} {level} {source} {source_html}
{summary}
Show matching line
{content_formatted[:2000]}
''' html += f'''
''' return html def main(): if len(sys.argv) < 7: print("Usage: generate_html_report.py ") sys.exit(1) entries_file = sys.argv[1] prowjob_name = sys.argv[2] build_id = sys.argv[3] target = sys.argv[4] resource_pattern = sys.argv[5] gcsweb_url = sys.argv[6] # Load entries with open(entries_file) as f: entries = json.load(f) # Convert timestamp strings to datetime objects for entry in entries: if entry['timestamp_str']: try: entry['timestamp'] = datetime.fromisoformat(entry['timestamp_str'].replace('Z', '+00:00')) except: entry['timestamp'] = None else: entry['timestamp'] = None # Create context-based HTML files for log files (full HTML for <1MB, context view for >1MB) import subprocess logs_dir = f".work/prow-job-analyze-resource/{build_id}/logs" file_mapping = {} # Check if create_context_html_files.py exists create_inline_script = Path(__file__).parent / "create_context_html_files.py" if create_inline_script.exists(): print("Creating context-based HTML files for log files...", file=sys.stderr) try: result = subprocess.run( ["python3", str(create_inline_script), logs_dir, build_id, entries_file], capture_output=True, text=True, check=True ) # Parse JSON output (the script outputs JSON to stdout) file_mapping = json.loads(result.stdout) print(f"Created {len(file_mapping)} HTML files", file=sys.stderr) except subprocess.CalledProcessError as e: print(f"Warning: Failed to create HTML files: {e.stderr}", file=sys.stderr) except json.JSONDecodeError as e: print(f"Warning: Failed to parse file mapping JSON: {e}", file=sys.stderr) else: print(f"Warning: create_context_html_files.py not found at {create_inline_script}", file=sys.stderr) # Generate HTML html = generate_html_report(entries, prowjob_name, build_id, target, resource_pattern, gcsweb_url, file_mapping) # Determine filename - use first resource from pattern for safe filesystem naming if '|' in resource_pattern: first_resource = resource_pattern.split('|')[0] else: first_resource = resource_pattern # Sanitize filename (remove regex special chars that might cause issues) filename_safe = first_resource.replace('.*', '').replace('[', '').replace(']', '').replace('(', '').replace(')', '') # Write to file output_file = f".work/prow-job-analyze-resource/{build_id}/{filename_safe}.html" with open(output_file, 'w') as f: f.write(html) # Calculate statistics for JSON output level_counts = {} audit_count = 0 pod_count = 0 for entry in entries: level = entry.get('level', 'unknown') level_counts[level] = level_counts.get(level, 0) + 1 if entry.get('source') == 'audit': audit_count += 1 elif entry.get('source') == 'pod': pod_count += 1 # Parse resource_pattern to get list of resources if '|' in resource_pattern: resources_list = sorted(resource_pattern.split('|')) else: resources_list = [resource_pattern] # Get timestamp range timestamps = [e['timestamp'] for e in entries if e['timestamp']] if timestamps: first_timestamp = min(timestamps).isoformat() last_timestamp = max(timestamps).isoformat() else: first_timestamp = None last_timestamp = None # Output structured JSON to stdout for parsing result = { "success": True, "output_file": output_file, "prowjob_name": prowjob_name, "build_id": build_id, "target": target, "resources": resources_list, "total_entries": len(entries), "audit_entries": audit_count, "pod_entries": pod_count, "level_counts": level_counts, "inline_html_files": len(file_mapping), "first_timestamp": first_timestamp, "last_timestamp": last_timestamp } print(json.dumps(result, indent=2)) if __name__ == '__main__': main()