Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions

View File

@@ -0,0 +1,370 @@
#!/usr/bin/env python3
"""
Enhanced PDF report generation for biomni conversation histories.
This script provides additional customization options for biomni reports:
- Custom styling and branding
- Formatted code blocks
- Section organization
- Metadata inclusion
- Export format options (PDF, HTML, Markdown)
Usage:
python generate_report.py --input conversation.json --output report.pdf
python generate_report.py --agent-object agent --output report.pdf --format html
"""
import argparse
import json
from pathlib import Path
from typing import Dict, List, Optional, Any
from datetime import datetime
def format_conversation_history(
messages: List[Dict[str, Any]],
include_metadata: bool = True,
include_code: bool = True,
include_timestamps: bool = False
) -> str:
"""
Format conversation history into structured markdown.
Args:
messages: List of conversation message dictionaries
include_metadata: Include metadata section
include_code: Include code blocks
include_timestamps: Include message timestamps
Returns:
Formatted markdown string
"""
sections = []
# Header
sections.append("# Biomni Analysis Report\n")
# Metadata
if include_metadata:
sections.append("## Metadata\n")
sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
sections.append(f"- **Number of interactions**: {len(messages)}")
sections.append("\n---\n")
# Process messages
sections.append("## Analysis\n")
for i, msg in enumerate(messages, 1):
role = msg.get('role', 'unknown')
content = msg.get('content', '')
if role == 'user':
sections.append(f"### Task {i // 2 + 1}\n")
sections.append(f"**Query:**\n```\n{content}\n```\n")
elif role == 'assistant':
sections.append(f"**Response:**\n")
# Check if content contains code
if include_code and ('```' in content or 'import ' in content):
# Attempt to separate text and code
parts = content.split('```')
for j, part in enumerate(parts):
if j % 2 == 0:
# Text content
if part.strip():
sections.append(f"{part.strip()}\n")
else:
# Code content
# Check if language is specified
lines = part.split('\n', 1)
if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
lang = lines[0].strip()
code = lines[1]
else:
lang = 'python' # Default to python
code = part
sections.append(f"```{lang}\n{code}\n```\n")
else:
sections.append(f"{content}\n")
sections.append("\n---\n")
return '\n'.join(sections)
def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
"""
Convert markdown to styled HTML.
Args:
markdown_content: Markdown string
title: HTML page title
Returns:
HTML string
"""
# Simple markdown to HTML conversion
# For production use, consider using a library like markdown or mistune
html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
max-width: 900px;
margin: 0 auto;
padding: 20px;
color: #333;
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
h2 {{
color: #34495e;
margin-top: 30px;
border-bottom: 2px solid #95a5a6;
padding-bottom: 5px;
}}
h3 {{
color: #555;
}}
code {{
background-color: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
}}
pre {{
background-color: #f8f8f8;
border: 1px solid #ddd;
border-radius: 5px;
padding: 15px;
overflow-x: auto;
}}
pre code {{
background-color: transparent;
padding: 0;
}}
hr {{
border: none;
border-top: 1px solid #ddd;
margin: 30px 0;
}}
.metadata {{
background-color: #ecf0f1;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}}
.task {{
background-color: #e8f4f8;
padding: 10px;
border-left: 4px solid #3498db;
margin: 20px 0;
}}
.footer {{
margin-top: 50px;
text-align: center;
color: #7f8c8d;
font-size: 0.9em;
}}
</style>
</head>
<body>
<div class="content">
{markdown_to_html_simple(markdown_content)}
</div>
<div class="footer">
<p>Generated with Biomni | Stanford SNAP Lab</p>
<p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
</div>
</body>
</html>
"""
return html_template
def markdown_to_html_simple(md: str) -> str:
"""Simple markdown to HTML converter (basic implementation)."""
lines = md.split('\n')
html_lines = []
in_code_block = False
in_list = False
for line in lines:
# Code blocks
if line.startswith('```'):
if in_code_block:
html_lines.append('</code></pre>')
in_code_block = False
else:
lang = line[3:].strip()
html_lines.append(f'<pre><code class="language-{lang}">')
in_code_block = True
continue
if in_code_block:
html_lines.append(line)
continue
# Headers
if line.startswith('# '):
html_lines.append(f'<h1>{line[2:]}</h1>')
elif line.startswith('## '):
html_lines.append(f'<h2>{line[3:]}</h2>')
elif line.startswith('### '):
html_lines.append(f'<h3>{line[4:]}</h3>')
# Lists
elif line.startswith('- '):
if not in_list:
html_lines.append('<ul>')
in_list = True
html_lines.append(f'<li>{line[2:]}</li>')
else:
if in_list:
html_lines.append('</ul>')
in_list = False
# Horizontal rule
if line.strip() == '---':
html_lines.append('<hr>')
# Bold
elif '**' in line:
line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
html_lines.append(f'<p>{line}</p>')
# Regular paragraph
elif line.strip():
html_lines.append(f'<p>{line}</p>')
else:
html_lines.append('<br>')
if in_list:
html_lines.append('</ul>')
return '\n'.join(html_lines)
def generate_report(
conversation_data: Dict[str, Any],
output_path: Path,
format: str = 'markdown',
title: Optional[str] = None
):
"""
Generate formatted report from conversation data.
Args:
conversation_data: Conversation history dictionary
output_path: Output file path
format: Output format ('markdown', 'html', or 'pdf')
title: Report title
"""
messages = conversation_data.get('messages', [])
if not title:
title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"
# Generate markdown
markdown_content = format_conversation_history(messages)
if format == 'markdown':
output_path.write_text(markdown_content)
print(f"✓ Markdown report saved to {output_path}")
elif format == 'html':
html_content = markdown_to_html(markdown_content, title)
output_path.write_text(html_content)
print(f"✓ HTML report saved to {output_path}")
elif format == 'pdf':
# For PDF generation, we'd typically use a library like weasyprint or reportlab
# This is a placeholder implementation
print("PDF generation requires additional dependencies (weasyprint or reportlab)")
print("Falling back to HTML format...")
html_path = output_path.with_suffix('.html')
html_content = markdown_to_html(markdown_content, title)
html_path.write_text(html_content)
print(f"✓ HTML report saved to {html_path}")
print(" To convert to PDF:")
print(f" 1. Install weasyprint: pip install weasyprint")
print(f" 2. Run: weasyprint {html_path} {output_path}")
else:
raise ValueError(f"Unsupported format: {format}")
def main():
"""Main entry point for CLI usage."""
parser = argparse.ArgumentParser(
description="Generate enhanced reports from biomni conversation histories"
)
parser.add_argument(
'--input',
type=Path,
required=True,
help='Input conversation history JSON file'
)
parser.add_argument(
'--output',
type=Path,
required=True,
help='Output report file path'
)
parser.add_argument(
'--format',
choices=['markdown', 'html', 'pdf'],
default='markdown',
help='Output format (default: markdown)'
)
parser.add_argument(
'--title',
type=str,
help='Report title (optional)'
)
args = parser.parse_args()
# Load conversation data
try:
with open(args.input, 'r') as f:
conversation_data = json.load(f)
except FileNotFoundError:
print(f"❌ Input file not found: {args.input}")
return 1
except json.JSONDecodeError:
print(f"❌ Invalid JSON in input file: {args.input}")
return 1
# Generate report
try:
generate_report(
conversation_data,
args.output,
format=args.format,
title=args.title
)
return 0
except Exception as e:
print(f"❌ Error generating report: {e}")
return 1
if __name__ == '__main__':
import sys
sys.exit(main())