Initial commit

2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions
--- a/skills/biomni/scripts/generate_report.py
+++ b/skills/biomni/scripts/generate_report.py
@@ -0,0 +1,370 @@
+#!/usr/bin/env python3
+"""
+Enhanced PDF report generation for biomni conversation histories.
+
+This script provides additional customization options for biomni reports:
+- Custom styling and branding
+- Formatted code blocks
+- Section organization
+- Metadata inclusion
+- Export format options (PDF, HTML, Markdown)
+
+Usage:
+    python generate_report.py --input conversation.json --output report.pdf
+    python generate_report.py --agent-object agent --output report.pdf --format html
+"""
+
+import argparse
+import json
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+from datetime import datetime
+
+
+def format_conversation_history(
+    messages: List[Dict[str, Any]],
+    include_metadata: bool = True,
+    include_code: bool = True,
+    include_timestamps: bool = False
+) -> str:
+    """
+    Format conversation history into structured markdown.
+
+    Args:
+        messages: List of conversation message dictionaries
+        include_metadata: Include metadata section
+        include_code: Include code blocks
+        include_timestamps: Include message timestamps
+
+    Returns:
+        Formatted markdown string
+    """
+    sections = []
+
+    # Header
+    sections.append("# Biomni Analysis Report\n")
+
+    # Metadata
+    if include_metadata:
+        sections.append("## Metadata\n")
+        sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        sections.append(f"- **Number of interactions**: {len(messages)}")
+        sections.append("\n---\n")
+
+    # Process messages
+    sections.append("## Analysis\n")
+
+    for i, msg in enumerate(messages, 1):
+        role = msg.get('role', 'unknown')
+        content = msg.get('content', '')
+
+        if role == 'user':
+            sections.append(f"### Task {i // 2 + 1}\n")
+            sections.append(f"**Query:**\n```\n{content}\n```\n")
+
+        elif role == 'assistant':
+            sections.append(f"**Response:**\n")
+
+            # Check if content contains code
+            if include_code and ('```' in content or 'import ' in content):
+                # Attempt to separate text and code
+                parts = content.split('```')
+                for j, part in enumerate(parts):
+                    if j % 2 == 0:
+                        # Text content
+                        if part.strip():
+                            sections.append(f"{part.strip()}\n")
+                    else:
+                        # Code content
+                        # Check if language is specified
+                        lines = part.split('\n', 1)
+                        if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
+                            lang = lines[0].strip()
+                            code = lines[1]
+                        else:
+                            lang = 'python'  # Default to python
+                            code = part
+
+                        sections.append(f"```{lang}\n{code}\n```\n")
+            else:
+                sections.append(f"{content}\n")
+
+            sections.append("\n---\n")
+
+    return '\n'.join(sections)
+
+
+def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
+    """
+    Convert markdown to styled HTML.
+
+    Args:
+        markdown_content: Markdown string
+        title: HTML page title
+
+    Returns:
+        HTML string
+    """
+    # Simple markdown to HTML conversion
+    # For production use, consider using a library like markdown or mistune
+
+    html_template = f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{title}</title>
+    <style>
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+            line-height: 1.6;
+            max-width: 900px;
+            margin: 0 auto;
+            padding: 20px;
+            color: #333;
+        }}
+        h1 {{
+            color: #2c3e50;
+            border-bottom: 3px solid #3498db;
+            padding-bottom: 10px;
+        }}
+        h2 {{
+            color: #34495e;
+            margin-top: 30px;
+            border-bottom: 2px solid #95a5a6;
+            padding-bottom: 5px;
+        }}
+        h3 {{
+            color: #555;
+        }}
+        code {{
+            background-color: #f4f4f4;
+            padding: 2px 6px;
+            border-radius: 3px;
+            font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
+        }}
+        pre {{
+            background-color: #f8f8f8;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            padding: 15px;
+            overflow-x: auto;
+        }}
+        pre code {{
+            background-color: transparent;
+            padding: 0;
+        }}
+        hr {{
+            border: none;
+            border-top: 1px solid #ddd;
+            margin: 30px 0;
+        }}
+        .metadata {{
+            background-color: #ecf0f1;
+            padding: 15px;
+            border-radius: 5px;
+            margin-bottom: 20px;
+        }}
+        .task {{
+            background-color: #e8f4f8;
+            padding: 10px;
+            border-left: 4px solid #3498db;
+            margin: 20px 0;
+        }}
+        .footer {{
+            margin-top: 50px;
+            text-align: center;
+            color: #7f8c8d;
+            font-size: 0.9em;
+        }}
+    </style>
+</head>
+<body>
+    <div class="content">
+        {markdown_to_html_simple(markdown_content)}
+    </div>
+    <div class="footer">
+        <p>Generated with Biomni | Stanford SNAP Lab</p>
+        <p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
+    </div>
+</body>
+</html>
+"""
+    return html_template
+
+
+def markdown_to_html_simple(md: str) -> str:
+    """Simple markdown to HTML converter (basic implementation)."""
+    lines = md.split('\n')
+    html_lines = []
+    in_code_block = False
+    in_list = False
+
+    for line in lines:
+        # Code blocks
+        if line.startswith('```'):
+            if in_code_block:
+                html_lines.append('</code></pre>')
+                in_code_block = False
+            else:
+                lang = line[3:].strip()
+                html_lines.append(f'<pre><code class="language-{lang}">')
+                in_code_block = True
+            continue
+
+        if in_code_block:
+            html_lines.append(line)
+            continue
+
+        # Headers
+        if line.startswith('# '):
+            html_lines.append(f'<h1>{line[2:]}</h1>')
+        elif line.startswith('## '):
+            html_lines.append(f'<h2>{line[3:]}</h2>')
+        elif line.startswith('### '):
+            html_lines.append(f'<h3>{line[4:]}</h3>')
+        # Lists
+        elif line.startswith('- '):
+            if not in_list:
+                html_lines.append('<ul>')
+                in_list = True
+            html_lines.append(f'<li>{line[2:]}</li>')
+        else:
+            if in_list:
+                html_lines.append('</ul>')
+                in_list = False
+
+            # Horizontal rule
+            if line.strip() == '---':
+                html_lines.append('<hr>')
+            # Bold
+            elif '**' in line:
+                line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
+                html_lines.append(f'<p>{line}</p>')
+            # Regular paragraph
+            elif line.strip():
+                html_lines.append(f'<p>{line}</p>')
+            else:
+                html_lines.append('<br>')
+
+    if in_list:
+        html_lines.append('</ul>')
+
+    return '\n'.join(html_lines)
+
+
+def generate_report(
+    conversation_data: Dict[str, Any],
+    output_path: Path,
+    format: str = 'markdown',
+    title: Optional[str] = None
+):
+    """
+    Generate formatted report from conversation data.
+
+    Args:
+        conversation_data: Conversation history dictionary
+        output_path: Output file path
+        format: Output format ('markdown', 'html', or 'pdf')
+        title: Report title
+    """
+    messages = conversation_data.get('messages', [])
+
+    if not title:
+        title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"
+
+    # Generate markdown
+    markdown_content = format_conversation_history(messages)
+
+    if format == 'markdown':
+        output_path.write_text(markdown_content)
+        print(f"✓ Markdown report saved to {output_path}")
+
+    elif format == 'html':
+        html_content = markdown_to_html(markdown_content, title)
+        output_path.write_text(html_content)
+        print(f"✓ HTML report saved to {output_path}")
+
+    elif format == 'pdf':
+        # For PDF generation, we'd typically use a library like weasyprint or reportlab
+        # This is a placeholder implementation
+        print("PDF generation requires additional dependencies (weasyprint or reportlab)")
+        print("Falling back to HTML format...")
+
+        html_path = output_path.with_suffix('.html')
+        html_content = markdown_to_html(markdown_content, title)
+        html_path.write_text(html_content)
+
+        print(f"✓ HTML report saved to {html_path}")
+        print("  To convert to PDF:")
+        print(f"    1. Install weasyprint: pip install weasyprint")
+        print(f"    2. Run: weasyprint {html_path} {output_path}")
+
+    else:
+        raise ValueError(f"Unsupported format: {format}")
+
+
+def main():
+    """Main entry point for CLI usage."""
+    parser = argparse.ArgumentParser(
+        description="Generate enhanced reports from biomni conversation histories"
+    )
+
+    parser.add_argument(
+        '--input',
+        type=Path,
+        required=True,
+        help='Input conversation history JSON file'
+    )
+
+    parser.add_argument(
+        '--output',
+        type=Path,
+        required=True,
+        help='Output report file path'
+    )
+
+    parser.add_argument(
+        '--format',
+        choices=['markdown', 'html', 'pdf'],
+        default='markdown',
+        help='Output format (default: markdown)'
+    )
+
+    parser.add_argument(
+        '--title',
+        type=str,
+        help='Report title (optional)'
+    )
+
+    args = parser.parse_args()
+
+    # Load conversation data
+    try:
+        with open(args.input, 'r') as f:
+            conversation_data = json.load(f)
+    except FileNotFoundError:
+        print(f"❌ Input file not found: {args.input}")
+        return 1
+    except json.JSONDecodeError:
+        print(f"❌ Invalid JSON in input file: {args.input}")
+        return 1
+
+    # Generate report
+    try:
+        generate_report(
+            conversation_data,
+            args.output,
+            format=args.format,
+            title=args.title
+        )
+        return 0
+    except Exception as e:
+        print(f"❌ Error generating report: {e}")
+        return 1
+
+
+if __name__ == '__main__':
+    import sys
+    sys.exit(main())
--- a/skills/biomni/scripts/setup_environment.py
+++ b/skills/biomni/scripts/setup_environment.py
@@ -0,0 +1,355 @@
+#!/usr/bin/env python3
+"""
+Interactive setup script for biomni environment configuration.
+
+This script helps users set up:
+1. Conda environment with required dependencies
+2. API keys for LLM providers
+3. Data lake directory configuration
+4. MCP server setup (optional)
+
+Usage:
+    python setup_environment.py
+"""
+
+import os
+import sys
+import subprocess
+from pathlib import Path
+from typing import Dict, Optional
+
+
+def check_conda_installed() -> bool:
+    """Check if conda is available in the system."""
+    try:
+        subprocess.run(
+            ['conda', '--version'],
+            capture_output=True,
+            check=True
+        )
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
+
+
+def setup_conda_environment():
+    """Guide user through conda environment setup."""
+    print("\n=== Conda Environment Setup ===")
+
+    if not check_conda_installed():
+        print("❌ Conda not found. Please install Miniconda or Anaconda:")
+        print("   https://docs.conda.io/en/latest/miniconda.html")
+        return False
+
+    print("✓ Conda is installed")
+
+    # Check if biomni_e1 environment exists
+    result = subprocess.run(
+        ['conda', 'env', 'list'],
+        capture_output=True,
+        text=True
+    )
+
+    if 'biomni_e1' in result.stdout:
+        print("✓ biomni_e1 environment already exists")
+        return True
+
+    print("\nCreating biomni_e1 conda environment...")
+    print("This will install Python 3.10 and required dependencies.")
+
+    response = input("Proceed? [y/N]: ").strip().lower()
+    if response != 'y':
+        print("Skipping conda environment setup")
+        return False
+
+    try:
+        # Create conda environment
+        subprocess.run(
+            ['conda', 'create', '-n', 'biomni_e1', 'python=3.10', '-y'],
+            check=True
+        )
+
+        print("\n✓ Conda environment created successfully")
+        print("\nTo activate: conda activate biomni_e1")
+        print("Then install biomni: pip install biomni --upgrade")
+        return True
+
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Failed to create conda environment: {e}")
+        return False
+
+
+def setup_api_keys() -> Dict[str, str]:
+    """Interactive API key configuration."""
+    print("\n=== API Key Configuration ===")
+    print("Biomni supports multiple LLM providers.")
+    print("At minimum, configure one provider.")
+
+    api_keys = {}
+
+    # Anthropic (recommended)
+    print("\n1. Anthropic Claude (Recommended)")
+    print("   Get your API key from: https://console.anthropic.com/")
+    anthropic_key = input("   Enter ANTHROPIC_API_KEY (or press Enter to skip): ").strip()
+    if anthropic_key:
+        api_keys['ANTHROPIC_API_KEY'] = anthropic_key
+
+    # OpenAI
+    print("\n2. OpenAI")
+    print("   Get your API key from: https://platform.openai.com/api-keys")
+    openai_key = input("   Enter OPENAI_API_KEY (or press Enter to skip): ").strip()
+    if openai_key:
+        api_keys['OPENAI_API_KEY'] = openai_key
+
+    # Google Gemini
+    print("\n3. Google Gemini")
+    print("   Get your API key from: https://makersuite.google.com/app/apikey")
+    google_key = input("   Enter GOOGLE_API_KEY (or press Enter to skip): ").strip()
+    if google_key:
+        api_keys['GOOGLE_API_KEY'] = google_key
+
+    # Groq
+    print("\n4. Groq")
+    print("   Get your API key from: https://console.groq.com/keys")
+    groq_key = input("   Enter GROQ_API_KEY (or press Enter to skip): ").strip()
+    if groq_key:
+        api_keys['GROQ_API_KEY'] = groq_key
+
+    if not api_keys:
+        print("\n⚠️  No API keys configured. You'll need at least one to use biomni.")
+        return {}
+
+    return api_keys
+
+
+def save_api_keys(api_keys: Dict[str, str], method: str = 'env_file'):
+    """Save API keys using specified method."""
+    if method == 'env_file':
+        env_file = Path.cwd() / '.env'
+
+        # Read existing .env if present
+        existing_vars = {}
+        if env_file.exists():
+            with open(env_file, 'r') as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith('#'):
+                        if '=' in line:
+                            key, val = line.split('=', 1)
+                            existing_vars[key.strip()] = val.strip()
+
+        # Update with new keys
+        existing_vars.update(api_keys)
+
+        # Write to .env
+        with open(env_file, 'w') as f:
+            f.write("# Biomni API Keys\n")
+            f.write(f"# Generated by setup_environment.py\n\n")
+            for key, value in existing_vars.items():
+                f.write(f"{key}={value}\n")
+
+        print(f"\n✓ API keys saved to {env_file}")
+        print("  Keys will be loaded automatically when biomni runs in this directory")
+
+    elif method == 'shell_export':
+        shell_file = Path.home() / '.bashrc'  # or .zshrc for zsh users
+
+        print("\n📋 Add these lines to your shell configuration:")
+        for key, value in api_keys.items():
+            print(f"   export {key}=\"{value}\"")
+
+        print(f"\nThen run: source {shell_file}")
+
+
+def setup_data_directory() -> Optional[Path]:
+    """Configure biomni data lake directory."""
+    print("\n=== Data Lake Configuration ===")
+    print("Biomni requires ~11GB for integrated biomedical databases.")
+
+    default_path = Path.cwd() / 'biomni_data'
+    print(f"\nDefault location: {default_path}")
+
+    response = input("Use default location? [Y/n]: ").strip().lower()
+
+    if response == 'n':
+        custom_path = input("Enter custom path: ").strip()
+        data_path = Path(custom_path).expanduser().resolve()
+    else:
+        data_path = default_path
+
+    # Create directory if it doesn't exist
+    data_path.mkdir(parents=True, exist_ok=True)
+
+    print(f"\n✓ Data directory configured: {data_path}")
+    print("  Data will be downloaded automatically on first use")
+
+    return data_path
+
+
+def test_installation(data_path: Path):
+    """Test biomni installation with a simple query."""
+    print("\n=== Installation Test ===")
+    print("Testing biomni installation with a simple query...")
+
+    response = input("Run test? [Y/n]: ").strip().lower()
+    if response == 'n':
+        print("Skipping test")
+        return
+
+    test_code = f'''
+import os
+from biomni.agent import A1
+
+# Use environment variables for API keys
+agent = A1(path='{data_path}', llm='claude-sonnet-4-20250514')
+
+# Simple test query
+result = agent.go("What is the primary function of the TP53 gene?")
+print("Test result:", result)
+'''
+
+    test_file = Path('test_biomni.py')
+    with open(test_file, 'w') as f:
+        f.write(test_code)
+
+    print(f"\nTest script created: {test_file}")
+    print("Running test...")
+
+    try:
+        subprocess.run([sys.executable, str(test_file)], check=True)
+        print("\n✓ Test completed successfully!")
+        test_file.unlink()  # Clean up test file
+    except subprocess.CalledProcessError:
+        print("\n❌ Test failed. Check your configuration.")
+        print(f"   Test script saved as {test_file} for debugging")
+
+
+def generate_example_script(data_path: Path):
+    """Generate example usage script."""
+    example_code = f'''#!/usr/bin/env python3
+"""
+Example biomni usage script
+
+This demonstrates basic biomni usage patterns.
+Modify this script for your research tasks.
+"""
+
+from biomni.agent import A1
+
+# Initialize agent
+agent = A1(
+    path='{data_path}',
+    llm='claude-sonnet-4-20250514'  # or your preferred LLM
+)
+
+# Example 1: Simple gene query
+print("Example 1: Gene function query")
+result = agent.go("""
+What are the main functions of the BRCA1 gene?
+Include information about:
+- Molecular function
+- Associated diseases
+- Protein interactions
+""")
+print(result)
+print("-" * 80)
+
+# Example 2: Data analysis
+print("\\nExample 2: GWAS analysis")
+result = agent.go("""
+Explain how to analyze GWAS summary statistics for:
+1. Identifying genome-wide significant variants
+2. Mapping variants to genes
+3. Pathway enrichment analysis
+""")
+print(result)
+
+# Save conversation history
+agent.save_conversation_history("example_results.pdf")
+print("\\nResults saved to example_results.pdf")
+'''
+
+    example_file = Path('example_biomni_usage.py')
+    with open(example_file, 'w') as f:
+        f.write(example_code)
+
+    print(f"\n✓ Example script created: {example_file}")
+
+
+def main():
+    """Main setup workflow."""
+    print("=" * 60)
+    print("Biomni Environment Setup")
+    print("=" * 60)
+
+    # Step 1: Conda environment
+    conda_success = setup_conda_environment()
+
+    if conda_success:
+        print("\n⚠️  Remember to activate the environment:")
+        print("   conda activate biomni_e1")
+        print("   pip install biomni --upgrade")
+
+    # Step 2: API keys
+    api_keys = setup_api_keys()
+
+    if api_keys:
+        print("\nHow would you like to store API keys?")
+        print("1. .env file (recommended, local to this directory)")
+        print("2. Shell export (add to .bashrc/.zshrc)")
+
+        choice = input("Choose [1/2]: ").strip()
+
+        if choice == '2':
+            save_api_keys(api_keys, method='shell_export')
+        else:
+            save_api_keys(api_keys, method='env_file')
+
+    # Step 3: Data directory
+    data_path = setup_data_directory()
+
+    # Step 4: Generate example script
+    if data_path:
+        generate_example_script(data_path)
+
+    # Step 5: Test installation (optional)
+    if api_keys and data_path:
+        test_installation(data_path)
+
+    # Summary
+    print("\n" + "=" * 60)
+    print("Setup Complete!")
+    print("=" * 60)
+
+    if conda_success:
+        print("✓ Conda environment: biomni_e1")
+
+    if api_keys:
+        print(f"✓ API keys configured: {', '.join(api_keys.keys())}")
+
+    if data_path:
+        print(f"✓ Data directory: {data_path}")
+
+    print("\nNext steps:")
+    if conda_success:
+        print("1. conda activate biomni_e1")
+        print("2. pip install biomni --upgrade")
+        print("3. Run example_biomni_usage.py to test")
+    else:
+        print("1. Install conda/miniconda")
+        print("2. Run this script again")
+
+    print("\nFor documentation, see:")
+    print("  - GitHub: https://github.com/snap-stanford/biomni")
+    print("  - Paper: https://www.biorxiv.org/content/10.1101/2025.05.30.656746v1")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n\nSetup interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Error during setup: {e}")
+        sys.exit(1)