Tool Composition and Workflows in ToolUniverse

Overview

ToolUniverse enables chaining multiple tools together to create complex scientific workflows. Tools can be composed sequentially or in parallel to solve multi-step research problems.

Sequential Tool Composition

Execute tools in sequence where each tool's output feeds into the next tool.

Basic Pattern

from tooluniverse import ToolUniverse

tu = ToolUniverse()
tu.load_tools()

# Step 1: Get disease-associated targets
targets = tu.run({
    "name": "OpenTargets_get_associated_targets_by_disease_efoId",
    "arguments": {"efoId": "EFO_0000537"}  # Hypertension
})

# Step 2: For each target, get protein structure
structures = []
for target in targets[:5]:  # First 5 targets
    structure = tu.run({
        "name": "AlphaFold_get_structure",
        "arguments": {"uniprot_id": target['uniprot_id']}
    })
    structures.append(structure)

# Step 3: Analyze structures
for structure in structures:
    analysis = tu.run({
        "name": "ProteinAnalysis_calculate_properties",
        "arguments": {"structure": structure}
    })

Complex Workflow Examples

Drug Discovery Workflow

Complete workflow from disease to drug candidates:

# 1. Find disease-associated targets
print("Finding disease targets...")
targets = tu.run({
    "name": "OpenTargets_get_associated_targets_by_disease_efoId",
    "arguments": {"efoId": "EFO_0000616"}  # Breast cancer
})

# 2. Get target protein sequences
print("Retrieving protein sequences...")
sequences = []
for target in targets[:10]:
    seq = tu.run({
        "name": "UniProt_get_sequence",
        "arguments": {"uniprot_id": target['uniprot_id']}
    })
    sequences.append(seq)

# 3. Predict protein structures
print("Predicting structures...")
structures = []
for seq in sequences:
    structure = tu.run({
        "name": "AlphaFold_get_structure",
        "arguments": {"sequence": seq}
    })
    structures.append(structure)

# 4. Find binding sites
print("Identifying binding sites...")
binding_sites = []
for structure in structures:
    sites = tu.run({
        "name": "Fpocket_find_binding_sites",
        "arguments": {"structure": structure}
    })
    binding_sites.append(sites)

# 5. Screen compound libraries
print("Screening compounds...")
hits = []
for site in binding_sites:
    compounds = tu.run({
        "name": "ZINC_virtual_screening",
        "arguments": {
            "binding_site": site,
            "library": "lead-like",
            "top_n": 100
        }
    })
    hits.extend(compounds)

# 6. Calculate drug-likeness
print("Evaluating drug-likeness...")
drug_candidates = []
for compound in hits:
    properties = tu.run({
        "name": "RDKit_calculate_drug_properties",
        "arguments": {"smiles": compound['smiles']}
    })
    if properties['lipinski_pass']:
        drug_candidates.append(compound)

print(f"Found {len(drug_candidates)} drug candidates")

Genomics Analysis Workflow

# 1. Download gene expression data
expression_data = tu.run({
    "name": "GEO_download_dataset",
    "arguments": {"geo_id": "GSE12345"}
})

# 2. Perform differential expression analysis
de_genes = tu.run({
    "name": "DESeq2_differential_expression",
    "arguments": {
        "data": expression_data,
        "condition1": "control",
        "condition2": "treated"
    }
})

# 3. Pathway enrichment analysis
pathways = tu.run({
    "name": "KEGG_pathway_enrichment",
    "arguments": {
        "gene_list": de_genes['significant_genes'],
        "organism": "hsa"
    }
})

# 4. Find relevant literature
papers = tu.run({
    "name": "PubMed_search",
    "arguments": {
        "query": f"{pathways[0]['pathway_name']} AND cancer",
        "max_results": 20
    }
})

# 5. Summarize findings
summary = tu.run({
    "name": "LLM_summarize",
    "arguments": {
        "text": papers,
        "focus": "therapeutic implications"
    }
})

Clinical Genomics Workflow

# 1. Load patient variants
variants = tu.run({
    "name": "VCF_parse",
    "arguments": {"vcf_file": "patient_001.vcf"}
})

# 2. Annotate variants
annotated = tu.run({
    "name": "VEP_annotate_variants",
    "arguments": {"variants": variants}
})

# 3. Filter pathogenic variants
pathogenic = tu.run({
    "name": "ClinVar_filter_pathogenic",
    "arguments": {"variants": annotated}
})

# 4. Find disease associations
diseases = tu.run({
    "name": "OMIM_disease_lookup",
    "arguments": {"genes": pathogenic['affected_genes']}
})

# 5. Generate clinical report
report = tu.run({
    "name": "Report_generator",
    "arguments": {
        "variants": pathogenic,
        "diseases": diseases,
        "format": "clinical"
    }
})

Parallel Tool Execution

Execute multiple tools simultaneously when they don't depend on each other:

import concurrent.futures

def run_tool(tu, tool_config):
    return tu.run(tool_config)

# Define parallel tasks
tasks = [
    {"name": "PubMed_search", "arguments": {"query": "cancer", "max_results": 10}},
    {"name": "OpenTargets_get_diseases", "arguments": {"therapeutic_area": "oncology"}},
    {"name": "ChEMBL_search_compounds", "arguments": {"target": "EGFR"}}
]

# Execute in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    futures = [executor.submit(run_tool, tu, task) for task in tasks]
    results = [future.result() for future in concurrent.futures.as_completed(futures)]

Output Processing Hooks

ToolUniverse supports post-processing hooks for:

Summarization
File saving
Data transformation
Visualization

# Example: Save results to file
result = tu.run({
    "name": "some_tool",
    "arguments": {"param": "value"}
},
hooks={
    "save_to_file": {"filename": "results.json"},
    "summarize": {"format": "brief"}
})

Best Practices

Error Handling: Implement try-except blocks for each tool in workflow
Data Validation: Verify output from each step before passing to next tool
Checkpointing: Save intermediate results for long workflows
Logging: Track progress through complex workflows
Resource Management: Consider rate limits and computational resources
Modularity: Break complex workflows into reusable functions
Testing: Test each step individually before composing full workflow

6.2 KiB Raw Blame History