#!/usr/bin/env python3 """ Example workflow demonstrating tool composition in ToolUniverse. This script shows a complete drug discovery workflow: 1. Find disease-associated targets 2. Retrieve protein sequences 3. Get structure predictions 4. Screen compound libraries 5. Calculate drug-likeness properties """ from tooluniverse import ToolUniverse def drug_discovery_workflow(disease_efo_id: str, max_targets: int = 3): """ Execute a drug discovery workflow for a given disease. Args: disease_efo_id: EFO ID for the disease (e.g., "EFO_0000537" for hypertension) max_targets: Maximum number of targets to process """ tu = ToolUniverse() tu.load_tools() print("=" * 70) print("DRUG DISCOVERY WORKFLOW") print("=" * 70) # Step 1: Find disease-associated targets print(f"\nStep 1: Finding targets for disease {disease_efo_id}...") targets = tu.run({ "name": "OpenTargets_get_associated_targets_by_disease_efoId", "arguments": {"efoId": disease_efo_id} }) print(f"✓ Found {len(targets)} disease-associated targets") # Process top targets top_targets = targets[:max_targets] print(f" Processing top {len(top_targets)} targets:") for idx, target in enumerate(top_targets, 1): print(f" {idx}. {target.get('target_name', 'Unknown')} ({target.get('uniprot_id', 'N/A')})") # Step 2: Get protein sequences print(f"\nStep 2: Retrieving protein sequences...") sequences = [] for target in top_targets: try: seq = tu.run({ "name": "UniProt_get_sequence", "arguments": {"uniprot_id": target['uniprot_id']} }) sequences.append({ "target": target, "sequence": seq }) print(f" ✓ Retrieved sequence for {target.get('target_name', 'Unknown')}") except Exception as e: print(f" ✗ Failed to get sequence: {e}") # Step 3: Predict protein structures print(f"\nStep 3: Predicting protein structures...") structures = [] for seq_data in sequences: try: structure = tu.run({ "name": "AlphaFold_get_structure", "arguments": {"uniprot_id": seq_data['target']['uniprot_id']} }) structures.append({ "target": seq_data['target'], "structure": structure }) print(f" ✓ Predicted structure for {seq_data['target'].get('target_name', 'Unknown')}") except Exception as e: print(f" ✗ Failed to predict structure: {e}") # Step 4: Find binding sites print(f"\nStep 4: Identifying binding sites...") binding_sites = [] for struct_data in structures: try: sites = tu.run({ "name": "Fpocket_find_binding_sites", "arguments": {"structure": struct_data['structure']} }) binding_sites.append({ "target": struct_data['target'], "sites": sites }) print(f" ✓ Found {len(sites)} binding sites for {struct_data['target'].get('target_name', 'Unknown')}") except Exception as e: print(f" ✗ Failed to find binding sites: {e}") # Step 5: Virtual screening (simplified) print(f"\nStep 5: Screening compound libraries...") all_hits = [] for site_data in binding_sites: for site in site_data['sites'][:1]: # Top site only try: compounds = tu.run({ "name": "ZINC_virtual_screening", "arguments": { "binding_site": site, "library": "lead-like", "top_n": 10 } }) all_hits.extend(compounds) print(f" ✓ Found {len(compounds)} hit compounds for {site_data['target'].get('target_name', 'Unknown')}") except Exception as e: print(f" ✗ Screening failed: {e}") # Step 6: Calculate drug-likeness print(f"\nStep 6: Evaluating drug-likeness...") drug_candidates = [] for compound in all_hits: try: properties = tu.run({ "name": "RDKit_calculate_drug_properties", "arguments": {"smiles": compound['smiles']} }) if properties.get('lipinski_pass', False): drug_candidates.append({ "compound": compound, "properties": properties }) except Exception as e: print(f" ✗ Property calculation failed: {e}") print(f"\n ✓ Identified {len(drug_candidates)} drug candidates passing Lipinski's Rule of Five") # Summary print("\n" + "=" * 70) print("WORKFLOW SUMMARY") print("=" * 70) print(f"Disease targets processed: {len(top_targets)}") print(f"Protein structures predicted: {len(structures)}") print(f"Binding sites identified: {sum(len(s['sites']) for s in binding_sites)}") print(f"Compounds screened: {len(all_hits)}") print(f"Drug candidates identified: {len(drug_candidates)}") print("=" * 70) return drug_candidates def genomics_workflow(geo_id: str): """ Execute a genomics analysis workflow. Args: geo_id: GEO dataset ID (e.g., "GSE12345") """ tu = ToolUniverse() tu.load_tools() print("=" * 70) print("GENOMICS ANALYSIS WORKFLOW") print("=" * 70) # Step 1: Download gene expression data print(f"\nStep 1: Downloading dataset {geo_id}...") try: expression_data = tu.run({ "name": "GEO_download_dataset", "arguments": {"geo_id": geo_id} }) print(f" ✓ Downloaded expression data") except Exception as e: print(f" ✗ Failed: {e}") return # Step 2: Differential expression analysis print(f"\nStep 2: Performing differential expression analysis...") try: de_genes = tu.run({ "name": "DESeq2_differential_expression", "arguments": { "data": expression_data, "condition1": "control", "condition2": "treated" } }) print(f" ✓ Found {len(de_genes.get('significant_genes', []))} differentially expressed genes") except Exception as e: print(f" ✗ Failed: {e}") return # Step 3: Pathway enrichment print(f"\nStep 3: Running pathway enrichment analysis...") try: pathways = tu.run({ "name": "KEGG_pathway_enrichment", "arguments": { "gene_list": de_genes['significant_genes'], "organism": "hsa" } }) print(f" ✓ Found {len(pathways)} enriched pathways") if pathways: print(f" Top pathway: {pathways[0].get('pathway_name', 'Unknown')}") except Exception as e: print(f" ✗ Failed: {e}") print("\n" + "=" * 70) if __name__ == "__main__": # Example 1: Drug discovery workflow for hypertension print("EXAMPLE 1: Drug Discovery for Hypertension") candidates = drug_discovery_workflow("EFO_0000537", max_targets=2) print("\n\n") # Example 2: Genomics workflow print("EXAMPLE 2: Genomics Analysis") genomics_workflow("GSE12345")