Initial commit
This commit is contained in:
219
skills/tooluniverse/scripts/example_workflow.py
Executable file
219
skills/tooluniverse/scripts/example_workflow.py
Executable file
@@ -0,0 +1,219 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Example workflow demonstrating tool composition in ToolUniverse.
|
||||
|
||||
This script shows a complete drug discovery workflow:
|
||||
1. Find disease-associated targets
|
||||
2. Retrieve protein sequences
|
||||
3. Get structure predictions
|
||||
4. Screen compound libraries
|
||||
5. Calculate drug-likeness properties
|
||||
"""
|
||||
|
||||
from tooluniverse import ToolUniverse
|
||||
|
||||
|
||||
def drug_discovery_workflow(disease_efo_id: str, max_targets: int = 3):
|
||||
"""
|
||||
Execute a drug discovery workflow for a given disease.
|
||||
|
||||
Args:
|
||||
disease_efo_id: EFO ID for the disease (e.g., "EFO_0000537" for hypertension)
|
||||
max_targets: Maximum number of targets to process
|
||||
"""
|
||||
tu = ToolUniverse()
|
||||
tu.load_tools()
|
||||
|
||||
print("=" * 70)
|
||||
print("DRUG DISCOVERY WORKFLOW")
|
||||
print("=" * 70)
|
||||
|
||||
# Step 1: Find disease-associated targets
|
||||
print(f"\nStep 1: Finding targets for disease {disease_efo_id}...")
|
||||
targets = tu.run({
|
||||
"name": "OpenTargets_get_associated_targets_by_disease_efoId",
|
||||
"arguments": {"efoId": disease_efo_id}
|
||||
})
|
||||
print(f"✓ Found {len(targets)} disease-associated targets")
|
||||
|
||||
# Process top targets
|
||||
top_targets = targets[:max_targets]
|
||||
print(f" Processing top {len(top_targets)} targets:")
|
||||
for idx, target in enumerate(top_targets, 1):
|
||||
print(f" {idx}. {target.get('target_name', 'Unknown')} ({target.get('uniprot_id', 'N/A')})")
|
||||
|
||||
# Step 2: Get protein sequences
|
||||
print(f"\nStep 2: Retrieving protein sequences...")
|
||||
sequences = []
|
||||
for target in top_targets:
|
||||
try:
|
||||
seq = tu.run({
|
||||
"name": "UniProt_get_sequence",
|
||||
"arguments": {"uniprot_id": target['uniprot_id']}
|
||||
})
|
||||
sequences.append({
|
||||
"target": target,
|
||||
"sequence": seq
|
||||
})
|
||||
print(f" ✓ Retrieved sequence for {target.get('target_name', 'Unknown')}")
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed to get sequence: {e}")
|
||||
|
||||
# Step 3: Predict protein structures
|
||||
print(f"\nStep 3: Predicting protein structures...")
|
||||
structures = []
|
||||
for seq_data in sequences:
|
||||
try:
|
||||
structure = tu.run({
|
||||
"name": "AlphaFold_get_structure",
|
||||
"arguments": {"uniprot_id": seq_data['target']['uniprot_id']}
|
||||
})
|
||||
structures.append({
|
||||
"target": seq_data['target'],
|
||||
"structure": structure
|
||||
})
|
||||
print(f" ✓ Predicted structure for {seq_data['target'].get('target_name', 'Unknown')}")
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed to predict structure: {e}")
|
||||
|
||||
# Step 4: Find binding sites
|
||||
print(f"\nStep 4: Identifying binding sites...")
|
||||
binding_sites = []
|
||||
for struct_data in structures:
|
||||
try:
|
||||
sites = tu.run({
|
||||
"name": "Fpocket_find_binding_sites",
|
||||
"arguments": {"structure": struct_data['structure']}
|
||||
})
|
||||
binding_sites.append({
|
||||
"target": struct_data['target'],
|
||||
"sites": sites
|
||||
})
|
||||
print(f" ✓ Found {len(sites)} binding sites for {struct_data['target'].get('target_name', 'Unknown')}")
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed to find binding sites: {e}")
|
||||
|
||||
# Step 5: Virtual screening (simplified)
|
||||
print(f"\nStep 5: Screening compound libraries...")
|
||||
all_hits = []
|
||||
for site_data in binding_sites:
|
||||
for site in site_data['sites'][:1]: # Top site only
|
||||
try:
|
||||
compounds = tu.run({
|
||||
"name": "ZINC_virtual_screening",
|
||||
"arguments": {
|
||||
"binding_site": site,
|
||||
"library": "lead-like",
|
||||
"top_n": 10
|
||||
}
|
||||
})
|
||||
all_hits.extend(compounds)
|
||||
print(f" ✓ Found {len(compounds)} hit compounds for {site_data['target'].get('target_name', 'Unknown')}")
|
||||
except Exception as e:
|
||||
print(f" ✗ Screening failed: {e}")
|
||||
|
||||
# Step 6: Calculate drug-likeness
|
||||
print(f"\nStep 6: Evaluating drug-likeness...")
|
||||
drug_candidates = []
|
||||
for compound in all_hits:
|
||||
try:
|
||||
properties = tu.run({
|
||||
"name": "RDKit_calculate_drug_properties",
|
||||
"arguments": {"smiles": compound['smiles']}
|
||||
})
|
||||
|
||||
if properties.get('lipinski_pass', False):
|
||||
drug_candidates.append({
|
||||
"compound": compound,
|
||||
"properties": properties
|
||||
})
|
||||
except Exception as e:
|
||||
print(f" ✗ Property calculation failed: {e}")
|
||||
|
||||
print(f"\n ✓ Identified {len(drug_candidates)} drug candidates passing Lipinski's Rule of Five")
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 70)
|
||||
print("WORKFLOW SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"Disease targets processed: {len(top_targets)}")
|
||||
print(f"Protein structures predicted: {len(structures)}")
|
||||
print(f"Binding sites identified: {sum(len(s['sites']) for s in binding_sites)}")
|
||||
print(f"Compounds screened: {len(all_hits)}")
|
||||
print(f"Drug candidates identified: {len(drug_candidates)}")
|
||||
print("=" * 70)
|
||||
|
||||
return drug_candidates
|
||||
|
||||
|
||||
def genomics_workflow(geo_id: str):
|
||||
"""
|
||||
Execute a genomics analysis workflow.
|
||||
|
||||
Args:
|
||||
geo_id: GEO dataset ID (e.g., "GSE12345")
|
||||
"""
|
||||
tu = ToolUniverse()
|
||||
tu.load_tools()
|
||||
|
||||
print("=" * 70)
|
||||
print("GENOMICS ANALYSIS WORKFLOW")
|
||||
print("=" * 70)
|
||||
|
||||
# Step 1: Download gene expression data
|
||||
print(f"\nStep 1: Downloading dataset {geo_id}...")
|
||||
try:
|
||||
expression_data = tu.run({
|
||||
"name": "GEO_download_dataset",
|
||||
"arguments": {"geo_id": geo_id}
|
||||
})
|
||||
print(f" ✓ Downloaded expression data")
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed: {e}")
|
||||
return
|
||||
|
||||
# Step 2: Differential expression analysis
|
||||
print(f"\nStep 2: Performing differential expression analysis...")
|
||||
try:
|
||||
de_genes = tu.run({
|
||||
"name": "DESeq2_differential_expression",
|
||||
"arguments": {
|
||||
"data": expression_data,
|
||||
"condition1": "control",
|
||||
"condition2": "treated"
|
||||
}
|
||||
})
|
||||
print(f" ✓ Found {len(de_genes.get('significant_genes', []))} differentially expressed genes")
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed: {e}")
|
||||
return
|
||||
|
||||
# Step 3: Pathway enrichment
|
||||
print(f"\nStep 3: Running pathway enrichment analysis...")
|
||||
try:
|
||||
pathways = tu.run({
|
||||
"name": "KEGG_pathway_enrichment",
|
||||
"arguments": {
|
||||
"gene_list": de_genes['significant_genes'],
|
||||
"organism": "hsa"
|
||||
}
|
||||
})
|
||||
print(f" ✓ Found {len(pathways)} enriched pathways")
|
||||
if pathways:
|
||||
print(f" Top pathway: {pathways[0].get('pathway_name', 'Unknown')}")
|
||||
except Exception as e:
|
||||
print(f" ✗ Failed: {e}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example 1: Drug discovery workflow for hypertension
|
||||
print("EXAMPLE 1: Drug Discovery for Hypertension")
|
||||
candidates = drug_discovery_workflow("EFO_0000537", max_targets=2)
|
||||
|
||||
print("\n\n")
|
||||
|
||||
# Example 2: Genomics workflow
|
||||
print("EXAMPLE 2: Genomics Analysis")
|
||||
genomics_workflow("GSE12345")
|
||||
Reference in New Issue
Block a user