12 KiB
12 KiB
Drug Information Queries
Overview
DrugBank provides comprehensive drug information with 200+ data fields per entry including chemical properties, pharmacology, mechanisms of action, and clinical data.
Database Contents
Drug Categories
- FDA-Approved Small Molecules: ~2,037 drugs
- Biotech/Biologic Drugs: ~241 entries
- Nutraceuticals: ~96 compounds
- Experimental Drugs: ~6,000+ compounds
- Withdrawn/Discontinued: Historical drugs with safety data
Data Fields (200+ per entry)
- Identifiers: DrugBank ID, CAS number, UNII, PubChem CID
- Names: Generic, brand, synonyms, IUPAC
- Chemical: Structure (SMILES, InChI), formula, molecular weight
- Pharmacology: Indication, mechanism of action, pharmacodynamics
- Pharmacokinetics: Absorption, distribution, metabolism, excretion (ADME)
- Toxicity: LD50, adverse effects, contraindications
- Clinical: Dosage forms, routes of administration, half-life
- Targets: Proteins, enzymes, transporters, carriers
- Interactions: Drug-drug, drug-food interactions
- References: Citations to literature and clinical studies
XML Structure Navigation
Basic XML Structure
<drugbank>
<drug type="small molecule" created="..." updated="...">
<drugbank-id primary="true">DB00001</drugbank-id>
<name>Lepirudin</name>
<description>...</description>
<cas-number>...</cas-number>
<synthesis-reference>...</synthesis-reference>
<indication>...</indication>
<pharmacodynamics>...</pharmacodynamics>
<mechanism-of-action>...</mechanism-of-action>
<toxicity>...</toxicity>
<metabolism>...</metabolism>
<absorption>...</absorption>
<half-life>...</half-life>
<protein-binding>...</protein-binding>
<route-of-elimination>...</route-of-elimination>
<calculated-properties>...</calculated-properties>
<experimental-properties>...</experimental-properties>
<targets>...</targets>
<enzymes>...</enzymes>
<transporters>...</transporters>
<drug-interactions>...</drug-interactions>
</drug>
</drugbank>
Namespaces
DrugBank XML uses namespaces. Handle them properly:
import xml.etree.ElementTree as ET
# Define namespace
ns = {'db': 'http://www.drugbank.ca'}
# Query with namespace
root = get_drugbank_root()
drugs = root.findall('db:drug', ns)
Query by Drug Identifier
Query by DrugBank ID
from drugbank_downloader import get_drugbank_root
def get_drug_by_id(drugbank_id):
"""Retrieve drug entry by DrugBank ID (e.g., 'DB00001')"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
for drug in root.findall('db:drug', ns):
primary_id = drug.find('db:drugbank-id[@primary="true"]', ns)
if primary_id is not None and primary_id.text == drugbank_id:
return drug
return None
# Example usage
drug = get_drug_by_id('DB00001')
if drug:
name = drug.find('db:name', ns).text
print(f"Drug: {name}")
Query by Name
def get_drug_by_name(drug_name):
"""Find drug by name (case-insensitive)"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
drug_name_lower = drug_name.lower()
for drug in root.findall('db:drug', ns):
name_elem = drug.find('db:name', ns)
if name_elem is not None and name_elem.text.lower() == drug_name_lower:
return drug
# Also check synonyms
for synonym in drug.findall('.//db:synonym', ns):
if synonym.text and synonym.text.lower() == drug_name_lower:
return drug
return None
# Example
drug = get_drug_by_name('Aspirin')
Query by CAS Number
def get_drug_by_cas(cas_number):
"""Find drug by CAS registry number"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
for drug in root.findall('db:drug', ns):
cas_elem = drug.find('db:cas-number', ns)
if cas_elem is not None and cas_elem.text == cas_number:
return drug
return None
Extract Specific Information
Basic Drug Information
def extract_basic_info(drug):
"""Extract essential drug information"""
ns = {'db': 'http://www.drugbank.ca'}
info = {
'drugbank_id': drug.find('db:drugbank-id[@primary="true"]', ns).text,
'name': drug.find('db:name', ns).text,
'type': drug.get('type'),
'cas_number': get_text_safe(drug.find('db:cas-number', ns)),
'description': get_text_safe(drug.find('db:description', ns)),
'indication': get_text_safe(drug.find('db:indication', ns)),
}
return info
def get_text_safe(element):
"""Safely get text from element, return None if not found"""
return element.text if element is not None else None
Chemical Properties
def extract_chemical_properties(drug):
"""Extract chemical structure and properties"""
ns = {'db': 'http://www.drugbank.ca'}
properties = {}
# Calculated properties
calc_props = drug.find('db:calculated-properties', ns)
if calc_props is not None:
for prop in calc_props.findall('db:property', ns):
kind = prop.find('db:kind', ns).text
value = prop.find('db:value', ns).text
properties[kind] = value
# Experimental properties
exp_props = drug.find('db:experimental-properties', ns)
if exp_props is not None:
for prop in exp_props.findall('db:property', ns):
kind = prop.find('db:kind', ns).text
value = prop.find('db:value', ns).text
properties[f"{kind}_experimental"] = value
return properties
# Common properties to extract:
# - SMILES
# - InChI
# - InChIKey
# - Molecular Formula
# - Molecular Weight
# - logP (partition coefficient)
# - Water Solubility
# - Melting Point
# - pKa
Pharmacology Information
def extract_pharmacology(drug):
"""Extract pharmacological information"""
ns = {'db': 'http://www.drugbank.ca'}
pharm = {
'indication': get_text_safe(drug.find('db:indication', ns)),
'pharmacodynamics': get_text_safe(drug.find('db:pharmacodynamics', ns)),
'mechanism_of_action': get_text_safe(drug.find('db:mechanism-of-action', ns)),
'toxicity': get_text_safe(drug.find('db:toxicity', ns)),
'metabolism': get_text_safe(drug.find('db:metabolism', ns)),
'absorption': get_text_safe(drug.find('db:absorption', ns)),
'half_life': get_text_safe(drug.find('db:half-life', ns)),
'protein_binding': get_text_safe(drug.find('db:protein-binding', ns)),
'route_of_elimination': get_text_safe(drug.find('db:route-of-elimination', ns)),
'volume_of_distribution': get_text_safe(drug.find('db:volume-of-distribution', ns)),
'clearance': get_text_safe(drug.find('db:clearance', ns)),
}
return pharm
External Identifiers
def extract_external_identifiers(drug):
"""Extract cross-references to other databases"""
ns = {'db': 'http://www.drugbank.ca'}
identifiers = {}
external_ids = drug.find('db:external-identifiers', ns)
if external_ids is not None:
for ext_id in external_ids.findall('db:external-identifier', ns):
resource = ext_id.find('db:resource', ns).text
identifier = ext_id.find('db:identifier', ns).text
identifiers[resource] = identifier
return identifiers
# Common external databases:
# - PubChem Compound
# - PubChem Substance
# - ChEMBL
# - ChEBI
# - UniProtKB
# - KEGG Drug
# - PharmGKB
# - RxCUI (RxNorm)
# - ZINC
Building Drug Datasets
Create Drug Dictionary
def build_drug_database():
"""Build searchable dictionary of all drugs"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
drug_db = {}
for drug in root.findall('db:drug', ns):
db_id = drug.find('db:drugbank-id[@primary="true"]', ns).text
drug_info = {
'id': db_id,
'name': get_text_safe(drug.find('db:name', ns)),
'type': drug.get('type'),
'description': get_text_safe(drug.find('db:description', ns)),
'cas': get_text_safe(drug.find('db:cas-number', ns)),
'indication': get_text_safe(drug.find('db:indication', ns)),
}
drug_db[db_id] = drug_info
return drug_db
# Create searchable database
drugs = build_drug_database()
print(f"Total drugs: {len(drugs)}")
Export to DataFrame
import pandas as pd
def create_drug_dataframe():
"""Create pandas DataFrame of drug information"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
drugs_data = []
for drug in root.findall('db:drug', ns):
drug_dict = {
'drugbank_id': drug.find('db:drugbank-id[@primary="true"]', ns).text,
'name': get_text_safe(drug.find('db:name', ns)),
'type': drug.get('type'),
'cas_number': get_text_safe(drug.find('db:cas-number', ns)),
'description': get_text_safe(drug.find('db:description', ns)),
'indication': get_text_safe(drug.find('db:indication', ns)),
}
drugs_data.append(drug_dict)
df = pd.DataFrame(drugs_data)
return df
# Usage
df = create_drug_dataframe()
df.to_csv('drugbank_drugs.csv', index=False)
Filter by Drug Type
def filter_by_type(drug_type='small molecule'):
"""Get drugs of specific type"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
filtered_drugs = []
for drug in root.findall('db:drug', ns):
if drug.get('type') == drug_type:
db_id = drug.find('db:drugbank-id[@primary="true"]', ns).text
name = get_text_safe(drug.find('db:name', ns))
filtered_drugs.append({'id': db_id, 'name': name})
return filtered_drugs
# Get all biotech drugs
biotech_drugs = filter_by_type('biotech')
Search by Keyword
def search_drugs_by_keyword(keyword, field='indication'):
"""Search drugs by keyword in specific field"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
results = []
keyword_lower = keyword.lower()
for drug in root.findall('db:drug', ns):
field_elem = drug.find(f'db:{field}', ns)
if field_elem is not None and field_elem.text:
if keyword_lower in field_elem.text.lower():
db_id = drug.find('db:drugbank-id[@primary="true"]', ns).text
name = get_text_safe(drug.find('db:name', ns))
results.append({
'id': db_id,
'name': name,
field: field_elem.text[:200] # First 200 chars
})
return results
# Example: Find drugs for cancer treatment
cancer_drugs = search_drugs_by_keyword('cancer', 'indication')
Performance Optimization
Indexing for Faster Queries
def build_indexes():
"""Build indexes for faster lookups"""
root = get_drugbank_root()
ns = {'db': 'http://www.drugbank.ca'}
# Index by ID, name, and CAS
id_index = {}
name_index = {}
cas_index = {}
for drug in root.findall('db:drug', ns):
db_id = drug.find('db:drugbank-id[@primary="true"]', ns).text
id_index[db_id] = drug
name = get_text_safe(drug.find('db:name', ns))
if name:
name_index[name.lower()] = drug
cas = get_text_safe(drug.find('db:cas-number', ns))
if cas:
cas_index[cas] = drug
return {'id': id_index, 'name': name_index, 'cas': cas_index}
# Build once, query many times
indexes = build_indexes()
drug = indexes['name'].get('aspirin')