Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions

View File

@@ -0,0 +1,278 @@
#!/usr/bin/env python3
"""
ChEMBL Database Query Examples
This script demonstrates common query patterns for the ChEMBL database
using the chembl_webresource_client Python library.
Requirements:
pip install chembl_webresource_client
pip install pandas (optional, for data manipulation)
"""
from chembl_webresource_client.new_client import new_client
def get_molecule_info(chembl_id):
"""
Retrieve detailed information about a molecule by ChEMBL ID.
Args:
chembl_id: ChEMBL identifier (e.g., 'CHEMBL25')
Returns:
Dictionary containing molecule information
"""
molecule = new_client.molecule
return molecule.get(chembl_id)
def search_molecules_by_name(name_pattern):
"""
Search for molecules by name pattern.
Args:
name_pattern: Name or pattern to search for
Returns:
List of matching molecules
"""
molecule = new_client.molecule
results = molecule.filter(pref_name__icontains=name_pattern)
return list(results)
def find_molecules_by_properties(max_mw=500, min_logp=None, max_logp=None):
"""
Find molecules based on physicochemical properties.
Args:
max_mw: Maximum molecular weight
min_logp: Minimum LogP value
max_logp: Maximum LogP value
Returns:
List of matching molecules
"""
molecule = new_client.molecule
filters = {
'molecule_properties__mw_freebase__lte': max_mw
}
if min_logp is not None:
filters['molecule_properties__alogp__gte'] = min_logp
if max_logp is not None:
filters['molecule_properties__alogp__lte'] = max_logp
results = molecule.filter(**filters)
return list(results)
def get_target_info(target_chembl_id):
"""
Retrieve information about a biological target.
Args:
target_chembl_id: ChEMBL target identifier (e.g., 'CHEMBL240')
Returns:
Dictionary containing target information
"""
target = new_client.target
return target.get(target_chembl_id)
def search_targets_by_name(target_name):
"""
Search for targets by name or keyword.
Args:
target_name: Target name or keyword (e.g., 'kinase', 'EGFR')
Returns:
List of matching targets
"""
target = new_client.target
results = target.filter(
target_type='SINGLE PROTEIN',
pref_name__icontains=target_name
)
return list(results)
def get_bioactivity_data(target_chembl_id, activity_type='IC50', max_value=100):
"""
Retrieve bioactivity data for a specific target.
Args:
target_chembl_id: ChEMBL target identifier
activity_type: Type of activity (IC50, Ki, EC50, etc.)
max_value: Maximum activity value in nM
Returns:
List of activity records
"""
activity = new_client.activity
results = activity.filter(
target_chembl_id=target_chembl_id,
standard_type=activity_type,
standard_value__lte=max_value,
standard_units='nM'
)
return list(results)
def find_similar_compounds(smiles, similarity_threshold=85):
"""
Find compounds similar to a query structure.
Args:
smiles: SMILES string of query molecule
similarity_threshold: Minimum similarity percentage (0-100)
Returns:
List of similar compounds
"""
similarity = new_client.similarity
results = similarity.filter(
smiles=smiles,
similarity=similarity_threshold
)
return list(results)
def substructure_search(smiles):
"""
Search for compounds containing a specific substructure.
Args:
smiles: SMILES string of substructure
Returns:
List of compounds containing the substructure
"""
substructure = new_client.substructure
results = substructure.filter(smiles=smiles)
return list(results)
def get_drug_info(molecule_chembl_id):
"""
Retrieve drug information including indications and mechanisms.
Args:
molecule_chembl_id: ChEMBL molecule identifier
Returns:
Tuple of (drug_info, mechanisms, indications)
"""
drug = new_client.drug
mechanism = new_client.mechanism
drug_indication = new_client.drug_indication
try:
drug_info = drug.get(molecule_chembl_id)
except:
drug_info = None
mechanisms = list(mechanism.filter(molecule_chembl_id=molecule_chembl_id))
indications = list(drug_indication.filter(molecule_chembl_id=molecule_chembl_id))
return drug_info, mechanisms, indications
def find_kinase_inhibitors(max_ic50=100):
"""
Find potent kinase inhibitors.
Args:
max_ic50: Maximum IC50 value in nM
Returns:
List of kinase inhibitor activities
"""
target = new_client.target
activity = new_client.activity
# Find kinase targets
kinase_targets = target.filter(
target_type='SINGLE PROTEIN',
pref_name__icontains='kinase'
)
# Get target IDs
target_ids = [t['target_chembl_id'] for t in kinase_targets[:10]] # Limit to first 10
# Find activities
results = activity.filter(
target_chembl_id__in=target_ids,
standard_type='IC50',
standard_value__lte=max_ic50,
standard_units='nM'
)
return list(results)
def get_compound_bioactivities(molecule_chembl_id):
"""
Get all bioactivity data for a specific compound.
Args:
molecule_chembl_id: ChEMBL molecule identifier
Returns:
List of all activity records for the compound
"""
activity = new_client.activity
results = activity.filter(
molecule_chembl_id=molecule_chembl_id,
pchembl_value__isnull=False
)
return list(results)
def export_to_dataframe(data):
"""
Convert ChEMBL data to pandas DataFrame (requires pandas).
Args:
data: List of ChEMBL records
Returns:
pandas DataFrame
"""
try:
import pandas as pd
return pd.DataFrame(data)
except ImportError:
print("pandas not installed. Install with: pip install pandas")
return None
# Example usage
if __name__ == "__main__":
print("ChEMBL Database Query Examples")
print("=" * 50)
# Example 1: Get information about aspirin
print("\n1. Getting information about aspirin (CHEMBL25)...")
aspirin = get_molecule_info('CHEMBL25')
print(f"Name: {aspirin.get('pref_name')}")
print(f"Formula: {aspirin.get('molecule_properties', {}).get('full_molformula')}")
# Example 2: Search for EGFR inhibitors
print("\n2. Searching for EGFR targets...")
egfr_targets = search_targets_by_name('EGFR')
if egfr_targets:
print(f"Found {len(egfr_targets)} EGFR-related targets")
print(f"First target: {egfr_targets[0]['pref_name']}")
# Example 3: Find potent activities for a target
print("\n3. Finding potent compounds for EGFR (CHEMBL203)...")
activities = get_bioactivity_data('CHEMBL203', 'IC50', max_value=10)
print(f"Found {len(activities)} compounds with IC50 <= 10 nM")
print("\n" + "=" * 50)
print("Examples completed successfully!")