Initial commit
This commit is contained in:
518
skills/clinpgx-database/scripts/query_clinpgx.py
Executable file
518
skills/clinpgx-database/scripts/query_clinpgx.py
Executable file
@@ -0,0 +1,518 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
ClinPGx API Query Helper Script
|
||||
|
||||
Provides ready-to-use functions for querying the ClinPGx database API.
|
||||
Includes rate limiting, error handling, and caching functionality.
|
||||
|
||||
ClinPGx API: https://api.clinpgx.org/
|
||||
Rate limit: 2 requests per second
|
||||
License: Creative Commons Attribution-ShareAlike 4.0 International
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
# API Configuration
|
||||
BASE_URL = "https://api.clinpgx.org/v1/"
|
||||
RATE_LIMIT_DELAY = 0.5 # 500ms delay = 2 requests/second
|
||||
|
||||
|
||||
def rate_limited_request(url: str, params: Optional[Dict] = None, delay: float = RATE_LIMIT_DELAY) -> requests.Response:
|
||||
"""
|
||||
Make API request with rate limiting compliance.
|
||||
|
||||
Args:
|
||||
url: API endpoint URL
|
||||
params: Query parameters
|
||||
delay: Delay in seconds between requests (default 0.5s for 2 req/sec)
|
||||
|
||||
Returns:
|
||||
Response object
|
||||
"""
|
||||
response = requests.get(url, params=params)
|
||||
time.sleep(delay)
|
||||
return response
|
||||
|
||||
|
||||
def safe_api_call(url: str, params: Optional[Dict] = None, max_retries: int = 3) -> Optional[Dict]:
|
||||
"""
|
||||
Make API call with error handling and exponential backoff retry.
|
||||
|
||||
Args:
|
||||
url: API endpoint URL
|
||||
params: Query parameters
|
||||
max_retries: Maximum number of retry attempts
|
||||
|
||||
Returns:
|
||||
JSON response data or None on failure
|
||||
"""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = requests.get(url, params=params, timeout=10)
|
||||
|
||||
if response.status_code == 200:
|
||||
time.sleep(RATE_LIMIT_DELAY)
|
||||
return response.json()
|
||||
elif response.status_code == 429:
|
||||
# Rate limit exceeded
|
||||
wait_time = 2 ** attempt # Exponential backoff: 1s, 2s, 4s
|
||||
print(f"Rate limit exceeded. Waiting {wait_time}s before retry...")
|
||||
time.sleep(wait_time)
|
||||
elif response.status_code == 404:
|
||||
print(f"Resource not found: {url}")
|
||||
return None
|
||||
else:
|
||||
response.raise_for_status()
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Attempt {attempt + 1}/{max_retries} failed: {e}")
|
||||
if attempt == max_retries - 1:
|
||||
print(f"Failed after {max_retries} attempts")
|
||||
return None
|
||||
time.sleep(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def cached_query(cache_file: str, query_func, *args, **kwargs) -> Any:
|
||||
"""
|
||||
Cache API results to avoid repeated queries.
|
||||
|
||||
Args:
|
||||
cache_file: Path to cache file
|
||||
query_func: Function to call if cache miss
|
||||
*args, **kwargs: Arguments to pass to query_func
|
||||
|
||||
Returns:
|
||||
Cached or freshly queried data
|
||||
"""
|
||||
cache_path = Path(cache_file)
|
||||
|
||||
if cache_path.exists():
|
||||
print(f"Loading from cache: {cache_file}")
|
||||
with open(cache_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
print(f"Cache miss. Querying API...")
|
||||
result = query_func(*args, **kwargs)
|
||||
|
||||
if result is not None:
|
||||
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(cache_path, 'w') as f:
|
||||
json.dump(result, f, indent=2)
|
||||
print(f"Cached to: {cache_file}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Core Query Functions
|
||||
|
||||
def get_gene_info(gene_symbol: str) -> Optional[Dict]:
|
||||
"""
|
||||
Retrieve detailed information about a pharmacogene.
|
||||
|
||||
Args:
|
||||
gene_symbol: Gene symbol (e.g., "CYP2D6", "TPMT")
|
||||
|
||||
Returns:
|
||||
Gene information dictionary
|
||||
|
||||
Example:
|
||||
>>> gene_data = get_gene_info("CYP2D6")
|
||||
>>> print(gene_data['symbol'], gene_data['name'])
|
||||
"""
|
||||
url = f"{BASE_URL}gene/{gene_symbol}"
|
||||
return safe_api_call(url)
|
||||
|
||||
|
||||
def get_drug_info(drug_name: str) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Search for drug/chemical information by name.
|
||||
|
||||
Args:
|
||||
drug_name: Drug name (e.g., "warfarin", "codeine")
|
||||
|
||||
Returns:
|
||||
List of matching drugs
|
||||
|
||||
Example:
|
||||
>>> drugs = get_drug_info("warfarin")
|
||||
>>> for drug in drugs:
|
||||
>>> print(drug['name'], drug['id'])
|
||||
"""
|
||||
url = f"{BASE_URL}chemical"
|
||||
params = {"name": drug_name}
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
def get_gene_drug_pairs(gene: Optional[str] = None, drug: Optional[str] = None) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Query gene-drug interaction pairs.
|
||||
|
||||
Args:
|
||||
gene: Gene symbol (optional)
|
||||
drug: Drug name (optional)
|
||||
|
||||
Returns:
|
||||
List of gene-drug pairs with clinical annotations
|
||||
|
||||
Example:
|
||||
>>> # Get all pairs for CYP2D6
|
||||
>>> pairs = get_gene_drug_pairs(gene="CYP2D6")
|
||||
>>>
|
||||
>>> # Get specific gene-drug pair
|
||||
>>> pair = get_gene_drug_pairs(gene="CYP2D6", drug="codeine")
|
||||
"""
|
||||
url = f"{BASE_URL}geneDrugPair"
|
||||
params = {}
|
||||
if gene:
|
||||
params["gene"] = gene
|
||||
if drug:
|
||||
params["drug"] = drug
|
||||
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
def get_cpic_guidelines(gene: Optional[str] = None, drug: Optional[str] = None) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Retrieve CPIC clinical practice guidelines.
|
||||
|
||||
Args:
|
||||
gene: Gene symbol (optional)
|
||||
drug: Drug name (optional)
|
||||
|
||||
Returns:
|
||||
List of CPIC guidelines
|
||||
|
||||
Example:
|
||||
>>> # Get all CPIC guidelines
|
||||
>>> guidelines = get_cpic_guidelines()
|
||||
>>>
|
||||
>>> # Get guideline for specific gene-drug
|
||||
>>> guideline = get_cpic_guidelines(gene="CYP2C19", drug="clopidogrel")
|
||||
"""
|
||||
url = f"{BASE_URL}guideline"
|
||||
params = {"source": "CPIC"}
|
||||
if gene:
|
||||
params["gene"] = gene
|
||||
if drug:
|
||||
params["drug"] = drug
|
||||
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
def get_alleles(gene: str) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Get all alleles for a pharmacogene including function and frequency.
|
||||
|
||||
Args:
|
||||
gene: Gene symbol (e.g., "CYP2D6")
|
||||
|
||||
Returns:
|
||||
List of alleles with functional annotations and population frequencies
|
||||
|
||||
Example:
|
||||
>>> alleles = get_alleles("CYP2D6")
|
||||
>>> for allele in alleles:
|
||||
>>> print(f"{allele['name']}: {allele['function']}")
|
||||
"""
|
||||
url = f"{BASE_URL}allele"
|
||||
params = {"gene": gene}
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
def get_allele_info(allele_name: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get detailed information about a specific allele.
|
||||
|
||||
Args:
|
||||
allele_name: Allele name (e.g., "CYP2D6*4")
|
||||
|
||||
Returns:
|
||||
Allele information dictionary
|
||||
|
||||
Example:
|
||||
>>> allele = get_allele_info("CYP2D6*4")
|
||||
>>> print(allele['function'], allele['frequencies'])
|
||||
"""
|
||||
url = f"{BASE_URL}allele/{allele_name}"
|
||||
return safe_api_call(url)
|
||||
|
||||
|
||||
def get_clinical_annotations(
|
||||
gene: Optional[str] = None,
|
||||
drug: Optional[str] = None,
|
||||
evidence_level: Optional[str] = None
|
||||
) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Retrieve curated literature annotations for gene-drug interactions.
|
||||
|
||||
Args:
|
||||
gene: Gene symbol (optional)
|
||||
drug: Drug name (optional)
|
||||
evidence_level: Filter by evidence level (1A, 1B, 2A, 2B, 3, 4)
|
||||
|
||||
Returns:
|
||||
List of clinical annotations
|
||||
|
||||
Example:
|
||||
>>> # Get all annotations for CYP2D6
|
||||
>>> annotations = get_clinical_annotations(gene="CYP2D6")
|
||||
>>>
|
||||
>>> # Get high-quality evidence only
|
||||
>>> high_quality = get_clinical_annotations(evidence_level="1A")
|
||||
"""
|
||||
url = f"{BASE_URL}clinicalAnnotation"
|
||||
params = {}
|
||||
if gene:
|
||||
params["gene"] = gene
|
||||
if drug:
|
||||
params["drug"] = drug
|
||||
if evidence_level:
|
||||
params["evidenceLevel"] = evidence_level
|
||||
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
def get_drug_labels(drug: str, source: Optional[str] = None) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Retrieve pharmacogenomic drug label information.
|
||||
|
||||
Args:
|
||||
drug: Drug name
|
||||
source: Regulatory source (e.g., "FDA", "EMA")
|
||||
|
||||
Returns:
|
||||
List of drug labels with PGx information
|
||||
|
||||
Example:
|
||||
>>> # Get all labels for warfarin
|
||||
>>> labels = get_drug_labels("warfarin")
|
||||
>>>
|
||||
>>> # Get only FDA labels
|
||||
>>> fda_labels = get_drug_labels("warfarin", source="FDA")
|
||||
"""
|
||||
url = f"{BASE_URL}drugLabel"
|
||||
params = {"drug": drug}
|
||||
if source:
|
||||
params["source"] = source
|
||||
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
def search_variants(rsid: Optional[str] = None, chromosome: Optional[str] = None,
|
||||
position: Optional[str] = None) -> Optional[List[Dict]]:
|
||||
"""
|
||||
Search for genetic variants by rsID or genomic position.
|
||||
|
||||
Args:
|
||||
rsid: dbSNP rsID (e.g., "rs4244285")
|
||||
chromosome: Chromosome number
|
||||
position: Genomic position
|
||||
|
||||
Returns:
|
||||
List of matching variants
|
||||
|
||||
Example:
|
||||
>>> # Search by rsID
|
||||
>>> variant = search_variants(rsid="rs4244285")
|
||||
>>>
|
||||
>>> # Search by position
|
||||
>>> variants = search_variants(chromosome="10", position="94781859")
|
||||
"""
|
||||
url = f"{BASE_URL}variant"
|
||||
|
||||
if rsid:
|
||||
url = f"{BASE_URL}variant/{rsid}"
|
||||
return safe_api_call(url)
|
||||
|
||||
params = {}
|
||||
if chromosome:
|
||||
params["chromosome"] = chromosome
|
||||
if position:
|
||||
params["position"] = position
|
||||
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
def get_pathway_info(pathway_id: Optional[str] = None, drug: Optional[str] = None) -> Optional[Any]:
|
||||
"""
|
||||
Retrieve pharmacokinetic/pharmacodynamic pathway information.
|
||||
|
||||
Args:
|
||||
pathway_id: ClinPGx pathway ID (optional)
|
||||
drug: Drug name (optional)
|
||||
|
||||
Returns:
|
||||
Pathway information or list of pathways
|
||||
|
||||
Example:
|
||||
>>> # Get specific pathway
|
||||
>>> pathway = get_pathway_info(pathway_id="PA146123006")
|
||||
>>>
|
||||
>>> # Get all pathways for a drug
|
||||
>>> pathways = get_pathway_info(drug="warfarin")
|
||||
"""
|
||||
if pathway_id:
|
||||
url = f"{BASE_URL}pathway/{pathway_id}"
|
||||
return safe_api_call(url)
|
||||
|
||||
url = f"{BASE_URL}pathway"
|
||||
params = {}
|
||||
if drug:
|
||||
params["drug"] = drug
|
||||
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
# Utility Functions
|
||||
|
||||
def export_to_dataframe(data: List[Dict], output_file: Optional[str] = None):
|
||||
"""
|
||||
Convert API results to pandas DataFrame for analysis.
|
||||
|
||||
Args:
|
||||
data: List of dictionaries from API
|
||||
output_file: Optional CSV output file path
|
||||
|
||||
Returns:
|
||||
pandas DataFrame
|
||||
|
||||
Example:
|
||||
>>> pairs = get_gene_drug_pairs(gene="CYP2D6")
|
||||
>>> df = export_to_dataframe(pairs, "cyp2d6_pairs.csv")
|
||||
>>> print(df.head())
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
print("pandas not installed. Install with: pip install pandas")
|
||||
return None
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
|
||||
if output_file:
|
||||
df.to_csv(output_file, index=False)
|
||||
print(f"Data exported to: {output_file}")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def batch_gene_query(gene_list: List[str], delay: float = 0.5) -> Dict[str, Dict]:
|
||||
"""
|
||||
Query multiple genes in batch with rate limiting.
|
||||
|
||||
Args:
|
||||
gene_list: List of gene symbols
|
||||
delay: Delay between requests (default 0.5s)
|
||||
|
||||
Returns:
|
||||
Dictionary mapping gene symbols to gene data
|
||||
|
||||
Example:
|
||||
>>> genes = ["CYP2D6", "CYP2C19", "CYP2C9", "TPMT"]
|
||||
>>> results = batch_gene_query(genes)
|
||||
>>> for gene, data in results.items():
|
||||
>>> print(f"{gene}: {data['name']}")
|
||||
"""
|
||||
results = {}
|
||||
|
||||
print(f"Querying {len(gene_list)} genes with {delay}s delay between requests...")
|
||||
|
||||
for gene in gene_list:
|
||||
print(f"Fetching: {gene}")
|
||||
data = get_gene_info(gene)
|
||||
if data:
|
||||
results[gene] = data
|
||||
time.sleep(delay)
|
||||
|
||||
print(f"Completed: {len(results)}/{len(gene_list)} successful")
|
||||
return results
|
||||
|
||||
|
||||
def find_actionable_gene_drug_pairs(cpic_level: str = "A") -> Optional[List[Dict]]:
|
||||
"""
|
||||
Find all clinically actionable gene-drug pairs with CPIC guidelines.
|
||||
|
||||
Args:
|
||||
cpic_level: CPIC recommendation level (A, B, C, D)
|
||||
|
||||
Returns:
|
||||
List of actionable gene-drug pairs
|
||||
|
||||
Example:
|
||||
>>> # Get all Level A recommendations
|
||||
>>> actionable = find_actionable_gene_drug_pairs(cpic_level="A")
|
||||
>>> for pair in actionable:
|
||||
>>> print(f"{pair['gene']} - {pair['drug']}")
|
||||
"""
|
||||
url = f"{BASE_URL}geneDrugPair"
|
||||
params = {"cpicLevel": cpic_level}
|
||||
return safe_api_call(url, params)
|
||||
|
||||
|
||||
# Example Usage
|
||||
if __name__ == "__main__":
|
||||
print("ClinPGx API Query Examples\n")
|
||||
|
||||
# Example 1: Get gene information
|
||||
print("=" * 60)
|
||||
print("Example 1: Get CYP2D6 gene information")
|
||||
print("=" * 60)
|
||||
cyp2d6 = get_gene_info("CYP2D6")
|
||||
if cyp2d6:
|
||||
print(f"Gene: {cyp2d6.get('symbol')}")
|
||||
print(f"Name: {cyp2d6.get('name')}")
|
||||
print()
|
||||
|
||||
# Example 2: Search for a drug
|
||||
print("=" * 60)
|
||||
print("Example 2: Search for warfarin")
|
||||
print("=" * 60)
|
||||
warfarin = get_drug_info("warfarin")
|
||||
if warfarin:
|
||||
for drug in warfarin[:1]: # Show first result
|
||||
print(f"Drug: {drug.get('name')}")
|
||||
print(f"ID: {drug.get('id')}")
|
||||
print()
|
||||
|
||||
# Example 3: Get gene-drug pairs
|
||||
print("=" * 60)
|
||||
print("Example 3: Get CYP2C19-clopidogrel pair")
|
||||
print("=" * 60)
|
||||
pair = get_gene_drug_pairs(gene="CYP2C19", drug="clopidogrel")
|
||||
if pair:
|
||||
print(f"Found {len(pair)} gene-drug pair(s)")
|
||||
if len(pair) > 0:
|
||||
print(f"Annotations: {pair[0].get('sources', [])}")
|
||||
print()
|
||||
|
||||
# Example 4: Get CPIC guidelines
|
||||
print("=" * 60)
|
||||
print("Example 4: Get CPIC guidelines for CYP2C19")
|
||||
print("=" * 60)
|
||||
guidelines = get_cpic_guidelines(gene="CYP2C19")
|
||||
if guidelines:
|
||||
print(f"Found {len(guidelines)} guideline(s)")
|
||||
for g in guidelines[:2]: # Show first 2
|
||||
print(f" - {g.get('name')}")
|
||||
print()
|
||||
|
||||
# Example 5: Get alleles for a gene
|
||||
print("=" * 60)
|
||||
print("Example 5: Get CYP2D6 alleles")
|
||||
print("=" * 60)
|
||||
alleles = get_alleles("CYP2D6")
|
||||
if alleles:
|
||||
print(f"Found {len(alleles)} allele(s)")
|
||||
for allele in alleles[:3]: # Show first 3
|
||||
print(f" - {allele.get('name')}: {allele.get('function')}")
|
||||
print()
|
||||
|
||||
print("=" * 60)
|
||||
print("Examples completed!")
|
||||
print("=" * 60)
|
||||
Reference in New Issue
Block a user