Files
gh-k-dense-ai-claude-scient…/skills/clinicaltrials-database/scripts/query_clinicaltrials.py
2025-11-30 08:30:10 +08:00

216 lines
6.8 KiB
Python

#!/usr/bin/env python3
"""
ClinicalTrials.gov API Query Helper
A comprehensive Python script for querying the ClinicalTrials.gov API v2.
Provides convenient functions for common query patterns including searching
by condition, intervention, location, sponsor, and retrieving specific trials.
API Documentation: https://clinicaltrials.gov/data-api/api
Rate Limit: ~50 requests per minute per IP address
"""
import requests
import json
from typing import Dict, List, Optional, Union
from urllib.parse import urlencode
BASE_URL = "https://clinicaltrials.gov/api/v2"
def search_studies(
condition: Optional[str] = None,
intervention: Optional[str] = None,
location: Optional[str] = None,
sponsor: Optional[str] = None,
status: Optional[Union[str, List[str]]] = None,
nct_ids: Optional[List[str]] = None,
sort: str = "LastUpdatePostDate:desc",
page_size: int = 10,
page_token: Optional[str] = None,
format: str = "json"
) -> Dict:
"""
Search for clinical trials using various filters.
Args:
condition: Disease or condition (e.g., "lung cancer", "diabetes")
intervention: Treatment or intervention (e.g., "Pembrolizumab", "exercise")
location: Geographic location (e.g., "New York", "California")
sponsor: Sponsor or collaborator name (e.g., "National Cancer Institute")
status: Study status(es). Can be string or list. Valid values:
RECRUITING, NOT_YET_RECRUITING, ENROLLING_BY_INVITATION,
ACTIVE_NOT_RECRUITING, SUSPENDED, TERMINATED, COMPLETED, WITHDRAWN
nct_ids: List of NCT IDs to filter by
sort: Sort order (e.g., "LastUpdatePostDate:desc", "EnrollmentCount:desc")
page_size: Number of results per page (default: 10, max: 1000)
page_token: Token for pagination (returned from previous query)
format: Response format ("json" or "csv")
Returns:
Dictionary containing search results with studies and metadata
"""
params = {}
# Build query parameters
if condition:
params['query.cond'] = condition
if intervention:
params['query.intr'] = intervention
if location:
params['query.locn'] = location
if sponsor:
params['query.spons'] = sponsor
# Handle status filter (can be list or string)
if status:
if isinstance(status, list):
params['filter.overallStatus'] = ','.join(status)
else:
params['filter.overallStatus'] = status
# Handle NCT IDs filter
if nct_ids:
params['filter.ids'] = ','.join(nct_ids)
# Add pagination and sorting
params['sort'] = sort
params['pageSize'] = page_size
if page_token:
params['pageToken'] = page_token
# Set format
params['format'] = format
url = f"{BASE_URL}/studies"
response = requests.get(url, params=params)
response.raise_for_status()
if format == "json":
return response.json()
else:
return response.text
def get_study_details(nct_id: str, format: str = "json") -> Dict:
"""
Retrieve detailed information about a specific clinical trial.
Args:
nct_id: The NCT ID of the trial (e.g., "NCT04852770")
format: Response format ("json" or "csv")
Returns:
Dictionary containing comprehensive study information
"""
params = {'format': format}
url = f"{BASE_URL}/studies/{nct_id}"
response = requests.get(url, params=params)
response.raise_for_status()
if format == "json":
return response.json()
else:
return response.text
def search_with_all_results(
condition: Optional[str] = None,
intervention: Optional[str] = None,
location: Optional[str] = None,
sponsor: Optional[str] = None,
status: Optional[Union[str, List[str]]] = None,
max_results: Optional[int] = None
) -> List[Dict]:
"""
Search for clinical trials and automatically paginate through all results.
Args:
condition: Disease or condition to search for
intervention: Treatment or intervention to search for
location: Geographic location to search in
sponsor: Sponsor or collaborator name
status: Study status(es) to filter by
max_results: Maximum number of results to retrieve (None for all)
Returns:
List of all matching studies
"""
all_studies = []
page_token = None
while True:
result = search_studies(
condition=condition,
intervention=intervention,
location=location,
sponsor=sponsor,
status=status,
page_size=1000, # Use max page size for efficiency
page_token=page_token
)
studies = result.get('studies', [])
all_studies.extend(studies)
# Check if we've reached the max or there are no more results
if max_results and len(all_studies) >= max_results:
return all_studies[:max_results]
# Check for next page
page_token = result.get('nextPageToken')
if not page_token:
break
return all_studies
def extract_study_summary(study: Dict) -> Dict:
"""
Extract key information from a study for quick overview.
Args:
study: A study dictionary from the API response
Returns:
Dictionary with essential study information
"""
protocol = study.get('protocolSection', {})
identification = protocol.get('identificationModule', {})
status_module = protocol.get('statusModule', {})
description = protocol.get('descriptionModule', {})
return {
'nct_id': identification.get('nctId'),
'title': identification.get('officialTitle') or identification.get('briefTitle'),
'status': status_module.get('overallStatus'),
'phase': protocol.get('designModule', {}).get('phases', []),
'enrollment': protocol.get('designModule', {}).get('enrollmentInfo', {}).get('count'),
'brief_summary': description.get('briefSummary'),
'last_update': status_module.get('lastUpdatePostDateStruct', {}).get('date')
}
# Example usage
if __name__ == "__main__":
# Example 1: Search for recruiting lung cancer trials
print("Example 1: Searching for recruiting lung cancer trials...")
results = search_studies(
condition="lung cancer",
status="RECRUITING",
page_size=5
)
print(f"Found {results.get('totalCount', 0)} total trials")
print(f"Showing first {len(results.get('studies', []))} trials\n")
# Example 2: Get details for a specific trial
if results.get('studies'):
first_study = results['studies'][0]
nct_id = first_study['protocolSection']['identificationModule']['nctId']
print(f"Example 2: Getting details for {nct_id}...")
details = get_study_details(nct_id)
summary = extract_study_summary(details)
print(json.dumps(summary, indent=2))