Initial commit
This commit is contained in:
215
skills/clinicaltrials-database/scripts/query_clinicaltrials.py
Normal file
215
skills/clinicaltrials-database/scripts/query_clinicaltrials.py
Normal file
@@ -0,0 +1,215 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
ClinicalTrials.gov API Query Helper
|
||||
|
||||
A comprehensive Python script for querying the ClinicalTrials.gov API v2.
|
||||
Provides convenient functions for common query patterns including searching
|
||||
by condition, intervention, location, sponsor, and retrieving specific trials.
|
||||
|
||||
API Documentation: https://clinicaltrials.gov/data-api/api
|
||||
Rate Limit: ~50 requests per minute per IP address
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
from typing import Dict, List, Optional, Union
|
||||
from urllib.parse import urlencode
|
||||
|
||||
|
||||
BASE_URL = "https://clinicaltrials.gov/api/v2"
|
||||
|
||||
|
||||
def search_studies(
|
||||
condition: Optional[str] = None,
|
||||
intervention: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
sponsor: Optional[str] = None,
|
||||
status: Optional[Union[str, List[str]]] = None,
|
||||
nct_ids: Optional[List[str]] = None,
|
||||
sort: str = "LastUpdatePostDate:desc",
|
||||
page_size: int = 10,
|
||||
page_token: Optional[str] = None,
|
||||
format: str = "json"
|
||||
) -> Dict:
|
||||
"""
|
||||
Search for clinical trials using various filters.
|
||||
|
||||
Args:
|
||||
condition: Disease or condition (e.g., "lung cancer", "diabetes")
|
||||
intervention: Treatment or intervention (e.g., "Pembrolizumab", "exercise")
|
||||
location: Geographic location (e.g., "New York", "California")
|
||||
sponsor: Sponsor or collaborator name (e.g., "National Cancer Institute")
|
||||
status: Study status(es). Can be string or list. Valid values:
|
||||
RECRUITING, NOT_YET_RECRUITING, ENROLLING_BY_INVITATION,
|
||||
ACTIVE_NOT_RECRUITING, SUSPENDED, TERMINATED, COMPLETED, WITHDRAWN
|
||||
nct_ids: List of NCT IDs to filter by
|
||||
sort: Sort order (e.g., "LastUpdatePostDate:desc", "EnrollmentCount:desc")
|
||||
page_size: Number of results per page (default: 10, max: 1000)
|
||||
page_token: Token for pagination (returned from previous query)
|
||||
format: Response format ("json" or "csv")
|
||||
|
||||
Returns:
|
||||
Dictionary containing search results with studies and metadata
|
||||
"""
|
||||
params = {}
|
||||
|
||||
# Build query parameters
|
||||
if condition:
|
||||
params['query.cond'] = condition
|
||||
if intervention:
|
||||
params['query.intr'] = intervention
|
||||
if location:
|
||||
params['query.locn'] = location
|
||||
if sponsor:
|
||||
params['query.spons'] = sponsor
|
||||
|
||||
# Handle status filter (can be list or string)
|
||||
if status:
|
||||
if isinstance(status, list):
|
||||
params['filter.overallStatus'] = ','.join(status)
|
||||
else:
|
||||
params['filter.overallStatus'] = status
|
||||
|
||||
# Handle NCT IDs filter
|
||||
if nct_ids:
|
||||
params['filter.ids'] = ','.join(nct_ids)
|
||||
|
||||
# Add pagination and sorting
|
||||
params['sort'] = sort
|
||||
params['pageSize'] = page_size
|
||||
if page_token:
|
||||
params['pageToken'] = page_token
|
||||
|
||||
# Set format
|
||||
params['format'] = format
|
||||
|
||||
url = f"{BASE_URL}/studies"
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
if format == "json":
|
||||
return response.json()
|
||||
else:
|
||||
return response.text
|
||||
|
||||
|
||||
def get_study_details(nct_id: str, format: str = "json") -> Dict:
|
||||
"""
|
||||
Retrieve detailed information about a specific clinical trial.
|
||||
|
||||
Args:
|
||||
nct_id: The NCT ID of the trial (e.g., "NCT04852770")
|
||||
format: Response format ("json" or "csv")
|
||||
|
||||
Returns:
|
||||
Dictionary containing comprehensive study information
|
||||
"""
|
||||
params = {'format': format}
|
||||
url = f"{BASE_URL}/studies/{nct_id}"
|
||||
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
if format == "json":
|
||||
return response.json()
|
||||
else:
|
||||
return response.text
|
||||
|
||||
|
||||
def search_with_all_results(
|
||||
condition: Optional[str] = None,
|
||||
intervention: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
sponsor: Optional[str] = None,
|
||||
status: Optional[Union[str, List[str]]] = None,
|
||||
max_results: Optional[int] = None
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Search for clinical trials and automatically paginate through all results.
|
||||
|
||||
Args:
|
||||
condition: Disease or condition to search for
|
||||
intervention: Treatment or intervention to search for
|
||||
location: Geographic location to search in
|
||||
sponsor: Sponsor or collaborator name
|
||||
status: Study status(es) to filter by
|
||||
max_results: Maximum number of results to retrieve (None for all)
|
||||
|
||||
Returns:
|
||||
List of all matching studies
|
||||
"""
|
||||
all_studies = []
|
||||
page_token = None
|
||||
|
||||
while True:
|
||||
result = search_studies(
|
||||
condition=condition,
|
||||
intervention=intervention,
|
||||
location=location,
|
||||
sponsor=sponsor,
|
||||
status=status,
|
||||
page_size=1000, # Use max page size for efficiency
|
||||
page_token=page_token
|
||||
)
|
||||
|
||||
studies = result.get('studies', [])
|
||||
all_studies.extend(studies)
|
||||
|
||||
# Check if we've reached the max or there are no more results
|
||||
if max_results and len(all_studies) >= max_results:
|
||||
return all_studies[:max_results]
|
||||
|
||||
# Check for next page
|
||||
page_token = result.get('nextPageToken')
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
return all_studies
|
||||
|
||||
|
||||
def extract_study_summary(study: Dict) -> Dict:
|
||||
"""
|
||||
Extract key information from a study for quick overview.
|
||||
|
||||
Args:
|
||||
study: A study dictionary from the API response
|
||||
|
||||
Returns:
|
||||
Dictionary with essential study information
|
||||
"""
|
||||
protocol = study.get('protocolSection', {})
|
||||
identification = protocol.get('identificationModule', {})
|
||||
status_module = protocol.get('statusModule', {})
|
||||
description = protocol.get('descriptionModule', {})
|
||||
|
||||
return {
|
||||
'nct_id': identification.get('nctId'),
|
||||
'title': identification.get('officialTitle') or identification.get('briefTitle'),
|
||||
'status': status_module.get('overallStatus'),
|
||||
'phase': protocol.get('designModule', {}).get('phases', []),
|
||||
'enrollment': protocol.get('designModule', {}).get('enrollmentInfo', {}).get('count'),
|
||||
'brief_summary': description.get('briefSummary'),
|
||||
'last_update': status_module.get('lastUpdatePostDateStruct', {}).get('date')
|
||||
}
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Example 1: Search for recruiting lung cancer trials
|
||||
print("Example 1: Searching for recruiting lung cancer trials...")
|
||||
results = search_studies(
|
||||
condition="lung cancer",
|
||||
status="RECRUITING",
|
||||
page_size=5
|
||||
)
|
||||
print(f"Found {results.get('totalCount', 0)} total trials")
|
||||
print(f"Showing first {len(results.get('studies', []))} trials\n")
|
||||
|
||||
# Example 2: Get details for a specific trial
|
||||
if results.get('studies'):
|
||||
first_study = results['studies'][0]
|
||||
nct_id = first_study['protocolSection']['identificationModule']['nctId']
|
||||
print(f"Example 2: Getting details for {nct_id}...")
|
||||
details = get_study_details(nct_id)
|
||||
summary = extract_study_summary(details)
|
||||
print(json.dumps(summary, indent=2))
|
||||
Reference in New Issue
Block a user