Initial commit

2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions
--- a/skills/clinicaltrials-database/scripts/query_clinicaltrials.py
+++ b/skills/clinicaltrials-database/scripts/query_clinicaltrials.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+"""
+ClinicalTrials.gov API Query Helper
+
+A comprehensive Python script for querying the ClinicalTrials.gov API v2.
+Provides convenient functions for common query patterns including searching
+by condition, intervention, location, sponsor, and retrieving specific trials.
+
+API Documentation: https://clinicaltrials.gov/data-api/api
+Rate Limit: ~50 requests per minute per IP address
+"""
+
+import requests
+import json
+from typing import Dict, List, Optional, Union
+from urllib.parse import urlencode
+
+
+BASE_URL = "https://clinicaltrials.gov/api/v2"
+
+
+def search_studies(
+    condition: Optional[str] = None,
+    intervention: Optional[str] = None,
+    location: Optional[str] = None,
+    sponsor: Optional[str] = None,
+    status: Optional[Union[str, List[str]]] = None,
+    nct_ids: Optional[List[str]] = None,
+    sort: str = "LastUpdatePostDate:desc",
+    page_size: int = 10,
+    page_token: Optional[str] = None,
+    format: str = "json"
+) -> Dict:
+    """
+    Search for clinical trials using various filters.
+
+    Args:
+        condition: Disease or condition (e.g., "lung cancer", "diabetes")
+        intervention: Treatment or intervention (e.g., "Pembrolizumab", "exercise")
+        location: Geographic location (e.g., "New York", "California")
+        sponsor: Sponsor or collaborator name (e.g., "National Cancer Institute")
+        status: Study status(es). Can be string or list. Valid values:
+                RECRUITING, NOT_YET_RECRUITING, ENROLLING_BY_INVITATION,
+                ACTIVE_NOT_RECRUITING, SUSPENDED, TERMINATED, COMPLETED, WITHDRAWN
+        nct_ids: List of NCT IDs to filter by
+        sort: Sort order (e.g., "LastUpdatePostDate:desc", "EnrollmentCount:desc")
+        page_size: Number of results per page (default: 10, max: 1000)
+        page_token: Token for pagination (returned from previous query)
+        format: Response format ("json" or "csv")
+
+    Returns:
+        Dictionary containing search results with studies and metadata
+    """
+    params = {}
+
+    # Build query parameters
+    if condition:
+        params['query.cond'] = condition
+    if intervention:
+        params['query.intr'] = intervention
+    if location:
+        params['query.locn'] = location
+    if sponsor:
+        params['query.spons'] = sponsor
+
+    # Handle status filter (can be list or string)
+    if status:
+        if isinstance(status, list):
+            params['filter.overallStatus'] = ','.join(status)
+        else:
+            params['filter.overallStatus'] = status
+
+    # Handle NCT IDs filter
+    if nct_ids:
+        params['filter.ids'] = ','.join(nct_ids)
+
+    # Add pagination and sorting
+    params['sort'] = sort
+    params['pageSize'] = page_size
+    if page_token:
+        params['pageToken'] = page_token
+
+    # Set format
+    params['format'] = format
+
+    url = f"{BASE_URL}/studies"
+    response = requests.get(url, params=params)
+    response.raise_for_status()
+
+    if format == "json":
+        return response.json()
+    else:
+        return response.text
+
+
+def get_study_details(nct_id: str, format: str = "json") -> Dict:
+    """
+    Retrieve detailed information about a specific clinical trial.
+
+    Args:
+        nct_id: The NCT ID of the trial (e.g., "NCT04852770")
+        format: Response format ("json" or "csv")
+
+    Returns:
+        Dictionary containing comprehensive study information
+    """
+    params = {'format': format}
+    url = f"{BASE_URL}/studies/{nct_id}"
+
+    response = requests.get(url, params=params)
+    response.raise_for_status()
+
+    if format == "json":
+        return response.json()
+    else:
+        return response.text
+
+
+def search_with_all_results(
+    condition: Optional[str] = None,
+    intervention: Optional[str] = None,
+    location: Optional[str] = None,
+    sponsor: Optional[str] = None,
+    status: Optional[Union[str, List[str]]] = None,
+    max_results: Optional[int] = None
+) -> List[Dict]:
+    """
+    Search for clinical trials and automatically paginate through all results.
+
+    Args:
+        condition: Disease or condition to search for
+        intervention: Treatment or intervention to search for
+        location: Geographic location to search in
+        sponsor: Sponsor or collaborator name
+        status: Study status(es) to filter by
+        max_results: Maximum number of results to retrieve (None for all)
+
+    Returns:
+        List of all matching studies
+    """
+    all_studies = []
+    page_token = None
+
+    while True:
+        result = search_studies(
+            condition=condition,
+            intervention=intervention,
+            location=location,
+            sponsor=sponsor,
+            status=status,
+            page_size=1000,  # Use max page size for efficiency
+            page_token=page_token
+        )
+
+        studies = result.get('studies', [])
+        all_studies.extend(studies)
+
+        # Check if we've reached the max or there are no more results
+        if max_results and len(all_studies) >= max_results:
+            return all_studies[:max_results]
+
+        # Check for next page
+        page_token = result.get('nextPageToken')
+        if not page_token:
+            break
+
+    return all_studies
+
+
+def extract_study_summary(study: Dict) -> Dict:
+    """
+    Extract key information from a study for quick overview.
+
+    Args:
+        study: A study dictionary from the API response
+
+    Returns:
+        Dictionary with essential study information
+    """
+    protocol = study.get('protocolSection', {})
+    identification = protocol.get('identificationModule', {})
+    status_module = protocol.get('statusModule', {})
+    description = protocol.get('descriptionModule', {})
+
+    return {
+        'nct_id': identification.get('nctId'),
+        'title': identification.get('officialTitle') or identification.get('briefTitle'),
+        'status': status_module.get('overallStatus'),
+        'phase': protocol.get('designModule', {}).get('phases', []),
+        'enrollment': protocol.get('designModule', {}).get('enrollmentInfo', {}).get('count'),
+        'brief_summary': description.get('briefSummary'),
+        'last_update': status_module.get('lastUpdatePostDateStruct', {}).get('date')
+    }
+
+
+# Example usage
+if __name__ == "__main__":
+    # Example 1: Search for recruiting lung cancer trials
+    print("Example 1: Searching for recruiting lung cancer trials...")
+    results = search_studies(
+        condition="lung cancer",
+        status="RECRUITING",
+        page_size=5
+    )
+    print(f"Found {results.get('totalCount', 0)} total trials")
+    print(f"Showing first {len(results.get('studies', []))} trials\n")
+
+    # Example 2: Get details for a specific trial
+    if results.get('studies'):
+        first_study = results['studies'][0]
+        nct_id = first_study['protocolSection']['identificationModule']['nctId']
+        print(f"Example 2: Getting details for {nct_id}...")
+        details = get_study_details(nct_id)
+        summary = extract_study_summary(details)
+        print(json.dumps(summary, indent=2))