Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions

View File

@@ -0,0 +1,335 @@
#!/usr/bin/env python3
"""
FDA API Usage Examples
Demonstrates common use cases for querying FDA databases.
Usage:
python fda_examples.py
"""
import os
from fda_query import FDAQuery
def example_drug_safety_profile(fda, drug_name):
"""
Create a comprehensive safety profile for a drug.
Includes:
- Total adverse events
- Most common reactions
- Serious events
- Recent recalls
"""
print(f"\n{'='*60}")
print(f"DRUG SAFETY PROFILE: {drug_name}")
print(f"{'='*60}\n")
# 1. Count total adverse events
events = fda.query_drug_events(drug_name, limit=1)
if "meta" in events and "results" in events["meta"]:
total = events["meta"]["results"].get("total", 0)
print(f"Total Adverse Event Reports: {total:,}")
# 2. Most common reactions
print(f"\nMost Common Adverse Reactions:")
reactions = fda.count_by_field(
"drug", "event",
search=f"patient.drug.medicinalproduct:*{drug_name}*",
field="patient.reaction.reactionmeddrapt",
exact=True
)
if "results" in reactions:
for i, item in enumerate(reactions["results"][:10], 1):
print(f" {i}. {item['term']}: {item['count']:,} reports")
# 3. Serious events
serious_events = fda.query(
"drug", "event",
search=f"patient.drug.medicinalproduct:*{drug_name}*+AND+serious:1",
limit=1
)
if "meta" in serious_events and "results" in serious_events["meta"]:
serious_total = serious_events["meta"]["results"].get("total", 0)
print(f"\nSerious Adverse Events: {serious_total:,}")
# 4. Check for recent recalls
recalls = fda.query_drug_recalls(drug_name=drug_name)
if "results" in recalls and len(recalls["results"]) > 0:
print(f"\nRecent Recalls: {len(recalls['results'])}")
for recall in recalls["results"][:3]:
print(f" - {recall.get('reason_for_recall', 'Unknown')} "
f"(Class {recall.get('classification', 'Unknown')})")
else:
print(f"\nRecent Recalls: None found")
def example_device_surveillance(fda, device_name):
"""
Monitor medical device safety.
Includes:
- Adverse events
- Event types (death, injury, malfunction)
- Recent recalls
"""
print(f"\n{'='*60}")
print(f"DEVICE SURVEILLANCE: {device_name}")
print(f"{'='*60}\n")
# 1. Count adverse events
events = fda.query_device_events(device_name, limit=1)
if "meta" in events and "results" in events["meta"]:
total = events["meta"]["results"].get("total", 0)
print(f"Total Adverse Event Reports: {total:,}")
# 2. Event types
print(f"\nEvent Type Distribution:")
event_types = fda.count_by_field(
"device", "event",
search=f"device.brand_name:*{device_name}*",
field="event_type",
exact=False
)
if "results" in event_types:
for item in event_types["results"]:
print(f" {item['term']}: {item['count']:,}")
# 3. Recent events
recent = fda.query_device_events(device_name, limit=5)
if "results" in recent and len(recent["results"]) > 0:
print(f"\nRecent Events (sample):")
for i, event in enumerate(recent["results"][:3], 1):
event_type = event.get("event_type", "Unknown")
date = event.get("date_received", "Unknown")
print(f" {i}. Type: {event_type}, Date: {date}")
def example_food_recall_monitoring(fda, allergen):
"""
Monitor food recalls for specific allergen.
Args:
fda: FDAQuery instance
allergen: Allergen to monitor (e.g., "peanut", "milk", "soy")
"""
print(f"\n{'='*60}")
print(f"ALLERGEN RECALL MONITORING: {allergen}")
print(f"{'='*60}\n")
# Find recalls mentioning this allergen
recalls = fda.query_food_recalls(reason=allergen)
if "results" in recalls and len(recalls["results"]) > 0:
print(f"Found {len(recalls['results'])} recalls mentioning '{allergen}':\n")
for recall in recalls["results"][:10]:
product = recall.get("product_description", "Unknown product")
classification = recall.get("classification", "Unknown")
reason = recall.get("reason_for_recall", "Unknown")
date = recall.get("recall_initiation_date", "Unknown")
status = recall.get("status", "Unknown")
print(f"Product: {product}")
print(f" Classification: {classification}")
print(f" Reason: {reason}")
print(f" Date: {date}")
print(f" Status: {status}")
print()
else:
print(f"No recent recalls found for allergen: {allergen}")
def example_substance_lookup(fda, substance_name):
"""
Look up substance information.
Includes:
- UNII code
- CAS numbers
- Chemical structure
- Related substances
"""
print(f"\n{'='*60}")
print(f"SUBSTANCE INFORMATION: {substance_name}")
print(f"{'='*60}\n")
substances = fda.query_substance_by_name(substance_name)
if "results" in substances and len(substances["results"]) > 0:
for i, substance in enumerate(substances["results"][:3], 1):
print(f"Match {i}:")
# Names
names = substance.get("names", [])
if names:
preferred = next((n["name"] for n in names if n.get("preferred")), names[0].get("name"))
print(f" Name: {preferred}")
# UNII
unii = substance.get("approvalID")
if unii:
print(f" UNII: {unii}")
# CAS numbers
codes = substance.get("codes", [])
cas_numbers = [c["code"] for c in codes if "CAS" in c.get("codeSystem", "")]
if cas_numbers:
print(f" CAS: {', '.join(cas_numbers)}")
# Structure
if "structure" in substance:
structure = substance["structure"]
formula = structure.get("formula")
mol_weight = structure.get("molecularWeight")
if formula:
print(f" Formula: {formula}")
if mol_weight:
print(f" Molecular Weight: {mol_weight}")
# Substance class
substance_class = substance.get("substanceClass")
if substance_class:
print(f" Class: {substance_class}")
print()
else:
print(f"No substances found matching: {substance_name}")
def example_comparative_drug_analysis(fda, drug_list):
"""
Compare safety profiles of multiple drugs.
Args:
fda: FDAQuery instance
drug_list: List of drug names to compare
"""
print(f"\n{'='*60}")
print(f"COMPARATIVE DRUG ANALYSIS")
print(f"{'='*60}\n")
print(f"Comparing: {', '.join(drug_list)}\n")
comparison = {}
for drug in drug_list:
# Get total events
events = fda.query_drug_events(drug, limit=1)
total = 0
if "meta" in events and "results" in events["meta"]:
total = events["meta"]["results"].get("total", 0)
# Get serious events
serious = fda.query(
"drug", "event",
search=f"patient.drug.medicinalproduct:*{drug}*+AND+serious:1",
limit=1
)
serious_total = 0
if "meta" in serious and "results" in serious["meta"]:
serious_total = serious["meta"]["results"].get("total", 0)
serious_rate = (serious_total / total * 100) if total > 0 else 0
comparison[drug] = {
"total_events": total,
"serious_events": serious_total,
"serious_rate": serious_rate
}
# Display comparison
print(f"{'Drug':<20} {'Total Events':>15} {'Serious Events':>15} {'Serious %':>12}")
print("-" * 65)
for drug, data in comparison.items():
print(f"{drug:<20} {data['total_events']:>15,} "
f"{data['serious_events']:>15,} {data['serious_rate']:>11.2f}%")
def example_veterinary_analysis(fda, species, drug_name):
"""
Analyze veterinary drug adverse events by species.
Args:
fda: FDAQuery instance
species: Animal species (e.g., "Dog", "Cat", "Horse")
drug_name: Veterinary drug name
"""
print(f"\n{'='*60}")
print(f"VETERINARY DRUG ANALYSIS: {drug_name} in {species}")
print(f"{'='*60}\n")
events = fda.query_animal_events(species=species, drug_name=drug_name)
if "results" in events and len(events["results"]) > 0:
print(f"Found {len(events['results'])} adverse event reports\n")
# Collect reactions
reactions = []
serious_count = 0
for event in events["results"]:
if event.get("serious_ae") == "true":
serious_count += 1
if "reaction" in event:
for reaction in event["reaction"]:
if "veddra_term_name" in reaction:
reactions.append(reaction["veddra_term_name"])
print(f"Serious Events: {serious_count} ({serious_count/len(events['results'])*100:.1f}%)")
# Count reactions
from collections import Counter
reaction_counts = Counter(reactions)
print(f"\nMost Common Reactions:")
for reaction, count in reaction_counts.most_common(10):
print(f" {reaction}: {count}")
else:
print(f"No adverse events found")
def main():
"""Run example analyses."""
# Get API key from environment
api_key = os.environ.get("FDA_API_KEY")
if not api_key:
print("Warning: No FDA_API_KEY found in environment.")
print("You can still use the API but with lower rate limits.")
print("Set FDA_API_KEY environment variable for better performance.\n")
# Initialize FDA query client
fda = FDAQuery(api_key=api_key)
# Run examples
try:
# Example 1: Drug safety profile
example_drug_safety_profile(fda, "aspirin")
# Example 2: Device surveillance
example_device_surveillance(fda, "pacemaker")
# Example 3: Food recall monitoring
example_food_recall_monitoring(fda, "undeclared peanut")
# Example 4: Substance lookup
example_substance_lookup(fda, "ibuprofen")
# Example 5: Comparative analysis
example_comparative_drug_analysis(fda, ["aspirin", "ibuprofen", "naproxen"])
# Example 6: Veterinary analysis
example_veterinary_analysis(fda, "Dog", "flea collar")
except Exception as e:
print(f"\nError running examples: {e}")
print("This may be due to API rate limits or connectivity issues.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,440 @@
#!/usr/bin/env python3
"""
FDA API Query Helper
Comprehensive utility for querying FDA databases through openFDA API.
Includes error handling, rate limiting, caching, and common query patterns.
Usage:
from fda_query import FDAQuery
fda = FDAQuery(api_key="YOUR_API_KEY")
results = fda.query_drug_events(drug_name="aspirin", limit=100)
"""
import requests
import time
import json
import hashlib
from pathlib import Path
from datetime import datetime, timedelta
from collections import deque, Counter
from typing import Dict, List, Optional, Any
class RateLimiter:
"""Manage API rate limits."""
def __init__(self, max_per_minute: int = 240):
self.max_per_minute = max_per_minute
self.requests = deque()
def wait_if_needed(self):
"""Wait if necessary to stay under rate limit."""
now = time.time()
# Remove requests older than 1 minute
while self.requests and now - self.requests[0] > 60:
self.requests.popleft()
# Check if at limit
if len(self.requests) >= self.max_per_minute:
sleep_time = 60 - (now - self.requests[0]) + 0.1
if sleep_time > 0:
print(f"Rate limit approaching. Waiting {sleep_time:.1f} seconds...")
time.sleep(sleep_time)
self.requests.popleft()
self.requests.append(time.time())
class FDACache:
"""Simple file-based cache for FDA API responses."""
def __init__(self, cache_dir: str = "fda_cache", ttl: int = 3600):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(exist_ok=True)
self.ttl = ttl
def _get_cache_key(self, url: str, params: Dict) -> str:
"""Generate cache key from URL and params."""
cache_string = f"{url}_{json.dumps(params, sort_keys=True)}"
return hashlib.md5(cache_string.encode()).hexdigest()
def get(self, url: str, params: Dict) -> Optional[Dict]:
"""Get cached response if available and not expired."""
key = self._get_cache_key(url, params)
cache_file = self.cache_dir / f"{key}.json"
if cache_file.exists():
age = time.time() - cache_file.stat().st_mtime
if age < self.ttl:
with open(cache_file, 'r') as f:
return json.load(f)
return None
def set(self, url: str, params: Dict, data: Dict):
"""Cache response data."""
key = self._get_cache_key(url, params)
cache_file = self.cache_dir / f"{key}.json"
with open(cache_file, 'w') as f:
json.dump(data, f)
class FDAQuery:
"""Main class for querying FDA databases."""
BASE_URL = "https://api.fda.gov"
def __init__(self, api_key: Optional[str] = None, use_cache: bool = True,
cache_ttl: int = 3600, rate_limit: int = 240):
"""
Initialize FDA query client.
Args:
api_key: FDA API key (optional but recommended)
use_cache: Whether to use response caching
cache_ttl: Cache time-to-live in seconds
rate_limit: Requests per minute limit
"""
self.api_key = api_key
self.rate_limiter = RateLimiter(max_per_minute=rate_limit)
self.cache = FDACache(ttl=cache_ttl) if use_cache else None
def _build_url(self, category: str, endpoint: str) -> str:
"""Build full API endpoint URL."""
return f"{self.BASE_URL}/{category}/{endpoint}.json"
def _make_request(self, url: str, params: Dict, use_cache: bool = True) -> Dict:
"""
Make API request with error handling, rate limiting, and caching.
Args:
url: Full API endpoint URL
params: Query parameters
use_cache: Whether to use cache for this request
Returns:
API response as dictionary
"""
# Add API key if available
if self.api_key:
params["api_key"] = self.api_key
# Check cache
if use_cache and self.cache:
cached = self.cache.get(url, params)
if cached:
return cached
# Rate limiting
self.rate_limiter.wait_if_needed()
# Make request
try:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
# Cache successful response
if use_cache and self.cache:
self.cache.set(url, params, data)
return data
except requests.exceptions.HTTPError as e:
if response.status_code == 404:
return {"error": "No results found", "results": []}
elif response.status_code == 429:
# Rate limit exceeded, wait and retry once
print("Rate limit exceeded. Waiting 60 seconds...")
time.sleep(60)
return self._make_request(url, params, use_cache=False)
elif response.status_code == 400:
return {"error": f"Invalid query: {response.text}"}
else:
return {"error": f"HTTP error {response.status_code}: {e}"}
except requests.exceptions.RequestException as e:
return {"error": f"Request error: {e}"}
def query(self, category: str, endpoint: str, search: Optional[str] = None,
limit: int = 100, skip: int = 0, count: Optional[str] = None,
sort: Optional[str] = None) -> Dict:
"""
Generic query method for any FDA endpoint.
Args:
category: API category (drug, device, food, animalandveterinary, other)
endpoint: Specific endpoint (event, label, enforcement, etc.)
search: Search query string
limit: Maximum results to return (1-1000)
skip: Number of results to skip (for pagination)
count: Field to count/aggregate by
sort: Field to sort by (e.g., "receivedate:desc")
Returns:
API response dictionary
"""
url = self._build_url(category, endpoint)
params = {}
if search:
params["search"] = search
if limit:
params["limit"] = min(limit, 1000)
if skip:
params["skip"] = skip
if count:
params["count"] = count
if sort:
params["sort"] = sort
return self._make_request(url, params)
def query_all(self, category: str, endpoint: str, search: str,
max_results: int = 5000, batch_size: int = 100) -> List[Dict]:
"""
Query and retrieve all results with automatic pagination.
Args:
category: API category
endpoint: Specific endpoint
search: Search query string
max_results: Maximum total results to retrieve
batch_size: Results per request
Returns:
List of all result records
"""
all_results = []
skip = 0
while len(all_results) < max_results:
data = self.query(
category=category,
endpoint=endpoint,
search=search,
limit=batch_size,
skip=skip
)
if "error" in data or "results" not in data:
break
results = data["results"]
if not results:
break
all_results.extend(results)
if len(results) < batch_size:
break
skip += batch_size
return all_results[:max_results]
# Drug-specific methods
def query_drug_events(self, drug_name: str, limit: int = 100) -> Dict:
"""Query drug adverse events."""
search = f"patient.drug.medicinalproduct:*{drug_name}*"
return self.query("drug", "event", search=search, limit=limit)
def query_drug_label(self, drug_name: str, brand: bool = True) -> Dict:
"""Query drug labeling information."""
field = "openfda.brand_name" if brand else "openfda.generic_name"
search = f"{field}:{drug_name}"
return self.query("drug", "label", search=search, limit=1)
def query_drug_ndc(self, ndc: Optional[str] = None,
manufacturer: Optional[str] = None) -> Dict:
"""Query National Drug Code directory."""
if ndc:
search = f"product_ndc:{ndc}"
elif manufacturer:
search = f"labeler_name:*{manufacturer}*"
else:
raise ValueError("Must provide either ndc or manufacturer")
return self.query("drug", "ndc", search=search, limit=100)
def query_drug_recalls(self, drug_name: Optional[str] = None,
classification: Optional[str] = None) -> Dict:
"""Query drug recalls."""
search_parts = []
if drug_name:
search_parts.append(f"product_description:*{drug_name}*")
if classification:
search_parts.append(f"classification:Class+{classification}")
search = "+AND+".join(search_parts) if search_parts else None
return self.query("drug", "enforcement", search=search, limit=100,
sort="report_date:desc")
# Device-specific methods
def query_device_events(self, device_name: str, limit: int = 100) -> Dict:
"""Query device adverse events."""
search = f"device.brand_name:*{device_name}*"
return self.query("device", "event", search=search, limit=limit)
def query_device_510k(self, applicant: Optional[str] = None,
device_name: Optional[str] = None) -> Dict:
"""Query 510(k) clearances."""
if applicant:
search = f"applicant:*{applicant}*"
elif device_name:
search = f"device_name:*{device_name}*"
else:
raise ValueError("Must provide either applicant or device_name")
return self.query("device", "510k", search=search, limit=100)
def query_device_classification(self, product_code: str) -> Dict:
"""Query device classification by product code."""
search = f"product_code:{product_code}"
return self.query("device", "classification", search=search, limit=1)
# Food-specific methods
def query_food_events(self, product_name: Optional[str] = None,
industry: Optional[str] = None) -> Dict:
"""Query food adverse events."""
if product_name:
search = f"products.name_brand:*{product_name}*"
elif industry:
search = f"products.industry_name:*{industry}*"
else:
search = "_exists_:report_number"
return self.query("food", "event", search=search, limit=100)
def query_food_recalls(self, product: Optional[str] = None,
reason: Optional[str] = None,
classification: Optional[str] = None) -> Dict:
"""Query food recalls."""
search_parts = []
if product:
search_parts.append(f"product_description:*{product}*")
if reason:
search_parts.append(f"reason_for_recall:*{reason}*")
if classification:
search_parts.append(f"classification:Class+{classification}")
search = "+AND+".join(search_parts) if search_parts else "_exists_:recall_number"
return self.query("food", "enforcement", search=search, limit=100,
sort="report_date:desc")
# Animal & Veterinary methods
def query_animal_events(self, species: Optional[str] = None,
drug_name: Optional[str] = None) -> Dict:
"""Query animal drug adverse events."""
search_parts = []
if species:
search_parts.append(f"animal.species:*{species}*")
if drug_name:
search_parts.append(f"drug.brand_name:*{drug_name}*")
search = "+AND+".join(search_parts) if search_parts else "_exists_:unique_aer_id_number"
return self.query("animalandveterinary", "event", search=search, limit=100)
# Substance methods
def query_substance_by_unii(self, unii: str) -> Dict:
"""Query substance by UNII code."""
search = f"approvalID:{unii}"
return self.query("other", "substance", search=search, limit=1)
def query_substance_by_name(self, name: str) -> Dict:
"""Query substance by name."""
search = f"names.name:*{name}*"
return self.query("other", "substance", search=search, limit=10)
# Analysis methods
def count_by_field(self, category: str, endpoint: str,
search: str, field: str, exact: bool = True) -> Dict:
"""
Count and aggregate results by a specific field.
Args:
category: API category
endpoint: Specific endpoint
search: Search query
field: Field to count by
exact: Use exact phrase matching
Returns:
Count results
"""
count_field = f"{field}.exact" if exact and not field.endswith(".exact") else field
return self.query(category, endpoint, search=search, count=count_field)
def get_date_range_data(self, category: str, endpoint: str,
date_field: str, days_back: int = 30,
additional_search: Optional[str] = None) -> List[Dict]:
"""
Get data for a specific date range.
Args:
category: API category
endpoint: Specific endpoint
date_field: Date field name
days_back: Number of days to look back
additional_search: Additional search criteria
Returns:
List of results
"""
end_date = datetime.now()
start_date = end_date - timedelta(days=days_back)
date_range = f"[{start_date.strftime('%Y%m%d')}+TO+{end_date.strftime('%Y%m%d')}]"
search = f"{date_field}:{date_range}"
if additional_search:
search = f"{search}+AND+{additional_search}"
return self.query_all(category, endpoint, search=search)
def main():
"""Example usage."""
import os
# Get API key from environment or use None
api_key = os.environ.get("FDA_API_KEY")
# Initialize client
fda = FDAQuery(api_key=api_key)
# Example 1: Query drug adverse events
print("Querying aspirin adverse events...")
events = fda.query_drug_events("aspirin", limit=10)
if "results" in events:
print(f"Found {len(events['results'])} events")
# Example 2: Count reactions
print("\nCounting reactions...")
counts = fda.count_by_field(
"drug", "event",
search="patient.drug.medicinalproduct:aspirin",
field="patient.reaction.reactionmeddrapt"
)
if "results" in counts:
for item in counts["results"][:5]:
print(f" {item['term']}: {item['count']}")
# Example 3: Get drug label
print("\nGetting drug label...")
label = fda.query_drug_label("Lipitor", brand=True)
if "results" in label and len(label["results"]) > 0:
result = label["results"][0]
if "indications_and_usage" in result:
print(f" Indications: {result['indications_and_usage'][0][:200]}...")
if __name__ == "__main__":
main()