#!/usr/bin/env python3 """ USPTO PatentSearch API Helper Provides functions for searching and retrieving patent data using the USPTO PatentSearch API (ElasticSearch-based system, replaced legacy PatentsView in May 2025). Requires: - requests library: pip install requests - USPTO API key from https://account.uspto.gov/api-manager/ Environment variables: USPTO_API_KEY - Your USPTO API key """ import os import sys import json import requests from typing import Dict, List, Optional, Any from datetime import datetime class PatentSearchClient: """Client for USPTO PatentSearch API.""" BASE_URL = "https://search.patentsview.org/api/v1" def __init__(self, api_key: Optional[str] = None): """ Initialize client with API key. Args: api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var) """ self.api_key = api_key or os.getenv("USPTO_API_KEY") if not self.api_key: raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.") self.headers = { "X-Api-Key": self.api_key, "Content-Type": "application/json" } def _request(self, endpoint: str, query: Dict, fields: Optional[List[str]] = None, sort: Optional[List[Dict]] = None, options: Optional[Dict] = None) -> Dict: """ Make a request to the PatentSearch API. Args: endpoint: API endpoint (e.g., "patent", "inventor") query: Query dictionary fields: List of fields to return sort: Sort specification options: Pagination and other options Returns: API response as dictionary """ url = f"{self.BASE_URL}/{endpoint}" data = {"q": query} if fields: data["f"] = fields if sort: data["s"] = sort if options: data["o"] = options response = requests.post(url, headers=self.headers, json=data) response.raise_for_status() return response.json() def search_patents(self, query: Dict, fields: Optional[List[str]] = None, sort: Optional[List[Dict]] = None, page: int = 1, per_page: int = 100) -> Dict: """ Search for patents. Args: query: Query dictionary (see PatentSearch API docs for syntax) fields: Fields to return (defaults to essential fields) sort: Sort specification page: Page number per_page: Results per page (max 1000) Returns: Search results with patents array Example: # Search by keyword results = client.search_patents({ "patent_abstract": {"_text_all": ["machine", "learning"]} }) # Search by date range results = client.search_patents({ "patent_date": {"_gte": "2024-01-01", "_lte": "2024-12-31"} }) """ if fields is None: fields = [ "patent_number", "patent_title", "patent_date", "patent_abstract", "assignee_organization", "inventor_name", "cpc_subclass_id" ] if sort is None: sort = [{"patent_date": "desc"}] options = {"page": page, "per_page": min(per_page, 1000)} return self._request("patent", query, fields, sort, options) def get_patent(self, patent_number: str) -> Optional[Dict]: """ Get details for a specific patent by number. Args: patent_number: Patent number (with or without commas) Returns: Patent data dictionary or None if not found """ # Remove commas from patent number patent_number = patent_number.replace(",", "") query = {"patent_number": patent_number} fields = [ "patent_number", "patent_title", "patent_date", "patent_abstract", "patent_type", "inventor_name", "assignee_organization", "cpc_subclass_id", "cited_patent_number", "citedby_patent_number" ] result = self._request("patent", query, fields) if result.get("patents"): return result["patents"][0] return None def search_by_inventor(self, inventor_name: str, **kwargs) -> Dict: """ Search patents by inventor name. Args: inventor_name: Inventor name (use _text_phrase for exact match) **kwargs: Additional search parameters Returns: Search results """ query = {"inventor_name": {"_text_phrase": inventor_name}} return self.search_patents(query, **kwargs) def search_by_assignee(self, assignee_name: str, **kwargs) -> Dict: """ Search patents by assignee/company name. Args: assignee_name: Assignee/company name **kwargs: Additional search parameters Returns: Search results """ query = {"assignee_organization": {"_text_any": assignee_name.split()}} return self.search_patents(query, **kwargs) def search_by_classification(self, cpc_code: str, **kwargs) -> Dict: """ Search patents by CPC classification code. Args: cpc_code: CPC subclass code (e.g., "H04N", "G06F") **kwargs: Additional search parameters Returns: Search results """ query = {"cpc_subclass_id": cpc_code} return self.search_patents(query, **kwargs) def search_by_date_range(self, start_date: str, end_date: str, **kwargs) -> Dict: """ Search patents by date range. Args: start_date: Start date (YYYY-MM-DD) end_date: End date (YYYY-MM-DD) **kwargs: Additional search parameters Returns: Search results """ query = { "patent_date": { "_gte": start_date, "_lte": end_date } } return self.search_patents(query, **kwargs) def advanced_search(self, keywords: List[str], assignee: Optional[str] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, cpc_codes: Optional[List[str]] = None, **kwargs) -> Dict: """ Perform advanced search with multiple criteria. Args: keywords: List of keywords to search in abstract/title assignee: Assignee/company name start_date: Start date (YYYY-MM-DD) end_date: End date (YYYY-MM-DD) cpc_codes: List of CPC classification codes **kwargs: Additional search parameters Returns: Search results """ conditions = [] # Keyword search in abstract if keywords: conditions.append({ "patent_abstract": {"_text_all": keywords} }) # Assignee filter if assignee: conditions.append({ "assignee_organization": {"_text_any": assignee.split()} }) # Date range if start_date and end_date: conditions.append({ "patent_date": {"_gte": start_date, "_lte": end_date} }) # CPC classification if cpc_codes: conditions.append({ "cpc_subclass_id": cpc_codes }) query = {"_and": conditions} if len(conditions) > 1 else conditions[0] return self.search_patents(query, **kwargs) def main(): """Command-line interface for patent search.""" if len(sys.argv) < 2: print("Usage:") print(" python patent_search.py ") print(" python patent_search.py --inventor ") print(" python patent_search.py --assignee ") print(" python patent_search.py --keywords ...") sys.exit(1) client = PatentSearchClient() try: if sys.argv[1] == "--inventor": results = client.search_by_inventor(" ".join(sys.argv[2:])) elif sys.argv[1] == "--assignee": results = client.search_by_assignee(" ".join(sys.argv[2:])) elif sys.argv[1] == "--keywords": query = {"patent_abstract": {"_text_all": sys.argv[2:]}} results = client.search_patents(query) else: # Assume patent number patent = client.get_patent(sys.argv[1]) if patent: results = {"patents": [patent], "count": 1, "total_hits": 1} else: print(f"Patent {sys.argv[1]} not found") sys.exit(1) # Print results print(json.dumps(results, indent=2)) except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()