Files
gh-k-dense-ai-claude-scient…/skills/uspto-database/scripts/patent_search.py
2025-11-30 08:30:10 +08:00

291 lines
8.9 KiB
Python

#!/usr/bin/env python3
"""
USPTO PatentSearch API Helper
Provides functions for searching and retrieving patent data using the USPTO
PatentSearch API (ElasticSearch-based system, replaced legacy PatentsView in May 2025).
Requires:
- requests library: pip install requests
- USPTO API key from https://account.uspto.gov/api-manager/
Environment variables:
USPTO_API_KEY - Your USPTO API key
"""
import os
import sys
import json
import requests
from typing import Dict, List, Optional, Any
from datetime import datetime
class PatentSearchClient:
"""Client for USPTO PatentSearch API."""
BASE_URL = "https://search.patentsview.org/api/v1"
def __init__(self, api_key: Optional[str] = None):
"""
Initialize client with API key.
Args:
api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var)
"""
self.api_key = api_key or os.getenv("USPTO_API_KEY")
if not self.api_key:
raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.")
self.headers = {
"X-Api-Key": self.api_key,
"Content-Type": "application/json"
}
def _request(self, endpoint: str, query: Dict, fields: Optional[List[str]] = None,
sort: Optional[List[Dict]] = None, options: Optional[Dict] = None) -> Dict:
"""
Make a request to the PatentSearch API.
Args:
endpoint: API endpoint (e.g., "patent", "inventor")
query: Query dictionary
fields: List of fields to return
sort: Sort specification
options: Pagination and other options
Returns:
API response as dictionary
"""
url = f"{self.BASE_URL}/{endpoint}"
data = {"q": query}
if fields:
data["f"] = fields
if sort:
data["s"] = sort
if options:
data["o"] = options
response = requests.post(url, headers=self.headers, json=data)
response.raise_for_status()
return response.json()
def search_patents(self, query: Dict, fields: Optional[List[str]] = None,
sort: Optional[List[Dict]] = None, page: int = 1,
per_page: int = 100) -> Dict:
"""
Search for patents.
Args:
query: Query dictionary (see PatentSearch API docs for syntax)
fields: Fields to return (defaults to essential fields)
sort: Sort specification
page: Page number
per_page: Results per page (max 1000)
Returns:
Search results with patents array
Example:
# Search by keyword
results = client.search_patents({
"patent_abstract": {"_text_all": ["machine", "learning"]}
})
# Search by date range
results = client.search_patents({
"patent_date": {"_gte": "2024-01-01", "_lte": "2024-12-31"}
})
"""
if fields is None:
fields = [
"patent_number", "patent_title", "patent_date",
"patent_abstract", "assignee_organization",
"inventor_name", "cpc_subclass_id"
]
if sort is None:
sort = [{"patent_date": "desc"}]
options = {"page": page, "per_page": min(per_page, 1000)}
return self._request("patent", query, fields, sort, options)
def get_patent(self, patent_number: str) -> Optional[Dict]:
"""
Get details for a specific patent by number.
Args:
patent_number: Patent number (with or without commas)
Returns:
Patent data dictionary or None if not found
"""
# Remove commas from patent number
patent_number = patent_number.replace(",", "")
query = {"patent_number": patent_number}
fields = [
"patent_number", "patent_title", "patent_date", "patent_abstract",
"patent_type", "inventor_name", "assignee_organization",
"cpc_subclass_id", "cited_patent_number", "citedby_patent_number"
]
result = self._request("patent", query, fields)
if result.get("patents"):
return result["patents"][0]
return None
def search_by_inventor(self, inventor_name: str, **kwargs) -> Dict:
"""
Search patents by inventor name.
Args:
inventor_name: Inventor name (use _text_phrase for exact match)
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {"inventor_name": {"_text_phrase": inventor_name}}
return self.search_patents(query, **kwargs)
def search_by_assignee(self, assignee_name: str, **kwargs) -> Dict:
"""
Search patents by assignee/company name.
Args:
assignee_name: Assignee/company name
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {"assignee_organization": {"_text_any": assignee_name.split()}}
return self.search_patents(query, **kwargs)
def search_by_classification(self, cpc_code: str, **kwargs) -> Dict:
"""
Search patents by CPC classification code.
Args:
cpc_code: CPC subclass code (e.g., "H04N", "G06F")
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {"cpc_subclass_id": cpc_code}
return self.search_patents(query, **kwargs)
def search_by_date_range(self, start_date: str, end_date: str, **kwargs) -> Dict:
"""
Search patents by date range.
Args:
start_date: Start date (YYYY-MM-DD)
end_date: End date (YYYY-MM-DD)
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {
"patent_date": {
"_gte": start_date,
"_lte": end_date
}
}
return self.search_patents(query, **kwargs)
def advanced_search(self, keywords: List[str], assignee: Optional[str] = None,
start_date: Optional[str] = None, end_date: Optional[str] = None,
cpc_codes: Optional[List[str]] = None, **kwargs) -> Dict:
"""
Perform advanced search with multiple criteria.
Args:
keywords: List of keywords to search in abstract/title
assignee: Assignee/company name
start_date: Start date (YYYY-MM-DD)
end_date: End date (YYYY-MM-DD)
cpc_codes: List of CPC classification codes
**kwargs: Additional search parameters
Returns:
Search results
"""
conditions = []
# Keyword search in abstract
if keywords:
conditions.append({
"patent_abstract": {"_text_all": keywords}
})
# Assignee filter
if assignee:
conditions.append({
"assignee_organization": {"_text_any": assignee.split()}
})
# Date range
if start_date and end_date:
conditions.append({
"patent_date": {"_gte": start_date, "_lte": end_date}
})
# CPC classification
if cpc_codes:
conditions.append({
"cpc_subclass_id": cpc_codes
})
query = {"_and": conditions} if len(conditions) > 1 else conditions[0]
return self.search_patents(query, **kwargs)
def main():
"""Command-line interface for patent search."""
if len(sys.argv) < 2:
print("Usage:")
print(" python patent_search.py <patent_number>")
print(" python patent_search.py --inventor <name>")
print(" python patent_search.py --assignee <company>")
print(" python patent_search.py --keywords <word1> <word2> ...")
sys.exit(1)
client = PatentSearchClient()
try:
if sys.argv[1] == "--inventor":
results = client.search_by_inventor(" ".join(sys.argv[2:]))
elif sys.argv[1] == "--assignee":
results = client.search_by_assignee(" ".join(sys.argv[2:]))
elif sys.argv[1] == "--keywords":
query = {"patent_abstract": {"_text_all": sys.argv[2:]}}
results = client.search_patents(query)
else:
# Assume patent number
patent = client.get_patent(sys.argv[1])
if patent:
results = {"patents": [patent], "count": 1, "total_hits": 1}
else:
print(f"Patent {sys.argv[1]} not found")
sys.exit(1)
# Print results
print(json.dumps(results, indent=2))
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()