Initial commit
This commit is contained in:
290
skills/uspto-database/scripts/patent_search.py
Normal file
290
skills/uspto-database/scripts/patent_search.py
Normal file
@@ -0,0 +1,290 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
USPTO PatentSearch API Helper
|
||||
|
||||
Provides functions for searching and retrieving patent data using the USPTO
|
||||
PatentSearch API (ElasticSearch-based system, replaced legacy PatentsView in May 2025).
|
||||
|
||||
Requires:
|
||||
- requests library: pip install requests
|
||||
- USPTO API key from https://account.uspto.gov/api-manager/
|
||||
|
||||
Environment variables:
|
||||
USPTO_API_KEY - Your USPTO API key
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class PatentSearchClient:
|
||||
"""Client for USPTO PatentSearch API."""
|
||||
|
||||
BASE_URL = "https://search.patentsview.org/api/v1"
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize client with API key.
|
||||
|
||||
Args:
|
||||
api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var)
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("USPTO_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.")
|
||||
|
||||
self.headers = {
|
||||
"X-Api-Key": self.api_key,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
def _request(self, endpoint: str, query: Dict, fields: Optional[List[str]] = None,
|
||||
sort: Optional[List[Dict]] = None, options: Optional[Dict] = None) -> Dict:
|
||||
"""
|
||||
Make a request to the PatentSearch API.
|
||||
|
||||
Args:
|
||||
endpoint: API endpoint (e.g., "patent", "inventor")
|
||||
query: Query dictionary
|
||||
fields: List of fields to return
|
||||
sort: Sort specification
|
||||
options: Pagination and other options
|
||||
|
||||
Returns:
|
||||
API response as dictionary
|
||||
"""
|
||||
url = f"{self.BASE_URL}/{endpoint}"
|
||||
|
||||
data = {"q": query}
|
||||
if fields:
|
||||
data["f"] = fields
|
||||
if sort:
|
||||
data["s"] = sort
|
||||
if options:
|
||||
data["o"] = options
|
||||
|
||||
response = requests.post(url, headers=self.headers, json=data)
|
||||
response.raise_for_status()
|
||||
|
||||
return response.json()
|
||||
|
||||
def search_patents(self, query: Dict, fields: Optional[List[str]] = None,
|
||||
sort: Optional[List[Dict]] = None, page: int = 1,
|
||||
per_page: int = 100) -> Dict:
|
||||
"""
|
||||
Search for patents.
|
||||
|
||||
Args:
|
||||
query: Query dictionary (see PatentSearch API docs for syntax)
|
||||
fields: Fields to return (defaults to essential fields)
|
||||
sort: Sort specification
|
||||
page: Page number
|
||||
per_page: Results per page (max 1000)
|
||||
|
||||
Returns:
|
||||
Search results with patents array
|
||||
|
||||
Example:
|
||||
# Search by keyword
|
||||
results = client.search_patents({
|
||||
"patent_abstract": {"_text_all": ["machine", "learning"]}
|
||||
})
|
||||
|
||||
# Search by date range
|
||||
results = client.search_patents({
|
||||
"patent_date": {"_gte": "2024-01-01", "_lte": "2024-12-31"}
|
||||
})
|
||||
"""
|
||||
if fields is None:
|
||||
fields = [
|
||||
"patent_number", "patent_title", "patent_date",
|
||||
"patent_abstract", "assignee_organization",
|
||||
"inventor_name", "cpc_subclass_id"
|
||||
]
|
||||
|
||||
if sort is None:
|
||||
sort = [{"patent_date": "desc"}]
|
||||
|
||||
options = {"page": page, "per_page": min(per_page, 1000)}
|
||||
|
||||
return self._request("patent", query, fields, sort, options)
|
||||
|
||||
def get_patent(self, patent_number: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get details for a specific patent by number.
|
||||
|
||||
Args:
|
||||
patent_number: Patent number (with or without commas)
|
||||
|
||||
Returns:
|
||||
Patent data dictionary or None if not found
|
||||
"""
|
||||
# Remove commas from patent number
|
||||
patent_number = patent_number.replace(",", "")
|
||||
|
||||
query = {"patent_number": patent_number}
|
||||
fields = [
|
||||
"patent_number", "patent_title", "patent_date", "patent_abstract",
|
||||
"patent_type", "inventor_name", "assignee_organization",
|
||||
"cpc_subclass_id", "cited_patent_number", "citedby_patent_number"
|
||||
]
|
||||
|
||||
result = self._request("patent", query, fields)
|
||||
|
||||
if result.get("patents"):
|
||||
return result["patents"][0]
|
||||
return None
|
||||
|
||||
def search_by_inventor(self, inventor_name: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by inventor name.
|
||||
|
||||
Args:
|
||||
inventor_name: Inventor name (use _text_phrase for exact match)
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {"inventor_name": {"_text_phrase": inventor_name}}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def search_by_assignee(self, assignee_name: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by assignee/company name.
|
||||
|
||||
Args:
|
||||
assignee_name: Assignee/company name
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {"assignee_organization": {"_text_any": assignee_name.split()}}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def search_by_classification(self, cpc_code: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by CPC classification code.
|
||||
|
||||
Args:
|
||||
cpc_code: CPC subclass code (e.g., "H04N", "G06F")
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {"cpc_subclass_id": cpc_code}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def search_by_date_range(self, start_date: str, end_date: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by date range.
|
||||
|
||||
Args:
|
||||
start_date: Start date (YYYY-MM-DD)
|
||||
end_date: End date (YYYY-MM-DD)
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {
|
||||
"patent_date": {
|
||||
"_gte": start_date,
|
||||
"_lte": end_date
|
||||
}
|
||||
}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def advanced_search(self, keywords: List[str], assignee: Optional[str] = None,
|
||||
start_date: Optional[str] = None, end_date: Optional[str] = None,
|
||||
cpc_codes: Optional[List[str]] = None, **kwargs) -> Dict:
|
||||
"""
|
||||
Perform advanced search with multiple criteria.
|
||||
|
||||
Args:
|
||||
keywords: List of keywords to search in abstract/title
|
||||
assignee: Assignee/company name
|
||||
start_date: Start date (YYYY-MM-DD)
|
||||
end_date: End date (YYYY-MM-DD)
|
||||
cpc_codes: List of CPC classification codes
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
conditions = []
|
||||
|
||||
# Keyword search in abstract
|
||||
if keywords:
|
||||
conditions.append({
|
||||
"patent_abstract": {"_text_all": keywords}
|
||||
})
|
||||
|
||||
# Assignee filter
|
||||
if assignee:
|
||||
conditions.append({
|
||||
"assignee_organization": {"_text_any": assignee.split()}
|
||||
})
|
||||
|
||||
# Date range
|
||||
if start_date and end_date:
|
||||
conditions.append({
|
||||
"patent_date": {"_gte": start_date, "_lte": end_date}
|
||||
})
|
||||
|
||||
# CPC classification
|
||||
if cpc_codes:
|
||||
conditions.append({
|
||||
"cpc_subclass_id": cpc_codes
|
||||
})
|
||||
|
||||
query = {"_and": conditions} if len(conditions) > 1 else conditions[0]
|
||||
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface for patent search."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage:")
|
||||
print(" python patent_search.py <patent_number>")
|
||||
print(" python patent_search.py --inventor <name>")
|
||||
print(" python patent_search.py --assignee <company>")
|
||||
print(" python patent_search.py --keywords <word1> <word2> ...")
|
||||
sys.exit(1)
|
||||
|
||||
client = PatentSearchClient()
|
||||
|
||||
try:
|
||||
if sys.argv[1] == "--inventor":
|
||||
results = client.search_by_inventor(" ".join(sys.argv[2:]))
|
||||
elif sys.argv[1] == "--assignee":
|
||||
results = client.search_by_assignee(" ".join(sys.argv[2:]))
|
||||
elif sys.argv[1] == "--keywords":
|
||||
query = {"patent_abstract": {"_text_all": sys.argv[2:]}}
|
||||
results = client.search_patents(query)
|
||||
else:
|
||||
# Assume patent number
|
||||
patent = client.get_patent(sys.argv[1])
|
||||
if patent:
|
||||
results = {"patents": [patent], "count": 1, "total_hits": 1}
|
||||
else:
|
||||
print(f"Patent {sys.argv[1]} not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Print results
|
||||
print(json.dumps(results, indent=2))
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user