Files
2025-11-30 08:30:10 +08:00

307 lines
8.2 KiB
Python

#!/usr/bin/env python3
"""
Helper functions for common OpenAlex query patterns.
Provides high-level functions for typical research queries.
"""
from typing import List, Dict, Optional, Any
from openalex_client import OpenAlexClient
def find_author_works(
author_name: str,
client: OpenAlexClient,
limit: Optional[int] = None
) -> List[Dict[str, Any]]:
"""
Find all works by an author (two-step pattern).
Args:
author_name: Author name to search for
client: OpenAlexClient instance
limit: Maximum number of works to return
Returns:
List of works by the author
"""
# Step 1: Find author ID
author_response = client._make_request(
'/authors',
params={'search': author_name, 'per-page': 1}
)
if not author_response.get('results'):
print(f"No author found for: {author_name}")
return []
author = author_response['results'][0]
author_id = author['id'].split('/')[-1] # Extract ID from URL
print(f"Found author: {author['display_name']} (ID: {author_id})")
# Step 2: Get works by author
works_params = {
'filter': f'authorships.author.id:{author_id}',
'per-page': 200
}
if limit and limit <= 200:
works_params['per-page'] = limit
response = client._make_request('/works', works_params)
return response.get('results', [])
else:
# Need pagination
return client.paginate_all('/works', works_params, max_results=limit)
def find_institution_works(
institution_name: str,
client: OpenAlexClient,
limit: Optional[int] = None
) -> List[Dict[str, Any]]:
"""
Find all works from an institution (two-step pattern).
Args:
institution_name: Institution name to search for
client: OpenAlexClient instance
limit: Maximum number of works to return
Returns:
List of works from the institution
"""
# Step 1: Find institution ID
inst_response = client._make_request(
'/institutions',
params={'search': institution_name, 'per-page': 1}
)
if not inst_response.get('results'):
print(f"No institution found for: {institution_name}")
return []
institution = inst_response['results'][0]
inst_id = institution['id'].split('/')[-1] # Extract ID from URL
print(f"Found institution: {institution['display_name']} (ID: {inst_id})")
# Step 2: Get works from institution
works_params = {
'filter': f'authorships.institutions.id:{inst_id}',
'per-page': 200
}
if limit and limit <= 200:
works_params['per-page'] = limit
response = client._make_request('/works', works_params)
return response.get('results', [])
else:
return client.paginate_all('/works', works_params, max_results=limit)
def find_highly_cited_recent_papers(
topic: Optional[str] = None,
years: str = ">2020",
client: Optional[OpenAlexClient] = None,
limit: int = 100
) -> List[Dict[str, Any]]:
"""
Find highly cited recent papers, optionally filtered by topic.
Args:
topic: Optional search term for topic filtering
years: Year filter (e.g., ">2020", "2020-2023")
client: OpenAlexClient instance
limit: Maximum number of papers to return
Returns:
List of highly cited papers sorted by citation count
"""
if client is None:
client = OpenAlexClient()
params = {
'filter': f'publication_year:{years}',
'sort': 'cited_by_count:desc',
'per-page': min(limit, 200)
}
if topic:
params['search'] = topic
if limit <= 200:
response = client._make_request('/works', params)
return response.get('results', [])
else:
return client.paginate_all('/works', params, max_results=limit)
def get_open_access_papers(
search_term: str,
client: OpenAlexClient,
oa_status: str = "any", # "any", "gold", "green", "hybrid", "bronze"
limit: int = 100
) -> List[Dict[str, Any]]:
"""
Find open access papers on a topic.
Args:
search_term: Search query
client: OpenAlexClient instance
oa_status: Type of OA ("any" for is_oa:true, or specific status)
limit: Maximum number of papers to return
Returns:
List of open access papers
"""
if oa_status == "any":
filter_str = "is_oa:true"
else:
filter_str = f"open_access.oa_status:{oa_status}"
params = {
'search': search_term,
'filter': filter_str,
'per-page': min(limit, 200)
}
if limit <= 200:
response = client._make_request('/works', params)
return response.get('results', [])
else:
return client.paginate_all('/works', params, max_results=limit)
def get_publication_trends(
search_term: Optional[str] = None,
filter_params: Optional[Dict] = None,
client: Optional[OpenAlexClient] = None
) -> List[Dict[str, Any]]:
"""
Get publication counts by year.
Args:
search_term: Optional search query
filter_params: Optional additional filters
client: OpenAlexClient instance
Returns:
List of {year, count} dictionaries
"""
if client is None:
client = OpenAlexClient()
params = {'group_by': 'publication_year'}
if search_term:
params['search'] = search_term
if filter_params:
filter_str = ','.join([f"{k}:{v}" for k, v in filter_params.items()])
params['filter'] = filter_str
response = client._make_request('/works', params)
return response.get('group_by', [])
def analyze_research_output(
entity_type: str, # 'author' or 'institution'
entity_name: str,
client: OpenAlexClient,
years: str = ">2020"
) -> Dict[str, Any]:
"""
Analyze research output for an author or institution.
Args:
entity_type: 'author' or 'institution'
entity_name: Name to search for
client: OpenAlexClient instance
years: Year filter
Returns:
Dictionary with analysis results
"""
# Find entity ID
if entity_type == 'author':
endpoint = '/authors'
filter_prefix = 'authorships.author.id'
else:
endpoint = '/institutions'
filter_prefix = 'authorships.institutions.id'
# Step 1: Find entity
entity_response = client._make_request(
endpoint,
params={'search': entity_name, 'per-page': 1}
)
if not entity_response.get('results'):
return {'error': f'No {entity_type} found for: {entity_name}'}
entity = entity_response['results'][0]
entity_id = entity['id'].split('/')[-1]
# Step 2: Get statistics
filter_params = {
filter_prefix: entity_id,
'publication_year': years
}
# Total works
works_response = client.search_works(
filter_params=filter_params,
per_page=1
)
total_works = works_response['meta']['count']
# Works by year
trends = client.group_by(
'works',
'publication_year',
filter_params={filter_prefix: entity_id, 'publication_year': years}
)
# Top topics
topics = client.group_by(
'works',
'topics.id',
filter_params=filter_params
)
# OA percentage
oa_works = client.search_works(
filter_params={**filter_params, 'is_oa': 'true'},
per_page=1
)
oa_count = oa_works['meta']['count']
oa_percentage = (oa_count / total_works * 100) if total_works > 0 else 0
return {
'entity_name': entity['display_name'],
'entity_id': entity_id,
'total_works': total_works,
'open_access_works': oa_count,
'open_access_percentage': round(oa_percentage, 1),
'publications_by_year': trends[:10], # Last 10 years
'top_topics': topics[:10] # Top 10 topics
}
if __name__ == "__main__":
# Example usage
import json
client = OpenAlexClient(email="your-email@example.com")
# Find works by author
print("\n=== Finding works by author ===")
works = find_author_works("Einstein", client, limit=5)
print(f"Found {len(works)} works")
# Analyze research output
print("\n=== Analyzing institution research output ===")
analysis = analyze_research_output('institution', 'MIT', client)
print(json.dumps(analysis, indent=2))