307 lines
8.2 KiB
Python
307 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Helper functions for common OpenAlex query patterns.
|
|
|
|
Provides high-level functions for typical research queries.
|
|
"""
|
|
|
|
from typing import List, Dict, Optional, Any
|
|
from openalex_client import OpenAlexClient
|
|
|
|
|
|
def find_author_works(
|
|
author_name: str,
|
|
client: OpenAlexClient,
|
|
limit: Optional[int] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Find all works by an author (two-step pattern).
|
|
|
|
Args:
|
|
author_name: Author name to search for
|
|
client: OpenAlexClient instance
|
|
limit: Maximum number of works to return
|
|
|
|
Returns:
|
|
List of works by the author
|
|
"""
|
|
# Step 1: Find author ID
|
|
author_response = client._make_request(
|
|
'/authors',
|
|
params={'search': author_name, 'per-page': 1}
|
|
)
|
|
|
|
if not author_response.get('results'):
|
|
print(f"No author found for: {author_name}")
|
|
return []
|
|
|
|
author = author_response['results'][0]
|
|
author_id = author['id'].split('/')[-1] # Extract ID from URL
|
|
|
|
print(f"Found author: {author['display_name']} (ID: {author_id})")
|
|
|
|
# Step 2: Get works by author
|
|
works_params = {
|
|
'filter': f'authorships.author.id:{author_id}',
|
|
'per-page': 200
|
|
}
|
|
|
|
if limit and limit <= 200:
|
|
works_params['per-page'] = limit
|
|
response = client._make_request('/works', works_params)
|
|
return response.get('results', [])
|
|
else:
|
|
# Need pagination
|
|
return client.paginate_all('/works', works_params, max_results=limit)
|
|
|
|
|
|
def find_institution_works(
|
|
institution_name: str,
|
|
client: OpenAlexClient,
|
|
limit: Optional[int] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Find all works from an institution (two-step pattern).
|
|
|
|
Args:
|
|
institution_name: Institution name to search for
|
|
client: OpenAlexClient instance
|
|
limit: Maximum number of works to return
|
|
|
|
Returns:
|
|
List of works from the institution
|
|
"""
|
|
# Step 1: Find institution ID
|
|
inst_response = client._make_request(
|
|
'/institutions',
|
|
params={'search': institution_name, 'per-page': 1}
|
|
)
|
|
|
|
if not inst_response.get('results'):
|
|
print(f"No institution found for: {institution_name}")
|
|
return []
|
|
|
|
institution = inst_response['results'][0]
|
|
inst_id = institution['id'].split('/')[-1] # Extract ID from URL
|
|
|
|
print(f"Found institution: {institution['display_name']} (ID: {inst_id})")
|
|
|
|
# Step 2: Get works from institution
|
|
works_params = {
|
|
'filter': f'authorships.institutions.id:{inst_id}',
|
|
'per-page': 200
|
|
}
|
|
|
|
if limit and limit <= 200:
|
|
works_params['per-page'] = limit
|
|
response = client._make_request('/works', works_params)
|
|
return response.get('results', [])
|
|
else:
|
|
return client.paginate_all('/works', works_params, max_results=limit)
|
|
|
|
|
|
def find_highly_cited_recent_papers(
|
|
topic: Optional[str] = None,
|
|
years: str = ">2020",
|
|
client: Optional[OpenAlexClient] = None,
|
|
limit: int = 100
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Find highly cited recent papers, optionally filtered by topic.
|
|
|
|
Args:
|
|
topic: Optional search term for topic filtering
|
|
years: Year filter (e.g., ">2020", "2020-2023")
|
|
client: OpenAlexClient instance
|
|
limit: Maximum number of papers to return
|
|
|
|
Returns:
|
|
List of highly cited papers sorted by citation count
|
|
"""
|
|
if client is None:
|
|
client = OpenAlexClient()
|
|
|
|
params = {
|
|
'filter': f'publication_year:{years}',
|
|
'sort': 'cited_by_count:desc',
|
|
'per-page': min(limit, 200)
|
|
}
|
|
|
|
if topic:
|
|
params['search'] = topic
|
|
|
|
if limit <= 200:
|
|
response = client._make_request('/works', params)
|
|
return response.get('results', [])
|
|
else:
|
|
return client.paginate_all('/works', params, max_results=limit)
|
|
|
|
|
|
def get_open_access_papers(
|
|
search_term: str,
|
|
client: OpenAlexClient,
|
|
oa_status: str = "any", # "any", "gold", "green", "hybrid", "bronze"
|
|
limit: int = 100
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Find open access papers on a topic.
|
|
|
|
Args:
|
|
search_term: Search query
|
|
client: OpenAlexClient instance
|
|
oa_status: Type of OA ("any" for is_oa:true, or specific status)
|
|
limit: Maximum number of papers to return
|
|
|
|
Returns:
|
|
List of open access papers
|
|
"""
|
|
if oa_status == "any":
|
|
filter_str = "is_oa:true"
|
|
else:
|
|
filter_str = f"open_access.oa_status:{oa_status}"
|
|
|
|
params = {
|
|
'search': search_term,
|
|
'filter': filter_str,
|
|
'per-page': min(limit, 200)
|
|
}
|
|
|
|
if limit <= 200:
|
|
response = client._make_request('/works', params)
|
|
return response.get('results', [])
|
|
else:
|
|
return client.paginate_all('/works', params, max_results=limit)
|
|
|
|
|
|
def get_publication_trends(
|
|
search_term: Optional[str] = None,
|
|
filter_params: Optional[Dict] = None,
|
|
client: Optional[OpenAlexClient] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get publication counts by year.
|
|
|
|
Args:
|
|
search_term: Optional search query
|
|
filter_params: Optional additional filters
|
|
client: OpenAlexClient instance
|
|
|
|
Returns:
|
|
List of {year, count} dictionaries
|
|
"""
|
|
if client is None:
|
|
client = OpenAlexClient()
|
|
|
|
params = {'group_by': 'publication_year'}
|
|
|
|
if search_term:
|
|
params['search'] = search_term
|
|
|
|
if filter_params:
|
|
filter_str = ','.join([f"{k}:{v}" for k, v in filter_params.items()])
|
|
params['filter'] = filter_str
|
|
|
|
response = client._make_request('/works', params)
|
|
return response.get('group_by', [])
|
|
|
|
|
|
def analyze_research_output(
|
|
entity_type: str, # 'author' or 'institution'
|
|
entity_name: str,
|
|
client: OpenAlexClient,
|
|
years: str = ">2020"
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Analyze research output for an author or institution.
|
|
|
|
Args:
|
|
entity_type: 'author' or 'institution'
|
|
entity_name: Name to search for
|
|
client: OpenAlexClient instance
|
|
years: Year filter
|
|
|
|
Returns:
|
|
Dictionary with analysis results
|
|
"""
|
|
# Find entity ID
|
|
if entity_type == 'author':
|
|
endpoint = '/authors'
|
|
filter_prefix = 'authorships.author.id'
|
|
else:
|
|
endpoint = '/institutions'
|
|
filter_prefix = 'authorships.institutions.id'
|
|
|
|
# Step 1: Find entity
|
|
entity_response = client._make_request(
|
|
endpoint,
|
|
params={'search': entity_name, 'per-page': 1}
|
|
)
|
|
|
|
if not entity_response.get('results'):
|
|
return {'error': f'No {entity_type} found for: {entity_name}'}
|
|
|
|
entity = entity_response['results'][0]
|
|
entity_id = entity['id'].split('/')[-1]
|
|
|
|
# Step 2: Get statistics
|
|
filter_params = {
|
|
filter_prefix: entity_id,
|
|
'publication_year': years
|
|
}
|
|
|
|
# Total works
|
|
works_response = client.search_works(
|
|
filter_params=filter_params,
|
|
per_page=1
|
|
)
|
|
total_works = works_response['meta']['count']
|
|
|
|
# Works by year
|
|
trends = client.group_by(
|
|
'works',
|
|
'publication_year',
|
|
filter_params={filter_prefix: entity_id, 'publication_year': years}
|
|
)
|
|
|
|
# Top topics
|
|
topics = client.group_by(
|
|
'works',
|
|
'topics.id',
|
|
filter_params=filter_params
|
|
)
|
|
|
|
# OA percentage
|
|
oa_works = client.search_works(
|
|
filter_params={**filter_params, 'is_oa': 'true'},
|
|
per_page=1
|
|
)
|
|
oa_count = oa_works['meta']['count']
|
|
oa_percentage = (oa_count / total_works * 100) if total_works > 0 else 0
|
|
|
|
return {
|
|
'entity_name': entity['display_name'],
|
|
'entity_id': entity_id,
|
|
'total_works': total_works,
|
|
'open_access_works': oa_count,
|
|
'open_access_percentage': round(oa_percentage, 1),
|
|
'publications_by_year': trends[:10], # Last 10 years
|
|
'top_topics': topics[:10] # Top 10 topics
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
import json
|
|
|
|
client = OpenAlexClient(email="your-email@example.com")
|
|
|
|
# Find works by author
|
|
print("\n=== Finding works by author ===")
|
|
works = find_author_works("Einstein", client, limit=5)
|
|
print(f"Found {len(works)} works")
|
|
|
|
# Analyze research output
|
|
print("\n=== Analyzing institution research output ===")
|
|
analysis = analyze_research_output('institution', 'MIT', client)
|
|
print(json.dumps(analysis, indent=2))
|