Initial commit
This commit is contained in:
306
skills/openalex-database/scripts/query_helpers.py
Normal file
306
skills/openalex-database/scripts/query_helpers.py
Normal file
@@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Helper functions for common OpenAlex query patterns.
|
||||
|
||||
Provides high-level functions for typical research queries.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Optional, Any
|
||||
from openalex_client import OpenAlexClient
|
||||
|
||||
|
||||
def find_author_works(
|
||||
author_name: str,
|
||||
client: OpenAlexClient,
|
||||
limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Find all works by an author (two-step pattern).
|
||||
|
||||
Args:
|
||||
author_name: Author name to search for
|
||||
client: OpenAlexClient instance
|
||||
limit: Maximum number of works to return
|
||||
|
||||
Returns:
|
||||
List of works by the author
|
||||
"""
|
||||
# Step 1: Find author ID
|
||||
author_response = client._make_request(
|
||||
'/authors',
|
||||
params={'search': author_name, 'per-page': 1}
|
||||
)
|
||||
|
||||
if not author_response.get('results'):
|
||||
print(f"No author found for: {author_name}")
|
||||
return []
|
||||
|
||||
author = author_response['results'][0]
|
||||
author_id = author['id'].split('/')[-1] # Extract ID from URL
|
||||
|
||||
print(f"Found author: {author['display_name']} (ID: {author_id})")
|
||||
|
||||
# Step 2: Get works by author
|
||||
works_params = {
|
||||
'filter': f'authorships.author.id:{author_id}',
|
||||
'per-page': 200
|
||||
}
|
||||
|
||||
if limit and limit <= 200:
|
||||
works_params['per-page'] = limit
|
||||
response = client._make_request('/works', works_params)
|
||||
return response.get('results', [])
|
||||
else:
|
||||
# Need pagination
|
||||
return client.paginate_all('/works', works_params, max_results=limit)
|
||||
|
||||
|
||||
def find_institution_works(
|
||||
institution_name: str,
|
||||
client: OpenAlexClient,
|
||||
limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Find all works from an institution (two-step pattern).
|
||||
|
||||
Args:
|
||||
institution_name: Institution name to search for
|
||||
client: OpenAlexClient instance
|
||||
limit: Maximum number of works to return
|
||||
|
||||
Returns:
|
||||
List of works from the institution
|
||||
"""
|
||||
# Step 1: Find institution ID
|
||||
inst_response = client._make_request(
|
||||
'/institutions',
|
||||
params={'search': institution_name, 'per-page': 1}
|
||||
)
|
||||
|
||||
if not inst_response.get('results'):
|
||||
print(f"No institution found for: {institution_name}")
|
||||
return []
|
||||
|
||||
institution = inst_response['results'][0]
|
||||
inst_id = institution['id'].split('/')[-1] # Extract ID from URL
|
||||
|
||||
print(f"Found institution: {institution['display_name']} (ID: {inst_id})")
|
||||
|
||||
# Step 2: Get works from institution
|
||||
works_params = {
|
||||
'filter': f'authorships.institutions.id:{inst_id}',
|
||||
'per-page': 200
|
||||
}
|
||||
|
||||
if limit and limit <= 200:
|
||||
works_params['per-page'] = limit
|
||||
response = client._make_request('/works', works_params)
|
||||
return response.get('results', [])
|
||||
else:
|
||||
return client.paginate_all('/works', works_params, max_results=limit)
|
||||
|
||||
|
||||
def find_highly_cited_recent_papers(
|
||||
topic: Optional[str] = None,
|
||||
years: str = ">2020",
|
||||
client: Optional[OpenAlexClient] = None,
|
||||
limit: int = 100
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Find highly cited recent papers, optionally filtered by topic.
|
||||
|
||||
Args:
|
||||
topic: Optional search term for topic filtering
|
||||
years: Year filter (e.g., ">2020", "2020-2023")
|
||||
client: OpenAlexClient instance
|
||||
limit: Maximum number of papers to return
|
||||
|
||||
Returns:
|
||||
List of highly cited papers sorted by citation count
|
||||
"""
|
||||
if client is None:
|
||||
client = OpenAlexClient()
|
||||
|
||||
params = {
|
||||
'filter': f'publication_year:{years}',
|
||||
'sort': 'cited_by_count:desc',
|
||||
'per-page': min(limit, 200)
|
||||
}
|
||||
|
||||
if topic:
|
||||
params['search'] = topic
|
||||
|
||||
if limit <= 200:
|
||||
response = client._make_request('/works', params)
|
||||
return response.get('results', [])
|
||||
else:
|
||||
return client.paginate_all('/works', params, max_results=limit)
|
||||
|
||||
|
||||
def get_open_access_papers(
|
||||
search_term: str,
|
||||
client: OpenAlexClient,
|
||||
oa_status: str = "any", # "any", "gold", "green", "hybrid", "bronze"
|
||||
limit: int = 100
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Find open access papers on a topic.
|
||||
|
||||
Args:
|
||||
search_term: Search query
|
||||
client: OpenAlexClient instance
|
||||
oa_status: Type of OA ("any" for is_oa:true, or specific status)
|
||||
limit: Maximum number of papers to return
|
||||
|
||||
Returns:
|
||||
List of open access papers
|
||||
"""
|
||||
if oa_status == "any":
|
||||
filter_str = "is_oa:true"
|
||||
else:
|
||||
filter_str = f"open_access.oa_status:{oa_status}"
|
||||
|
||||
params = {
|
||||
'search': search_term,
|
||||
'filter': filter_str,
|
||||
'per-page': min(limit, 200)
|
||||
}
|
||||
|
||||
if limit <= 200:
|
||||
response = client._make_request('/works', params)
|
||||
return response.get('results', [])
|
||||
else:
|
||||
return client.paginate_all('/works', params, max_results=limit)
|
||||
|
||||
|
||||
def get_publication_trends(
|
||||
search_term: Optional[str] = None,
|
||||
filter_params: Optional[Dict] = None,
|
||||
client: Optional[OpenAlexClient] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get publication counts by year.
|
||||
|
||||
Args:
|
||||
search_term: Optional search query
|
||||
filter_params: Optional additional filters
|
||||
client: OpenAlexClient instance
|
||||
|
||||
Returns:
|
||||
List of {year, count} dictionaries
|
||||
"""
|
||||
if client is None:
|
||||
client = OpenAlexClient()
|
||||
|
||||
params = {'group_by': 'publication_year'}
|
||||
|
||||
if search_term:
|
||||
params['search'] = search_term
|
||||
|
||||
if filter_params:
|
||||
filter_str = ','.join([f"{k}:{v}" for k, v in filter_params.items()])
|
||||
params['filter'] = filter_str
|
||||
|
||||
response = client._make_request('/works', params)
|
||||
return response.get('group_by', [])
|
||||
|
||||
|
||||
def analyze_research_output(
|
||||
entity_type: str, # 'author' or 'institution'
|
||||
entity_name: str,
|
||||
client: OpenAlexClient,
|
||||
years: str = ">2020"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze research output for an author or institution.
|
||||
|
||||
Args:
|
||||
entity_type: 'author' or 'institution'
|
||||
entity_name: Name to search for
|
||||
client: OpenAlexClient instance
|
||||
years: Year filter
|
||||
|
||||
Returns:
|
||||
Dictionary with analysis results
|
||||
"""
|
||||
# Find entity ID
|
||||
if entity_type == 'author':
|
||||
endpoint = '/authors'
|
||||
filter_prefix = 'authorships.author.id'
|
||||
else:
|
||||
endpoint = '/institutions'
|
||||
filter_prefix = 'authorships.institutions.id'
|
||||
|
||||
# Step 1: Find entity
|
||||
entity_response = client._make_request(
|
||||
endpoint,
|
||||
params={'search': entity_name, 'per-page': 1}
|
||||
)
|
||||
|
||||
if not entity_response.get('results'):
|
||||
return {'error': f'No {entity_type} found for: {entity_name}'}
|
||||
|
||||
entity = entity_response['results'][0]
|
||||
entity_id = entity['id'].split('/')[-1]
|
||||
|
||||
# Step 2: Get statistics
|
||||
filter_params = {
|
||||
filter_prefix: entity_id,
|
||||
'publication_year': years
|
||||
}
|
||||
|
||||
# Total works
|
||||
works_response = client.search_works(
|
||||
filter_params=filter_params,
|
||||
per_page=1
|
||||
)
|
||||
total_works = works_response['meta']['count']
|
||||
|
||||
# Works by year
|
||||
trends = client.group_by(
|
||||
'works',
|
||||
'publication_year',
|
||||
filter_params={filter_prefix: entity_id, 'publication_year': years}
|
||||
)
|
||||
|
||||
# Top topics
|
||||
topics = client.group_by(
|
||||
'works',
|
||||
'topics.id',
|
||||
filter_params=filter_params
|
||||
)
|
||||
|
||||
# OA percentage
|
||||
oa_works = client.search_works(
|
||||
filter_params={**filter_params, 'is_oa': 'true'},
|
||||
per_page=1
|
||||
)
|
||||
oa_count = oa_works['meta']['count']
|
||||
oa_percentage = (oa_count / total_works * 100) if total_works > 0 else 0
|
||||
|
||||
return {
|
||||
'entity_name': entity['display_name'],
|
||||
'entity_id': entity_id,
|
||||
'total_works': total_works,
|
||||
'open_access_works': oa_count,
|
||||
'open_access_percentage': round(oa_percentage, 1),
|
||||
'publications_by_year': trends[:10], # Last 10 years
|
||||
'top_topics': topics[:10] # Top 10 topics
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
import json
|
||||
|
||||
client = OpenAlexClient(email="your-email@example.com")
|
||||
|
||||
# Find works by author
|
||||
print("\n=== Finding works by author ===")
|
||||
works = find_author_works("Einstein", client, limit=5)
|
||||
print(f"Found {len(works)} works")
|
||||
|
||||
# Analyze research output
|
||||
print("\n=== Analyzing institution research output ===")
|
||||
analysis = analyze_research_output('institution', 'MIT', client)
|
||||
print(json.dumps(analysis, indent=2))
|
||||
Reference in New Issue
Block a user