#!/usr/bin/env python3 """ Helper functions for common OpenAlex query patterns. Provides high-level functions for typical research queries. """ from typing import List, Dict, Optional, Any from openalex_client import OpenAlexClient def find_author_works( author_name: str, client: OpenAlexClient, limit: Optional[int] = None ) -> List[Dict[str, Any]]: """ Find all works by an author (two-step pattern). Args: author_name: Author name to search for client: OpenAlexClient instance limit: Maximum number of works to return Returns: List of works by the author """ # Step 1: Find author ID author_response = client._make_request( '/authors', params={'search': author_name, 'per-page': 1} ) if not author_response.get('results'): print(f"No author found for: {author_name}") return [] author = author_response['results'][0] author_id = author['id'].split('/')[-1] # Extract ID from URL print(f"Found author: {author['display_name']} (ID: {author_id})") # Step 2: Get works by author works_params = { 'filter': f'authorships.author.id:{author_id}', 'per-page': 200 } if limit and limit <= 200: works_params['per-page'] = limit response = client._make_request('/works', works_params) return response.get('results', []) else: # Need pagination return client.paginate_all('/works', works_params, max_results=limit) def find_institution_works( institution_name: str, client: OpenAlexClient, limit: Optional[int] = None ) -> List[Dict[str, Any]]: """ Find all works from an institution (two-step pattern). Args: institution_name: Institution name to search for client: OpenAlexClient instance limit: Maximum number of works to return Returns: List of works from the institution """ # Step 1: Find institution ID inst_response = client._make_request( '/institutions', params={'search': institution_name, 'per-page': 1} ) if not inst_response.get('results'): print(f"No institution found for: {institution_name}") return [] institution = inst_response['results'][0] inst_id = institution['id'].split('/')[-1] # Extract ID from URL print(f"Found institution: {institution['display_name']} (ID: {inst_id})") # Step 2: Get works from institution works_params = { 'filter': f'authorships.institutions.id:{inst_id}', 'per-page': 200 } if limit and limit <= 200: works_params['per-page'] = limit response = client._make_request('/works', works_params) return response.get('results', []) else: return client.paginate_all('/works', works_params, max_results=limit) def find_highly_cited_recent_papers( topic: Optional[str] = None, years: str = ">2020", client: Optional[OpenAlexClient] = None, limit: int = 100 ) -> List[Dict[str, Any]]: """ Find highly cited recent papers, optionally filtered by topic. Args: topic: Optional search term for topic filtering years: Year filter (e.g., ">2020", "2020-2023") client: OpenAlexClient instance limit: Maximum number of papers to return Returns: List of highly cited papers sorted by citation count """ if client is None: client = OpenAlexClient() params = { 'filter': f'publication_year:{years}', 'sort': 'cited_by_count:desc', 'per-page': min(limit, 200) } if topic: params['search'] = topic if limit <= 200: response = client._make_request('/works', params) return response.get('results', []) else: return client.paginate_all('/works', params, max_results=limit) def get_open_access_papers( search_term: str, client: OpenAlexClient, oa_status: str = "any", # "any", "gold", "green", "hybrid", "bronze" limit: int = 100 ) -> List[Dict[str, Any]]: """ Find open access papers on a topic. Args: search_term: Search query client: OpenAlexClient instance oa_status: Type of OA ("any" for is_oa:true, or specific status) limit: Maximum number of papers to return Returns: List of open access papers """ if oa_status == "any": filter_str = "is_oa:true" else: filter_str = f"open_access.oa_status:{oa_status}" params = { 'search': search_term, 'filter': filter_str, 'per-page': min(limit, 200) } if limit <= 200: response = client._make_request('/works', params) return response.get('results', []) else: return client.paginate_all('/works', params, max_results=limit) def get_publication_trends( search_term: Optional[str] = None, filter_params: Optional[Dict] = None, client: Optional[OpenAlexClient] = None ) -> List[Dict[str, Any]]: """ Get publication counts by year. Args: search_term: Optional search query filter_params: Optional additional filters client: OpenAlexClient instance Returns: List of {year, count} dictionaries """ if client is None: client = OpenAlexClient() params = {'group_by': 'publication_year'} if search_term: params['search'] = search_term if filter_params: filter_str = ','.join([f"{k}:{v}" for k, v in filter_params.items()]) params['filter'] = filter_str response = client._make_request('/works', params) return response.get('group_by', []) def analyze_research_output( entity_type: str, # 'author' or 'institution' entity_name: str, client: OpenAlexClient, years: str = ">2020" ) -> Dict[str, Any]: """ Analyze research output for an author or institution. Args: entity_type: 'author' or 'institution' entity_name: Name to search for client: OpenAlexClient instance years: Year filter Returns: Dictionary with analysis results """ # Find entity ID if entity_type == 'author': endpoint = '/authors' filter_prefix = 'authorships.author.id' else: endpoint = '/institutions' filter_prefix = 'authorships.institutions.id' # Step 1: Find entity entity_response = client._make_request( endpoint, params={'search': entity_name, 'per-page': 1} ) if not entity_response.get('results'): return {'error': f'No {entity_type} found for: {entity_name}'} entity = entity_response['results'][0] entity_id = entity['id'].split('/')[-1] # Step 2: Get statistics filter_params = { filter_prefix: entity_id, 'publication_year': years } # Total works works_response = client.search_works( filter_params=filter_params, per_page=1 ) total_works = works_response['meta']['count'] # Works by year trends = client.group_by( 'works', 'publication_year', filter_params={filter_prefix: entity_id, 'publication_year': years} ) # Top topics topics = client.group_by( 'works', 'topics.id', filter_params=filter_params ) # OA percentage oa_works = client.search_works( filter_params={**filter_params, 'is_oa': 'true'}, per_page=1 ) oa_count = oa_works['meta']['count'] oa_percentage = (oa_count / total_works * 100) if total_works > 0 else 0 return { 'entity_name': entity['display_name'], 'entity_id': entity_id, 'total_works': total_works, 'open_access_works': oa_count, 'open_access_percentage': round(oa_percentage, 1), 'publications_by_year': trends[:10], # Last 10 years 'top_topics': topics[:10] # Top 10 topics } if __name__ == "__main__": # Example usage import json client = OpenAlexClient(email="your-email@example.com") # Find works by author print("\n=== Finding works by author ===") works = find_author_works("Einstein", client, limit=5) print(f"Found {len(works)} works") # Analyze research output print("\n=== Analyzing institution research output ===") analysis = analyze_research_output('institution', 'MIT', client) print(json.dumps(analysis, indent=2))