Files
2025-11-30 08:30:10 +08:00

252 lines
7.5 KiB
Python

"""
KEGG REST API Helper Functions
This module provides Python functions for interacting with the KEGG REST API.
All functions return raw response text which can be parsed as needed.
API Base URL: https://rest.kegg.jp
Documentation: https://www.kegg.jp/kegg/rest/keggapi.html
IMPORTANT: KEGG API is made available only for academic use by academic users.
"""
import urllib.request
import urllib.parse
import urllib.error
from typing import Optional, List, Union
KEGG_BASE_URL = "https://rest.kegg.jp"
def kegg_info(database: str) -> str:
"""
Get database metadata and statistics.
Args:
database: KEGG database name (e.g., 'kegg', 'pathway', 'enzyme', 'genes')
Returns:
str: Database information and statistics
Example:
info = kegg_info('pathway')
"""
url = f"{KEGG_BASE_URL}/info/{database}"
try:
with urllib.request.urlopen(url) as response:
return response.read().decode('utf-8')
except urllib.error.HTTPError as e:
return f"Error: {e.code} - {e.reason}"
def kegg_list(database: str, org: Optional[str] = None) -> str:
"""
List entry identifiers and associated names.
Args:
database: KEGG database name or specific entry (e.g., 'pathway', 'enzyme', 'hsa:10458')
org: Optional organism code for pathway/module listings (e.g., 'hsa' for human)
Returns:
str: Tab-delimited list of entries
Examples:
pathways = kegg_list('pathway') # List all reference pathways
hsa_pathways = kegg_list('pathway', 'hsa') # List human pathways
genes = kegg_list('hsa:10458+ece:Z5100') # List specific genes
"""
if org:
url = f"{KEGG_BASE_URL}/list/{database}/{org}"
else:
url = f"{KEGG_BASE_URL}/list/{database}"
try:
with urllib.request.urlopen(url) as response:
return response.read().decode('utf-8')
except urllib.error.HTTPError as e:
return f"Error: {e.code} - {e.reason}"
def kegg_find(database: str, query: str, option: Optional[str] = None) -> str:
"""
Search for entries by keywords or molecular properties.
Args:
database: Database to search ('genes', 'compound', 'drug', etc.)
query: Search term or molecular property
option: Optional parameter for molecular searches:
'formula' - exact match to chemical formula
'exact_mass' - range search by exact mass (e.g., '174.05-174.15')
'mol_weight' - range search by molecular weight
Returns:
str: Tab-delimited search results
Examples:
# Keyword search
results = kegg_find('genes', 'shiga toxin')
# Formula search
compounds = kegg_find('compound', 'C7H10N4O2', 'formula')
# Mass range search
drugs = kegg_find('drug', '300-310', 'exact_mass')
"""
query_encoded = urllib.parse.quote(query)
if option:
url = f"{KEGG_BASE_URL}/find/{database}/{query_encoded}/{option}"
else:
url = f"{KEGG_BASE_URL}/find/{database}/{query_encoded}"
try:
with urllib.request.urlopen(url) as response:
return response.read().decode('utf-8')
except urllib.error.HTTPError as e:
return f"Error: {e.code} - {e.reason}"
def kegg_get(entries: Union[str, List[str]], option: Optional[str] = None) -> str:
"""
Retrieve full database entries or specific data formats.
Args:
entries: Single entry ID or list of entry IDs (max 10)
option: Optional output format:
'aaseq' or 'ntseq' - FASTA sequence
'mol' - MOL format (for compounds)
'kcf' - KCF format (for compounds)
'image' - PNG image (pathway maps, single entry only)
'kgml' - KGML format (pathway XML, single entry only)
'json' - JSON format (pathway only, single entry only)
Returns:
str: Entry data in requested format
Examples:
# Get pathway entry
pathway = kegg_get('hsa00010')
# Get multiple entries
genes = kegg_get(['hsa:10458', 'ece:Z5100'])
# Get sequence
sequence = kegg_get('hsa:10458', 'aaseq')
# Get pathway as JSON
pathway_json = kegg_get('hsa05130', 'json')
"""
if isinstance(entries, list):
entries_str = '+'.join(entries[:10]) # Max 10 entries
else:
entries_str = entries
if option:
url = f"{KEGG_BASE_URL}/get/{entries_str}/{option}"
else:
url = f"{KEGG_BASE_URL}/get/{entries_str}"
try:
with urllib.request.urlopen(url) as response:
return response.read().decode('utf-8')
except urllib.error.HTTPError as e:
return f"Error: {e.code} - {e.reason}"
def kegg_conv(target_db: str, source_db: str) -> str:
"""
Convert identifiers between KEGG and external databases.
Args:
target_db: Target database (e.g., 'ncbi-geneid', 'uniprot', 'pubchem')
source_db: Source database or entry (e.g., 'hsa', 'compound', 'hsa:10458')
Returns:
str: Tab-delimited conversion table
Examples:
# Convert all human genes to NCBI Gene IDs
conversions = kegg_conv('ncbi-geneid', 'hsa')
# Convert specific gene
gene_id = kegg_conv('ncbi-geneid', 'hsa:10458')
# Convert compounds to PubChem IDs
pubchem = kegg_conv('pubchem', 'compound')
"""
url = f"{KEGG_BASE_URL}/conv/{target_db}/{source_db}"
try:
with urllib.request.urlopen(url) as response:
return response.read().decode('utf-8')
except urllib.error.HTTPError as e:
return f"Error: {e.code} - {e.reason}"
def kegg_link(target_db: str, source_db: str) -> str:
"""
Find related entries across KEGG databases.
Args:
target_db: Target database (e.g., 'pathway', 'enzyme', 'genes')
source_db: Source database or entry (e.g., 'hsa', 'pathway', 'hsa:10458')
Returns:
str: Tab-delimited list of linked entries
Examples:
# Find pathways linked to human genes
links = kegg_link('pathway', 'hsa')
# Find genes in a specific pathway
genes = kegg_link('genes', 'hsa00010')
# Find pathways for a specific gene
pathways = kegg_link('pathway', 'hsa:10458')
"""
url = f"{KEGG_BASE_URL}/link/{target_db}/{source_db}"
try:
with urllib.request.urlopen(url) as response:
return response.read().decode('utf-8')
except urllib.error.HTTPError as e:
return f"Error: {e.code} - {e.reason}"
def kegg_ddi(drug_entries: Union[str, List[str]]) -> str:
"""
Check drug-drug interactions.
Args:
drug_entries: Single drug entry or list of drug entries (max 10)
Returns:
str: Drug interaction information
Example:
interactions = kegg_ddi(['D00001', 'D00002'])
"""
if isinstance(drug_entries, list):
entries_str = '+'.join(drug_entries[:10]) # Max 10 entries
else:
entries_str = drug_entries
url = f"{KEGG_BASE_URL}/ddi/{entries_str}"
try:
with urllib.request.urlopen(url) as response:
return response.read().decode('utf-8')
except urllib.error.HTTPError as e:
return f"Error: {e.code} - {e.reason}"
if __name__ == "__main__":
# Example usage
print("KEGG Info Example:")
print(kegg_info('pathway')[:200] + "...\n")
print("KEGG List Example (first 3 pathways):")
pathways = kegg_list('pathway')
print('\n'.join(pathways.split('\n')[:3]) + "\n")
print("KEGG Find Example:")
print(kegg_find('genes', 'p53')[:200] + "...")