252 lines
7.5 KiB
Python
252 lines
7.5 KiB
Python
"""
|
|
KEGG REST API Helper Functions
|
|
|
|
This module provides Python functions for interacting with the KEGG REST API.
|
|
All functions return raw response text which can be parsed as needed.
|
|
|
|
API Base URL: https://rest.kegg.jp
|
|
Documentation: https://www.kegg.jp/kegg/rest/keggapi.html
|
|
|
|
IMPORTANT: KEGG API is made available only for academic use by academic users.
|
|
"""
|
|
|
|
import urllib.request
|
|
import urllib.parse
|
|
import urllib.error
|
|
from typing import Optional, List, Union
|
|
|
|
|
|
KEGG_BASE_URL = "https://rest.kegg.jp"
|
|
|
|
|
|
def kegg_info(database: str) -> str:
|
|
"""
|
|
Get database metadata and statistics.
|
|
|
|
Args:
|
|
database: KEGG database name (e.g., 'kegg', 'pathway', 'enzyme', 'genes')
|
|
|
|
Returns:
|
|
str: Database information and statistics
|
|
|
|
Example:
|
|
info = kegg_info('pathway')
|
|
"""
|
|
url = f"{KEGG_BASE_URL}/info/{database}"
|
|
try:
|
|
with urllib.request.urlopen(url) as response:
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.HTTPError as e:
|
|
return f"Error: {e.code} - {e.reason}"
|
|
|
|
|
|
def kegg_list(database: str, org: Optional[str] = None) -> str:
|
|
"""
|
|
List entry identifiers and associated names.
|
|
|
|
Args:
|
|
database: KEGG database name or specific entry (e.g., 'pathway', 'enzyme', 'hsa:10458')
|
|
org: Optional organism code for pathway/module listings (e.g., 'hsa' for human)
|
|
|
|
Returns:
|
|
str: Tab-delimited list of entries
|
|
|
|
Examples:
|
|
pathways = kegg_list('pathway') # List all reference pathways
|
|
hsa_pathways = kegg_list('pathway', 'hsa') # List human pathways
|
|
genes = kegg_list('hsa:10458+ece:Z5100') # List specific genes
|
|
"""
|
|
if org:
|
|
url = f"{KEGG_BASE_URL}/list/{database}/{org}"
|
|
else:
|
|
url = f"{KEGG_BASE_URL}/list/{database}"
|
|
|
|
try:
|
|
with urllib.request.urlopen(url) as response:
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.HTTPError as e:
|
|
return f"Error: {e.code} - {e.reason}"
|
|
|
|
|
|
def kegg_find(database: str, query: str, option: Optional[str] = None) -> str:
|
|
"""
|
|
Search for entries by keywords or molecular properties.
|
|
|
|
Args:
|
|
database: Database to search ('genes', 'compound', 'drug', etc.)
|
|
query: Search term or molecular property
|
|
option: Optional parameter for molecular searches:
|
|
'formula' - exact match to chemical formula
|
|
'exact_mass' - range search by exact mass (e.g., '174.05-174.15')
|
|
'mol_weight' - range search by molecular weight
|
|
|
|
Returns:
|
|
str: Tab-delimited search results
|
|
|
|
Examples:
|
|
# Keyword search
|
|
results = kegg_find('genes', 'shiga toxin')
|
|
|
|
# Formula search
|
|
compounds = kegg_find('compound', 'C7H10N4O2', 'formula')
|
|
|
|
# Mass range search
|
|
drugs = kegg_find('drug', '300-310', 'exact_mass')
|
|
"""
|
|
query_encoded = urllib.parse.quote(query)
|
|
|
|
if option:
|
|
url = f"{KEGG_BASE_URL}/find/{database}/{query_encoded}/{option}"
|
|
else:
|
|
url = f"{KEGG_BASE_URL}/find/{database}/{query_encoded}"
|
|
|
|
try:
|
|
with urllib.request.urlopen(url) as response:
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.HTTPError as e:
|
|
return f"Error: {e.code} - {e.reason}"
|
|
|
|
|
|
def kegg_get(entries: Union[str, List[str]], option: Optional[str] = None) -> str:
|
|
"""
|
|
Retrieve full database entries or specific data formats.
|
|
|
|
Args:
|
|
entries: Single entry ID or list of entry IDs (max 10)
|
|
option: Optional output format:
|
|
'aaseq' or 'ntseq' - FASTA sequence
|
|
'mol' - MOL format (for compounds)
|
|
'kcf' - KCF format (for compounds)
|
|
'image' - PNG image (pathway maps, single entry only)
|
|
'kgml' - KGML format (pathway XML, single entry only)
|
|
'json' - JSON format (pathway only, single entry only)
|
|
|
|
Returns:
|
|
str: Entry data in requested format
|
|
|
|
Examples:
|
|
# Get pathway entry
|
|
pathway = kegg_get('hsa00010')
|
|
|
|
# Get multiple entries
|
|
genes = kegg_get(['hsa:10458', 'ece:Z5100'])
|
|
|
|
# Get sequence
|
|
sequence = kegg_get('hsa:10458', 'aaseq')
|
|
|
|
# Get pathway as JSON
|
|
pathway_json = kegg_get('hsa05130', 'json')
|
|
"""
|
|
if isinstance(entries, list):
|
|
entries_str = '+'.join(entries[:10]) # Max 10 entries
|
|
else:
|
|
entries_str = entries
|
|
|
|
if option:
|
|
url = f"{KEGG_BASE_URL}/get/{entries_str}/{option}"
|
|
else:
|
|
url = f"{KEGG_BASE_URL}/get/{entries_str}"
|
|
|
|
try:
|
|
with urllib.request.urlopen(url) as response:
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.HTTPError as e:
|
|
return f"Error: {e.code} - {e.reason}"
|
|
|
|
|
|
def kegg_conv(target_db: str, source_db: str) -> str:
|
|
"""
|
|
Convert identifiers between KEGG and external databases.
|
|
|
|
Args:
|
|
target_db: Target database (e.g., 'ncbi-geneid', 'uniprot', 'pubchem')
|
|
source_db: Source database or entry (e.g., 'hsa', 'compound', 'hsa:10458')
|
|
|
|
Returns:
|
|
str: Tab-delimited conversion table
|
|
|
|
Examples:
|
|
# Convert all human genes to NCBI Gene IDs
|
|
conversions = kegg_conv('ncbi-geneid', 'hsa')
|
|
|
|
# Convert specific gene
|
|
gene_id = kegg_conv('ncbi-geneid', 'hsa:10458')
|
|
|
|
# Convert compounds to PubChem IDs
|
|
pubchem = kegg_conv('pubchem', 'compound')
|
|
"""
|
|
url = f"{KEGG_BASE_URL}/conv/{target_db}/{source_db}"
|
|
try:
|
|
with urllib.request.urlopen(url) as response:
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.HTTPError as e:
|
|
return f"Error: {e.code} - {e.reason}"
|
|
|
|
|
|
def kegg_link(target_db: str, source_db: str) -> str:
|
|
"""
|
|
Find related entries across KEGG databases.
|
|
|
|
Args:
|
|
target_db: Target database (e.g., 'pathway', 'enzyme', 'genes')
|
|
source_db: Source database or entry (e.g., 'hsa', 'pathway', 'hsa:10458')
|
|
|
|
Returns:
|
|
str: Tab-delimited list of linked entries
|
|
|
|
Examples:
|
|
# Find pathways linked to human genes
|
|
links = kegg_link('pathway', 'hsa')
|
|
|
|
# Find genes in a specific pathway
|
|
genes = kegg_link('genes', 'hsa00010')
|
|
|
|
# Find pathways for a specific gene
|
|
pathways = kegg_link('pathway', 'hsa:10458')
|
|
"""
|
|
url = f"{KEGG_BASE_URL}/link/{target_db}/{source_db}"
|
|
try:
|
|
with urllib.request.urlopen(url) as response:
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.HTTPError as e:
|
|
return f"Error: {e.code} - {e.reason}"
|
|
|
|
|
|
def kegg_ddi(drug_entries: Union[str, List[str]]) -> str:
|
|
"""
|
|
Check drug-drug interactions.
|
|
|
|
Args:
|
|
drug_entries: Single drug entry or list of drug entries (max 10)
|
|
|
|
Returns:
|
|
str: Drug interaction information
|
|
|
|
Example:
|
|
interactions = kegg_ddi(['D00001', 'D00002'])
|
|
"""
|
|
if isinstance(drug_entries, list):
|
|
entries_str = '+'.join(drug_entries[:10]) # Max 10 entries
|
|
else:
|
|
entries_str = drug_entries
|
|
|
|
url = f"{KEGG_BASE_URL}/ddi/{entries_str}"
|
|
try:
|
|
with urllib.request.urlopen(url) as response:
|
|
return response.read().decode('utf-8')
|
|
except urllib.error.HTTPError as e:
|
|
return f"Error: {e.code} - {e.reason}"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
print("KEGG Info Example:")
|
|
print(kegg_info('pathway')[:200] + "...\n")
|
|
|
|
print("KEGG List Example (first 3 pathways):")
|
|
pathways = kegg_list('pathway')
|
|
print('\n'.join(pathways.split('\n')[:3]) + "\n")
|
|
|
|
print("KEGG Find Example:")
|
|
print(kegg_find('genes', 'p53')[:200] + "...")
|