Initial commit
This commit is contained in:
369
skills/string-database/scripts/string_api.py
Normal file
369
skills/string-database/scripts/string_api.py
Normal file
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
STRING Database REST API Helper Functions
|
||||
|
||||
This module provides Python functions for interacting with the STRING database API.
|
||||
All functions return raw response text or JSON which can be parsed as needed.
|
||||
|
||||
API Base URL: https://string-db.org/api
|
||||
Documentation: https://string-db.org/help/api/
|
||||
|
||||
STRING provides protein-protein interaction data from over 40 sources covering
|
||||
5000+ genomes with ~59.3 million proteins and 20+ billion interactions.
|
||||
"""
|
||||
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import urllib.error
|
||||
import json
|
||||
from typing import Optional, List, Union, Dict
|
||||
|
||||
|
||||
STRING_BASE_URL = "https://string-db.org/api"
|
||||
|
||||
|
||||
def string_map_ids(identifiers: Union[str, List[str]],
|
||||
species: int = 9606,
|
||||
limit: int = 1,
|
||||
echo_query: int = 1,
|
||||
caller_identity: str = "claude_scientific_skills") -> str:
|
||||
"""
|
||||
Map protein names, synonyms, and identifiers to STRING IDs.
|
||||
|
||||
Args:
|
||||
identifiers: Single protein identifier or list of identifiers
|
||||
species: NCBI taxon ID (default: 9606 for human)
|
||||
limit: Number of matches to return per identifier (default: 1)
|
||||
echo_query: Include query term in output (1) or not (0)
|
||||
caller_identity: Application identifier for tracking
|
||||
|
||||
Returns:
|
||||
str: TSV format with mapping results
|
||||
|
||||
Examples:
|
||||
# Map single protein
|
||||
result = string_map_ids('TP53', species=9606)
|
||||
|
||||
# Map multiple proteins
|
||||
result = string_map_ids(['TP53', 'BRCA1', 'EGFR'], species=9606)
|
||||
"""
|
||||
if isinstance(identifiers, list):
|
||||
identifiers_str = '\n'.join(identifiers)
|
||||
else:
|
||||
identifiers_str = identifiers
|
||||
|
||||
params = {
|
||||
'identifiers': identifiers_str,
|
||||
'species': species,
|
||||
'limit': limit,
|
||||
'echo_query': echo_query,
|
||||
'caller_identity': caller_identity
|
||||
}
|
||||
|
||||
url = f"{STRING_BASE_URL}/tsv/get_string_ids"
|
||||
data = urllib.parse.urlencode(params).encode('utf-8')
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url, data=data) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}"
|
||||
|
||||
|
||||
def string_network(identifiers: Union[str, List[str]],
|
||||
species: int = 9606,
|
||||
required_score: int = 400,
|
||||
network_type: str = "functional",
|
||||
add_nodes: int = 0,
|
||||
caller_identity: str = "claude_scientific_skills") -> str:
|
||||
"""
|
||||
Get protein-protein interaction network data.
|
||||
|
||||
Args:
|
||||
identifiers: Protein identifier(s) - use STRING IDs for best results
|
||||
species: NCBI taxon ID (default: 9606 for human)
|
||||
required_score: Confidence threshold 0-1000 (default: 400 = medium confidence)
|
||||
network_type: 'functional' or 'physical' (default: functional)
|
||||
add_nodes: Number of additional nodes to add to network (0-10)
|
||||
caller_identity: Application identifier for tracking
|
||||
|
||||
Returns:
|
||||
str: TSV format with interaction data
|
||||
|
||||
Examples:
|
||||
# Get network for single protein
|
||||
network = string_network('9606.ENSP00000269305')
|
||||
|
||||
# Get network with multiple proteins
|
||||
network = string_network(['9606.ENSP00000269305', '9606.ENSP00000275493'])
|
||||
|
||||
# Get network with additional interacting proteins
|
||||
network = string_network('TP53', add_nodes=5, required_score=700)
|
||||
"""
|
||||
if isinstance(identifiers, list):
|
||||
identifiers_str = '%0d'.join(identifiers)
|
||||
else:
|
||||
identifiers_str = identifiers
|
||||
|
||||
params = {
|
||||
'identifiers': identifiers_str,
|
||||
'species': species,
|
||||
'required_score': required_score,
|
||||
'network_type': network_type,
|
||||
'add_nodes': add_nodes,
|
||||
'caller_identity': caller_identity
|
||||
}
|
||||
|
||||
url = f"{STRING_BASE_URL}/tsv/network?" + urllib.parse.urlencode(params)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}"
|
||||
|
||||
|
||||
def string_network_image(identifiers: Union[str, List[str]],
|
||||
species: int = 9606,
|
||||
required_score: int = 400,
|
||||
network_flavor: str = "evidence",
|
||||
add_nodes: int = 0,
|
||||
caller_identity: str = "claude_scientific_skills") -> bytes:
|
||||
"""
|
||||
Get network visualization as PNG image.
|
||||
|
||||
Args:
|
||||
identifiers: Protein identifier(s)
|
||||
species: NCBI taxon ID (default: 9606 for human)
|
||||
required_score: Confidence threshold 0-1000 (default: 400)
|
||||
network_flavor: 'evidence', 'confidence', or 'actions' (default: evidence)
|
||||
add_nodes: Number of additional nodes to add (0-10)
|
||||
caller_identity: Application identifier for tracking
|
||||
|
||||
Returns:
|
||||
bytes: PNG image data
|
||||
|
||||
Example:
|
||||
# Get network image
|
||||
img_data = string_network_image(['TP53', 'MDM2', 'ATM'])
|
||||
with open('network.png', 'wb') as f:
|
||||
f.write(img_data)
|
||||
"""
|
||||
if isinstance(identifiers, list):
|
||||
identifiers_str = '%0d'.join(identifiers)
|
||||
else:
|
||||
identifiers_str = identifiers
|
||||
|
||||
params = {
|
||||
'identifiers': identifiers_str,
|
||||
'species': species,
|
||||
'required_score': required_score,
|
||||
'network_flavor': network_flavor,
|
||||
'add_nodes': add_nodes,
|
||||
'caller_identity': caller_identity
|
||||
}
|
||||
|
||||
url = f"{STRING_BASE_URL}/image/network?" + urllib.parse.urlencode(params)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}".encode()
|
||||
|
||||
|
||||
def string_interaction_partners(identifiers: Union[str, List[str]],
|
||||
species: int = 9606,
|
||||
required_score: int = 400,
|
||||
limit: int = 10,
|
||||
caller_identity: str = "claude_scientific_skills") -> str:
|
||||
"""
|
||||
Get all interaction partners for protein(s).
|
||||
|
||||
Args:
|
||||
identifiers: Protein identifier(s)
|
||||
species: NCBI taxon ID (default: 9606 for human)
|
||||
required_score: Confidence threshold 0-1000 (default: 400)
|
||||
limit: Maximum number of partners to return (default: 10)
|
||||
caller_identity: Application identifier for tracking
|
||||
|
||||
Returns:
|
||||
str: TSV format with interaction partners
|
||||
|
||||
Example:
|
||||
# Get top 20 interactors of TP53
|
||||
partners = string_interaction_partners('TP53', limit=20, required_score=700)
|
||||
"""
|
||||
if isinstance(identifiers, list):
|
||||
identifiers_str = '%0d'.join(identifiers)
|
||||
else:
|
||||
identifiers_str = identifiers
|
||||
|
||||
params = {
|
||||
'identifiers': identifiers_str,
|
||||
'species': species,
|
||||
'required_score': required_score,
|
||||
'limit': limit,
|
||||
'caller_identity': caller_identity
|
||||
}
|
||||
|
||||
url = f"{STRING_BASE_URL}/tsv/interaction_partners?" + urllib.parse.urlencode(params)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}"
|
||||
|
||||
|
||||
def string_enrichment(identifiers: Union[str, List[str]],
|
||||
species: int = 9606,
|
||||
caller_identity: str = "claude_scientific_skills") -> str:
|
||||
"""
|
||||
Perform functional enrichment analysis (Gene Ontology, KEGG, Pfam, etc.).
|
||||
|
||||
Args:
|
||||
identifiers: List of protein identifiers
|
||||
species: NCBI taxon ID (default: 9606 for human)
|
||||
caller_identity: Application identifier for tracking
|
||||
|
||||
Returns:
|
||||
str: TSV format with enrichment results
|
||||
|
||||
Example:
|
||||
# Enrichment for a list of proteins
|
||||
proteins = ['TP53', 'MDM2', 'ATM', 'CHEK2', 'BRCA1']
|
||||
enrichment = string_enrichment(proteins, species=9606)
|
||||
"""
|
||||
if isinstance(identifiers, list):
|
||||
identifiers_str = '%0d'.join(identifiers)
|
||||
else:
|
||||
identifiers_str = identifiers
|
||||
|
||||
params = {
|
||||
'identifiers': identifiers_str,
|
||||
'species': species,
|
||||
'caller_identity': caller_identity
|
||||
}
|
||||
|
||||
url = f"{STRING_BASE_URL}/tsv/enrichment?" + urllib.parse.urlencode(params)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}"
|
||||
|
||||
|
||||
def string_ppi_enrichment(identifiers: Union[str, List[str]],
|
||||
species: int = 9606,
|
||||
required_score: int = 400,
|
||||
caller_identity: str = "claude_scientific_skills") -> str:
|
||||
"""
|
||||
Test if network has more interactions than expected by chance.
|
||||
|
||||
Args:
|
||||
identifiers: List of protein identifiers
|
||||
species: NCBI taxon ID (default: 9606 for human)
|
||||
required_score: Confidence threshold 0-1000 (default: 400)
|
||||
caller_identity: Application identifier for tracking
|
||||
|
||||
Returns:
|
||||
str: JSON with PPI enrichment p-value
|
||||
|
||||
Example:
|
||||
# Test if proteins are more connected than random
|
||||
proteins = ['TP53', 'MDM2', 'ATM', 'CHEK2']
|
||||
ppi_result = string_ppi_enrichment(proteins)
|
||||
"""
|
||||
if isinstance(identifiers, list):
|
||||
identifiers_str = '%0d'.join(identifiers)
|
||||
else:
|
||||
identifiers_str = identifiers
|
||||
|
||||
params = {
|
||||
'identifiers': identifiers_str,
|
||||
'species': species,
|
||||
'required_score': required_score,
|
||||
'caller_identity': caller_identity
|
||||
}
|
||||
|
||||
url = f"{STRING_BASE_URL}/json/ppi_enrichment?" + urllib.parse.urlencode(params)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}"
|
||||
|
||||
|
||||
def string_homology(identifiers: Union[str, List[str]],
|
||||
species: int = 9606,
|
||||
caller_identity: str = "claude_scientific_skills") -> str:
|
||||
"""
|
||||
Get homology/similarity scores between proteins.
|
||||
|
||||
Args:
|
||||
identifiers: Protein identifier(s)
|
||||
species: NCBI taxon ID (default: 9606 for human)
|
||||
caller_identity: Application identifier for tracking
|
||||
|
||||
Returns:
|
||||
str: TSV format with homology scores
|
||||
|
||||
Example:
|
||||
# Get homology data
|
||||
homology = string_homology(['TP53', 'TP63', 'TP73'])
|
||||
"""
|
||||
if isinstance(identifiers, list):
|
||||
identifiers_str = '%0d'.join(identifiers)
|
||||
else:
|
||||
identifiers_str = identifiers
|
||||
|
||||
params = {
|
||||
'identifiers': identifiers_str,
|
||||
'species': species,
|
||||
'caller_identity': caller_identity
|
||||
}
|
||||
|
||||
url = f"{STRING_BASE_URL}/tsv/homology?" + urllib.parse.urlencode(params)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}"
|
||||
|
||||
|
||||
def string_version() -> str:
|
||||
"""
|
||||
Get current STRING database version.
|
||||
|
||||
Returns:
|
||||
str: Version information
|
||||
|
||||
Example:
|
||||
version = string_version()
|
||||
"""
|
||||
url = f"{STRING_BASE_URL}/tsv/version"
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return response.read().decode('utf-8')
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Error: {e.code} - {e.reason}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
print("STRING Version:")
|
||||
print(string_version())
|
||||
print()
|
||||
|
||||
print("Mapping protein names to STRING IDs:")
|
||||
mapping = string_map_ids(['TP53', 'BRCA1'], species=9606)
|
||||
print(mapping)
|
||||
print()
|
||||
|
||||
print("Getting interaction network:")
|
||||
network = string_network('TP53', species=9606, add_nodes=3)
|
||||
print(network[:500] + "...")
|
||||
Reference in New Issue
Block a user