10 KiB
10 KiB
UniProt API Examples
Practical code examples for interacting with the UniProt REST API in multiple languages.
Python Examples
Example 1: Basic Search
import requests
# Search for human insulin proteins
url = "https://rest.uniprot.org/uniprotkb/search"
params = {
"query": "insulin AND organism_id:9606 AND reviewed:true",
"format": "json",
"size": 10
}
response = requests.get(url, params=params)
data = response.json()
for result in data['results']:
print(f"{result['primaryAccession']}: {result['proteinDescription']['recommendedName']['fullName']['value']}")
Example 2: Retrieve Protein Sequence
import requests
# Get human insulin sequence in FASTA format
accession = "P01308"
url = f"https://rest.uniprot.org/uniprotkb/{accession}.fasta"
response = requests.get(url)
print(response.text)
Example 3: Custom Fields
import requests
# Get specific fields only
url = "https://rest.uniprot.org/uniprotkb/search"
params = {
"query": "gene:BRCA1 AND reviewed:true",
"format": "tsv",
"fields": "accession,gene_names,organism_name,length,cc_function"
}
response = requests.get(url, params=params)
print(response.text)
Example 4: ID Mapping
import requests
import time
def map_uniprot_ids(ids, from_db, to_db):
# Submit job
submit_url = "https://rest.uniprot.org/idmapping/run"
data = {
"from": from_db,
"to": to_db,
"ids": ",".join(ids)
}
response = requests.post(submit_url, data=data)
job_id = response.json()["jobId"]
# Poll for completion
status_url = f"https://rest.uniprot.org/idmapping/status/{job_id}"
while True:
response = requests.get(status_url)
status = response.json()
if "results" in status or "failedIds" in status:
break
time.sleep(3)
# Get results
results_url = f"https://rest.uniprot.org/idmapping/results/{job_id}"
response = requests.get(results_url)
return response.json()
# Map UniProt IDs to PDB
ids = ["P01308", "P04637"]
mapping = map_uniprot_ids(ids, "UniProtKB_AC-ID", "PDB")
print(mapping)
Example 5: Stream Large Results
import requests
# Stream all reviewed human proteins
url = "https://rest.uniprot.org/uniprotkb/stream"
params = {
"query": "organism_id:9606 AND reviewed:true",
"format": "fasta"
}
response = requests.get(url, params=params, stream=True)
# Process in chunks
with open("human_proteins.fasta", "w") as f:
for chunk in response.iter_content(chunk_size=8192, decode_unicode=True):
if chunk:
f.write(chunk)
Example 6: Pagination
import requests
def get_all_results(query, fields=None):
"""Get all results with pagination"""
url = "https://rest.uniprot.org/uniprotkb/search"
all_results = []
params = {
"query": query,
"format": "json",
"size": 500 # Max size per page
}
if fields:
params["fields"] = ",".join(fields)
while True:
response = requests.get(url, params=params)
data = response.json()
all_results.extend(data['results'])
# Check for next page
if 'next' in data:
url = data['next']
else:
break
return all_results
# Get all human kinases
results = get_all_results(
"protein_name:kinase AND organism_id:9606 AND reviewed:true",
fields=["accession", "gene_names", "protein_name"]
)
print(f"Found {len(results)} proteins")
cURL Examples
Example 1: Simple Search
# Search for insulin proteins
curl "https://rest.uniprot.org/uniprotkb/search?query=insulin&format=json&size=5"
Example 2: Get Protein Entry
# Get human insulin in FASTA format
curl "https://rest.uniprot.org/uniprotkb/P01308.fasta"
Example 3: Custom Fields
# Get specific fields in TSV format
curl "https://rest.uniprot.org/uniprotkb/search?query=gene:BRCA1&format=tsv&fields=accession,gene_names,length"
Example 4: ID Mapping - Submit Job
# Submit mapping job
curl -X POST "https://rest.uniprot.org/idmapping/run" \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "from=UniProtKB_AC-ID&to=PDB&ids=P01308,P04637"
Example 5: ID Mapping - Get Results
# Get mapping results (replace JOB_ID)
curl "https://rest.uniprot.org/idmapping/results/JOB_ID"
Example 6: Download All Results
# Download all human reviewed proteins
curl "https://rest.uniprot.org/uniprotkb/stream?query=organism_id:9606+AND+reviewed:true&format=fasta" \
-o human_proteins.fasta
R Examples
Example 1: Basic Search
library(httr)
library(jsonlite)
# Search for insulin proteins
url <- "https://rest.uniprot.org/uniprotkb/search"
query_params <- list(
query = "insulin AND organism_id:9606",
format = "json",
size = 10
)
response <- GET(url, query = query_params)
data <- fromJSON(content(response, "text"))
# Extract accessions and names
proteins <- data$results[, c("primaryAccession", "proteinDescription")]
print(proteins)
Example 2: Get Sequences
library(httr)
# Get protein sequence
accession <- "P01308"
url <- paste0("https://rest.uniprot.org/uniprotkb/", accession, ".fasta")
response <- GET(url)
sequence <- content(response, "text")
cat(sequence)
Example 3: Download to Data Frame
library(httr)
library(readr)
# Get data as TSV
url <- "https://rest.uniprot.org/uniprotkb/search"
query_params <- list(
query = "gene:BRCA1 AND reviewed:true",
format = "tsv",
fields = "accession,gene_names,organism_name,length"
)
response <- GET(url, query = query_params)
data <- read_tsv(content(response, "text"))
print(data)
JavaScript Examples
Example 1: Fetch API
// Search for proteins
async function searchUniProt(query) {
const url = `https://rest.uniprot.org/uniprotkb/search?query=${encodeURIComponent(query)}&format=json&size=10`;
const response = await fetch(url);
const data = await response.json();
return data.results;
}
// Usage
searchUniProt("insulin AND organism_id:9606")
.then(results => console.log(results));
Example 2: Get Protein Entry
async function getProtein(accession, format = "json") {
const url = `https://rest.uniprot.org/uniprotkb/${accession}.${format}`;
const response = await fetch(url);
if (format === "json") {
return await response.json();
} else {
return await response.text();
}
}
// Usage
getProtein("P01308", "fasta")
.then(sequence => console.log(sequence));
Example 3: ID Mapping
async function mapIds(ids, fromDb, toDb) {
// Submit job
const submitUrl = "https://rest.uniprot.org/idmapping/run";
const formData = new URLSearchParams({
from: fromDb,
to: toDb,
ids: ids.join(",")
});
const submitResponse = await fetch(submitUrl, {
method: "POST",
body: formData
});
const { jobId } = await submitResponse.json();
// Poll for completion
const statusUrl = `https://rest.uniprot.org/idmapping/status/${jobId}`;
while (true) {
const statusResponse = await fetch(statusUrl);
const status = await statusResponse.json();
if ("results" in status || "failedIds" in status) {
break;
}
await new Promise(resolve => setTimeout(resolve, 3000));
}
// Get results
const resultsUrl = `https://rest.uniprot.org/idmapping/results/${jobId}`;
const resultsResponse = await fetch(resultsUrl);
return await resultsResponse.json();
}
// Usage
mapIds(["P01308", "P04637"], "UniProtKB_AC-ID", "PDB")
.then(mapping => console.log(mapping));
Advanced Examples
Example: Batch Processing with Rate Limiting
import requests
import time
from typing import List, Dict
class UniProtClient:
def __init__(self, rate_limit=1.0):
self.base_url = "https://rest.uniprot.org"
self.rate_limit = rate_limit
self.last_request = 0
def _rate_limit(self):
"""Enforce rate limiting"""
elapsed = time.time() - self.last_request
if elapsed < self.rate_limit:
time.sleep(self.rate_limit - elapsed)
self.last_request = time.time()
def batch_get_proteins(self, accessions: List[str],
batch_size: int = 100) -> List[Dict]:
"""Get proteins in batches"""
results = []
for i in range(0, len(accessions), batch_size):
batch = accessions[i:i + batch_size]
query = " OR ".join([f"accession:{acc}" for acc in batch])
self._rate_limit()
response = requests.get(
f"{self.base_url}/uniprotkb/search",
params={
"query": query,
"format": "json",
"size": batch_size
}
)
if response.ok:
data = response.json()
results.extend(data.get('results', []))
else:
print(f"Error in batch {i//batch_size}: {response.status_code}")
return results
# Usage
client = UniProtClient(rate_limit=0.5)
accessions = ["P01308", "P04637", "P12345", "Q9Y6K9"]
proteins = client.batch_get_proteins(accessions)
Example: Download with Progress Bar
import requests
from tqdm import tqdm
def download_with_progress(query, output_file, format="fasta"):
"""Download results with progress bar"""
url = "https://rest.uniprot.org/uniprotkb/stream"
params = {
"query": query,
"format": format
}
response = requests.get(url, params=params, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(output_file, 'wb') as f, \
tqdm(total=total_size, unit='B', unit_scale=True) as pbar:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
pbar.update(len(chunk))
# Usage
download_with_progress(
"organism_id:9606 AND reviewed:true",
"human_proteome.fasta"
)
Resources
- API Documentation: https://www.uniprot.org/help/api
- Interactive API Explorer: https://www.uniprot.org/api-documentation
- Python client (Unipressed): https://github.com/multimeric/Unipressed
- Bioservices package: https://bioservices.readthedocs.io/