414 lines
10 KiB
Markdown
414 lines
10 KiB
Markdown
# UniProt API Examples
|
|
|
|
Practical code examples for interacting with the UniProt REST API in multiple languages.
|
|
|
|
## Python Examples
|
|
|
|
### Example 1: Basic Search
|
|
```python
|
|
import requests
|
|
|
|
# Search for human insulin proteins
|
|
url = "https://rest.uniprot.org/uniprotkb/search"
|
|
params = {
|
|
"query": "insulin AND organism_id:9606 AND reviewed:true",
|
|
"format": "json",
|
|
"size": 10
|
|
}
|
|
|
|
response = requests.get(url, params=params)
|
|
data = response.json()
|
|
|
|
for result in data['results']:
|
|
print(f"{result['primaryAccession']}: {result['proteinDescription']['recommendedName']['fullName']['value']}")
|
|
```
|
|
|
|
### Example 2: Retrieve Protein Sequence
|
|
```python
|
|
import requests
|
|
|
|
# Get human insulin sequence in FASTA format
|
|
accession = "P01308"
|
|
url = f"https://rest.uniprot.org/uniprotkb/{accession}.fasta"
|
|
|
|
response = requests.get(url)
|
|
print(response.text)
|
|
```
|
|
|
|
### Example 3: Custom Fields
|
|
```python
|
|
import requests
|
|
|
|
# Get specific fields only
|
|
url = "https://rest.uniprot.org/uniprotkb/search"
|
|
params = {
|
|
"query": "gene:BRCA1 AND reviewed:true",
|
|
"format": "tsv",
|
|
"fields": "accession,gene_names,organism_name,length,cc_function"
|
|
}
|
|
|
|
response = requests.get(url, params=params)
|
|
print(response.text)
|
|
```
|
|
|
|
### Example 4: ID Mapping
|
|
```python
|
|
import requests
|
|
import time
|
|
|
|
def map_uniprot_ids(ids, from_db, to_db):
|
|
# Submit job
|
|
submit_url = "https://rest.uniprot.org/idmapping/run"
|
|
data = {
|
|
"from": from_db,
|
|
"to": to_db,
|
|
"ids": ",".join(ids)
|
|
}
|
|
|
|
response = requests.post(submit_url, data=data)
|
|
job_id = response.json()["jobId"]
|
|
|
|
# Poll for completion
|
|
status_url = f"https://rest.uniprot.org/idmapping/status/{job_id}"
|
|
while True:
|
|
response = requests.get(status_url)
|
|
status = response.json()
|
|
if "results" in status or "failedIds" in status:
|
|
break
|
|
time.sleep(3)
|
|
|
|
# Get results
|
|
results_url = f"https://rest.uniprot.org/idmapping/results/{job_id}"
|
|
response = requests.get(results_url)
|
|
return response.json()
|
|
|
|
# Map UniProt IDs to PDB
|
|
ids = ["P01308", "P04637"]
|
|
mapping = map_uniprot_ids(ids, "UniProtKB_AC-ID", "PDB")
|
|
print(mapping)
|
|
```
|
|
|
|
### Example 5: Stream Large Results
|
|
```python
|
|
import requests
|
|
|
|
# Stream all reviewed human proteins
|
|
url = "https://rest.uniprot.org/uniprotkb/stream"
|
|
params = {
|
|
"query": "organism_id:9606 AND reviewed:true",
|
|
"format": "fasta"
|
|
}
|
|
|
|
response = requests.get(url, params=params, stream=True)
|
|
|
|
# Process in chunks
|
|
with open("human_proteins.fasta", "w") as f:
|
|
for chunk in response.iter_content(chunk_size=8192, decode_unicode=True):
|
|
if chunk:
|
|
f.write(chunk)
|
|
```
|
|
|
|
### Example 6: Pagination
|
|
```python
|
|
import requests
|
|
|
|
def get_all_results(query, fields=None):
|
|
"""Get all results with pagination"""
|
|
url = "https://rest.uniprot.org/uniprotkb/search"
|
|
all_results = []
|
|
|
|
params = {
|
|
"query": query,
|
|
"format": "json",
|
|
"size": 500 # Max size per page
|
|
}
|
|
|
|
if fields:
|
|
params["fields"] = ",".join(fields)
|
|
|
|
while True:
|
|
response = requests.get(url, params=params)
|
|
data = response.json()
|
|
all_results.extend(data['results'])
|
|
|
|
# Check for next page
|
|
if 'next' in data:
|
|
url = data['next']
|
|
else:
|
|
break
|
|
|
|
return all_results
|
|
|
|
# Get all human kinases
|
|
results = get_all_results(
|
|
"protein_name:kinase AND organism_id:9606 AND reviewed:true",
|
|
fields=["accession", "gene_names", "protein_name"]
|
|
)
|
|
print(f"Found {len(results)} proteins")
|
|
```
|
|
|
|
## cURL Examples
|
|
|
|
### Example 1: Simple Search
|
|
```bash
|
|
# Search for insulin proteins
|
|
curl "https://rest.uniprot.org/uniprotkb/search?query=insulin&format=json&size=5"
|
|
```
|
|
|
|
### Example 2: Get Protein Entry
|
|
```bash
|
|
# Get human insulin in FASTA format
|
|
curl "https://rest.uniprot.org/uniprotkb/P01308.fasta"
|
|
```
|
|
|
|
### Example 3: Custom Fields
|
|
```bash
|
|
# Get specific fields in TSV format
|
|
curl "https://rest.uniprot.org/uniprotkb/search?query=gene:BRCA1&format=tsv&fields=accession,gene_names,length"
|
|
```
|
|
|
|
### Example 4: ID Mapping - Submit Job
|
|
```bash
|
|
# Submit mapping job
|
|
curl -X POST "https://rest.uniprot.org/idmapping/run" \
|
|
-H "Content-Type: application/x-www-form-urlencoded" \
|
|
-d "from=UniProtKB_AC-ID&to=PDB&ids=P01308,P04637"
|
|
```
|
|
|
|
### Example 5: ID Mapping - Get Results
|
|
```bash
|
|
# Get mapping results (replace JOB_ID)
|
|
curl "https://rest.uniprot.org/idmapping/results/JOB_ID"
|
|
```
|
|
|
|
### Example 6: Download All Results
|
|
```bash
|
|
# Download all human reviewed proteins
|
|
curl "https://rest.uniprot.org/uniprotkb/stream?query=organism_id:9606+AND+reviewed:true&format=fasta" \
|
|
-o human_proteins.fasta
|
|
```
|
|
|
|
## R Examples
|
|
|
|
### Example 1: Basic Search
|
|
```r
|
|
library(httr)
|
|
library(jsonlite)
|
|
|
|
# Search for insulin proteins
|
|
url <- "https://rest.uniprot.org/uniprotkb/search"
|
|
query_params <- list(
|
|
query = "insulin AND organism_id:9606",
|
|
format = "json",
|
|
size = 10
|
|
)
|
|
|
|
response <- GET(url, query = query_params)
|
|
data <- fromJSON(content(response, "text"))
|
|
|
|
# Extract accessions and names
|
|
proteins <- data$results[, c("primaryAccession", "proteinDescription")]
|
|
print(proteins)
|
|
```
|
|
|
|
### Example 2: Get Sequences
|
|
```r
|
|
library(httr)
|
|
|
|
# Get protein sequence
|
|
accession <- "P01308"
|
|
url <- paste0("https://rest.uniprot.org/uniprotkb/", accession, ".fasta")
|
|
|
|
response <- GET(url)
|
|
sequence <- content(response, "text")
|
|
cat(sequence)
|
|
```
|
|
|
|
### Example 3: Download to Data Frame
|
|
```r
|
|
library(httr)
|
|
library(readr)
|
|
|
|
# Get data as TSV
|
|
url <- "https://rest.uniprot.org/uniprotkb/search"
|
|
query_params <- list(
|
|
query = "gene:BRCA1 AND reviewed:true",
|
|
format = "tsv",
|
|
fields = "accession,gene_names,organism_name,length"
|
|
)
|
|
|
|
response <- GET(url, query = query_params)
|
|
data <- read_tsv(content(response, "text"))
|
|
print(data)
|
|
```
|
|
|
|
## JavaScript Examples
|
|
|
|
### Example 1: Fetch API
|
|
```javascript
|
|
// Search for proteins
|
|
async function searchUniProt(query) {
|
|
const url = `https://rest.uniprot.org/uniprotkb/search?query=${encodeURIComponent(query)}&format=json&size=10`;
|
|
|
|
const response = await fetch(url);
|
|
const data = await response.json();
|
|
|
|
return data.results;
|
|
}
|
|
|
|
// Usage
|
|
searchUniProt("insulin AND organism_id:9606")
|
|
.then(results => console.log(results));
|
|
```
|
|
|
|
### Example 2: Get Protein Entry
|
|
```javascript
|
|
async function getProtein(accession, format = "json") {
|
|
const url = `https://rest.uniprot.org/uniprotkb/${accession}.${format}`;
|
|
|
|
const response = await fetch(url);
|
|
|
|
if (format === "json") {
|
|
return await response.json();
|
|
} else {
|
|
return await response.text();
|
|
}
|
|
}
|
|
|
|
// Usage
|
|
getProtein("P01308", "fasta")
|
|
.then(sequence => console.log(sequence));
|
|
```
|
|
|
|
### Example 3: ID Mapping
|
|
```javascript
|
|
async function mapIds(ids, fromDb, toDb) {
|
|
// Submit job
|
|
const submitUrl = "https://rest.uniprot.org/idmapping/run";
|
|
const formData = new URLSearchParams({
|
|
from: fromDb,
|
|
to: toDb,
|
|
ids: ids.join(",")
|
|
});
|
|
|
|
const submitResponse = await fetch(submitUrl, {
|
|
method: "POST",
|
|
body: formData
|
|
});
|
|
const { jobId } = await submitResponse.json();
|
|
|
|
// Poll for completion
|
|
const statusUrl = `https://rest.uniprot.org/idmapping/status/${jobId}`;
|
|
while (true) {
|
|
const statusResponse = await fetch(statusUrl);
|
|
const status = await statusResponse.json();
|
|
|
|
if ("results" in status || "failedIds" in status) {
|
|
break;
|
|
}
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
}
|
|
|
|
// Get results
|
|
const resultsUrl = `https://rest.uniprot.org/idmapping/results/${jobId}`;
|
|
const resultsResponse = await fetch(resultsUrl);
|
|
return await resultsResponse.json();
|
|
}
|
|
|
|
// Usage
|
|
mapIds(["P01308", "P04637"], "UniProtKB_AC-ID", "PDB")
|
|
.then(mapping => console.log(mapping));
|
|
```
|
|
|
|
## Advanced Examples
|
|
|
|
### Example: Batch Processing with Rate Limiting
|
|
```python
|
|
import requests
|
|
import time
|
|
from typing import List, Dict
|
|
|
|
class UniProtClient:
|
|
def __init__(self, rate_limit=1.0):
|
|
self.base_url = "https://rest.uniprot.org"
|
|
self.rate_limit = rate_limit
|
|
self.last_request = 0
|
|
|
|
def _rate_limit(self):
|
|
"""Enforce rate limiting"""
|
|
elapsed = time.time() - self.last_request
|
|
if elapsed < self.rate_limit:
|
|
time.sleep(self.rate_limit - elapsed)
|
|
self.last_request = time.time()
|
|
|
|
def batch_get_proteins(self, accessions: List[str],
|
|
batch_size: int = 100) -> List[Dict]:
|
|
"""Get proteins in batches"""
|
|
results = []
|
|
|
|
for i in range(0, len(accessions), batch_size):
|
|
batch = accessions[i:i + batch_size]
|
|
query = " OR ".join([f"accession:{acc}" for acc in batch])
|
|
|
|
self._rate_limit()
|
|
|
|
response = requests.get(
|
|
f"{self.base_url}/uniprotkb/search",
|
|
params={
|
|
"query": query,
|
|
"format": "json",
|
|
"size": batch_size
|
|
}
|
|
)
|
|
|
|
if response.ok:
|
|
data = response.json()
|
|
results.extend(data.get('results', []))
|
|
else:
|
|
print(f"Error in batch {i//batch_size}: {response.status_code}")
|
|
|
|
return results
|
|
|
|
# Usage
|
|
client = UniProtClient(rate_limit=0.5)
|
|
accessions = ["P01308", "P04637", "P12345", "Q9Y6K9"]
|
|
proteins = client.batch_get_proteins(accessions)
|
|
```
|
|
|
|
### Example: Download with Progress Bar
|
|
```python
|
|
import requests
|
|
from tqdm import tqdm
|
|
|
|
def download_with_progress(query, output_file, format="fasta"):
|
|
"""Download results with progress bar"""
|
|
url = "https://rest.uniprot.org/uniprotkb/stream"
|
|
params = {
|
|
"query": query,
|
|
"format": format
|
|
}
|
|
|
|
response = requests.get(url, params=params, stream=True)
|
|
total_size = int(response.headers.get('content-length', 0))
|
|
|
|
with open(output_file, 'wb') as f, \
|
|
tqdm(total=total_size, unit='B', unit_scale=True) as pbar:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
f.write(chunk)
|
|
pbar.update(len(chunk))
|
|
|
|
# Usage
|
|
download_with_progress(
|
|
"organism_id:9606 AND reviewed:true",
|
|
"human_proteome.fasta"
|
|
)
|
|
```
|
|
|
|
## Resources
|
|
|
|
- API Documentation: https://www.uniprot.org/help/api
|
|
- Interactive API Explorer: https://www.uniprot.org/api-documentation
|
|
- Python client (Unipressed): https://github.com/multimeric/Unipressed
|
|
- Bioservices package: https://bioservices.readthedocs.io/
|