Initial commit
This commit is contained in:
161
skills/gget/scripts/gene_analysis.py
Executable file
161
skills/gget/scripts/gene_analysis.py
Executable file
@@ -0,0 +1,161 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Gene Analysis Script
|
||||
Quick analysis of a gene: search, info, sequences, expression, and enrichment
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import gget
|
||||
|
||||
|
||||
def analyze_gene(gene_name, species="homo_sapiens", output_prefix=None):
|
||||
"""
|
||||
Perform comprehensive analysis of a gene.
|
||||
|
||||
Args:
|
||||
gene_name: Gene symbol to analyze
|
||||
species: Species name (default: homo_sapiens)
|
||||
output_prefix: Prefix for output files (default: gene_name)
|
||||
"""
|
||||
if output_prefix is None:
|
||||
output_prefix = gene_name.lower()
|
||||
|
||||
print(f"Analyzing gene: {gene_name}")
|
||||
print("=" * 60)
|
||||
|
||||
# Step 1: Search for the gene
|
||||
print("\n1. Searching for gene...")
|
||||
search_results = gget.search([gene_name], species=species, limit=1)
|
||||
|
||||
if len(search_results) == 0:
|
||||
print(f"Error: Gene '{gene_name}' not found in {species}")
|
||||
return False
|
||||
|
||||
gene_id = search_results["ensembl_id"].iloc[0]
|
||||
print(f" Found: {gene_id}")
|
||||
print(f" Description: {search_results['ensembl_description'].iloc[0]}")
|
||||
|
||||
# Step 2: Get detailed information
|
||||
print("\n2. Getting detailed information...")
|
||||
gene_info = gget.info([gene_id], pdb=True)
|
||||
gene_info.to_csv(f"{output_prefix}_info.csv", index=False)
|
||||
print(f" Saved to: {output_prefix}_info.csv")
|
||||
|
||||
if "uniprot_id" in gene_info.columns and gene_info["uniprot_id"].iloc[0]:
|
||||
print(f" UniProt ID: {gene_info['uniprot_id'].iloc[0]}")
|
||||
if "pdb_id" in gene_info.columns and gene_info["pdb_id"].iloc[0]:
|
||||
print(f" PDB IDs: {gene_info['pdb_id'].iloc[0]}")
|
||||
|
||||
# Step 3: Get sequences
|
||||
print("\n3. Retrieving sequences...")
|
||||
nucleotide_seq = gget.seq([gene_id])
|
||||
protein_seq = gget.seq([gene_id], translate=True)
|
||||
|
||||
with open(f"{output_prefix}_nucleotide.fasta", "w") as f:
|
||||
f.write(nucleotide_seq)
|
||||
print(f" Nucleotide sequence saved to: {output_prefix}_nucleotide.fasta")
|
||||
|
||||
with open(f"{output_prefix}_protein.fasta", "w") as f:
|
||||
f.write(protein_seq)
|
||||
print(f" Protein sequence saved to: {output_prefix}_protein.fasta")
|
||||
|
||||
# Step 4: Get tissue expression
|
||||
print("\n4. Getting tissue expression...")
|
||||
try:
|
||||
tissue_expr = gget.archs4(gene_name, which="tissue")
|
||||
tissue_expr.to_csv(f"{output_prefix}_tissue_expression.csv", index=False)
|
||||
print(f" Saved to: {output_prefix}_tissue_expression.csv")
|
||||
|
||||
# Show top tissues
|
||||
top_tissues = tissue_expr.nlargest(5, "median")
|
||||
print("\n Top expressing tissues:")
|
||||
for _, row in top_tissues.iterrows():
|
||||
print(f" {row['tissue']}: median = {row['median']:.2f}")
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not retrieve ARCHS4 data: {e}")
|
||||
|
||||
# Step 5: Find correlated genes
|
||||
print("\n5. Finding correlated genes...")
|
||||
try:
|
||||
correlated = gget.archs4(gene_name, which="correlation")
|
||||
correlated.to_csv(f"{output_prefix}_correlated_genes.csv", index=False)
|
||||
print(f" Saved to: {output_prefix}_correlated_genes.csv")
|
||||
|
||||
# Show top correlated
|
||||
print("\n Top 10 correlated genes:")
|
||||
for _, row in correlated.head(10).iterrows():
|
||||
print(f" {row['gene_symbol']}: r = {row['correlation']:.3f}")
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not retrieve correlation data: {e}")
|
||||
|
||||
# Step 6: Get disease associations
|
||||
print("\n6. Getting disease associations...")
|
||||
try:
|
||||
diseases = gget.opentargets(gene_id, resource="diseases", limit=10)
|
||||
diseases.to_csv(f"{output_prefix}_diseases.csv", index=False)
|
||||
print(f" Saved to: {output_prefix}_diseases.csv")
|
||||
|
||||
print("\n Top 5 disease associations:")
|
||||
for _, row in diseases.head(5).iterrows():
|
||||
print(f" {row['disease_name']}: score = {row['overall_score']:.3f}")
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not retrieve disease data: {e}")
|
||||
|
||||
# Step 7: Get drug associations
|
||||
print("\n7. Getting drug associations...")
|
||||
try:
|
||||
drugs = gget.opentargets(gene_id, resource="drugs", limit=10)
|
||||
if len(drugs) > 0:
|
||||
drugs.to_csv(f"{output_prefix}_drugs.csv", index=False)
|
||||
print(f" Saved to: {output_prefix}_drugs.csv")
|
||||
print(f"\n Found {len(drugs)} drug associations")
|
||||
else:
|
||||
print(" No drug associations found")
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not retrieve drug data: {e}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Analysis complete!")
|
||||
print(f"\nOutput files (prefix: {output_prefix}):")
|
||||
print(f" - {output_prefix}_info.csv")
|
||||
print(f" - {output_prefix}_nucleotide.fasta")
|
||||
print(f" - {output_prefix}_protein.fasta")
|
||||
print(f" - {output_prefix}_tissue_expression.csv")
|
||||
print(f" - {output_prefix}_correlated_genes.csv")
|
||||
print(f" - {output_prefix}_diseases.csv")
|
||||
print(f" - {output_prefix}_drugs.csv (if available)")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Perform comprehensive analysis of a gene using gget"
|
||||
)
|
||||
parser.add_argument("gene", help="Gene symbol to analyze")
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--species",
|
||||
default="homo_sapiens",
|
||||
help="Species (default: homo_sapiens)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output", help="Output prefix for files (default: gene name)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
success = analyze_gene(args.gene, args.species, args.output)
|
||||
sys.exit(0 if success else 1)
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nAnalysis interrupted by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n\nError: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user