# gget Workflow Examples Extended workflow examples demonstrating how to combine multiple gget modules for common bioinformatics tasks. ## Table of Contents 1. [Complete Gene Analysis Pipeline](#complete-gene-analysis-pipeline) 2. [Comparative Structural Biology](#comparative-structural-biology) 3. [Cancer Genomics Analysis](#cancer-genomics-analysis) 4. [Single-Cell Expression Analysis](#single-cell-expression-analysis) 5. [Building Reference Transcriptomes](#building-reference-transcriptomes) 6. [Mutation Impact Assessment](#mutation-impact-assessment) 7. [Drug Target Discovery](#drug-target-discovery) --- ## Complete Gene Analysis Pipeline Comprehensive analysis of a gene from discovery to functional annotation. ```python import gget import pandas as pd # Step 1: Search for genes of interest print("Step 1: Searching for GABA receptor genes...") search_results = gget.search(["GABA", "receptor", "alpha"], species="homo_sapiens", andor="and") print(f"Found {len(search_results)} genes") # Step 2: Get detailed information print("\nStep 2: Getting detailed information...") gene_ids = search_results["ensembl_id"].tolist()[:5] # Top 5 genes gene_info = gget.info(gene_ids, pdb=True) print(gene_info[["ensembl_id", "gene_name", "uniprot_id", "description"]]) # Step 3: Retrieve sequences print("\nStep 3: Retrieving sequences...") nucleotide_seqs = gget.seq(gene_ids) protein_seqs = gget.seq(gene_ids, translate=True) # Save sequences with open("gaba_receptors_nt.fasta", "w") as f: f.write(nucleotide_seqs) with open("gaba_receptors_aa.fasta", "w") as f: f.write(protein_seqs) # Step 4: Get expression data print("\nStep 4: Getting tissue expression...") for gene_id, gene_name in zip(gene_ids, gene_info["gene_name"]): expr_data = gget.archs4(gene_name, which="tissue") print(f"\n{gene_name} expression:") print(expr_data.head()) # Step 5: Find correlated genes print("\nStep 5: Finding correlated genes...") correlated = gget.archs4(gene_info["gene_name"].iloc[0], which="correlation") correlated_top = correlated.head(20) print(correlated_top) # Step 6: Enrichment analysis on correlated genes print("\nStep 6: Performing enrichment analysis...") gene_list = correlated_top["gene_symbol"].tolist() enrichment = gget.enrichr(gene_list, database="ontology", plot=True) print(enrichment.head(10)) # Step 7: Get disease associations print("\nStep 7: Getting disease associations...") for gene_id, gene_name in zip(gene_ids[:3], gene_info["gene_name"][:3]): diseases = gget.opentargets(gene_id, resource="diseases", limit=5) print(f"\n{gene_name} disease associations:") print(diseases) # Step 8: Check for orthologs print("\nStep 8: Finding orthologs...") orthologs = gget.bgee(gene_ids[0], type="orthologs") print(orthologs) print("\nComplete gene analysis pipeline finished!") ``` --- ## Comparative Structural Biology Compare protein structures across species and analyze functional motifs. ```python import gget # Define genes for comparison human_gene = "ENSG00000169174" # PCSK9 mouse_gene = "ENSMUSG00000044254" # Pcsk9 print("Comparative Structural Biology Workflow") print("=" * 50) # Step 1: Get gene information print("\n1. Getting gene information...") human_info = gget.info([human_gene]) mouse_info = gget.info([mouse_gene]) print(f"Human: {human_info['gene_name'].iloc[0]}") print(f"Mouse: {mouse_info['gene_name'].iloc[0]}") # Step 2: Retrieve protein sequences print("\n2. Retrieving protein sequences...") human_seq = gget.seq(human_gene, translate=True) mouse_seq = gget.seq(mouse_gene, translate=True) # Save to file for alignment with open("pcsk9_sequences.fasta", "w") as f: f.write(human_seq) f.write("\n") f.write(mouse_seq) # Step 3: Align sequences print("\n3. Aligning sequences...") alignment = gget.muscle("pcsk9_sequences.fasta") print("Alignment completed. Visualizing in ClustalW format:") print(alignment) # Step 4: Get existing structures from PDB print("\n4. Searching PDB for existing structures...") # Search by sequence using BLAST pdb_results = gget.blast(human_seq, database="pdbaa", limit=5) print("Top PDB matches:") print(pdb_results[["Description", "Max Score", "Query Coverage"]]) # Download top structure if len(pdb_results) > 0: # Extract PDB ID from description (usually format: "PDB|XXXX|...") pdb_id = pdb_results.iloc[0]["Description"].split("|")[1] print(f"\nDownloading PDB structure: {pdb_id}") gget.pdb(pdb_id, save=True) # Step 5: Predict AlphaFold structures print("\n5. Predicting structures with AlphaFold...") # Note: This requires gget setup alphafold and is computationally intensive # Uncomment to run: # human_structure = gget.alphafold(human_seq, plot=True) # mouse_structure = gget.alphafold(mouse_seq, plot=True) print("(AlphaFold prediction skipped - uncomment to run)") # Step 6: Identify functional motifs print("\n6. Identifying functional motifs with ELM...") # Note: Requires gget setup elm # Uncomment to run: # human_ortholog_df, human_regex_df = gget.elm(human_seq) # print("Human PCSK9 functional motifs:") # print(human_regex_df) print("(ELM analysis skipped - uncomment to run)") # Step 7: Get orthology information print("\n7. Getting orthology information from Bgee...") orthologs = gget.bgee(human_gene, type="orthologs") print("PCSK9 orthologs:") print(orthologs) print("\nComparative structural biology workflow completed!") ``` --- ## Cancer Genomics Analysis Analyze cancer-associated genes and their mutations. ```python import gget import matplotlib.pyplot as plt print("Cancer Genomics Analysis Workflow") print("=" * 50) # Step 1: Search for cancer-related genes print("\n1. Searching for breast cancer genes...") genes = gget.search(["breast", "cancer", "BRCA"], species="homo_sapiens", andor="or", limit=20) print(f"Found {len(genes)} genes") # Focus on specific genes target_genes = ["BRCA1", "BRCA2", "TP53", "PIK3CA", "ESR1"] print(f"\nAnalyzing: {', '.join(target_genes)}") # Step 2: Get gene information print("\n2. Getting gene information...") gene_search = [] for gene in target_genes: result = gget.search([gene], species="homo_sapiens", limit=1) if len(result) > 0: gene_search.append(result.iloc[0]) gene_df = pd.DataFrame(gene_search) gene_ids = gene_df["ensembl_id"].tolist() # Step 3: Get disease associations print("\n3. Getting disease associations from OpenTargets...") for gene_id, gene_name in zip(gene_ids, target_genes): print(f"\n{gene_name} disease associations:") diseases = gget.opentargets(gene_id, resource="diseases", limit=3) print(diseases[["disease_name", "overall_score"]]) # Step 4: Get drug associations print("\n4. Getting drug associations...") for gene_id, gene_name in zip(gene_ids[:3], target_genes[:3]): print(f"\n{gene_name} drug associations:") drugs = gget.opentargets(gene_id, resource="drugs", limit=3) if len(drugs) > 0: print(drugs[["drug_name", "drug_type", "max_phase_for_all_diseases"]]) # Step 5: Search cBioPortal for studies print("\n5. Searching cBioPortal for breast cancer studies...") studies = gget.cbio_search(["breast", "cancer"]) print(f"Found {len(studies)} studies") print(studies[:5]) # Step 6: Create cancer genomics heatmap print("\n6. Creating cancer genomics heatmap...") if len(studies) > 0: # Select relevant studies selected_studies = studies[:2] # Top 2 studies gget.cbio_plot( selected_studies, target_genes, stratification="cancer_type", variation_type="mutation_occurrences", show=False ) print("Heatmap saved to ./gget_cbio_figures/") # Step 7: Query COSMIC database (requires setup) print("\n7. Querying COSMIC database...") # Note: Requires COSMIC account and database download # Uncomment to run: # for gene in target_genes[:2]: # cosmic_results = gget.cosmic( # gene, # cosmic_tsv_path="cosmic_cancer.tsv", # limit=10 # ) # print(f"\n{gene} mutations in COSMIC:") # print(cosmic_results) print("(COSMIC query skipped - requires database download)") # Step 8: Enrichment analysis print("\n8. Performing pathway enrichment...") enrichment = gget.enrichr(target_genes, database="pathway", plot=True) print("\nTop enriched pathways:") print(enrichment.head(10)) print("\nCancer genomics analysis completed!") ``` --- ## Single-Cell Expression Analysis Analyze single-cell RNA-seq data for specific cell types and tissues. ```python import gget import scanpy as sc print("Single-Cell Expression Analysis Workflow") print("=" * 50) # Note: Requires gget setup cellxgene # Step 1: Define genes and cell types of interest genes_of_interest = ["ACE2", "TMPRSS2", "CD4", "CD8A"] tissue = "lung" cell_types = ["type ii pneumocyte", "macrophage", "t cell"] print(f"\nAnalyzing genes: {', '.join(genes_of_interest)}") print(f"Tissue: {tissue}") print(f"Cell types: {', '.join(cell_types)}") # Step 2: Get metadata first print("\n1. Retrieving metadata...") metadata = gget.cellxgene( gene=genes_of_interest, tissue=tissue, species="homo_sapiens", meta_only=True ) print(f"Found {len(metadata)} datasets") print(metadata.head()) # Step 3: Download count matrices print("\n2. Downloading single-cell data...") # Note: This can be a large download adata = gget.cellxgene( gene=genes_of_interest, tissue=tissue, species="homo_sapiens", census_version="stable" ) print(f"AnnData shape: {adata.shape}") print(f"Genes: {adata.n_vars}") print(f"Cells: {adata.n_obs}") # Step 4: Basic QC and filtering with scanpy print("\n3. Performing quality control...") sc.pp.filter_cells(adata, min_genes=200) sc.pp.filter_genes(adata, min_cells=3) print(f"After QC - Cells: {adata.n_obs}, Genes: {adata.n_vars}") # Step 5: Normalize and log-transform print("\n4. Normalizing data...") sc.pp.normalize_total(adata, target_sum=1e4) sc.pp.log1p(adata) # Step 6: Calculate gene expression statistics print("\n5. Calculating expression statistics...") for gene in genes_of_interest: if gene in adata.var_names: expr = adata[:, gene].X.toarray().flatten() print(f"\n{gene} expression:") print(f" Mean: {expr.mean():.3f}") print(f" Median: {np.median(expr):.3f}") print(f" % expressing: {(expr > 0).sum() / len(expr) * 100:.1f}%") # Step 7: Get tissue expression from ARCHS4 for comparison print("\n6. Getting bulk tissue expression from ARCHS4...") for gene in genes_of_interest: tissue_expr = gget.archs4(gene, which="tissue") lung_expr = tissue_expr[tissue_expr["tissue"] == "lung"] if len(lung_expr) > 0: print(f"\n{gene} in lung (ARCHS4):") print(f" Median: {lung_expr['median'].iloc[0]:.3f}") # Step 8: Enrichment analysis print("\n7. Performing enrichment analysis...") enrichment = gget.enrichr(genes_of_interest, database="celltypes", plot=True) print("\nTop cell type associations:") print(enrichment.head(10)) # Step 9: Get disease associations print("\n8. Getting disease associations...") for gene in genes_of_interest: gene_search = gget.search([gene], species="homo_sapiens", limit=1) if len(gene_search) > 0: gene_id = gene_search["ensembl_id"].iloc[0] diseases = gget.opentargets(gene_id, resource="diseases", limit=3) print(f"\n{gene} disease associations:") print(diseases[["disease_name", "overall_score"]]) print("\nSingle-cell expression analysis completed!") ``` --- ## Building Reference Transcriptomes Prepare reference data for RNA-seq analysis pipelines. ```bash #!/bin/bash # Reference transcriptome building workflow echo "Reference Transcriptome Building Workflow" echo "==========================================" # Step 1: List available species echo -e "\n1. Listing available species..." gget ref --list_species > available_species.txt echo "Available species saved to available_species.txt" # Step 2: Download reference files for human echo -e "\n2. Downloading human reference files..." SPECIES="homo_sapiens" RELEASE=110 # Specify release for reproducibility # Download GTF annotation echo "Downloading GTF annotation..." gget ref -w gtf -r $RELEASE -d $SPECIES -o human_ref_gtf.json # Download cDNA sequences echo "Downloading cDNA sequences..." gget ref -w cdna -r $RELEASE -d $SPECIES -o human_ref_cdna.json # Download protein sequences echo "Downloading protein sequences..." gget ref -w pep -r $RELEASE -d $SPECIES -o human_ref_pep.json # Step 3: Build kallisto index (if kallisto is installed) echo -e "\n3. Building kallisto index..." if command -v kallisto &> /dev/null; then # Get cDNA FASTA file from download CDNA_FILE=$(ls *.cdna.all.fa.gz) if [ -f "$CDNA_FILE" ]; then kallisto index -i transcriptome.idx $CDNA_FILE echo "Kallisto index created: transcriptome.idx" else echo "cDNA FASTA file not found" fi else echo "kallisto not installed, skipping index building" fi # Step 4: Download genome for alignment-based methods echo -e "\n4. Downloading genome sequence..." gget ref -w dna -r $RELEASE -d $SPECIES -o human_ref_dna.json # Step 5: Get gene information for genes of interest echo -e "\n5. Getting information for specific genes..." gget search -s $SPECIES "TP53 BRCA1 BRCA2" -o key_genes.csv echo -e "\nReference transcriptome building completed!" ``` ```python # Python version import gget import json print("Reference Transcriptome Building Workflow") print("=" * 50) # Configuration species = "homo_sapiens" release = 110 genes_of_interest = ["TP53", "BRCA1", "BRCA2", "MYC", "EGFR"] # Step 1: Get reference information print("\n1. Getting reference information...") ref_info = gget.ref(species, release=release) # Save reference information with open("reference_info.json", "w") as f: json.dump(ref_info, f, indent=2) print("Reference information saved to reference_info.json") # Step 2: Download specific files print("\n2. Downloading reference files...") # GTF annotation gget.ref(species, which="gtf", release=release, download=True) # cDNA sequences gget.ref(species, which="cdna", release=release, download=True) # Step 3: Get information for genes of interest print(f"\n3. Getting information for {len(genes_of_interest)} genes...") gene_data = [] for gene in genes_of_interest: result = gget.search([gene], species=species, limit=1) if len(result) > 0: gene_data.append(result.iloc[0]) # Get detailed info if gene_data: gene_ids = [g["ensembl_id"] for g in gene_data] detailed_info = gget.info(gene_ids) detailed_info.to_csv("genes_of_interest_info.csv", index=False) print("Gene information saved to genes_of_interest_info.csv") # Step 4: Get sequences print("\n4. Retrieving sequences...") sequences_nt = gget.seq(gene_ids) sequences_aa = gget.seq(gene_ids, translate=True) with open("key_genes_nucleotide.fasta", "w") as f: f.write(sequences_nt) with open("key_genes_protein.fasta", "w") as f: f.write(sequences_aa) print("\nReference transcriptome building completed!") print(f"Files created:") print(" - reference_info.json") print(" - genes_of_interest_info.csv") print(" - key_genes_nucleotide.fasta") print(" - key_genes_protein.fasta") ``` --- ## Mutation Impact Assessment Analyze the impact of genetic mutations on protein structure and function. ```python import gget import pandas as pd print("Mutation Impact Assessment Workflow") print("=" * 50) # Define mutations to analyze mutations = [ {"gene": "TP53", "mutation": "c.818G>A", "description": "R273H hotspot"}, {"gene": "EGFR", "mutation": "c.2573T>G", "description": "L858R activating"}, ] # Step 1: Get gene information print("\n1. Getting gene information...") for mut in mutations: results = gget.search([mut["gene"]], species="homo_sapiens", limit=1) if len(results) > 0: mut["ensembl_id"] = results["ensembl_id"].iloc[0] print(f"{mut['gene']}: {mut['ensembl_id']}") # Step 2: Get sequences print("\n2. Retrieving wild-type sequences...") for mut in mutations: # Get nucleotide sequence nt_seq = gget.seq(mut["ensembl_id"]) mut["wt_sequence"] = nt_seq # Get protein sequence aa_seq = gget.seq(mut["ensembl_id"], translate=True) mut["wt_protein"] = aa_seq # Step 3: Generate mutated sequences print("\n3. Generating mutated sequences...") # Create mutation dataframe for gget mutate mut_df = pd.DataFrame({ "seq_ID": [m["gene"] for m in mutations], "mutation": [m["mutation"] for m in mutations] }) # For each mutation for mut in mutations: # Extract sequence from FASTA lines = mut["wt_sequence"].split("\n") seq = "".join(lines[1:]) # Create single mutation df single_mut = pd.DataFrame({ "seq_ID": [mut["gene"]], "mutation": [mut["mutation"]] }) # Generate mutated sequence mutated = gget.mutate([seq], mutations=single_mut) mut["mutated_sequence"] = mutated print("Mutated sequences generated") # Step 4: Get existing structure information print("\n4. Getting structure information...") for mut in mutations: # Get info with PDB IDs info = gget.info([mut["ensembl_id"]], pdb=True) if "pdb_id" in info.columns and pd.notna(info["pdb_id"].iloc[0]): pdb_ids = info["pdb_id"].iloc[0].split(";") print(f"\n{mut['gene']} PDB structures: {', '.join(pdb_ids[:3])}") # Download first structure if len(pdb_ids) > 0: pdb_id = pdb_ids[0].strip() mut["pdb_id"] = pdb_id gget.pdb(pdb_id, save=True) else: print(f"\n{mut['gene']}: No PDB structure available") mut["pdb_id"] = None # Step 5: Predict structures with AlphaFold (optional) print("\n5. Predicting structures with AlphaFold...") # Note: Requires gget setup alphafold and is computationally intensive # Uncomment to run: # for mut in mutations: # print(f"Predicting {mut['gene']} wild-type structure...") # wt_structure = gget.alphafold(mut["wt_protein"]) # # print(f"Predicting {mut['gene']} mutant structure...") # # Would need to translate mutated sequence first # # mutant_structure = gget.alphafold(mutated_protein) print("(AlphaFold prediction skipped - uncomment to run)") # Step 6: Find functional motifs print("\n6. Identifying functional motifs...") # Note: Requires gget setup elm # Uncomment to run: # for mut in mutations: # ortholog_df, regex_df = gget.elm(mut["wt_protein"]) # print(f"\n{mut['gene']} functional motifs:") # print(regex_df) print("(ELM analysis skipped - uncomment to run)") # Step 7: Get disease associations print("\n7. Getting disease associations...") for mut in mutations: diseases = gget.opentargets( mut["ensembl_id"], resource="diseases", limit=5 ) print(f"\n{mut['gene']} ({mut['description']}) disease associations:") print(diseases[["disease_name", "overall_score"]]) # Step 8: Query COSMIC for mutation frequency print("\n8. Querying COSMIC database...") # Note: Requires COSMIC database download # Uncomment to run: # for mut in mutations: # cosmic_results = gget.cosmic( # mut["mutation"], # cosmic_tsv_path="cosmic_cancer.tsv", # limit=10 # ) # print(f"\n{mut['gene']} {mut['mutation']} in COSMIC:") # print(cosmic_results) print("(COSMIC query skipped - requires database download)") print("\nMutation impact assessment completed!") ``` --- ## Drug Target Discovery Identify and validate potential drug targets for specific diseases. ```python import gget import pandas as pd print("Drug Target Discovery Workflow") print("=" * 50) # Step 1: Search for disease-related genes disease = "alzheimer" print(f"\n1. Searching for {disease} disease genes...") genes = gget.search([disease], species="homo_sapiens", limit=50) print(f"Found {len(genes)} potential genes") # Step 2: Get detailed information print("\n2. Getting detailed gene information...") gene_ids = genes["ensembl_id"].tolist()[:20] # Top 20 gene_info = gget.info(gene_ids[:10]) # Limit to avoid timeout # Step 3: Get disease associations from OpenTargets print("\n3. Getting disease associations...") disease_scores = [] for gene_id, gene_name in zip(gene_info["ensembl_id"], gene_info["gene_name"]): diseases = gget.opentargets(gene_id, resource="diseases", limit=10) # Filter for Alzheimer's disease alzheimer = diseases[diseases["disease_name"].str.contains("Alzheimer", case=False, na=False)] if len(alzheimer) > 0: disease_scores.append({ "ensembl_id": gene_id, "gene_name": gene_name, "disease_score": alzheimer["overall_score"].max() }) disease_df = pd.DataFrame(disease_scores).sort_values("disease_score", ascending=False) print("\nTop disease-associated genes:") print(disease_df.head(10)) # Step 4: Get tractability information print("\n4. Assessing target tractability...") top_targets = disease_df.head(5) for _, row in top_targets.iterrows(): tractability = gget.opentargets( row["ensembl_id"], resource="tractability" ) print(f"\n{row['gene_name']} tractability:") print(tractability) # Step 5: Get expression data print("\n5. Getting tissue expression data...") for _, row in top_targets.iterrows(): # Brain expression from OpenTargets expression = gget.opentargets( row["ensembl_id"], resource="expression", filter_tissue="brain" ) print(f"\n{row['gene_name']} brain expression:") print(expression) # Tissue expression from ARCHS4 tissue_expr = gget.archs4(row["gene_name"], which="tissue") brain_expr = tissue_expr[tissue_expr["tissue"].str.contains("brain", case=False, na=False)] print(f"ARCHS4 brain expression:") print(brain_expr) # Step 6: Check for existing drugs print("\n6. Checking for existing drugs...") for _, row in top_targets.iterrows(): drugs = gget.opentargets(row["ensembl_id"], resource="drugs", limit=5) print(f"\n{row['gene_name']} drug associations:") if len(drugs) > 0: print(drugs[["drug_name", "drug_type", "max_phase_for_all_diseases"]]) else: print("No drugs found") # Step 7: Get protein-protein interactions print("\n7. Getting protein-protein interactions...") for _, row in top_targets.iterrows(): interactions = gget.opentargets( row["ensembl_id"], resource="interactions", limit=10 ) print(f"\n{row['gene_name']} interacts with:") if len(interactions) > 0: print(interactions[["gene_b_symbol", "interaction_score"]]) # Step 8: Enrichment analysis print("\n8. Performing pathway enrichment...") gene_list = top_targets["gene_name"].tolist() enrichment = gget.enrichr(gene_list, database="pathway", plot=True) print("\nTop enriched pathways:") print(enrichment.head(10)) # Step 9: Get structure information print("\n9. Getting structure information...") for _, row in top_targets.iterrows(): info = gget.info([row["ensembl_id"]], pdb=True) if "pdb_id" in info.columns and pd.notna(info["pdb_id"].iloc[0]): pdb_ids = info["pdb_id"].iloc[0].split(";") print(f"\n{row['gene_name']} PDB structures: {', '.join(pdb_ids[:3])}") else: print(f"\n{row['gene_name']}: No PDB structure available") # Could predict with AlphaFold print(f" Consider AlphaFold prediction") # Step 10: Generate target summary report print("\n10. Generating target summary report...") report = [] for _, row in top_targets.iterrows(): report.append({ "Gene": row["gene_name"], "Ensembl ID": row["ensembl_id"], "Disease Score": row["disease_score"], "Target Status": "High Priority" }) report_df = pd.DataFrame(report) report_df.to_csv("drug_targets_report.csv", index=False) print("\nTarget report saved to drug_targets_report.csv") print("\nDrug target discovery workflow completed!") ``` --- ## Tips for Workflow Development ### Error Handling ```python import gget def safe_gget_call(func, *args, **kwargs): """Wrapper for gget calls with error handling""" try: result = func(*args, **kwargs) return result except Exception as e: print(f"Error in {func.__name__}: {str(e)}") return None # Usage result = safe_gget_call(gget.search, ["ACE2"], species="homo_sapiens") if result is not None: print(result) ``` ### Rate Limiting ```python import time import gget def rate_limited_queries(gene_ids, delay=1): """Query multiple genes with rate limiting""" results = [] for i, gene_id in enumerate(gene_ids): print(f"Querying {i+1}/{len(gene_ids)}: {gene_id}") result = gget.info([gene_id]) results.append(result) if i < len(gene_ids) - 1: # Don't sleep after last query time.sleep(delay) return pd.concat(results, ignore_index=True) ``` ### Caching Results ```python import os import pickle import gget def cached_gget(cache_file, func, *args, **kwargs): """Cache gget results to avoid repeated queries""" if os.path.exists(cache_file): print(f"Loading from cache: {cache_file}") with open(cache_file, "rb") as f: return pickle.load(f) result = func(*args, **kwargs) with open(cache_file, "wb") as f: pickle.dump(result, f) print(f"Saved to cache: {cache_file}") return result # Usage result = cached_gget("ace2_info.pkl", gget.info, ["ENSG00000130234"]) ``` --- These workflows demonstrate how to combine multiple gget modules for comprehensive bioinformatics analyses. Adapt them to your specific research questions and data types.