Initial commit

2025-11-29 18:02:37 +08:00
commit c1d9dee646
38 changed files with 11210 additions and 0 deletions
--- a/skills/phylo_from_buscos/templates/local/02_compleasm_first.sh
+++ b/skills/phylo_from_buscos/templates/local/02_compleasm_first.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# run_compleasm_first.sh
+source ~/.bashrc
+conda activate phylo
+
+# User-specified total CPU threads
+TOTAL_THREADS=TOTAL_THREADS  # Replace with total cores you want to use (e.g., 16, 32, 64)
+echo "Processing first genome with ${TOTAL_THREADS} CPU threads to download lineage database..."
+
+# Create output directory
+mkdir -p 01_busco_results
+
+# Process FIRST genome only
+first_genome=$(head -n 1 genome_list.txt)
+genome_name=$(basename ${first_genome} .fasta)
+echo "Processing: ${genome_name}"
+
+compleasm run \
+  -a ${first_genome} \
+  -o 01_busco_results/${genome_name}_compleasm \
+  -l LINEAGE \
+  -t ${TOTAL_THREADS}
+
+echo ""
+echo "First genome complete! Lineage database is now cached."
+echo "Now run the parallel script for remaining genomes: bash run_compleasm_parallel.sh"
--- a/skills/phylo_from_buscos/templates/local/02_compleasm_parallel.sh
+++ b/skills/phylo_from_buscos/templates/local/02_compleasm_parallel.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# run_compleasm_parallel.sh
+source ~/.bashrc
+conda activate phylo
+
+# Threading configuration (adjust based on your system)
+TOTAL_THREADS=TOTAL_THREADS      # Total cores to use (e.g., 64)
+THREADS_PER_JOB=THREADS_PER_JOB  # Threads per genome (e.g., 16)
+CONCURRENT_JOBS=$((TOTAL_THREADS / THREADS_PER_JOB))  # Calculated automatically
+
+echo "Configuration:"
+echo "  Total threads:      ${TOTAL_THREADS}"
+echo "  Threads per genome: ${THREADS_PER_JOB}"
+echo "  Concurrent genomes: ${CONCURRENT_JOBS}"
+echo ""
+
+# Create output directory
+mkdir -p 01_busco_results
+
+# Process remaining genomes (skip first one) in parallel
+tail -n +2 genome_list.txt | parallel -j ${CONCURRENT_JOBS} '
+  genome_name=$(basename {} .fasta)
+  echo "Processing ${genome_name} with THREADS_PER_JOB threads..."
+
+  compleasm run \
+    -a {} \
+    -o 01_busco_results/${genome_name}_compleasm \
+    -l LINEAGE \
+    -t THREADS_PER_JOB
+'
+
+echo ""
+echo "All genomes processed!"
--- a/skills/phylo_from_buscos/templates/local/08a_partition_search.sh
+++ b/skills/phylo_from_buscos/templates/local/08a_partition_search.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+source ~/.bashrc
+conda activate phylo
+
+cd 06_concatenation
+
+iqtree \
+  -s FcC_supermatrix.fas \
+  -spp partition_def.txt \
+  -nt 18 \
+  -safe \
+  -pre partition_search \
+  -m TESTMERGEONLY \
+  -mset MODEL_SET \
+  -msub nuclear \
+  -rcluster 10 \
+  -bb 1000 \
+  -alrt 1000
+
+echo "Partition search complete! Best scheme: partition_search.best_scheme.nex"
--- a/skills/phylo_from_buscos/templates/local/08c_gene_trees_parallel.sh
+++ b/skills/phylo_from_buscos/templates/local/08c_gene_trees_parallel.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+source ~/.bashrc
+conda activate phylo
+
+cd trimmed_aa
+
+# Create list of alignments
+ls *_trimmed.fas > locus_alignments.txt
+
+# Run IQ-TREE in parallel (adjust -j for number of concurrent jobs)
+cat locus_alignments.txt | parallel -j 4 '
+  prefix=$(basename {} _trimmed.fas)
+  iqtree -s {} -m MFP -bb 1000 -bnni -czb -pre ${prefix} -nt 1
+  echo "Tree complete: ${prefix}"
+'
+
+echo "All gene trees complete!"
--- a/skills/phylo_from_buscos/templates/local/08c_gene_trees_serial.sh
+++ b/skills/phylo_from_buscos/templates/local/08c_gene_trees_serial.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+source ~/.bashrc
+conda activate phylo
+
+cd trimmed_aa
+
+for locus in *_trimmed.fas; do
+    prefix=$(basename ${locus} _trimmed.fas)
+    echo "Processing ${prefix}..."
+    iqtree -s ${locus} -m MFP -bb 1000 -bnni -czb -pre ${prefix} -nt 1
+done
+
+echo "All gene trees complete!"