Initial commit
This commit is contained in:
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# run_compleasm_first.sh
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
# User-specified total CPU threads
|
||||
TOTAL_THREADS=TOTAL_THREADS # Replace with total cores you want to use (e.g., 16, 32, 64)
|
||||
echo "Processing first genome with ${TOTAL_THREADS} CPU threads to download lineage database..."
|
||||
|
||||
# Create output directory
|
||||
mkdir -p 01_busco_results
|
||||
|
||||
# Process FIRST genome only
|
||||
first_genome=$(head -n 1 genome_list.txt)
|
||||
genome_name=$(basename ${first_genome} .fasta)
|
||||
echo "Processing: ${genome_name}"
|
||||
|
||||
compleasm run \
|
||||
-a ${first_genome} \
|
||||
-o 01_busco_results/${genome_name}_compleasm \
|
||||
-l LINEAGE \
|
||||
-t ${TOTAL_THREADS}
|
||||
|
||||
echo ""
|
||||
echo "First genome complete! Lineage database is now cached."
|
||||
echo "Now run the parallel script for remaining genomes: bash run_compleasm_parallel.sh"
|
||||
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
# run_compleasm_parallel.sh
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
# Threading configuration (adjust based on your system)
|
||||
TOTAL_THREADS=TOTAL_THREADS # Total cores to use (e.g., 64)
|
||||
THREADS_PER_JOB=THREADS_PER_JOB # Threads per genome (e.g., 16)
|
||||
CONCURRENT_JOBS=$((TOTAL_THREADS / THREADS_PER_JOB)) # Calculated automatically
|
||||
|
||||
echo "Configuration:"
|
||||
echo " Total threads: ${TOTAL_THREADS}"
|
||||
echo " Threads per genome: ${THREADS_PER_JOB}"
|
||||
echo " Concurrent genomes: ${CONCURRENT_JOBS}"
|
||||
echo ""
|
||||
|
||||
# Create output directory
|
||||
mkdir -p 01_busco_results
|
||||
|
||||
# Process remaining genomes (skip first one) in parallel
|
||||
tail -n +2 genome_list.txt | parallel -j ${CONCURRENT_JOBS} '
|
||||
genome_name=$(basename {} .fasta)
|
||||
echo "Processing ${genome_name} with THREADS_PER_JOB threads..."
|
||||
|
||||
compleasm run \
|
||||
-a {} \
|
||||
-o 01_busco_results/${genome_name}_compleasm \
|
||||
-l LINEAGE \
|
||||
-t THREADS_PER_JOB
|
||||
'
|
||||
|
||||
echo ""
|
||||
echo "All genomes processed!"
|
||||
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
cd 06_concatenation
|
||||
|
||||
iqtree \
|
||||
-s FcC_supermatrix.fas \
|
||||
-spp partition_def.txt \
|
||||
-nt 18 \
|
||||
-safe \
|
||||
-pre partition_search \
|
||||
-m TESTMERGEONLY \
|
||||
-mset MODEL_SET \
|
||||
-msub nuclear \
|
||||
-rcluster 10 \
|
||||
-bb 1000 \
|
||||
-alrt 1000
|
||||
|
||||
echo "Partition search complete! Best scheme: partition_search.best_scheme.nex"
|
||||
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
cd trimmed_aa
|
||||
|
||||
# Create list of alignments
|
||||
ls *_trimmed.fas > locus_alignments.txt
|
||||
|
||||
# Run IQ-TREE in parallel (adjust -j for number of concurrent jobs)
|
||||
cat locus_alignments.txt | parallel -j 4 '
|
||||
prefix=$(basename {} _trimmed.fas)
|
||||
iqtree -s {} -m MFP -bb 1000 -bnni -czb -pre ${prefix} -nt 1
|
||||
echo "Tree complete: ${prefix}"
|
||||
'
|
||||
|
||||
echo "All gene trees complete!"
|
||||
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
cd trimmed_aa
|
||||
|
||||
for locus in *_trimmed.fas; do
|
||||
prefix=$(basename ${locus} _trimmed.fas)
|
||||
echo "Processing ${prefix}..."
|
||||
iqtree -s ${locus} -m MFP -bb 1000 -bnni -czb -pre ${prefix} -nt 1
|
||||
done
|
||||
|
||||
echo "All gene trees complete!"
|
||||
Reference in New Issue
Block a user