Initial commit
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=compleasm_first
|
||||
#SBATCH --cpus-per-task=TOTAL_THREADS # Replace with total available CPUs (e.g., 64)
|
||||
#SBATCH --mem-per-cpu=6G
|
||||
#SBATCH --time=24:00:00
|
||||
#SBATCH --output=logs/compleasm_first.%j.out
|
||||
#SBATCH --error=logs/compleasm_first.%j.err
|
||||
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
mkdir -p logs
|
||||
mkdir -p 01_busco_results
|
||||
|
||||
# Process FIRST genome only (downloads lineage database)
|
||||
first_genome=$(head -n 1 genome_list.txt)
|
||||
genome_name=$(basename ${first_genome} .fasta)
|
||||
echo "Processing first genome: ${genome_name} with ${SLURM_CPUS_PER_TASK} threads..."
|
||||
echo "This will download the BUSCO lineage database for subsequent runs."
|
||||
|
||||
compleasm run \
|
||||
-a ${first_genome} \
|
||||
-o 01_busco_results/${genome_name}_compleasm \
|
||||
-l LINEAGE \
|
||||
-t ${SLURM_CPUS_PER_TASK}
|
||||
|
||||
echo "First genome complete! Lineage database is now cached."
|
||||
echo "Submit the parallel job for remaining genomes: sbatch run_compleasm_parallel.job"
|
||||
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=compleasm_parallel
|
||||
#SBATCH --array=2-NUM_GENOMES # Start from genome 2 (first genome already processed)
|
||||
#SBATCH --cpus-per-task=THREADS_PER_JOB # e.g., 16 for 64-core system with 4 concurrent jobs
|
||||
#SBATCH --mem-per-cpu=6G
|
||||
#SBATCH --time=48:00:00
|
||||
#SBATCH --output=logs/compleasm.%A_%a.out
|
||||
#SBATCH --error=logs/compleasm.%A_%a.err
|
||||
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
mkdir -p 01_busco_results
|
||||
|
||||
# Get genome for this array task (skipping the first one)
|
||||
genome=$(sed -n "${SLURM_ARRAY_TASK_ID}p" genome_list.txt)
|
||||
genome_name=$(basename ${genome} .fasta)
|
||||
|
||||
echo "Processing ${genome_name} with ${SLURM_CPUS_PER_TASK} threads..."
|
||||
|
||||
compleasm run \
|
||||
-a ${genome} \
|
||||
-o 01_busco_results/${genome_name}_compleasm \
|
||||
-l LINEAGE \
|
||||
-t ${SLURM_CPUS_PER_TASK}
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=iqtree_partition
|
||||
#SBATCH --cpus-per-task=18
|
||||
#SBATCH --mem-per-cpu=4G
|
||||
#SBATCH --time=72:00:00
|
||||
#SBATCH --output=logs/partition_search.out
|
||||
#SBATCH --error=logs/partition_search.err
|
||||
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
cd 06_concatenation # Use organized directory structure
|
||||
|
||||
iqtree \
|
||||
-s FcC_supermatrix.fas \
|
||||
-spp partition_def.txt \
|
||||
-nt ${SLURM_CPUS_PER_TASK} \
|
||||
-safe \
|
||||
-pre partition_search \
|
||||
-m TESTMERGEONLY \
|
||||
-mset MODEL_SET \
|
||||
-msub nuclear \
|
||||
-rcluster 10 \
|
||||
-bb 1000 \
|
||||
-alrt 1000
|
||||
|
||||
# Output: partition_search.best_scheme.nex
|
||||
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --job-name=iqtree_genes
|
||||
#SBATCH --array=1-NUM_LOCI
|
||||
#SBATCH --cpus-per-task=1
|
||||
#SBATCH --mem-per-cpu=4G
|
||||
#SBATCH --time=2:00:00
|
||||
#SBATCH --output=logs/%A_%a.genetree.out
|
||||
|
||||
source ~/.bashrc
|
||||
conda activate phylo
|
||||
|
||||
cd trimmed_aa
|
||||
|
||||
# Create list of alignments if not present
|
||||
if [ ! -f locus_alignments.txt ]; then
|
||||
ls *_trimmed.fas > locus_alignments.txt
|
||||
fi
|
||||
|
||||
locus=$(sed -n "${SLURM_ARRAY_TASK_ID}p" locus_alignments.txt)
|
||||
|
||||
iqtree \
|
||||
-s ${locus} \
|
||||
-m MFP \
|
||||
-bb 1000 \
|
||||
-bnni \
|
||||
-czb \
|
||||
-pre $(basename ${locus} _trimmed.fas) \
|
||||
-nt 1
|
||||
Reference in New Issue
Block a user