Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:02:37 +08:00
commit c1d9dee646
38 changed files with 11210 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
#!/bin/bash
#SBATCH --job-name=compleasm_first
#SBATCH --cpus-per-task=TOTAL_THREADS # Replace with total available CPUs (e.g., 64)
#SBATCH --mem-per-cpu=6G
#SBATCH --time=24:00:00
#SBATCH --output=logs/compleasm_first.%j.out
#SBATCH --error=logs/compleasm_first.%j.err
source ~/.bashrc
conda activate phylo
mkdir -p logs
mkdir -p 01_busco_results
# Process FIRST genome only (downloads lineage database)
first_genome=$(head -n 1 genome_list.txt)
genome_name=$(basename ${first_genome} .fasta)
echo "Processing first genome: ${genome_name} with ${SLURM_CPUS_PER_TASK} threads..."
echo "This will download the BUSCO lineage database for subsequent runs."
compleasm run \
-a ${first_genome} \
-o 01_busco_results/${genome_name}_compleasm \
-l LINEAGE \
-t ${SLURM_CPUS_PER_TASK}
echo "First genome complete! Lineage database is now cached."
echo "Submit the parallel job for remaining genomes: sbatch run_compleasm_parallel.job"

View File

@@ -0,0 +1,25 @@
#!/bin/bash
#SBATCH --job-name=compleasm_parallel
#SBATCH --array=2-NUM_GENOMES # Start from genome 2 (first genome already processed)
#SBATCH --cpus-per-task=THREADS_PER_JOB # e.g., 16 for 64-core system with 4 concurrent jobs
#SBATCH --mem-per-cpu=6G
#SBATCH --time=48:00:00
#SBATCH --output=logs/compleasm.%A_%a.out
#SBATCH --error=logs/compleasm.%A_%a.err
source ~/.bashrc
conda activate phylo
mkdir -p 01_busco_results
# Get genome for this array task (skipping the first one)
genome=$(sed -n "${SLURM_ARRAY_TASK_ID}p" genome_list.txt)
genome_name=$(basename ${genome} .fasta)
echo "Processing ${genome_name} with ${SLURM_CPUS_PER_TASK} threads..."
compleasm run \
-a ${genome} \
-o 01_busco_results/${genome_name}_compleasm \
-l LINEAGE \
-t ${SLURM_CPUS_PER_TASK}

View File

@@ -0,0 +1,27 @@
#!/bin/bash
#SBATCH --job-name=iqtree_partition
#SBATCH --cpus-per-task=18
#SBATCH --mem-per-cpu=4G
#SBATCH --time=72:00:00
#SBATCH --output=logs/partition_search.out
#SBATCH --error=logs/partition_search.err
source ~/.bashrc
conda activate phylo
cd 06_concatenation # Use organized directory structure
iqtree \
-s FcC_supermatrix.fas \
-spp partition_def.txt \
-nt ${SLURM_CPUS_PER_TASK} \
-safe \
-pre partition_search \
-m TESTMERGEONLY \
-mset MODEL_SET \
-msub nuclear \
-rcluster 10 \
-bb 1000 \
-alrt 1000
# Output: partition_search.best_scheme.nex

View File

@@ -0,0 +1,28 @@
#!/bin/bash
#SBATCH --job-name=iqtree_genes
#SBATCH --array=1-NUM_LOCI
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=4G
#SBATCH --time=2:00:00
#SBATCH --output=logs/%A_%a.genetree.out
source ~/.bashrc
conda activate phylo
cd trimmed_aa
# Create list of alignments if not present
if [ ! -f locus_alignments.txt ]; then
ls *_trimmed.fas > locus_alignments.txt
fi
locus=$(sed -n "${SLURM_ARRAY_TASK_ID}p" locus_alignments.txt)
iqtree \
-s ${locus} \
-m MFP \
-bb 1000 \
-bnni \
-czb \
-pre $(basename ${locus} _trimmed.fas) \
-nt 1