Initial commit

2025-11-29 18:02:37 +08:00
commit c1d9dee646
38 changed files with 11210 additions and 0 deletions
--- a/skills/phylo_from_buscos/scripts/convert_fasconcat_to_partition.py
+++ b/skills/phylo_from_buscos/scripts/convert_fasconcat_to_partition.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+"""
+Convert FASconCAT info file to IQ-TREE partition format
+
+Usage:
+    python convert_fasconcat_to_partition.py FcC_info.xls [output_file.txt]
+
+Author: Bruno de Medeiros (Field Museum)
+Based on tutorials by Paul Frandsen (BYU)
+"""
+
+import sys
+
+
+def convert_fcc_to_partition(fcc_file, output_file="partition_def.txt"):
+    """
+    Convert FASconCAT info file to IQ-TREE partition format
+
+    Args:
+        fcc_file: Path to FcC_info.xls file from FASconCAT
+        output_file: Path to output partition definition file
+    """
+
+    try:
+        with open(fcc_file, 'r') as f:
+            lines = f.readlines()
+    except FileNotFoundError:
+        print(f"Error: File '{fcc_file}' not found")
+        sys.exit(1)
+
+    partitions_written = 0
+
+    with open(output_file, 'w') as out:
+        # Skip first two header lines (FASconCAT INFO and column headers)
+        for line in lines[2:]:
+            line = line.strip()
+            if line:
+                parts = line.split('\t')
+                if len(parts) >= 3:
+                    locus = parts[0]
+                    start = parts[1]
+                    end = parts[2]
+                    out.write(f"AA, {locus} = {start}-{end}\n")
+                    partitions_written += 1
+
+    print(f"Partition file created: {output_file}")
+    print(f"Number of partitions: {partitions_written}")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python convert_fasconcat_to_partition.py FcC_info.xls [output_file.txt]")
+        print("\nConverts FASconCAT info file to IQ-TREE partition format")
+        sys.exit(1)
+
+    fcc_file = sys.argv[1]
+    output_file = sys.argv[2] if len(sys.argv) > 2 else "partition_def.txt"
+
+    convert_fcc_to_partition(fcc_file, output_file)
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/phylo_from_buscos/scripts/download_ncbi_genomes.py
+++ b/skills/phylo_from_buscos/scripts/download_ncbi_genomes.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""
+Download genomes from NCBI using BioProject or Assembly accessions
+
+Usage:
+    python download_ncbi_genomes.py --bioprojects PRJNA12345 PRJEB67890
+    python download_ncbi_genomes.py --assemblies GCA_123456789.1 GCF_987654321.1
+
+Requires: ncbi-datasets-pylib (pip install ncbi-datasets-pylib)
+
+Author: Bruno de Medeiros (Field Museum)
+Based on tutorials by Paul Frandsen (BYU)
+"""
+
+import argparse
+import sys
+import subprocess
+
+
+def download_using_cli(accessions, output_file="genomes.zip"):
+    """
+    Download genomes using NCBI datasets CLI
+
+    Args:
+        accessions: List of BioProject or Assembly accessions
+        output_file: Name of output zip file
+    """
+    cmd = ["datasets", "download", "genome", "accession"] + accessions + ["--filename", output_file]
+
+    print(f"Running: {' '.join(cmd)}")
+    print("")
+
+    try:
+        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+        print(result.stdout)
+        print(f"\nDownload complete: {output_file}")
+        print("Extract with: unzip " + output_file)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error downloading genomes: {e}", file=sys.stderr)
+        print(e.stderr, file=sys.stderr)
+        return False
+    except FileNotFoundError:
+        print("Error: 'datasets' command not found", file=sys.stderr)
+        print("Install with: conda install -c conda-forge ncbi-datasets-cli", file=sys.stderr)
+        return False
+
+
+def get_bioproject_assemblies(bioprojects):
+    """
+    Get assembly accessions for given BioProjects using Python API
+
+    Args:
+        bioprojects: List of BioProject accessions
+
+    Returns:
+        List of tuples (assembly_accession, organism_name)
+    """
+    try:
+        from ncbi.datasets.metadata.genome import get_assembly_metadata_by_bioproject_accessions
+    except ImportError:
+        print("Error: ncbi-datasets-pylib not installed", file=sys.stderr)
+        print("Install with: pip install ncbi-datasets-pylib", file=sys.stderr)
+        sys.exit(1)
+
+    assemblies = []
+
+    print(f"Fetching assembly information for {len(bioprojects)} BioProject(s)...")
+    print("")
+
+    for assembly in get_assembly_metadata_by_bioproject_accessions(bioprojects):
+        acc = assembly.accession
+        name = assembly.organism.organism_name
+        assemblies.append((acc, name))
+        print(f"  {name}: {acc}")
+
+    print(f"\nFound {len(assemblies)} assemblies")
+
+    return assemblies
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Download genomes from NCBI using BioProject or Assembly accessions"
+    )
+
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument(
+        "--bioprojects",
+        nargs="+",
+        help="BioProject accessions (e.g., PRJNA12345 PRJEB67890)"
+    )
+    group.add_argument(
+        "--assemblies",
+        nargs="+",
+        help="Assembly accessions (e.g., GCA_123456789.1 GCF_987654321.1)"
+    )
+
+    parser.add_argument(
+        "-o", "--output",
+        default="genomes.zip",
+        help="Output zip file name (default: genomes.zip)"
+    )
+
+    parser.add_argument(
+        "--list-only",
+        action="store_true",
+        help="List assemblies without downloading (BioProject mode only)"
+    )
+
+    args = parser.parse_args()
+
+    if args.bioprojects:
+        assemblies = get_bioproject_assemblies(args.bioprojects)
+
+        if args.list_only:
+            print("\nAssembly accessions (use with --assemblies to download):")
+            for acc, name in assemblies:
+                print(acc)
+            return
+
+        # Download assemblies
+        assembly_accs = [acc for acc, name in assemblies]
+        success = download_using_cli(assembly_accs, args.output)
+
+    elif args.assemblies:
+        success = download_using_cli(args.assemblies, args.output)
+
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/phylo_from_buscos/scripts/extract_orthologs.sh
+++ b/skills/phylo_from_buscos/scripts/extract_orthologs.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+# Extract and reorganize single-copy orthologs from compleasm output
+#
+# Usage: bash extract_orthologs.sh LINEAGE_NAME
+#   Example: bash extract_orthologs.sh metazoa
+#
+# Author: Bruno de Medeiros (Field Museum)
+# Based on tutorials by Paul Frandsen (BYU)
+
+if [ $# -lt 1 ]; then
+  echo "Usage: bash extract_orthologs.sh LINEAGE_NAME"
+  echo "  Example: bash extract_orthologs.sh metazoa"
+  exit 1
+fi
+
+LINEAGE="$1"
+
+echo "Extracting single-copy orthologs for lineage: ${LINEAGE}"
+
+# Create directory for ortholog FASTA files
+mkdir -p single_copy_orthologs
+
+# Copy gene_marker.fasta files and rename by species
+count=0
+for dir in 01_busco_results/*_compleasm; do
+  if [ ! -d "${dir}" ]; then
+    continue
+  fi
+
+  genome=$(basename "${dir}" _compleasm)
+
+  # Auto-detect the OrthoDB version (odb10, odb11, odb12, etc.)
+  odb_dirs=("${dir}/${LINEAGE}_odb"*)
+  if [ -d "${odb_dirs[0]}" ]; then
+    marker_file="${odb_dirs[0]}/gene_marker.fasta"
+  else
+    echo "  Warning: No OrthoDB directory found for ${genome}" >&2
+    continue
+  fi
+
+  if [ -f "${marker_file}" ]; then
+    cp "${marker_file}" "single_copy_orthologs/${genome}.fasta"
+    echo "  Extracted: ${genome}"
+    count=$((count + 1))
+  else
+    echo "  Warning: Marker file not found for ${genome}" >&2
+  fi
+done
+
+if [ ${count} -eq 0 ]; then
+  echo "Error: No gene_marker.fasta files found. Check lineage name." >&2
+  exit 1
+fi
+
+echo "Extracted ${count} genomes"
+echo ""
+echo "Now generating per-locus unaligned FASTA files..."
+
+cd single_copy_orthologs || exit 1
+mkdir -p unaligned_aa
+cd unaligned_aa || exit 1
+
+# AWK script to split by ortholog ID
+awk 'BEGIN{RS=">"; FS="\n"} {
+  if (NF > 1) {
+    split($1, b, "_");
+    fnme = b[1] ".fas";
+    n = split(FILENAME, a, "/");
+    species = a[length(a)];
+    gsub(".fasta", "", species);
+    print ">" species "\n" $2 >> fnme;
+    close(fnme);
+  }
+}' ../*.fasta
+
+# Fix headers
+if [[ "$OSTYPE" == "darwin"* ]]; then
+  # macOS
+  sed -i '' -e 's/.fasta//g' *.fas
+else
+  # Linux
+  sed -i -e 's/.fasta//g' *.fas
+fi
+
+num_loci=$(ls -1 *.fas 2>/dev/null | wc -l)
+echo "Unaligned ortholog files generated: ${num_loci} loci"
+echo ""
+echo "Output directory: single_copy_orthologs/unaligned_aa/"
--- a/skills/phylo_from_buscos/scripts/generate_qc_report.sh
+++ b/skills/phylo_from_buscos/scripts/generate_qc_report.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Quality control report generator for compleasm results
+#
+# Usage: bash generate_qc_report.sh [output_file.csv]
+#
+# Author: Bruno de Medeiros (Field Museum)
+# Based on tutorials by Paul Frandsen (BYU)
+
+OUTPUT_FILE="${1:-qc_report.csv}"
+
+echo "Genome,Complete_SCO,Fragmented,Duplicated,Missing,Completeness(%)" > "${OUTPUT_FILE}"
+
+count=0
+for dir in 01_busco_results/*_compleasm; do
+  if [ ! -d "${dir}" ]; then
+    continue
+  fi
+
+  genome=$(basename "${dir}" _compleasm)
+  summary="${dir}/summary.txt"
+
+  if [ -f "${summary}" ]; then
+    # Parse completeness statistics from compleasm format
+    # compleasm uses: S: (single-copy), D: (duplicated), F: (fragmented), M: (missing)
+    # Format: "S:80.93%, 2283" where we need the count (2283)
+    complete=$(grep "^S:" "${summary}" | awk -F',' '{print $2}' | tr -d ' ')
+    duplicated=$(grep "^D:" "${summary}" | awk -F',' '{print $2}' | tr -d ' ')
+    fragmented=$(grep "^F:" "${summary}" | awk -F',' '{print $2}' | tr -d ' ')
+    missing=$(grep "^M:" "${summary}" | awk -F',' '{print $2}' | tr -d ' ')
+
+    # Check if all values were successfully extracted
+    if [ -z "${complete}" ] || [ -z "${fragmented}" ] || [ -z "${missing}" ]; then
+      echo "Warning: Could not parse statistics for ${genome}" >&2
+      continue
+    fi
+
+    # Calculate completeness percentage (Complete / Total * 100)
+    total=$((complete + duplicated + fragmented + missing))
+    if command -v bc &> /dev/null; then
+      completeness=$(echo "scale=2; (${complete} + ${duplicated}) / ${total} * 100" | bc)
+    else
+      # Fallback if bc not available
+      completeness=$(awk "BEGIN {printf \"%.2f\", (${complete} + ${duplicated}) / ${total} * 100}")
+    fi
+
+    echo "${genome},${complete},${fragmented},${duplicated},${missing},${completeness}" >> "${OUTPUT_FILE}"
+    count=$((count + 1))
+  else
+    echo "Warning: Summary file not found for ${genome}" >&2
+  fi
+done
+
+if [ ${count} -eq 0 ]; then
+  echo "Error: No compleasm output directories found (*_compleasm)" >&2
+  exit 1
+fi
+
+echo "QC report generated: ${OUTPUT_FILE}"
+echo "Genomes analyzed: ${count}"
--- a/skills/phylo_from_buscos/scripts/predownloaded_aliscore_alicut/ALICUT_V2.31.pl
+++ b/skills/phylo_from_buscos/scripts/predownloaded_aliscore_alicut/ALICUT_V2.31.pl
@@ -0,0 +1,742 @@
+#!/usr/bin/perl
+use strict       ;
+use File::Copy   ;
+use Tie::File    ;
+use Fcntl        ;
+use Term::Cap ;
+use Term::ANSIColor qw(:constants);
+use Getopt::Std  ;
+
+# updated on 13th february , 2009 by patrick k<>ck
+# updated on  2nd april    , 2009 by patrick k<>ck
+# updated on 15th june     , 2009 by patrick k<>ck
+# updated on 26th july     , 2009 by patrick k<>ck
+# updated on  7th september, 2011 by patrick k<>ck (alicut v2.3)
+# updated on 22.2.2017, by patrick k<>ck (alicut v2.31) -> correction of initial warning due to line 547, changed some terminal prints, argv handling commands
+
+my @answer_remain_stems = ( 'no', 'yes' ) ;
+my @answer_codons       = ( 'no', 'yes' ) ;
+my @answer_third_pos    = ( 'no', 'yes' ) ;
+
+&argv_handling ( \@answer_remain_stems, \@answer_codons, \@answer_third_pos ) ;
+&menu          ( \@answer_remain_stems, \@answer_codons, \@answer_third_pos ) ;
+
+
+
+sub argv_handling{
+	
+	my $aref_remain_stems = $_[0] ;
+	my $aref_codons       = $_[1] ;
+	my $aref_third_pos    = $_[2] ;
+	
+	my ( $commandline )   = join "", @ARGV ;
+		
+	$commandline =~ s/ |\s+// ;
+	my @commands = split "-", $commandline ;
+	shift @commands ;
+		
+	for my $single_command ( sort @commands ){
+			
+			if		( $single_command =~ /^r$/i ) { @$aref_remain_stems = ( reverse @$aref_remain_stems) }
+			elsif	( $single_command =~ /^c$/i ) { @$aref_codons       = ( reverse @$aref_codons      ) }
+			elsif	( $single_command =~ /^3$/i ) { @$aref_third_pos    = ( reverse @$aref_third_pos   ) }
+			elsif	( $single_command =~ /^h$/i ) { &help }
+			elsif	( $single_command =~ /^p$/i ) { &preface }
+			elsif	( $single_command =~ /^s$/i ) {  
+													&header ;
+													&commands( \$aref_remain_stems->[0], \$aref_codons->[0], \$aref_third_pos->[0]) ;
+													&start (\$aref_remain_stems->[0], \$aref_codons->[0], \$aref_third_pos->[0])
+			}
+			else	{ print "\n\t!COMMAND-ERROR!: unknown command \"-", $single_command, "\"\n" }
+	}
+		
+	&menu ( \@$aref_remain_stems, \@$aref_codons, \@$aref_third_pos)
+}
+
+sub header{
+	
+	printf "\n%68s\n", "------------------------------------------------------------"     ;
+	printf "%49s\n"  , "Welcome to ALICUT V2.31 !"                                        ;
+	printf "%60s\n"  , "a Perlscript to cut ALISCORE identified RSS"                      ;
+	printf "%57s\n"  , "written by Patrick Kueck (ZFMK, Bonn)"                            ;
+	printf "%68s\n\n", "------------------------------------------------------------"     ;
+}
+
+sub commands{
+	
+	my $sref_rem_stems = $_[0] ;
+	my $sref_reo_codon = $_[1] ;
+	my $sref_th_posit  = $_[2] ;
+	
+	print  "\n\t------------------------------------------------------------"             ;
+	print  "\n\tRemain Stem Position   :\t", $$sref_rem_stems ;
+	print  "\n\tRemove Codon           :\t", $$sref_reo_codon ;
+	print  "\n\tRemove 3rd Position    :\t", $$sref_th_posit ;
+	print  "\n\t------------------------------------------------------------\n"           ;
+}
+
+sub help{
+	
+	print
+ <<info;
+    
+	-------------------------------------------------------------------
+	-------------------------------------------------------------------
+	
+	General Information and Usage:
+	-------------------------------
+	ALICUT V2.31 removes ALISCORE identified RSS positions 
+	in given FASTA file(s) which are listed in the FASTA file cor-
+	responding ALISCORE "List" outfile(s). If structure sequences
+	are implemented, ALICUT V2.3 automatically replaces brackets 
+	of non rss positions by dots when they are paired with rss 
+	identified positions.
+	
+	
+	
+	Start ALICUT under default
+	-------------------------------------------------------------------
+	To remove all ALISCORE identified RSS positions:
+	
+	Type <s> return (via Menu) or
+	Type <perl ALICUT_V2.3.pl -s> <enter> (via command line)
+	
+	
+	
+	R-Option (Remain Stems)
+	-------------------------------------------------------------------
+	To remain all stem positions of identified rss within FASTA file(s): 
+	
+	Type <r> <return> <s> <enter> (via Menu)
+	Type <perl ALICUT_V2.3.pl -r -s> <enter> (via command line)
+	
+	
+	
+	C-Option (Remove Codon)
+	-------------------------------------------------------------------
+	To translate ALISCORE identified RSS positions of amino-acid data
+	into nucleotide triplet positions before exclusion of randomised
+	sequence sections:
+	
+	Type <c> return <s> return (via Menu) or
+	Type <perl ALICUT_V2.3.pl -c -s> <enter> (via command line)
+	
+	Note: 
+	This option is only useful if you have analysed amino-acid 
+	data, but wish to exclude nucleotide positions from the amino-acid 
+	data corresponding nucleotide data.
+	Be aware, that the name of the nucleotide data file has to be named 
+	equal to the ALISCORE analysed amino-acid data file. The C-option
+	can not be applied on amino-acid sequences. Otherwise, ALICUT
+	excludes the original ALISCORE identified sequence sections.
+	
+	
+	
+	3-Option (Remove 3rd position)
+	-------------------------------------------------------------------
+	To remove ALISCORE identified RSS only if its sequence position is 
+	up to amultiple of 3:
+	
+	Type <3> <return> <s> <return> (via Menu)
+	Type <perl ALICUT_V2.3.pl -3 -s> <enter> (via command line)
+	
+	Note: 
+	The 3-Option can be combined with the C-option. In this case,
+	positions of the ALISCORE "List" outfile(s) are translated into
+	codon positions from which only the 3rd positions are excluded.
+	The 3-Option can only be applied on nucleotide data. Otherwise, 
+	ALICUT excludes the original ALISCORE identified sequence sections.
+	
+	
+	
+	ALICUT IN and OUT files
+	-------------------------------------------------------------------
+	ALICUT V2.3 needs the original ALISCORE FASTA infile(s) and "List"
+	outfile(s) in the same folder as ALICUT V2.3.
+	
+	The "List" outfile(s) must contain the identified RSS positions
+	in one single line, separated by whitespace.
+	
+	e.g. 1 3 5 6 8 9 10 11 123 127 10000 10001
+	
+	ALICUT V2.0 can handle unlimited FASTA files in one single run.
+	The sole condition is that the Prefix of the ALISCORE "List" 
+	outfile(s) are identic with the associated FASTA infile(s). 
+	ALICUT V2.3 first searches for the ALISCORE "List" outfile(s), 
+	removes the Suffix "_List_random.txt" and searches for the 
+	"List" associated FASTA file(s).
+	
+	e.g. COI.fas_List_random.txt (ALISCORE "List" outfile)
+	     COI.fas                 (Associated FASTA infile)
+	
+	If both files are detected, ALICUT V2.3 excludes the RSS identified 
+	positions of the "List" file(s) in the associated
+	FASTA file(s) and saves the changes in a new FASTA outfile,
+	named "ALICUT_FASTAinputname.fas".
+	
+	Under the C- and 3-Option, removed sequence positions differ from
+	the original "List" position numbers. Under both options, ALICUT 
+	prints the actually removed positions in separate "ALICUT_LIST" 
+	outfile(s).
+	
+	ALICUT V2.3 generates also an info file "ALICUT_info". This file 
+	informs about the number and percentage of removed positions, number 
+	of single sequences, single parameter settings, and sequence states 
+	of each restricted FASTA file. 
+	If structure sequences are identified by ALICUT, ALICUT generates
+	structure info file(s) which lists remaining stem pairs and loop 
+	positions, as well as percentages of both structure elements.
+	
+	-------------------------------------------------------------------
+	-------------------------------------------------------------------
+	
+	
+info
+;
+
+	print  "\tBACK to ALICUT MAIN-Menu:\t\t type <return>\n"                    ;
+	print  "\n\t------------------------------------------------------------\n\t"  ;
+
+	chomp ( my $answer_xy = <STDIN> );
+
+	&menu ;
+	
+}
+
+sub preface{
+
+print
+<<preface
+	
+	--------------------FASconCAT PREFACE---------------------
+	
+	Version     : 2.31
+	Language    : PERL
+	Last Update : 22nd February, 2017
+	Author      : Patrick Kueck, ZFMK Bonn GERMANY
+	e-mail      : patrick_kueck\@web.de
+	Homepage    : http://www.zfmk.de
+	
+	This program is free software; you can whitedistribute it 
+	and/or modify it under the terms of the GNU General Public 
+	License as published by the Free Software Foundation ; 
+	either version 2 of the License, or (at your option) any 
+	later version.
+
+	This program is distributed in the hope that it will be 
+	useful, but WITHOUT ANY WARRANTY; without even the 
+	implied warranty of MERCHANTABILITY or FITNESS FOR A 
+	PARTICULAR PURPOSE. See the GNU General Public License for 
+	more details. 
+
+	You should have received a copy of the GNU General Public 
+	License along with this program; if not, write to the Free 
+	Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, 
+	USA.
+	
+	For further free downloadable programs visit:
+	www.zfmk.de/web/Forschung/Abteilungen/AG_Wgele/index.en.html
+	
+	------------------------------------------------------------
+
+preface
+; 
+
+	print  "\tBACK to ALICUT MAIN-Menu:\t\t type <return>\n"                       ;
+	print  "\n\t------------------------------------------------------------\n\t"  ;
+
+	chomp ( my $answer_xy = <STDIN> );
+
+	&menu;
+}
+
+sub menu{
+	
+	my $aref_remain_stems = $_[0] ;
+	my $aref_remove_codon = $_[1] ;
+	my $aref_third_posit  = $_[2] ;
+	
+	&header ;
+	
+	print "\n\tSTART ALICUT:\t\ttype <s> <return>"                                        ;
+	print "\n\tQUIT  ALICUT:\t\ttype <q> <return>"                                        ;
+	print "\n\tREMAIN STEMS:\t\ttype <r> <return>"                                        ;
+	print "\n\tREMOVE CODON:\t\ttype <c> <return>"                                        ;
+	print "\n\tREMOVE   3rd:\t\ttype <3> <return>"                                        ;
+	print "\n\tHELP:\t\t\ttype <h> <return>"                                              ;
+	print "\n\tPREFACE:\t\ttype <p> <return>"                                             ;
+	
+	&commands ( \$aref_remain_stems->[0], \$aref_remove_codon->[0], \$aref_third_posit->[0] );
+	
+	my       $answer_opening =  &commandline ;
+	
+	until  ( $answer_opening =~ /^s$|^r$|^c$|^p$|^h$|^1$|^2$|^q$|^3$/i ){ 
+		
+		print "\n\t!COMMAND-ERROR!: unknown command \"$answer_opening\"!\n" ;
+
+		$answer_opening =  &commandline ;
+	}
+
+	$answer_opening =~ /^s$/i      and do { &start ( \$aref_remain_stems->[0], \$aref_remove_codon->[0], \$aref_third_posit->[0] ) } ;
+	$answer_opening =~ /^r$/i      and do { @$aref_remain_stems = (reverse @$aref_remain_stems ); &menu                            } ;
+	$answer_opening =~ /^c$/i      and do { @$aref_remove_codon = (reverse @$aref_remove_codon ); &menu                            } ;
+	$answer_opening =~ /^3$/i      and do { @$aref_third_posit  = (reverse @$aref_third_posit  ); &menu                            } ;
+	$answer_opening =~ /^q$/i      and do {                                                        exit                            } ;
+	$answer_opening =~ /^h$/i      and do {                                                       &help                            } ;
+	$answer_opening =~ /^1$/       and do {                                                       &error1                          } ;
+	$answer_opening =~ /^2$/       and do {                                                       &error2                          } ;
+	$answer_opening =~ /^p$/i      and do {                                                       &preface                         }
+}
+
+sub start{
+	
+	my $sref_stems_remain = $_[0] ;
+	my $sref_codon_remove = $_[1] ;
+	my $sref_third_remove = $_[2] ;
+	
+	my $j = 0  ;
+	
+	open  OUTinfo, ">>ALICUT_info.xls" ;
+	print OUTinfo  "\nUsed List File\tUsed Fasta file\tremove triplets\tremove 3rd position\tnumber taxa\tbp before\tbp after\tremaining bp [%]\tsequence type\n"  ;
+	
+	
+	
+	# Read IN of all List_random.txt files within the same folder as ALICUT and handle it
+	READING:
+	foreach my $file ( <*List_*.txt> ) {
+		
+		# Set counter +1
+		$j++;
+	    
+		
+		
+		# Read in of the ALISCORE-list outfile
+		&tie_linefeeds ( \$file ) ;
+		( open IN, "<$file" ) or die "n\t!FILE-ERROR!: Can not open listfile $file!\n" ;
+		my $line = <IN> ; chomp $line ;
+		
+		# check for correct aliscore list format
+		unless ( $line =~ /^(\d+ )+\d+$|^\d+$/ ) { warn "\t!FILE-WARN!: $file has no ALISCORE list format!\n" ; next READING }
+		
+		# Total number of randomized identified positions
+		my @cut_positions = split " ", $line  ; close IN ;
+		
+		
+		
+		# "filename.fas_List_random.txt" to "filename.fas"
+		( my $file_fasta = $file ) =~ s/_List_.+//  ;
+		
+		# Read in of the original ALISCORE fasta infile which belongs to the listfile
+		&tie_linefeeds ( \$file_fasta ) ;
+		( open INfas, "<$file_fasta" ) or warn "\t!FILE-WARN!: Can not find $file_fasta!\n" and next READING ;
+		
+		chomp ( my @inputfile = <INfas> ) ; close INfas ;
+		warn  "\t!FILE-WARN!: File $file_fasta is empty!\n" if 0 == @inputfile and next READING ;
+		
+		# Handle the FASTA file in the way that sequencename and sequence alternate in each line
+		@inputfile                   = fas_bearbeiten ( @inputfile ) ;
+		
+		# Generate a hash: key=>taxon, value => sequenz
+		my %sequence                 = @inputfile ;
+		my @values                   = values %sequence ;
+		
+		# Determine basepositions before und after cut. Output of cuttings as total number and in percent
+		my $number_sequences         = keys %sequence ;
+	    my $number_characters_before = length $values[0] ;
+		
+		
+		
+		
+		
+		
+		# Check for correct FASTA format and handling of structure sequence
+		my $sequence_state = 'nt' ;
+		SEQUENCE_CHECK:
+		for my $raw_taxon ( keys %sequence ){
+				
+				# if whitespace are between ">" and the next sign within a sequence name, delete these whitespaces
+				$raw_taxon =~ s/^\>\s*/\>/g ;
+			
+				# if whitespaces between last sign and newline in sequence name, delete these whitespaces
+				$raw_taxon =~ s/\s*$//g ;
+			
+				die    "\n\t!FILE-ERROR!: $raw_taxon in $file_fasta is not in FASTA format!\n"                     if           $raw_taxon                  !~ /^\>/                             ;
+				die    "\n\t!FILE-ERROR!: Sequence name missing in $file_fasta!\n"                                 if           $raw_taxon                  =~ /^\>$/                            ;
+				die    "\n\t!FILE-ERROR!: Sequence name $raw_taxon in $file_fasta involves forbidden signs!\n"     if           $raw_taxon                  !~ /\w/                              ;
+				die    "\n\t!FILE-ERROR!: Sequences of $file_fasta have no equal length!\n"                        if length    $sequence{$raw_taxon}       != $number_characters_before         ;
+				die    "\n\t!FILE-ERROR!: Sequence missing in $file_fasta!\n"                                      if           $sequence{$raw_taxon}       =~ /^\n$|^$/                         ;
+				die    "\n\t!FILE-ERROR!: Sequence length in $file_fasta is too short to cut all positions!\n"     if           $number_characters_before   <  $cut_positions[ $#cut_positions ] ;
+				
+				
+				
+				# Structure handling
+				if ( $sequence{$raw_taxon} =~ /.*\(.*\).*/ ){
+					
+					$sequence{$raw_taxon}  =~ s/-/./g  ;
+					my @strc_elements      =  split "" , $sequence{$raw_taxon} ;
+					
+					for my $str_sign ( @strc_elements ){ 
+						
+						unless ( $str_sign =~ /\(|\)|\./ ){ die "\n\t!FILE-ERROR!: Structure string of $file_fasta involves forbidden signs in $raw_taxon!\n" }
+					}
+					
+					my $structurestring       =  $sequence{$raw_taxon} ; 
+					   $structurestring       =~ s/-/./g ;
+					   $sequence{$raw_taxon}  =  &structure_handling ( \$structurestring, \$$sref_stems_remain, \@cut_positions, \$file_fasta ); next SEQUENCE_CHECK ;
+				}
+		
+				
+				
+				# Check for correct sequence states
+				$sequence{$raw_taxon}   =~ s/(\w+)/\U$1/ig ;
+				my @seq_elements           = split "" , $sequence{$raw_taxon} ;
+				
+				for my $seq_sign ( @seq_elements ){ 
+					
+					unless ( $seq_sign =~ /A|C|G|T|U|-|N|Y|X|R|W|S|K|M|D|V|H|B|Q|E|I|L|F|P|\?/ ){ die "\n\t!FILE-ERROR!: Sequence of $file_fasta involves forbidden signs in $raw_taxon!\n" }
+				}
+				
+				if ( $sequence{$raw_taxon}  =~ /I|E|L|Q|F|P/ ) { $sequence_state = 'aa' }
+		}
+		
+		
+		
+		
+		
+		
+		
+		
+		# Translate cut positions
+		my @fasta_cut;
+		&translate_cut_positions( \$$sref_codon_remove, \$$sref_third_remove, \@cut_positions, \$number_characters_before, \@fasta_cut, \$sequence_state, \$file_fasta );
+		
+		
+		# Calculate percent of remaining positions
+		my $number_cut_positions     = @cut_positions ;
+		my $number_characters_after  = $number_characters_before-$number_cut_positions ;
+		
+		my $percent_left =  sprintf "%.1f", ( $number_characters_after / $number_characters_before ) * 100 ;
+		   $percent_left =~ s/\./,/g ;
+		   
+		
+		# Assume uncut positions to $final and print out to ALICUT_$file_fasta
+		if    ( $$sref_codon_remove =~ /yes/ && $$sref_third_remove =~ /yes/ ){ open OUT, ">ALICUT_codon_3rd_$file_fasta" }
+		elsif ( $$sref_codon_remove =~ /yes/ && $$sref_third_remove =~ /no/  ){ open OUT, ">ALICUT_codon_$file_fasta"     }
+		elsif ( $$sref_codon_remove =~ /no/  && $$sref_third_remove =~ /yes/ ){ open OUT, ">ALICUT_3rd_$file_fasta"       }
+		else                                                                  { open OUT, ">ALICUT_$file_fasta"           }
+		
+		for ( keys %sequence ){
+			
+			my @bases = split "", $sequence{$_}          ;
+			my @final = map { $bases[$_] } @fasta_cut    ;
+			my $final = $_."\n".( join "", @final )."\n" ;
+			
+			print OUT "$final" ;
+		}
+		close OUT;
+		
+		
+		
+		# Print Out of extra infos to ALICUT_info
+		print OUTinfo  "$file\t$file_fasta\t$$sref_codon_remove\t$$sref_third_remove\t$number_sequences\t$number_characters_before\t$number_characters_after\t$percent_left\t$sequence_state\n" ;
+		print          "\tDone  : $file cut to ALICUT_$file_fasta\n" 
+	}
+	
+	close OUTinfo  ;
+	
+	
+	# Print OUT number of right handled FASTA files in relation to total number of files
+	printf "\n%68s\n",   "------------------------------------------------------------" ;
+	printf "%42s\n",     "$j FASTA file(s) correctly handled!"                          ;
+	printf "%57s\n",     "Further infos are printed out in Alicut_info.txt!"            ;
+	printf "\n%63s\n",   "ALICUT V2.0 Finished! Thank you and good bye!"                ;
+	printf "%68s\n",     "------------------------------------------------------------" ;
+	
+	
+	&set_timer ;
+	exit ;
+	
+	sub tie_linefeeds{
+		
+		my $sref_filename = $_[0] ;
+		
+		( open IN , "<$$sref_filename" ) or warn "\tError: can not open $$sref_filename!\n" and next READING ;
+		
+		(tie ( my @data, 'Tie::File', $$sref_filename )) ;
+		
+		warn "\t!FILE-WARN!: $$sref_filename is empty!\n" and next READING if 0 == @data ;
+		
+		map { s/\r\n/\n/g } @data ;
+		map { s/\r/\n/g   } @data ;
+		
+		untie @data ; close IN ;
+		
+	}
+	
+	sub set_timer{
+		
+			my ( $user, $system, $cuser, $csystem ) = times ;
+	
+print <<TIME;
+
+			***  time used: $user sec  ***
+
+TIME
+
+		
+	}
+	
+	sub translate_cut_positions {
+		
+		my $sref_command_codon_remove = $_[0] ;
+		my $sref_command_third_remove = $_[1] ;
+		my $aref_cut_positions        = $_[2] ;
+		my $sref_number_characters    = $_[3] ;
+		my $aref_remaining_positions  = $_[4] ;
+		my $sref_sequence_state       = $_[5] ;
+		my $sref_filename             = $_[6] ;
+		
+		
+		# Translate identified RSS aminoacid positions to nucleotide triplet positions
+		if ( $$sref_command_codon_remove =~ /yes/ && $$sref_command_third_remove =~ /no/){
+			
+			unless ( $$sref_sequence_state =~ /aa/ ){
+				
+				my @fasta_old = @$aref_cut_positions ; @$aref_cut_positions = ();
+				for my $number( @fasta_old ){
+					
+					my $newno1 = ($number*3)-2;
+					my $newno2 = $newno1+1;
+					my $newno3 = $newno2+1;
+					
+					push @$aref_cut_positions, ( $newno1, $newno2, $newno3 )
+				}
+				
+				my $string_cutnumbers = join " ",  @$aref_cut_positions ;
+				open  OUTnewcut, ">ALICUT_cut_positions_codon.txt" or die "\n\t!FILE-ERROR!: Can not open File ALICUT_cut_positions_codon.txt" ;
+				print OUTnewcut  $string_cutnumbers ; close OUTnewcut ;
+			}
+			
+			else { warn "\n\t!FILE-WARN!: $$sref_filename include aa sequences!\n\tCodon positions not translated!" }
+		}
+		
+		# Translate identified RSS aminoacid positions to nucleotide triplet positions, but remove only third position
+		elsif ( $$sref_command_codon_remove =~ /yes/ && $$sref_command_third_remove =~ /yes/){
+			
+			unless ( $$sref_sequence_state =~ /aa/ ){
+			
+				my @fasta_old = @$aref_cut_positions ; @$aref_cut_positions = ();
+				for my $number( @fasta_old ){ 
+					
+					push @$aref_cut_positions, ($number*3) 
+				}
+				
+				my $string_cutnumbers = join " ",  @$aref_cut_positions ;
+				open  OUTnewcut, ">ALICUT_cut_positions_codon_3rd.txt" or die "\n\t!FILE-ERROR!: Can not open File ALICUT_cut_positions_codon_3rd.txt" ;
+				print OUTnewcut  $string_cutnumbers ; close OUTnewcut ;
+			}
+			
+			else { warn "\n\t!FILE-WARN!: $$sref_filename include aa sequences!\n\tCodon positions not translated!\n\t3rd codon position not removed!" }
+		}
+		
+		# Remove only identified RSS if third position of original sequence 
+		elsif ( $$sref_command_codon_remove =~ /no/ && $$sref_command_third_remove =~ /yes/){
+			
+			unless ( $$sref_sequence_state =~ /aa/ ){
+				
+				my @fasta_old = @$aref_cut_positions ; @$aref_cut_positions = ();
+				for my $number( @fasta_old ){
+					
+					if ( $number % 3 == 0 ){ push @$aref_cut_positions, $number }
+				}
+				
+				my $string_cutnumbers = join " ",  @$aref_cut_positions ;
+				open  OUTnewcut, ">ALICUT_cut_positions_3rd.txt" or die "\n\t!FILE-ERROR!: Can not open File ALICUT_cut_positions_3rd.txt" ;
+				print OUTnewcut  $string_cutnumbers ; close OUTnewcut
+			}
+			
+			else { warn "\n\t!FILE-WARN!: $$sref_filename include aa sequences!\n\tNot only 3rd codon position removed!" }
+		}
+		
+		
+		# Examine remaining positions
+		my  ( %seen, @zahlenreihe ) ;
+		for ( 1 .. $$sref_number_characters ) { push @zahlenreihe, $_-1 }
+		
+		for my $value ( @$aref_cut_positions ){ $seen{$value-1}++ }
+		for           ( @zahlenreihe         ){ unless ( $seen{$_} ){ push @$aref_remaining_positions, $_ } }
+	}
+}
+
+sub fas_bearbeiten{
+	
+	my @infile = @_                   ;
+	
+	grep  s/(\>.*)/$1\t/,     @infile ;
+	grep  s/ //g,             @infile ;
+	grep  s/\n//g,            @infile ;
+	grep  s/\t/\n/g,          @infile ;
+	grep  s/\>/\n\>/g,        @infile ;
+	my $string = join "",     @infile ;
+	@infile    = split "\n",  $string ;
+	shift                     @infile ;
+	return                    @infile ;
+}
+
+sub structure_handling{
+	
+	my $sref_string        = $_[0] ;
+	my $sref_answer_remain = $_[1] ;
+	my $aref_cut_positions = $_[2] ;
+	my $sref_filename      = $_[3] ;
+	
+	my ( 
+		
+		@pair_infos            ,
+		@forward               ,
+		@structurestring       ,
+		@loops                 ,
+		@pairs                 ,
+		%structure_of_position ,
+		%seen_struc
+		
+	);
+	
+	
+	# Stem assignment
+	my @structures = split "", $$sref_string ;
+	my  $i = 0                                                                                                         	                  ;
+	CHECKING:
+	for ( @structures ){ $i++                                                                                                             ;
+		
+		SWITCH:
+		$structure_of_position{$i} = $_                                                                                                   ;
+		
+		if ( $_  =~ /\(/ ){ push @forward, $i                                                                          and next CHECKING  }
+		if ( $_  =~ /\)/ ){ my $pair_1 = pop @forward; push @pairs, ( $pair_1, $i ); push @pair_infos, ( $pair_1.":".$i ); next CHECKING  }
+		if ( $_  =~ /\./ ){ push @loops,   $i                                                                          and next CHECKING  }
+	}
+	
+	@pair_infos  =  reverse @pair_infos                                                                                                   ;
+	
+	
+	
+	
+	# Generate listfiles for structure_info file
+	my $pairlist =  join "\n\t\t\t\t\t", @pair_infos   ;
+	my $looplist =  join "\n\t\t\t\t\t", @loops        ;
+	
+	
+	# Number and proportion of stem and loop positions for structure info file
+	my $N_total  =  @structures                        ;
+	my $N_stems  =  @pair_infos                        ;
+	my $N_loops  =  $N_total - ( $N_stems * 2 )        ;
+	my $P_loops  =  ( $N_loops / $N_total ) * 100      ;
+	my $P_stems  =  100 - $P_loops                     ;
+
+	
+	# Open structure info outfile
+	open OUTstruc, ">ALICUT_Struc_info_${$sref_filename}.txt"                                  ;
+	
+	# Print out
+	print OUTstruc "\nOriginal structure information identified in $$sref_filename:\n\n"  ;
+	print OUTstruc "- Number of characters:\t\t\t$N_total\n"                              ;
+	print OUTstruc "- Number of single loop characters:\t$N_loops [$P_stems %]\n"         ;
+	print OUTstruc "- Number of paired stem characters:\t$N_stems [$P_loops %]\n"         ;
+	print OUTstruc "\n- Paired stem positions:\t\t$pairlist\n\n"                          ;
+	print OUTstruc "\n- Loop positions:\t\t\t$looplist\n"                                 ;
+
+	close OUTstruc;
+	
+	if  ( $$sref_answer_remain =~ /yes/i ){
+		
+		my @cut_positions2 = ();
+		
+		# Remain rss identified stem positions within the MSA
+		for ( @pairs ){ $seen_struc{$_} = 1                                                   }
+		for ( @$aref_cut_positions ){ unless ( $seen_struc{$_} ){ push @cut_positions2, $_  } }
+		@$aref_cut_positions = @cut_positions2                                                ;
+	}
+	
+	else{
+		
+		my %pair = @pairs;
+		
+		# Replace paired structure positions of rss identified positions by dots
+		for my $bp_for ( keys %pair ){
+			
+			for my $rss ( @$aref_cut_positions ){
+				
+				if ( $bp_for        == $rss ){ $structure_of_position{$pair{$bp_for}}  = "." ; last }
+				if ( $pair{$bp_for} == $rss ){ $structure_of_position{$bp_for}         = "." ; last }
+			}
+		}
+	}
+	
+	for    ( my $k=1; $k<=@structures-1; $k++ ){ push @structurestring, $structure_of_position{$k}   }
+	my     $structure_string_neu = join "", @structurestring                                       ;
+	return $structure_string_neu                                                                   ;
+	
+}
+
+sub commandline{
+
+	print  "\n\tCOMMAND:\t "                                                          ;
+	
+	chomp ( my $sub_answer_opening = <STDIN> );
+
+	print  "\n\t------------------------------------------------------------\n"        ;
+	
+	return $sub_answer_opening;
+}	
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/skills/phylo_from_buscos/scripts/predownloaded_aliscore_alicut/Aliscore.02.2.pl
+++ b/skills/phylo_from_buscos/scripts/predownloaded_aliscore_alicut/Aliscore.02.2.pl
--- a/skills/phylo_from_buscos/scripts/predownloaded_aliscore_alicut/Aliscore_module.pm
+++ b/skills/phylo_from_buscos/scripts/predownloaded_aliscore_alicut/Aliscore_module.pm
--- a/skills/phylo_from_buscos/scripts/query_ncbi_assemblies.py
+++ b/skills/phylo_from_buscos/scripts/query_ncbi_assemblies.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+Query NCBI for available genome assemblies by taxon name
+
+Usage:
+    python query_ncbi_assemblies.py --taxon "Coleoptera"
+    python query_ncbi_assemblies.py --taxon "Drosophila" --max-results 50
+    python query_ncbi_assemblies.py --taxon "Apis" --refseq-only
+
+Requires: ncbi-datasets-pylib (pip install ncbi-datasets-pylib)
+
+Author: Bruno de Medeiros (Field Museum)
+"""
+
+import argparse
+import sys
+
+
+def query_assemblies_by_taxon(taxon, max_results=20, refseq_only=False):
+    """
+    Query NCBI for genome assemblies of a given taxon
+
+    Args:
+        taxon: Taxon name (e.g., "Coleoptera", "Drosophila melanogaster")
+        max_results: Maximum number of results to return
+        refseq_only: If True, only return RefSeq assemblies (GCF_*)
+
+    Returns:
+        List of dictionaries with assembly information
+    """
+    try:
+        from ncbi.datasets import GenomeApi
+        from ncbi.datasets.openapi import ApiClient, ApiException
+    except ImportError:
+        print("Error: ncbi-datasets-pylib not installed", file=sys.stderr)
+        print("Install with: pip install ncbi-datasets-pylib", file=sys.stderr)
+        sys.exit(1)
+
+    assemblies = []
+
+    print(f"Querying NCBI for '{taxon}' genome assemblies...")
+    print(f"(Limiting to {max_results} results)")
+    if refseq_only:
+        print("(RefSeq assemblies only)")
+    print("")
+
+    try:
+        with ApiClient() as api_client:
+            api = GenomeApi(api_client)
+
+            # Query genome assemblies for the taxon
+            genome_summary = api.genome_summary_by_taxon(
+                taxon=taxon,
+                limit=str(max_results),
+                filters_refseq_only=refseq_only
+            )
+
+            if not genome_summary.reports:
+                print(f"No assemblies found for taxon '{taxon}'")
+                return []
+
+            for report in genome_summary.reports:
+                assembly_info = {
+                    'accession': report.accession,
+                    'organism': report.organism.organism_name,
+                    'assembly_level': report.assembly_info.assembly_level,
+                    'assembly_name': report.assembly_info.assembly_name,
+                    'submission_date': report.assembly_info.release_date if hasattr(report.assembly_info, 'release_date') else 'N/A'
+                }
+                assemblies.append(assembly_info)
+
+    except ApiException as e:
+        print(f"Error querying NCBI: {e}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"Unexpected error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    return assemblies
+
+
+def format_table(assemblies):
+    """
+    Format assemblies as a readable table
+
+    Args:
+        assemblies: List of assembly dictionaries
+    """
+    if not assemblies:
+        return
+
+    print(f"Found {len(assemblies)} assemblies:\n")
+
+    # Print header
+    print(f"{'#':<4} {'Accession':<20} {'Organism':<40} {'Level':<15} {'Assembly Name':<30}")
+    print("-" * 110)
+
+    # Print data rows
+    for i, asm in enumerate(assemblies, 1):
+        organism = asm['organism'][:38] + '..' if len(asm['organism']) > 40 else asm['organism']
+        assembly_name = asm['assembly_name'][:28] + '..' if len(asm['assembly_name']) > 30 else asm['assembly_name']
+
+        print(f"{i:<4} {asm['accession']:<20} {organism:<40} {asm['assembly_level']:<15} {assembly_name:<30}")
+
+    print("")
+
+
+def save_accessions(assemblies, output_file):
+    """
+    Save assembly accessions to a file
+
+    Args:
+        assemblies: List of assembly dictionaries
+        output_file: Output file path
+    """
+    with open(output_file, 'w') as f:
+        for asm in assemblies:
+            f.write(f"{asm['accession']}\n")
+
+    print(f"Accessions saved to: {output_file}")
+    print(f"You can download these assemblies using:")
+    print(f"  python download_ncbi_genomes.py --assemblies $(cat {output_file})")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Query NCBI for available genome assemblies by taxon name",
+        epilog="Example: python query_ncbi_assemblies.py --taxon 'Coleoptera' --max-results 50"
+    )
+
+    parser.add_argument(
+        "--taxon",
+        required=True,
+        help="Taxon name (e.g., 'Coleoptera', 'Drosophila melanogaster')"
+    )
+
+    parser.add_argument(
+        "--max-results",
+        type=int,
+        default=20,
+        help="Maximum number of results to return (default: 20)"
+    )
+
+    parser.add_argument(
+        "--refseq-only",
+        action="store_true",
+        help="Only return RefSeq assemblies (GCF_* accessions)"
+    )
+
+    parser.add_argument(
+        "--save",
+        metavar="FILE",
+        help="Save accessions to a file for later download"
+    )
+
+    args = parser.parse_args()
+
+    # Query NCBI
+    assemblies = query_assemblies_by_taxon(
+        taxon=args.taxon,
+        max_results=args.max_results,
+        refseq_only=args.refseq_only
+    )
+
+    # Display results
+    format_table(assemblies)
+
+    # Save if requested
+    if args.save and assemblies:
+        save_accessions(assemblies, args.save)
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/phylo_from_buscos/scripts/rename_genomes.py
+++ b/skills/phylo_from_buscos/scripts/rename_genomes.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+"""
+Rename genome files with clean, meaningful sample names for phylogenomics
+
+This script helps create a mapping between genome files (often with cryptic
+accession numbers) and clean species/sample names that will appear in the
+final phylogenetic tree.
+
+Usage:
+    # Interactive mode - prompts for names
+    python rename_genomes.py --interactive genome1.fasta genome2.fasta
+
+    # From mapping file (TSV: old_name<TAB>new_name)
+    python rename_genomes.py --mapping samples.tsv
+
+    # Create template mapping file
+    python rename_genomes.py --create-template *.fasta > samples.tsv
+
+Author: Bruno de Medeiros (Field Museum)
+Based on tutorials by Paul Frandsen (BYU)
+"""
+
+import argparse
+import os
+import sys
+import shutil
+from pathlib import Path
+
+
+def sanitize_name(name):
+    """
+    Sanitize a name to be phylogenomics-safe
+    - Replace spaces with underscores
+    - Remove special characters
+    - Keep only alphanumeric, underscore, hyphen
+    """
+    # Replace spaces with underscores
+    name = name.replace(' ', '_')
+    # Remove special characters except underscore and hyphen
+    name = ''.join(c for c in name if c.isalnum() or c in '_-')
+    return name
+
+
+def create_template(genome_files, output=sys.stdout):
+    """Create a template mapping file"""
+    output.write("# Sample mapping file\n")
+    output.write("# Format: original_filename<TAB>new_sample_name\n")
+    output.write("# Edit the second column with meaningful species/sample names\n")
+    output.write("# Recommended format: [ACCESSION]_[NAME] (e.g., GCA000123456_Penstemon_eatonii)\n")
+    output.write("# This keeps accession for traceability while having readable names in trees\n")
+    output.write("# Names should contain only letters, numbers, underscores, and hyphens\n")
+    output.write("#\n")
+
+    for gfile in genome_files:
+        basename = Path(gfile).stem  # Remove extension
+        output.write(f"{gfile}\t{basename}\n")
+
+
+def read_mapping(mapping_file):
+    """Read mapping from TSV file"""
+    mapping = {}
+    with open(mapping_file, 'r') as f:
+        for line in f:
+            line = line.strip()
+            # Skip comments and empty lines
+            if not line or line.startswith('#'):
+                continue
+
+            parts = line.split('\t')
+            if len(parts) != 2:
+                print(f"Warning: Skipping invalid line: {line}", file=sys.stderr)
+                continue
+
+            old_name, new_name = parts
+            new_name = sanitize_name(new_name)
+            mapping[old_name] = new_name
+
+    return mapping
+
+
+def interactive_rename(genome_files):
+    """Interactively ask for new names"""
+    mapping = {}
+
+    print("Enter new sample names for each genome file.")
+    print("Press Enter to keep the current name.")
+    print("Names will be sanitized (spaces→underscores, special chars removed)\n")
+
+    for gfile in genome_files:
+        current_name = Path(gfile).stem
+        new_name = input(f"{gfile} → [{current_name}]: ").strip()
+
+        if not new_name:
+            new_name = current_name
+
+        new_name = sanitize_name(new_name)
+        mapping[gfile] = new_name
+        print(f"  Will rename to: {new_name}.fasta\n")
+
+    return mapping
+
+
+def rename_files(mapping, dry_run=False, backup=True):
+    """Rename genome files according to mapping"""
+
+    renamed = []
+    errors = []
+
+    for old_file, new_name in mapping.items():
+        if not os.path.exists(old_file):
+            errors.append(f"File not found: {old_file}")
+            continue
+
+        # Get extension from original file
+        ext = Path(old_file).suffix
+        if not ext:
+            ext = '.fasta'
+
+        new_file = f"{new_name}{ext}"
+
+        # Check if target exists
+        if os.path.exists(new_file) and new_file != old_file:
+            errors.append(f"Target exists: {new_file}")
+            continue
+
+        # Skip if names are the same
+        if old_file == new_file:
+            print(f"Skip (no change): {old_file}")
+            continue
+
+        if dry_run:
+            print(f"[DRY RUN] Would rename: {old_file} → {new_file}")
+        else:
+            # Backup if requested
+            if backup:
+                backup_file = f"{old_file}.backup"
+                shutil.copy2(old_file, backup_file)
+                print(f"Backup created: {backup_file}")
+
+            # Rename
+            shutil.move(old_file, new_file)
+            print(f"Renamed: {old_file} → {new_file}")
+            renamed.append((old_file, new_file))
+
+    return renamed, errors
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Rename genome files with meaningful sample names for phylogenomics",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Create template mapping file
+  python rename_genomes.py --create-template *.fasta > samples.tsv
+  # Edit samples.tsv, then apply mapping
+  python rename_genomes.py --mapping samples.tsv
+
+  # Interactive renaming
+  python rename_genomes.py --interactive genome1.fasta genome2.fasta
+
+  # Dry run (preview changes)
+  python rename_genomes.py --mapping samples.tsv --dry-run
+        """
+    )
+
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument(
+        '--create-template',
+        nargs='+',
+        metavar='GENOME',
+        help='Create a template mapping file from genome files'
+    )
+    group.add_argument(
+        '--mapping',
+        metavar='FILE',
+        help='TSV file with mapping (old_name<TAB>new_name)'
+    )
+    group.add_argument(
+        '--interactive',
+        nargs='+',
+        metavar='GENOME',
+        help='Interactively rename genome files'
+    )
+
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='Show what would be renamed without actually renaming'
+    )
+
+    parser.add_argument(
+        '--no-backup',
+        action='store_true',
+        help='Do not create backup files'
+    )
+
+    args = parser.parse_args()
+
+    # Create template
+    if args.create_template:
+        create_template(args.create_template)
+        return
+
+    # Interactive mode
+    if args.interactive:
+        mapping = interactive_rename(args.interactive)
+    # Mapping file mode
+    elif args.mapping:
+        mapping = read_mapping(args.mapping)
+    else:
+        parser.error("No mode specified")
+
+    if not mapping:
+        print("No files to rename", file=sys.stderr)
+        return
+
+    # Perform renaming
+    renamed, errors = rename_files(
+        mapping,
+        dry_run=args.dry_run,
+        backup=not args.no_backup
+    )
+
+    # Summary
+    print("\n" + "="*60)
+    if args.dry_run:
+        print("DRY RUN - No files were actually renamed")
+    else:
+        print(f"Successfully renamed {len(renamed)} file(s)")
+
+    if errors:
+        print(f"\nErrors ({len(errors)}):")
+        for error in errors:
+            print(f"  - {error}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/phylo_from_buscos/scripts/run_alicut.sh
+++ b/skills/phylo_from_buscos/scripts/run_alicut.sh
@@ -0,0 +1,247 @@
+#!/bin/bash
+
+# run_alicut.sh
+# Wrapper script for running ALICUT to remove Aliscore-identified RSS positions
+# Removes randomly similar sequence sections from alignments
+#
+# Usage:
+#   bash run_alicut.sh [aliscore_dir] [options]
+#
+# Options:
+#   -r         Remain stem positions (for RNA secondary structures)
+#   -c         Remove codon (translate AA positions to nucleotide triplets)
+#   -3         Remove only 3rd codon positions
+#   -s         Silent mode (non-interactive, use defaults)
+#
+# Requirements:
+#   - ALICUT_V2.31.pl in PATH or same directory
+#   - Perl with File::Copy, Tie::File, Term::Cap modules
+#   - Aliscore output directory with *_List_*.txt and original .fas file
+
+set -euo pipefail
+
+# Script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Check for ALICUT script
+if command -v ALICUT_V2.31.pl &> /dev/null; then
+    ALICUT_SCRIPT="ALICUT_V2.31.pl"
+elif [ -f "${SCRIPT_DIR}/ALICUT_V2.31.pl" ]; then
+    ALICUT_SCRIPT="${SCRIPT_DIR}/ALICUT_V2.31.pl"
+elif [ -f "./ALICUT_V2.31.pl" ]; then
+    ALICUT_SCRIPT="./ALICUT_V2.31.pl"
+else
+    echo "ERROR: ALICUT_V2.31.pl not found in PATH, script directory, or current directory"
+    echo "Please download from: https://www.zfmk.de/en/research/research-centres-and-groups/alicut"
+    exit 1
+fi
+
+# Function to display usage
+usage() {
+    cat <<EOF
+Usage: $0 [aliscore_dir] [options]
+
+Run ALICUT to remove Aliscore-identified randomly similar sequence sections.
+
+Arguments:
+  aliscore_dir   Directory containing Aliscore output files
+
+Options:
+  -r             Remain stem positions in RNA secondary structure alignments
+  -c             Remove entire codon (translates AA RSS positions to nt triplets)
+  -3             Remove only 3rd codon position of identified RSS
+  -s             Silent/scripted mode (non-interactive, use defaults)
+  -h             Display this help message
+
+Input Requirements:
+  The aliscore_dir must contain:
+    - Original FASTA alignment file (*.fas)
+    - Aliscore List file (*_List_random.txt or *_List_*.txt)
+
+Examples:
+  # Basic usage (interactive mode)
+  bash run_alicut.sh aliscore_alignment1
+
+  # Silent mode with defaults
+  bash run_alicut.sh aliscore_alignment1 -s
+
+  # Remain RNA stem positions
+  bash run_alicut.sh aliscore_16S -r -s
+
+  # Remove entire codons (for back-translation)
+  bash run_alicut.sh aliscore_protein1 -c -s
+
+  # Process all Aliscore output directories
+  for dir in aliscore_*/; do
+    bash run_alicut.sh "\${dir}" -s
+  done
+
+Output Files (in aliscore_dir):
+  - ALICUT_[alignment].fas        : Trimmed alignment
+  - ALICUT_info.xls               : Statistics (taxa, positions removed, etc.)
+  - ALICUT_Struc_info_*.txt       : Structure information (if RNA detected)
+
+Citation:
+  Kück P, Meusemann K, Dambach J, Thormann B, von Reumont BM, Wägele JW,
+  Misof B (2010) Parametric and non-parametric masking of randomness in
+  sequence alignments can be improved and leads to better resolved trees.
+  Front Zool 7:10. doi: 10.1186/1742-9994-7-10
+
+EOF
+    exit 0
+}
+
+# Parse command line arguments
+ALISCORE_DIR=""
+ALICUT_OPTS=""
+SILENT_MODE=false
+
+if [ $# -eq 0 ]; then
+    usage
+fi
+
+ALISCORE_DIR="$1"
+shift
+
+# Validate directory exists
+if [ ! -d "${ALISCORE_DIR}" ]; then
+    echo "ERROR: Aliscore directory not found: ${ALISCORE_DIR}"
+    exit 1
+fi
+
+# Parse ALICUT options
+while [ $# -gt 0 ]; do
+    case "$1" in
+        -h|--help)
+            usage
+            ;;
+        -r)
+            ALICUT_OPTS="${ALICUT_OPTS} -r"
+            shift
+            ;;
+        -c)
+            ALICUT_OPTS="${ALICUT_OPTS} -c"
+            shift
+            ;;
+        -3)
+            ALICUT_OPTS="${ALICUT_OPTS} -3"
+            shift
+            ;;
+        -s|--silent)
+            SILENT_MODE=true
+            ALICUT_OPTS="${ALICUT_OPTS} -s"
+            shift
+            ;;
+        *)
+            echo "ERROR: Unknown option: $1"
+            usage
+            ;;
+    esac
+done
+
+# Change to Aliscore output directory
+cd "${ALISCORE_DIR}"
+
+echo "Processing Aliscore output in: ${ALISCORE_DIR}"
+
+# Find List file
+LIST_FILE=$(ls *_List_*.txt 2>/dev/null | head -n 1)
+if [ -z "${LIST_FILE}" ]; then
+    echo "ERROR: No Aliscore List file found (*_List_*.txt)"
+    echo "Make sure Aliscore completed successfully"
+    exit 1
+fi
+
+echo "Found List file: ${LIST_FILE}"
+
+# Find original FASTA file
+FASTA_FILE=$(find . -maxdepth 1 \( -name "*.fas" -o -name "*.fasta" \) -type f | head -n 1 | sed 's|^\./||')
+if [ -z "${FASTA_FILE}" ]; then
+    echo "ERROR: No FASTA alignment file found (*.fas or *.fasta)"
+    echo "ALICUT requires the original alignment file in the same directory as List file"
+    exit 1
+fi
+
+echo "Found FASTA file: ${FASTA_FILE}"
+
+# Check if List file contains RSS positions
+RSS_COUNT=$(wc -w < "${LIST_FILE}" || echo "0")
+if [ "${RSS_COUNT}" -eq 0 ]; then
+    echo "WARNING: List file is empty (no RSS positions identified)"
+    echo "Aliscore found no randomly similar sequences to remove"
+    echo "Skipping ALICUT - alignment is already clean"
+
+    # Create a symbolic link to indicate no trimming was needed
+    ln -sf "${FASTA_FILE}" "ALICUT_${FASTA_FILE}"
+    echo "Created symbolic link: ALICUT_${FASTA_FILE} -> ${FASTA_FILE}"
+
+    cd ..
+    exit 0
+fi
+
+echo "Found ${RSS_COUNT} RSS positions to remove"
+
+# Run ALICUT
+echo ""
+echo "Running ALICUT..."
+echo "Options: ${ALICUT_OPTS}"
+
+# Construct ALICUT command
+ALICUT_CMD="perl ${ALICUT_SCRIPT} ${ALICUT_OPTS}"
+
+if [ "${SILENT_MODE}" = true ]; then
+    echo "Command: ${ALICUT_CMD}"
+    eval ${ALICUT_CMD}
+else
+    echo "Running ALICUT in interactive mode..."
+    echo "Press 's' and Enter to start with current options"
+    echo ""
+    perl "${ALICUT_SCRIPT}" ${ALICUT_OPTS}
+fi
+
+# Check if ALICUT completed successfully
+if [ $? -eq 0 ]; then
+    echo ""
+    echo "ALICUT completed successfully"
+
+    # Find output file
+    OUTPUT_FILE=$(ls ALICUT_*.fas ALICUT_*.fasta 2>/dev/null | head -n 1)
+
+    if [ -n "${OUTPUT_FILE}" ]; then
+        echo ""
+        echo "Output files:"
+        ls -lh ALICUT_* 2>/dev/null
+
+        # Calculate and report trimming statistics (handle multi-line FASTA format)
+        if [ -f "${OUTPUT_FILE}" ]; then
+            ORIGINAL_LENGTH=$(awk '/^>/ {if (seq) {print seq; seq=""}; next} {seq = seq $0} END {if (seq) print seq}' "${FASTA_FILE}" | head -n 1 | wc -c)
+            TRIMMED_LENGTH=$(awk '/^>/ {if (seq) {print seq; seq=""}; next} {seq = seq $0} END {if (seq) print seq}' "${OUTPUT_FILE}" | head -n 1 | wc -c)
+            REMOVED_LENGTH=$((ORIGINAL_LENGTH - TRIMMED_LENGTH))
+            PERCENT_REMOVED=$(awk "BEGIN {printf \"%.1f\", (${REMOVED_LENGTH}/${ORIGINAL_LENGTH})*100}")
+
+            echo ""
+            echo "Trimming statistics:"
+            echo "  Original length: ${ORIGINAL_LENGTH} bp"
+            echo "  Trimmed length:  ${TRIMMED_LENGTH} bp"
+            echo "  Removed:         ${REMOVED_LENGTH} bp (${PERCENT_REMOVED}%)"
+        fi
+
+        # Check for info file
+        if [ -f "ALICUT_info.xls" ]; then
+            echo ""
+            echo "Detailed statistics in: ALICUT_info.xls"
+        fi
+    else
+        echo "WARNING: Expected output file ALICUT_*.fas not found"
+    fi
+else
+    echo "ERROR: ALICUT failed"
+    cd ..
+    exit 1
+fi
+
+# Return to parent directory
+cd ..
+
+echo ""
+echo "Done: ${ALISCORE_DIR}"
--- a/skills/phylo_from_buscos/scripts/run_aliscore.sh
+++ b/skills/phylo_from_buscos/scripts/run_aliscore.sh
@@ -0,0 +1,248 @@
+#!/bin/bash
+
+# run_aliscore.sh
+# Wrapper script for running Aliscore on aligned sequences
+# Identifies randomly similar sequence sections (RSS) in multiple sequence alignments
+#
+# Usage:
+#   bash run_aliscore.sh [alignment.fas] [options]
+#
+# Options:
+#   -w INT     Window size (default: 4)
+#   -r INT     Number of random pairs to compare (default: 4*N taxa)
+#   -N         Treat gaps as ambiguous characters (recommended for amino acids)
+#   -t TREE    Tree file in Newick format for guided comparisons
+#   -l LEVEL   Node level for tree-based comparisons
+#   -o TAXA    Comma-separated list of outgroup taxa
+#
+# Array job usage:
+#   Set SLURM_ARRAY_TASK_ID or PBS_ARRAYID environment variable
+#   Create locus_list.txt with one alignment file per line
+#
+# Requirements:
+#   - Aliscore.02.2.pl in PATH or same directory
+#   - Perl with Tie::File and Fcntl modules
+
+set -euo pipefail
+
+# Script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Check for Aliscore script
+if command -v Aliscore.02.2.pl &> /dev/null; then
+    ALISCORE_SCRIPT="Aliscore.02.2.pl"
+elif [ -f "${SCRIPT_DIR}/Aliscore.02.2.pl" ]; then
+    ALISCORE_SCRIPT="${SCRIPT_DIR}/Aliscore.02.2.pl"
+elif [ -f "./Aliscore.02.2.pl" ]; then
+    ALISCORE_SCRIPT="./Aliscore.02.2.pl"
+else
+    echo "ERROR: Aliscore.02.2.pl not found in PATH, script directory, or current directory"
+    echo "Please download from: https://www.zfmk.de/en/research/research-centres-and-groups/aliscore"
+    exit 1
+fi
+
+# Function to display usage
+usage() {
+    cat <<EOF
+Usage: $0 [alignment.fas] [options]
+
+Run Aliscore to identify randomly similar sequence sections in alignments.
+
+Options:
+  -d DIR     Base output directory for all Aliscore results (default: aliscore_output)
+  -w INT     Window size for sliding window analysis (default: 4)
+  -r INT     Number of random sequence pairs to compare (default: 4*N taxa)
+  -N         Treat gaps as ambiguous characters (recommended for amino acids)
+  -t FILE    Tree file in Newick format for phylogeny-guided comparisons
+  -l LEVEL   Node level limit for tree-based comparisons (default: all)
+  -o TAXA    Comma-separated list of outgroup taxa for focused comparisons
+  -h         Display this help message
+
+Array Job Mode:
+  If SLURM_ARRAY_TASK_ID or PBS_ARRAYID is set, reads alignment from locus_list.txt
+  Create locus_list.txt with: ls *.fas > locus_list.txt
+
+Examples:
+  # Basic run with defaults (outputs to aliscore_output/)
+  bash run_aliscore.sh alignment.fas
+
+  # Amino acid sequences with gaps as ambiguous
+  bash run_aliscore.sh protein_alignment.fas -N
+
+  # Custom output directory
+  bash run_aliscore.sh alignment.fas -d my_aliscore_results
+
+  # Custom window size and random pairs
+  bash run_aliscore.sh alignment.fas -w 6 -r 100
+
+  # Tree-guided analysis
+  bash run_aliscore.sh alignment.fas -t species.tre
+
+  # Array job on SLURM
+  ls aligned_aa/*.fas > locus_list.txt
+  sbatch --array=1-\$(wc -l < locus_list.txt) run_aliscore_array.job
+
+Output Files (in aliscore_output/aliscore_[alignment]/):
+  - [alignment]_List_random.txt   : Positions identified as RSS (for ALICUT)
+  - [alignment]_Profile_random.txt: Quality profile for each position
+  - [alignment].svg               : Visual plot of scoring profiles
+
+Citation:
+  Misof B, Misof K (2009) A Monte Carlo approach successfully identifies
+  randomness in multiple sequence alignments: a more objective means of data
+  exclusion. Syst Biol 58(1):21-34. doi: 10.1093/sysbio/syp006
+
+EOF
+    exit 0
+}
+
+# Parse command line arguments
+ALIGNMENT=""
+ALISCORE_OPTS=""
+BASE_OUTPUT_DIR="aliscore_output"
+
+if [ $# -eq 0 ]; then
+    usage
+fi
+
+# Check for array job mode
+ARRAY_MODE=false
+ARRAY_ID=""
+
+if [ -n "${SLURM_ARRAY_TASK_ID:-}" ]; then
+    ARRAY_MODE=true
+    ARRAY_ID="${SLURM_ARRAY_TASK_ID}"
+elif [ -n "${PBS_ARRAYID:-}" ]; then
+    ARRAY_MODE=true
+    ARRAY_ID="${PBS_ARRAYID}"
+fi
+
+# If in array mode, get alignment from locus list
+if [ "${ARRAY_MODE}" = true ]; then
+    if [ ! -f "locus_list.txt" ]; then
+        echo "ERROR: Array job mode requires locus_list.txt"
+        echo "Create with: ls *.fas > locus_list.txt"
+        exit 1
+    fi
+
+    ALIGNMENT=$(sed -n "${ARRAY_ID}p" locus_list.txt)
+
+    if [ -z "${ALIGNMENT}" ]; then
+        echo "ERROR: Could not read alignment for array index ${ARRAY_ID}"
+        exit 1
+    fi
+
+    echo "Array job ${ARRAY_ID}: Processing ${ALIGNMENT}"
+
+    # Remaining arguments are Aliscore options
+    shift $#  # Clear positional parameters
+    set -- "$@"  # Reset with remaining args
+else
+    # First argument is alignment file
+    ALIGNMENT="$1"
+    shift
+fi
+
+# Validate alignment file exists
+if [ ! -f "${ALIGNMENT}" ]; then
+    echo "ERROR: Alignment file not found: ${ALIGNMENT}"
+    exit 1
+fi
+
+# Parse Aliscore options
+while [ $# -gt 0 ]; do
+    case "$1" in
+        -h|--help)
+            usage
+            ;;
+        -d|--output-dir)
+            BASE_OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -w)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -w $2"
+            shift 2
+            ;;
+        -r)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -r $2"
+            shift 2
+            ;;
+        -N)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -N"
+            shift
+            ;;
+        -t)
+            if [ ! -f "$2" ]; then
+                echo "ERROR: Tree file not found: $2"
+                exit 1
+            fi
+            ALISCORE_OPTS="${ALISCORE_OPTS} -t $2"
+            shift 2
+            ;;
+        -l)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -l $2"
+            shift 2
+            ;;
+        -o)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -o $2"
+            shift 2
+            ;;
+        *)
+            echo "ERROR: Unknown option: $1"
+            usage
+            ;;
+    esac
+done
+
+# Get alignment name without extension
+ALIGNMENT_NAME=$(basename "${ALIGNMENT}" .fas)
+ALIGNMENT_NAME=$(basename "${ALIGNMENT_NAME}" .fasta)
+
+# Create base output directory and specific directory for this alignment
+mkdir -p "${BASE_OUTPUT_DIR}"
+OUTPUT_DIR="${BASE_OUTPUT_DIR}/aliscore_${ALIGNMENT_NAME}"
+mkdir -p "${OUTPUT_DIR}"
+
+# Copy alignment to output directory
+cp "${ALIGNMENT}" "${OUTPUT_DIR}/"
+
+# Change to output directory
+cd "${OUTPUT_DIR}"
+
+# Run Aliscore
+echo "Running Aliscore on ${ALIGNMENT}..."
+echo "Options: ${ALISCORE_OPTS}"
+echo "Aliscore script: ${ALISCORE_SCRIPT}"
+
+# Construct and run Aliscore command
+ALISCORE_CMD="perl -I${SCRIPT_DIR} ${ALISCORE_SCRIPT} -i $(basename ${ALIGNMENT}) ${ALISCORE_OPTS}"
+echo "Command: ${ALISCORE_CMD}"
+
+eval ${ALISCORE_CMD}
+
+# Check if Aliscore completed successfully
+if [ $? -eq 0 ]; then
+    echo "Aliscore completed successfully for ${ALIGNMENT}"
+
+    # List output files
+    echo ""
+    echo "Output files in ${OUTPUT_DIR}:"
+    ls -lh *List*.txt *Profile*.txt *.svg 2>/dev/null || echo "  (some expected files not generated)"
+
+    # Report RSS positions if found
+    if [ -f "$(basename ${ALIGNMENT})_List_random.txt" ]; then
+        RSS_COUNT=$(wc -w < "$(basename ${ALIGNMENT})_List_random.txt")
+        echo ""
+        echo "Identified ${RSS_COUNT} randomly similar sequence positions"
+        echo "See: ${OUTPUT_DIR}/$(basename ${ALIGNMENT})_List_random.txt"
+    fi
+else
+    echo "ERROR: Aliscore failed for ${ALIGNMENT}"
+    cd ..
+    exit 1
+fi
+
+# Return to parent directory
+cd ..
+
+echo "Done: ${ALIGNMENT} -> ${OUTPUT_DIR}"
--- a/skills/phylo_from_buscos/scripts/run_aliscore_alicut_batch.sh
+++ b/skills/phylo_from_buscos/scripts/run_aliscore_alicut_batch.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+
+# run_aliscore_alicut_batch.sh
+# Batch processing script for Aliscore + ALICUT alignment trimming
+# Processes all alignments in a directory through both tools sequentially
+#
+# Usage:
+#   bash run_aliscore_alicut_batch.sh [alignment_dir] [options]
+#
+# This script:
+#   1. Runs Aliscore on all alignments to identify RSS
+#   2. Runs ALICUT on each Aliscore output to remove RSS
+#   3. Collects trimmed alignments in output directory
+#
+# Requirements:
+#   - run_aliscore.sh and run_alicut.sh in same directory or PATH
+#   - Aliscore.02.2.pl and ALICUT_V2.31.pl available
+
+set -euo pipefail
+
+# Script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Function to display usage
+usage() {
+    cat <<EOF
+Usage: $0 [alignment_dir] [options]
+
+Batch process multiple alignments through Aliscore and ALICUT.
+
+Arguments:
+  alignment_dir   Directory containing aligned FASTA files (*.fas)
+
+Options:
+  -o DIR         Output directory for trimmed alignments (default: aliscore_alicut_trimmed)
+  -d DIR         Base directory for Aliscore outputs (default: aliscore_output)
+  -w INT         Aliscore window size (default: 4)
+  -r INT         Aliscore random pairs (default: 4*N)
+  -N             Aliscore: treat gaps as ambiguous (recommended for AA)
+  --remain-stems ALICUT: remain RNA stem positions
+  --remove-codon ALICUT: remove entire codons (for back-translation)
+  --remove-3rd   ALICUT: remove only 3rd codon positions
+  -h             Display this help message
+
+Examples:
+  # Basic usage for amino acid alignments
+  bash run_aliscore_alicut_batch.sh aligned_aa/ -N
+
+  # Custom window size
+  bash run_aliscore_alicut_batch.sh aligned_aa/ -w 6 -N
+
+  # With RNA structure preservation
+  bash run_aliscore_alicut_batch.sh aligned_rrna/ --remain-stems
+
+Output:
+  - aliscore_output/aliscore_[locus]/  : Individual Aliscore results per locus
+  - aliscore_alicut_trimmed/           : Final trimmed alignments
+  - aliscore_alicut_trimmed/trimming_summary.txt : Statistics for all loci
+
+EOF
+    exit 0
+}
+
+# Default parameters
+ALIGNMENT_DIR=""
+OUTPUT_DIR="aliscore_alicut_trimmed"
+ALISCORE_BASE_DIR="aliscore_output"
+ALISCORE_OPTS=""
+ALICUT_OPTS="-s"  # Silent mode by default
+
+if [ $# -eq 0 ]; then
+    usage
+fi
+
+ALIGNMENT_DIR="$1"
+shift
+
+# Validate alignment directory
+if [ ! -d "${ALIGNMENT_DIR}" ]; then
+    echo "ERROR: Alignment directory not found: ${ALIGNMENT_DIR}"
+    exit 1
+fi
+
+# Parse options
+while [ $# -gt 0 ]; do
+    case "$1" in
+        -h|--help)
+            usage
+            ;;
+        -o|--output)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -d|--aliscore-dir)
+            ALISCORE_BASE_DIR="$2"
+            shift 2
+            ;;
+        -w)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -w $2"
+            shift 2
+            ;;
+        -r)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -r $2"
+            shift 2
+            ;;
+        -N)
+            ALISCORE_OPTS="${ALISCORE_OPTS} -N"
+            shift
+            ;;
+        --remain-stems)
+            ALICUT_OPTS="${ALICUT_OPTS} -r"
+            shift
+            ;;
+        --remove-codon)
+            ALICUT_OPTS="${ALICUT_OPTS} -c"
+            shift
+            ;;
+        --remove-3rd)
+            ALICUT_OPTS="${ALICUT_OPTS} -3"
+            shift
+            ;;
+        *)
+            echo "ERROR: Unknown option: $1"
+            usage
+            ;;
+    esac
+done
+
+# Check for wrapper scripts
+RUN_ALISCORE="${SCRIPT_DIR}/run_aliscore.sh"
+RUN_ALICUT="${SCRIPT_DIR}/run_alicut.sh"
+
+if [ ! -f "${RUN_ALISCORE}" ]; then
+    echo "ERROR: run_aliscore.sh not found: ${RUN_ALISCORE}"
+    exit 1
+fi
+
+if [ ! -f "${RUN_ALICUT}" ]; then
+    echo "ERROR: run_alicut.sh not found: ${RUN_ALICUT}"
+    exit 1
+fi
+
+# Create output directory
+mkdir -p "${OUTPUT_DIR}"
+
+# Find all FASTA files
+ALIGNMENTS=($(find "${ALIGNMENT_DIR}" -maxdepth 1 -name "*.fas" -o -name "*.fasta"))
+
+if [ ${#ALIGNMENTS[@]} -eq 0 ]; then
+    echo "ERROR: No FASTA files found in ${ALIGNMENT_DIR}"
+    exit 1
+fi
+
+echo "Found ${#ALIGNMENTS[@]} alignments to process"
+echo "Aliscore options: ${ALISCORE_OPTS}"
+echo "ALICUT options: ${ALICUT_OPTS}"
+echo ""
+
+# Initialize summary file
+SUMMARY_FILE="${OUTPUT_DIR}/trimming_summary.txt"
+echo -e "Locus\tOriginal_Length\tTrimmed_Length\tRemoved_Positions\tPercent_Removed\tRSS_Count" > "${SUMMARY_FILE}"
+
+# Process each alignment
+SUCCESS_COUNT=0
+FAIL_COUNT=0
+
+for ALIGNMENT in "${ALIGNMENTS[@]}"; do
+    LOCUS=$(basename "${ALIGNMENT}" .fas)
+    LOCUS=$(basename "${LOCUS}" .fasta)
+
+    echo "=========================================="
+    echo "Processing: ${LOCUS}"
+    echo "=========================================="
+
+    # Step 1: Run Aliscore
+    echo ""
+    echo "Step 1/2: Running Aliscore..."
+
+    if bash "${RUN_ALISCORE}" "${ALIGNMENT}" -d "${ALISCORE_BASE_DIR}" ${ALISCORE_OPTS}; then
+        echo "Aliscore completed for ${LOCUS}"
+    else
+        echo "ERROR: Aliscore failed for ${LOCUS}"
+        FAIL_COUNT=$((FAIL_COUNT + 1))
+        continue
+    fi
+
+    # Step 2: Run ALICUT
+    echo ""
+    echo "Step 2/2: Running ALICUT..."
+
+    ALISCORE_DIR="${ALISCORE_BASE_DIR}/aliscore_${LOCUS}"
+
+    if [ ! -d "${ALISCORE_DIR}" ]; then
+        echo "ERROR: Aliscore output directory not found: ${ALISCORE_DIR}"
+        FAIL_COUNT=$((FAIL_COUNT + 1))
+        continue
+    fi
+
+    if bash "${RUN_ALICUT}" "${ALISCORE_DIR}" ${ALICUT_OPTS}; then
+        echo "ALICUT completed for ${LOCUS}"
+    else
+        echo "ERROR: ALICUT failed for ${LOCUS}"
+        FAIL_COUNT=$((FAIL_COUNT + 1))
+        continue
+    fi
+
+    # Copy trimmed alignment to output directory
+    TRIMMED_FILE=$(find "${ALISCORE_DIR}" -name "ALICUT_*.fas" -o -name "ALICUT_*.fasta" | head -n 1)
+
+    if [ -n "${TRIMMED_FILE}" ] && [ -f "${TRIMMED_FILE}" ]; then
+        cp "${TRIMMED_FILE}" "${OUTPUT_DIR}/${LOCUS}_trimmed.fas"
+        echo "Trimmed alignment: ${OUTPUT_DIR}/${LOCUS}_trimmed.fas"
+
+        # Calculate statistics (handle multi-line FASTA format)
+        ORIGINAL_LENGTH=$(awk '/^>/ {if (seq) {print seq; seq=""}; next} {seq = seq $0} END {if (seq) print seq}' "${ALIGNMENT}" | head -n 1 | tr -d ' ' | wc -c)
+        TRIMMED_LENGTH=$(awk '/^>/ {if (seq) {print seq; seq=""}; next} {seq = seq $0} END {if (seq) print seq}' "${TRIMMED_FILE}" | head -n 1 | tr -d ' ' | wc -c)
+        REMOVED_LENGTH=$((ORIGINAL_LENGTH - TRIMMED_LENGTH))
+        PERCENT_REMOVED=$(awk "BEGIN {printf \"%.2f\", (${REMOVED_LENGTH}/${ORIGINAL_LENGTH})*100}")
+
+        # Count RSS positions
+        LIST_FILE=$(find "${ALISCORE_DIR}" -name "*_List_*.txt" | head -n 1)
+        RSS_COUNT=$(wc -w < "${LIST_FILE}" 2>/dev/null || echo "0")
+
+        # Append to summary
+        echo -e "${LOCUS}\t${ORIGINAL_LENGTH}\t${TRIMMED_LENGTH}\t${REMOVED_LENGTH}\t${PERCENT_REMOVED}\t${RSS_COUNT}" >> "${SUMMARY_FILE}"
+
+        SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
+    else
+        echo "WARNING: Trimmed file not found for ${LOCUS}"
+        FAIL_COUNT=$((FAIL_COUNT + 1))
+    fi
+
+    echo ""
+done
+
+# Final report
+echo "=========================================="
+echo "BATCH PROCESSING COMPLETE"
+echo "=========================================="
+echo ""
+echo "Successfully processed: ${SUCCESS_COUNT}/${#ALIGNMENTS[@]} alignments"
+echo "Failed: ${FAIL_COUNT}/${#ALIGNMENTS[@]} alignments"
+echo ""
+echo "Output directory: ${OUTPUT_DIR}"
+echo "Trimmed alignments: ${OUTPUT_DIR}/*_trimmed.fas"
+echo "Summary statistics: ${SUMMARY_FILE}"
+echo ""
+
+# Display summary statistics
+if [ ${SUCCESS_COUNT} -gt 0 ]; then
+    echo "Overall trimming statistics:"
+    awk 'NR>1 {
+        total_orig += $2;
+        total_trim += $3;
+        total_removed += $4;
+        count++
+    }
+    END {
+        if (count > 0) {
+            avg_removed = (total_removed / total_orig) * 100;
+            printf "  Total positions before: %d\n", total_orig;
+            printf "  Total positions after:  %d\n", total_trim;
+            printf "  Total removed:          %d (%.2f%%)\n", total_removed, avg_removed;
+            printf "  Average per locus:      %.2f%% removed\n", avg_removed;
+        }
+    }' "${SUMMARY_FILE}"
+fi
+
+echo ""
+echo "Done!"