230 lines
8.0 KiB
Python
Executable File
230 lines
8.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Tree operations helper script for common ETE toolkit tasks.
|
|
|
|
Provides command-line interface for basic tree operations like:
|
|
- Format conversion
|
|
- Rooting (outgroup, midpoint)
|
|
- Pruning
|
|
- Basic statistics
|
|
- ASCII visualization
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from ete3 import Tree
|
|
except ImportError:
|
|
print("Error: ete3 not installed. Install with: pip install ete3")
|
|
sys.exit(1)
|
|
|
|
|
|
def load_tree(tree_file, format_num=0):
|
|
"""Load tree from file."""
|
|
try:
|
|
return Tree(str(tree_file), format=format_num)
|
|
except Exception as e:
|
|
print(f"Error loading tree: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def convert_format(tree_file, output, in_format=0, out_format=1):
|
|
"""Convert tree between Newick formats."""
|
|
tree = load_tree(tree_file, in_format)
|
|
tree.write(outfile=str(output), format=out_format)
|
|
print(f"Converted {tree_file} (format {in_format}) → {output} (format {out_format})")
|
|
|
|
|
|
def reroot_tree(tree_file, output, outgroup=None, midpoint=False, format_num=0):
|
|
"""Reroot tree by outgroup or midpoint."""
|
|
tree = load_tree(tree_file, format_num)
|
|
|
|
if midpoint:
|
|
midpoint_node = tree.get_midpoint_outgroup()
|
|
tree.set_outgroup(midpoint_node)
|
|
print(f"Rerooted tree using midpoint method")
|
|
elif outgroup:
|
|
try:
|
|
outgroup_node = tree & outgroup
|
|
tree.set_outgroup(outgroup_node)
|
|
print(f"Rerooted tree using outgroup: {outgroup}")
|
|
except Exception as e:
|
|
print(f"Error: Could not find outgroup '{outgroup}': {e}")
|
|
sys.exit(1)
|
|
else:
|
|
print("Error: Must specify either --outgroup or --midpoint")
|
|
sys.exit(1)
|
|
|
|
tree.write(outfile=str(output), format=format_num)
|
|
print(f"Saved rerooted tree to: {output}")
|
|
|
|
|
|
def prune_tree(tree_file, output, keep_taxa, preserve_length=True, format_num=0):
|
|
"""Prune tree to keep only specified taxa."""
|
|
tree = load_tree(tree_file, format_num)
|
|
|
|
# Read taxa list
|
|
taxa_file = Path(keep_taxa)
|
|
if taxa_file.exists():
|
|
with open(taxa_file) as f:
|
|
taxa = [line.strip() for line in f if line.strip()]
|
|
else:
|
|
taxa = [t.strip() for t in keep_taxa.split(",")]
|
|
|
|
print(f"Pruning tree to {len(taxa)} taxa")
|
|
|
|
try:
|
|
tree.prune(taxa, preserve_branch_length=preserve_length)
|
|
tree.write(outfile=str(output), format=format_num)
|
|
print(f"Pruned tree saved to: {output}")
|
|
print(f"Retained {len(tree)} leaves")
|
|
except Exception as e:
|
|
print(f"Error pruning tree: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def tree_stats(tree_file, format_num=0):
|
|
"""Display tree statistics."""
|
|
tree = load_tree(tree_file, format_num)
|
|
|
|
print(f"\n=== Tree Statistics ===")
|
|
print(f"File: {tree_file}")
|
|
print(f"Number of leaves: {len(tree)}")
|
|
print(f"Total nodes: {len(list(tree.traverse()))}")
|
|
|
|
farthest_leaf, distance = tree.get_farthest_leaf()
|
|
print(f"Tree depth: {distance:.4f}")
|
|
print(f"Farthest leaf: {farthest_leaf.name}")
|
|
|
|
# Branch length statistics
|
|
branch_lengths = [node.dist for node in tree.traverse() if not node.is_root()]
|
|
if branch_lengths:
|
|
print(f"\nBranch length statistics:")
|
|
print(f" Mean: {sum(branch_lengths)/len(branch_lengths):.4f}")
|
|
print(f" Min: {min(branch_lengths):.4f}")
|
|
print(f" Max: {max(branch_lengths):.4f}")
|
|
|
|
# Support values
|
|
supports = [node.support for node in tree.traverse() if not node.is_leaf() and hasattr(node, 'support')]
|
|
if supports:
|
|
print(f"\nSupport value statistics:")
|
|
print(f" Mean: {sum(supports)/len(supports):.2f}")
|
|
print(f" Min: {min(supports):.2f}")
|
|
print(f" Max: {max(supports):.2f}")
|
|
|
|
print()
|
|
|
|
|
|
def show_ascii(tree_file, format_num=0, show_internal=True):
|
|
"""Display tree as ASCII art."""
|
|
tree = load_tree(tree_file, format_num)
|
|
print(tree.get_ascii(show_internal=show_internal))
|
|
|
|
|
|
def list_leaves(tree_file, format_num=0):
|
|
"""List all leaf names."""
|
|
tree = load_tree(tree_file, format_num)
|
|
for leaf in tree:
|
|
print(leaf.name)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="ETE toolkit tree operations helper",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Convert format
|
|
%(prog)s convert input.nw output.nw --in-format 0 --out-format 1
|
|
|
|
# Midpoint root
|
|
%(prog)s reroot input.nw output.nw --midpoint
|
|
|
|
# Reroot with outgroup
|
|
%(prog)s reroot input.nw output.nw --outgroup "Outgroup_species"
|
|
|
|
# Prune tree
|
|
%(prog)s prune input.nw output.nw --keep-taxa "speciesA,speciesB,speciesC"
|
|
|
|
# Show statistics
|
|
%(prog)s stats input.nw
|
|
|
|
# Display as ASCII
|
|
%(prog)s ascii input.nw
|
|
|
|
# List all leaves
|
|
%(prog)s leaves input.nw
|
|
"""
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
|
|
|
|
# Convert command
|
|
convert_parser = subparsers.add_parser("convert", help="Convert tree format")
|
|
convert_parser.add_argument("input", help="Input tree file")
|
|
convert_parser.add_argument("output", help="Output tree file")
|
|
convert_parser.add_argument("--in-format", type=int, default=0, help="Input format (default: 0)")
|
|
convert_parser.add_argument("--out-format", type=int, default=1, help="Output format (default: 1)")
|
|
|
|
# Reroot command
|
|
reroot_parser = subparsers.add_parser("reroot", help="Reroot tree")
|
|
reroot_parser.add_argument("input", help="Input tree file")
|
|
reroot_parser.add_argument("output", help="Output tree file")
|
|
reroot_parser.add_argument("--outgroup", help="Outgroup taxon name")
|
|
reroot_parser.add_argument("--midpoint", action="store_true", help="Use midpoint rooting")
|
|
reroot_parser.add_argument("--format", type=int, default=0, help="Newick format (default: 0)")
|
|
|
|
# Prune command
|
|
prune_parser = subparsers.add_parser("prune", help="Prune tree to specified taxa")
|
|
prune_parser.add_argument("input", help="Input tree file")
|
|
prune_parser.add_argument("output", help="Output tree file")
|
|
prune_parser.add_argument("--keep-taxa", required=True,
|
|
help="Taxa to keep (comma-separated or file path)")
|
|
prune_parser.add_argument("--no-preserve-length", action="store_true",
|
|
help="Don't preserve branch lengths")
|
|
prune_parser.add_argument("--format", type=int, default=0, help="Newick format (default: 0)")
|
|
|
|
# Stats command
|
|
stats_parser = subparsers.add_parser("stats", help="Display tree statistics")
|
|
stats_parser.add_argument("input", help="Input tree file")
|
|
stats_parser.add_argument("--format", type=int, default=0, help="Newick format (default: 0)")
|
|
|
|
# ASCII command
|
|
ascii_parser = subparsers.add_parser("ascii", help="Display tree as ASCII art")
|
|
ascii_parser.add_argument("input", help="Input tree file")
|
|
ascii_parser.add_argument("--format", type=int, default=0, help="Newick format (default: 0)")
|
|
ascii_parser.add_argument("--no-internal", action="store_true",
|
|
help="Don't show internal node names")
|
|
|
|
# Leaves command
|
|
leaves_parser = subparsers.add_parser("leaves", help="List all leaf names")
|
|
leaves_parser.add_argument("input", help="Input tree file")
|
|
leaves_parser.add_argument("--format", type=int, default=0, help="Newick format (default: 0)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.command:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
# Execute command
|
|
if args.command == "convert":
|
|
convert_format(args.input, args.output, args.in_format, args.out_format)
|
|
elif args.command == "reroot":
|
|
reroot_tree(args.input, args.output, args.outgroup, args.midpoint, args.format)
|
|
elif args.command == "prune":
|
|
prune_tree(args.input, args.output, args.keep_taxa,
|
|
not args.no_preserve_length, args.format)
|
|
elif args.command == "stats":
|
|
tree_stats(args.input, args.format)
|
|
elif args.command == "ascii":
|
|
show_ascii(args.input, args.format, not args.no_internal)
|
|
elif args.command == "leaves":
|
|
list_leaves(args.input, args.format)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|