405 lines
12 KiB
Plaintext
405 lines
12 KiB
Plaintext
---
|
|
title: "BioGeoBEARS Biogeographic Analysis"
|
|
author: "Generated by Claude Code"
|
|
date: "`r Sys.Date()`"
|
|
output:
|
|
html_document:
|
|
toc: true
|
|
toc_float: true
|
|
code_folding: show
|
|
theme: flatly
|
|
params:
|
|
tree_file: "tree.nwk"
|
|
geog_file: "geography.data"
|
|
max_range_size: 4
|
|
models: "DEC,DEC+J,DIVALIKE,DIVALIKE+J"
|
|
output_dir: "results"
|
|
---
|
|
|
|
```{r setup, include=FALSE}
|
|
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
|
|
library(BioGeoBEARS)
|
|
library(ape)
|
|
library(knitr)
|
|
library(kableExtra)
|
|
```
|
|
|
|
# Analysis Parameters
|
|
|
|
```{r parameters, echo=FALSE}
|
|
params_df <- data.frame(
|
|
Parameter = c("Tree file", "Geography file", "Max range size", "Models to test", "Output directory"),
|
|
Value = c(params$tree_file, params$geog_file, params$max_range_size, params$models, params$output_dir)
|
|
)
|
|
|
|
kable(params_df, caption = "Analysis Parameters") %>%
|
|
kable_styling(bootstrap_options = c("striped", "hover"))
|
|
```
|
|
|
|
# Input Data
|
|
|
|
## Phylogenetic Tree
|
|
|
|
```{r load-tree}
|
|
trfn <- params$tree_file
|
|
tr <- read.tree(trfn)
|
|
|
|
cat(paste("Number of tips:", length(tr$tip.label), "\n"))
|
|
cat(paste("Tree is rooted:", is.rooted(tr), "\n"))
|
|
cat(paste("Tree is ultrametric:", is.ultrametric(tr), "\n"))
|
|
|
|
# Plot tree
|
|
plot(tr, cex = 0.6, main = "Input Phylogeny")
|
|
```
|
|
|
|
## Geographic Distribution Data
|
|
|
|
```{r load-geography}
|
|
geogfn <- params$geog_file
|
|
tipranges <- getranges_from_LagrangePHYLIP(lgdata_fn = geogfn)
|
|
|
|
cat(paste("Number of species:", nrow(tipranges@df), "\n"))
|
|
cat(paste("Number of areas:", ncol(tipranges@df), "\n"))
|
|
cat(paste("Area names:", paste(names(tipranges@df), collapse = ", "), "\n\n"))
|
|
|
|
# Display geography matrix
|
|
kable(tipranges@df, caption = "Species Distribution Matrix (1 = present, 0 = absent)") %>%
|
|
kable_styling(bootstrap_options = c("striped", "hover"), font_size = 10) %>%
|
|
scroll_box(height = "400px")
|
|
```
|
|
|
|
## State Space Setup
|
|
|
|
```{r state-space}
|
|
max_range_size <- params$max_range_size
|
|
numareas <- ncol(tipranges@df)
|
|
|
|
num_states <- numstates_from_numareas(numareas = numareas,
|
|
maxareas = max_range_size,
|
|
include_null_range = TRUE)
|
|
|
|
cat(paste("Maximum range size:", max_range_size, "\n"))
|
|
cat(paste("Number of possible states:", num_states, "\n"))
|
|
```
|
|
|
|
# Model Fitting
|
|
|
|
```{r setup-output}
|
|
# Create output directory
|
|
if (!dir.exists(params$output_dir)) {
|
|
dir.create(params$output_dir, recursive = TRUE)
|
|
}
|
|
|
|
# Parse models to run
|
|
models_to_run <- unlist(strsplit(params$models, ","))
|
|
models_to_run <- trimws(models_to_run)
|
|
|
|
cat("Models to fit:\n")
|
|
for (model in models_to_run) {
|
|
cat(paste(" -", model, "\n"))
|
|
}
|
|
```
|
|
|
|
```{r model-fitting, results='hide'}
|
|
# Storage for results
|
|
results_list <- list()
|
|
model_comparison <- data.frame(
|
|
Model = character(),
|
|
LnL = numeric(),
|
|
nParams = integer(),
|
|
AIC = numeric(),
|
|
AICc = numeric(),
|
|
d = numeric(),
|
|
e = numeric(),
|
|
j = numeric(),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
# Helper function to setup and run a model
|
|
run_biogeobears_model <- function(model_name, BioGeoBEARS_run_object) {
|
|
cat(paste("\n\nFitting model:", model_name, "\n"))
|
|
|
|
# Configure model based on name
|
|
if (grepl("DEC", model_name)) {
|
|
# DEC model (default settings)
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "free"
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "free"
|
|
} else if (grepl("DIVALIKE", model_name)) {
|
|
# DIVALIKE model (vicariance only, no subset sympatry)
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "fixed"
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","init"] = 0.0
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","est"] = 0.0
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "free"
|
|
} else if (grepl("BAYAREALIKE", model_name)) {
|
|
# BAYAREALIKE model (sympatry only, no vicariance)
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "free"
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "fixed"
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","init"] = 0.0
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","est"] = 0.0
|
|
}
|
|
|
|
# Add +J parameter if specified
|
|
if (grepl("\\+J", model_name)) {
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free"
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.01
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.01
|
|
} else {
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "fixed"
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.0
|
|
BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.0
|
|
}
|
|
|
|
# Run optimization
|
|
res <- bears_optim_run(BioGeoBEARS_run_object)
|
|
|
|
return(res)
|
|
}
|
|
|
|
# Base run object setup
|
|
BioGeoBEARS_run_object <- define_BioGeoBEARS_run()
|
|
BioGeoBEARS_run_object$trfn <- trfn
|
|
BioGeoBEARS_run_object$geogfn <- geogfn
|
|
BioGeoBEARS_run_object$max_range_size <- max_range_size
|
|
BioGeoBEARS_run_object$min_branchlength <- 0.000001
|
|
BioGeoBEARS_run_object$include_null_range <- TRUE
|
|
BioGeoBEARS_run_object$force_sparse <- FALSE
|
|
BioGeoBEARS_run_object$speedup <- TRUE
|
|
BioGeoBEARS_run_object$use_optimx <- TRUE
|
|
BioGeoBEARS_run_object$calc_ancprobs <- TRUE
|
|
BioGeoBEARS_run_object <- readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object)
|
|
BioGeoBEARS_run_object <- calc_loglike_sp(BioGeoBEARS_run_object)
|
|
|
|
# Fit each model
|
|
for (model in models_to_run) {
|
|
tryCatch({
|
|
res <- run_biogeobears_model(model, BioGeoBEARS_run_object)
|
|
results_list[[model]] <- res
|
|
|
|
# Save result
|
|
save(res, file = file.path(params$output_dir, paste0(model, "_result.Rdata")))
|
|
|
|
# Extract parameters for comparison
|
|
params_table <- res$outputs@params_table
|
|
model_comparison <- rbind(model_comparison, data.frame(
|
|
Model = model,
|
|
LnL = res$outputs@loglikelihood,
|
|
nParams = sum(params_table$type == "free"),
|
|
AIC = res$outputs@AIC,
|
|
AICc = res$outputs@AICc,
|
|
d = params_table["d", "est"],
|
|
e = params_table["e", "est"],
|
|
j = params_table["j", "est"],
|
|
stringsAsFactors = FALSE
|
|
))
|
|
}, error = function(e) {
|
|
cat(paste("Error fitting model", model, ":", e$message, "\n"))
|
|
})
|
|
}
|
|
```
|
|
|
|
# Model Comparison
|
|
|
|
```{r model-comparison}
|
|
# Calculate AIC weights
|
|
if (nrow(model_comparison) > 0) {
|
|
model_comparison$delta_AIC <- model_comparison$AIC - min(model_comparison$AIC)
|
|
model_comparison$AIC_weight <- exp(-0.5 * model_comparison$delta_AIC) /
|
|
sum(exp(-0.5 * model_comparison$delta_AIC))
|
|
|
|
# Sort by AIC
|
|
model_comparison <- model_comparison[order(model_comparison$AIC), ]
|
|
|
|
kable(model_comparison, digits = 3,
|
|
caption = "Model Comparison (sorted by AIC)") %>%
|
|
kable_styling(bootstrap_options = c("striped", "hover")) %>%
|
|
row_spec(1, bold = TRUE, background = "#d4edda") # Highlight best model
|
|
|
|
# Model selection summary
|
|
best_model <- model_comparison$Model[1]
|
|
cat(paste("\n\nBest model by AIC:", best_model, "\n"))
|
|
cat(paste("AIC weight:", round(model_comparison$AIC_weight[1], 3), "\n"))
|
|
}
|
|
```
|
|
|
|
# Ancestral Range Reconstruction
|
|
|
|
## Best Model: `r if(exists('best_model')) best_model else 'TBD'`
|
|
|
|
```{r plot-best-model, fig.width=10, fig.height=12}
|
|
if (exists('best_model') && best_model %in% names(results_list)) {
|
|
res_best <- results_list[[best_model]]
|
|
|
|
# Create plots directory
|
|
plots_dir <- file.path(params$output_dir, "plots")
|
|
if (!dir.exists(plots_dir)) {
|
|
dir.create(plots_dir, recursive = TRUE)
|
|
}
|
|
|
|
# Plot with pie charts
|
|
pdf(file.path(plots_dir, paste0(best_model, "_pie.pdf")), width = 10, height = 12)
|
|
|
|
analysis_titletxt <- paste("BioGeoBEARS:", best_model)
|
|
|
|
plot_BioGeoBEARS_results(
|
|
results_object = res_best,
|
|
analysis_titletxt = analysis_titletxt,
|
|
addl_params = list("j"),
|
|
plotwhat = "pie",
|
|
label.offset = 0.5,
|
|
tipcex = 0.7,
|
|
statecex = 0.7,
|
|
splitcex = 0.6,
|
|
titlecex = 0.8,
|
|
plotsplits = TRUE,
|
|
include_null_range = TRUE,
|
|
tr = tr,
|
|
tipranges = tipranges
|
|
)
|
|
|
|
dev.off()
|
|
|
|
# Also create text plot
|
|
pdf(file.path(plots_dir, paste0(best_model, "_text.pdf")), width = 10, height = 12)
|
|
|
|
plot_BioGeoBEARS_results(
|
|
results_object = res_best,
|
|
analysis_titletxt = analysis_titletxt,
|
|
addl_params = list("j"),
|
|
plotwhat = "text",
|
|
label.offset = 0.5,
|
|
tipcex = 0.7,
|
|
statecex = 0.7,
|
|
splitcex = 0.6,
|
|
titlecex = 0.8,
|
|
plotsplits = TRUE,
|
|
include_null_range = TRUE,
|
|
tr = tr,
|
|
tipranges = tipranges
|
|
)
|
|
|
|
dev.off()
|
|
|
|
# Display in notebook (pie chart version)
|
|
plot_BioGeoBEARS_results(
|
|
results_object = res_best,
|
|
analysis_titletxt = analysis_titletxt,
|
|
addl_params = list("j"),
|
|
plotwhat = "pie",
|
|
label.offset = 0.5,
|
|
tipcex = 0.7,
|
|
statecex = 0.7,
|
|
splitcex = 0.6,
|
|
titlecex = 0.8,
|
|
plotsplits = TRUE,
|
|
include_null_range = TRUE,
|
|
tr = tr,
|
|
tipranges = tipranges
|
|
)
|
|
|
|
cat(paste("\n\nPlots saved to:", plots_dir, "\n"))
|
|
}
|
|
```
|
|
|
|
# Parameter Estimates
|
|
|
|
```{r parameter-estimates, fig.width=10, fig.height=6}
|
|
if (nrow(model_comparison) > 0) {
|
|
# Extract base models (without +J)
|
|
base_models <- model_comparison[!grepl("\\+J", model_comparison$Model), ]
|
|
j_models <- model_comparison[grepl("\\+J", model_comparison$Model), ]
|
|
|
|
par(mfrow = c(1, 3))
|
|
|
|
# Plot d (dispersal) estimates
|
|
barplot(model_comparison$d, names.arg = model_comparison$Model,
|
|
main = "Dispersal Rate (d)", ylab = "Rate", las = 2, cex.names = 0.8,
|
|
col = ifelse(model_comparison$Model == best_model, "darkgreen", "lightblue"))
|
|
|
|
# Plot e (extinction) estimates
|
|
barplot(model_comparison$e, names.arg = model_comparison$Model,
|
|
main = "Extinction Rate (e)", ylab = "Rate", las = 2, cex.names = 0.8,
|
|
col = ifelse(model_comparison$Model == best_model, "darkgreen", "lightblue"))
|
|
|
|
# Plot j (founder-event) estimates for +J models
|
|
j_vals <- model_comparison$j
|
|
j_vals[j_vals == 0] <- NA
|
|
barplot(j_vals, names.arg = model_comparison$Model,
|
|
main = "Founder-event Rate (j)", ylab = "Rate", las = 2, cex.names = 0.8,
|
|
col = ifelse(model_comparison$Model == best_model, "darkgreen", "lightblue"))
|
|
}
|
|
```
|
|
|
|
# Likelihood Ratio Tests
|
|
|
|
```{r lrt-tests}
|
|
# Compare models with and without +J
|
|
if (nrow(model_comparison) > 0) {
|
|
lrt_results <- data.frame(
|
|
Comparison = character(),
|
|
Model1 = character(),
|
|
Model2 = character(),
|
|
LRT_statistic = numeric(),
|
|
df = integer(),
|
|
p_value = numeric(),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
base_model_names <- c("DEC", "DIVALIKE", "BAYAREALIKE")
|
|
|
|
for (base in base_model_names) {
|
|
j_model <- paste0(base, "+J")
|
|
|
|
if (base %in% model_comparison$Model && j_model %in% model_comparison$Model) {
|
|
lnl_base <- model_comparison[model_comparison$Model == base, "LnL"]
|
|
lnl_j <- model_comparison[model_comparison$Model == j_model, "LnL"]
|
|
|
|
lrt_stat <- 2 * (lnl_j - lnl_base)
|
|
df <- 1 # One additional parameter (j)
|
|
p_val <- pchisq(lrt_stat, df = df, lower.tail = FALSE)
|
|
|
|
lrt_results <- rbind(lrt_results, data.frame(
|
|
Comparison = paste(base, "vs", j_model),
|
|
Model1 = base,
|
|
Model2 = j_model,
|
|
LRT_statistic = lrt_stat,
|
|
df = df,
|
|
p_value = p_val,
|
|
stringsAsFactors = FALSE
|
|
))
|
|
}
|
|
}
|
|
|
|
if (nrow(lrt_results) > 0) {
|
|
lrt_results$Significant <- ifelse(lrt_results$p_value < 0.05, "Yes*", "No")
|
|
|
|
kable(lrt_results, digits = 4,
|
|
caption = "Likelihood Ratio Tests (nested model comparisons)") %>%
|
|
kable_styling(bootstrap_options = c("striped", "hover"))
|
|
|
|
cat("\n* p < 0.05 indicates significant improvement with +J parameter\n")
|
|
}
|
|
}
|
|
```
|
|
|
|
# Session Info
|
|
|
|
```{r session-info}
|
|
sessionInfo()
|
|
```
|
|
|
|
# Outputs
|
|
|
|
All results have been saved to: **`r params$output_dir`**
|
|
|
|
Files generated:
|
|
|
|
- `[MODEL]_result.Rdata` - R data files with complete model results
|
|
- `plots/[MODEL]_pie.pdf` - Phylogeny with pie charts showing ancestral range probabilities
|
|
- `plots/[MODEL]_text.pdf` - Phylogeny with text labels showing most likely ancestral ranges
|
|
- `biogeobears_analysis_template.html` - This HTML report
|
|
|
|
To load a saved result in R:
|
|
```r
|
|
load("results/DEC+J_result.Rdata")
|
|
```
|