--- title: "BioGeoBEARS Biogeographic Analysis" author: "Generated by Claude Code" date: "`r Sys.Date()`" output: html_document: toc: true toc_float: true code_folding: show theme: flatly params: tree_file: "tree.nwk" geog_file: "geography.data" max_range_size: 4 models: "DEC,DEC+J,DIVALIKE,DIVALIKE+J" output_dir: "results" --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) library(BioGeoBEARS) library(ape) library(knitr) library(kableExtra) ``` # Analysis Parameters ```{r parameters, echo=FALSE} params_df <- data.frame( Parameter = c("Tree file", "Geography file", "Max range size", "Models to test", "Output directory"), Value = c(params$tree_file, params$geog_file, params$max_range_size, params$models, params$output_dir) ) kable(params_df, caption = "Analysis Parameters") %>% kable_styling(bootstrap_options = c("striped", "hover")) ``` # Input Data ## Phylogenetic Tree ```{r load-tree} trfn <- params$tree_file tr <- read.tree(trfn) cat(paste("Number of tips:", length(tr$tip.label), "\n")) cat(paste("Tree is rooted:", is.rooted(tr), "\n")) cat(paste("Tree is ultrametric:", is.ultrametric(tr), "\n")) # Plot tree plot(tr, cex = 0.6, main = "Input Phylogeny") ``` ## Geographic Distribution Data ```{r load-geography} geogfn <- params$geog_file tipranges <- getranges_from_LagrangePHYLIP(lgdata_fn = geogfn) cat(paste("Number of species:", nrow(tipranges@df), "\n")) cat(paste("Number of areas:", ncol(tipranges@df), "\n")) cat(paste("Area names:", paste(names(tipranges@df), collapse = ", "), "\n\n")) # Display geography matrix kable(tipranges@df, caption = "Species Distribution Matrix (1 = present, 0 = absent)") %>% kable_styling(bootstrap_options = c("striped", "hover"), font_size = 10) %>% scroll_box(height = "400px") ``` ## State Space Setup ```{r state-space} max_range_size <- params$max_range_size numareas <- ncol(tipranges@df) num_states <- numstates_from_numareas(numareas = numareas, maxareas = max_range_size, include_null_range = TRUE) cat(paste("Maximum range size:", max_range_size, "\n")) cat(paste("Number of possible states:", num_states, "\n")) ``` # Model Fitting ```{r setup-output} # Create output directory if (!dir.exists(params$output_dir)) { dir.create(params$output_dir, recursive = TRUE) } # Parse models to run models_to_run <- unlist(strsplit(params$models, ",")) models_to_run <- trimws(models_to_run) cat("Models to fit:\n") for (model in models_to_run) { cat(paste(" -", model, "\n")) } ``` ```{r model-fitting, results='hide'} # Storage for results results_list <- list() model_comparison <- data.frame( Model = character(), LnL = numeric(), nParams = integer(), AIC = numeric(), AICc = numeric(), d = numeric(), e = numeric(), j = numeric(), stringsAsFactors = FALSE ) # Helper function to setup and run a model run_biogeobears_model <- function(model_name, BioGeoBEARS_run_object) { cat(paste("\n\nFitting model:", model_name, "\n")) # Configure model based on name if (grepl("DEC", model_name)) { # DEC model (default settings) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "free" } else if (grepl("DIVALIKE", model_name)) { # DIVALIKE model (vicariance only, no subset sympatry) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "free" } else if (grepl("BAYAREALIKE", model_name)) { # BAYAREALIKE model (sympatry only, no vicariance) BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["s","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["v","est"] = 0.0 } # Add +J parameter if specified if (grepl("\\+J", model_name)) { BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.01 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.01 } else { BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "fixed" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = 0.0 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = 0.0 } # Run optimization res <- bears_optim_run(BioGeoBEARS_run_object) return(res) } # Base run object setup BioGeoBEARS_run_object <- define_BioGeoBEARS_run() BioGeoBEARS_run_object$trfn <- trfn BioGeoBEARS_run_object$geogfn <- geogfn BioGeoBEARS_run_object$max_range_size <- max_range_size BioGeoBEARS_run_object$min_branchlength <- 0.000001 BioGeoBEARS_run_object$include_null_range <- TRUE BioGeoBEARS_run_object$force_sparse <- FALSE BioGeoBEARS_run_object$speedup <- TRUE BioGeoBEARS_run_object$use_optimx <- TRUE BioGeoBEARS_run_object$calc_ancprobs <- TRUE BioGeoBEARS_run_object <- readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object) BioGeoBEARS_run_object <- calc_loglike_sp(BioGeoBEARS_run_object) # Fit each model for (model in models_to_run) { tryCatch({ res <- run_biogeobears_model(model, BioGeoBEARS_run_object) results_list[[model]] <- res # Save result save(res, file = file.path(params$output_dir, paste0(model, "_result.Rdata"))) # Extract parameters for comparison params_table <- res$outputs@params_table model_comparison <- rbind(model_comparison, data.frame( Model = model, LnL = res$outputs@loglikelihood, nParams = sum(params_table$type == "free"), AIC = res$outputs@AIC, AICc = res$outputs@AICc, d = params_table["d", "est"], e = params_table["e", "est"], j = params_table["j", "est"], stringsAsFactors = FALSE )) }, error = function(e) { cat(paste("Error fitting model", model, ":", e$message, "\n")) }) } ``` # Model Comparison ```{r model-comparison} # Calculate AIC weights if (nrow(model_comparison) > 0) { model_comparison$delta_AIC <- model_comparison$AIC - min(model_comparison$AIC) model_comparison$AIC_weight <- exp(-0.5 * model_comparison$delta_AIC) / sum(exp(-0.5 * model_comparison$delta_AIC)) # Sort by AIC model_comparison <- model_comparison[order(model_comparison$AIC), ] kable(model_comparison, digits = 3, caption = "Model Comparison (sorted by AIC)") %>% kable_styling(bootstrap_options = c("striped", "hover")) %>% row_spec(1, bold = TRUE, background = "#d4edda") # Highlight best model # Model selection summary best_model <- model_comparison$Model[1] cat(paste("\n\nBest model by AIC:", best_model, "\n")) cat(paste("AIC weight:", round(model_comparison$AIC_weight[1], 3), "\n")) } ``` # Ancestral Range Reconstruction ## Best Model: `r if(exists('best_model')) best_model else 'TBD'` ```{r plot-best-model, fig.width=10, fig.height=12} if (exists('best_model') && best_model %in% names(results_list)) { res_best <- results_list[[best_model]] # Create plots directory plots_dir <- file.path(params$output_dir, "plots") if (!dir.exists(plots_dir)) { dir.create(plots_dir, recursive = TRUE) } # Plot with pie charts pdf(file.path(plots_dir, paste0(best_model, "_pie.pdf")), width = 10, height = 12) analysis_titletxt <- paste("BioGeoBEARS:", best_model) plot_BioGeoBEARS_results( results_object = res_best, analysis_titletxt = analysis_titletxt, addl_params = list("j"), plotwhat = "pie", label.offset = 0.5, tipcex = 0.7, statecex = 0.7, splitcex = 0.6, titlecex = 0.8, plotsplits = TRUE, include_null_range = TRUE, tr = tr, tipranges = tipranges ) dev.off() # Also create text plot pdf(file.path(plots_dir, paste0(best_model, "_text.pdf")), width = 10, height = 12) plot_BioGeoBEARS_results( results_object = res_best, analysis_titletxt = analysis_titletxt, addl_params = list("j"), plotwhat = "text", label.offset = 0.5, tipcex = 0.7, statecex = 0.7, splitcex = 0.6, titlecex = 0.8, plotsplits = TRUE, include_null_range = TRUE, tr = tr, tipranges = tipranges ) dev.off() # Display in notebook (pie chart version) plot_BioGeoBEARS_results( results_object = res_best, analysis_titletxt = analysis_titletxt, addl_params = list("j"), plotwhat = "pie", label.offset = 0.5, tipcex = 0.7, statecex = 0.7, splitcex = 0.6, titlecex = 0.8, plotsplits = TRUE, include_null_range = TRUE, tr = tr, tipranges = tipranges ) cat(paste("\n\nPlots saved to:", plots_dir, "\n")) } ``` # Parameter Estimates ```{r parameter-estimates, fig.width=10, fig.height=6} if (nrow(model_comparison) > 0) { # Extract base models (without +J) base_models <- model_comparison[!grepl("\\+J", model_comparison$Model), ] j_models <- model_comparison[grepl("\\+J", model_comparison$Model), ] par(mfrow = c(1, 3)) # Plot d (dispersal) estimates barplot(model_comparison$d, names.arg = model_comparison$Model, main = "Dispersal Rate (d)", ylab = "Rate", las = 2, cex.names = 0.8, col = ifelse(model_comparison$Model == best_model, "darkgreen", "lightblue")) # Plot e (extinction) estimates barplot(model_comparison$e, names.arg = model_comparison$Model, main = "Extinction Rate (e)", ylab = "Rate", las = 2, cex.names = 0.8, col = ifelse(model_comparison$Model == best_model, "darkgreen", "lightblue")) # Plot j (founder-event) estimates for +J models j_vals <- model_comparison$j j_vals[j_vals == 0] <- NA barplot(j_vals, names.arg = model_comparison$Model, main = "Founder-event Rate (j)", ylab = "Rate", las = 2, cex.names = 0.8, col = ifelse(model_comparison$Model == best_model, "darkgreen", "lightblue")) } ``` # Likelihood Ratio Tests ```{r lrt-tests} # Compare models with and without +J if (nrow(model_comparison) > 0) { lrt_results <- data.frame( Comparison = character(), Model1 = character(), Model2 = character(), LRT_statistic = numeric(), df = integer(), p_value = numeric(), stringsAsFactors = FALSE ) base_model_names <- c("DEC", "DIVALIKE", "BAYAREALIKE") for (base in base_model_names) { j_model <- paste0(base, "+J") if (base %in% model_comparison$Model && j_model %in% model_comparison$Model) { lnl_base <- model_comparison[model_comparison$Model == base, "LnL"] lnl_j <- model_comparison[model_comparison$Model == j_model, "LnL"] lrt_stat <- 2 * (lnl_j - lnl_base) df <- 1 # One additional parameter (j) p_val <- pchisq(lrt_stat, df = df, lower.tail = FALSE) lrt_results <- rbind(lrt_results, data.frame( Comparison = paste(base, "vs", j_model), Model1 = base, Model2 = j_model, LRT_statistic = lrt_stat, df = df, p_value = p_val, stringsAsFactors = FALSE )) } } if (nrow(lrt_results) > 0) { lrt_results$Significant <- ifelse(lrt_results$p_value < 0.05, "Yes*", "No") kable(lrt_results, digits = 4, caption = "Likelihood Ratio Tests (nested model comparisons)") %>% kable_styling(bootstrap_options = c("striped", "hover")) cat("\n* p < 0.05 indicates significant improvement with +J parameter\n") } } ``` # Session Info ```{r session-info} sessionInfo() ``` # Outputs All results have been saved to: **`r params$output_dir`** Files generated: - `[MODEL]_result.Rdata` - R data files with complete model results - `plots/[MODEL]_pie.pdf` - Phylogeny with pie charts showing ancestral range probabilities - `plots/[MODEL]_text.pdf` - Phylogeny with text labels showing most likely ancestral ranges - `biogeobears_analysis_template.html` - This HTML report To load a saved result in R: ```r load("results/DEC+J_result.Rdata") ```