From faa6adcecff49d3ff5c933e80046402f19bb0e45 Mon Sep 17 00:00:00 2001
From: Zhongwei Li <lizhongwei.nkcs@gmail.com>
Date: Sun, 30 Nov 2025 08:48:12 +0800
Subject: [PATCH] Initial commit

---
 .claude-plugin/plugin.json                    |  12 +
 README.md                                     |   3 +
 plugin.lock.json                              |  68 +++
 skills/testing-r-packages/README.md           | 199 ++++++++
 skills/testing-r-packages/SKILL.md            | 424 +++++++++++++++++
 .../testing-r-packages/references/advanced.md | 410 ++++++++++++++++
 skills/testing-r-packages/references/bdd.md   | 448 ++++++++++++++++++
 .../testing-r-packages/references/fixtures.md | 333 +++++++++++++
 .../testing-r-packages/references/mocking.md  | 251 ++++++++++
 .../references/snapshots.md                   | 184 +++++++
 10 files changed, 2332 insertions(+)
 create mode 100644 .claude-plugin/plugin.json
 create mode 100644 README.md
 create mode 100644 plugin.lock.json
 create mode 100644 skills/testing-r-packages/README.md
 create mode 100644 skills/testing-r-packages/SKILL.md
 create mode 100644 skills/testing-r-packages/references/advanced.md
 create mode 100644 skills/testing-r-packages/references/bdd.md
 create mode 100644 skills/testing-r-packages/references/fixtures.md
 create mode 100644 skills/testing-r-packages/references/mocking.md
 create mode 100644 skills/testing-r-packages/references/snapshots.md

diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
new file mode 100644
index 0000000..8399213
--- /dev/null
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
+{
+  "name": "r-lib",
+  "description": "Collection of skills for R package developers",
+  "version": "0.0.0-2025.11.28",
+  "author": {
+    "name": "Garrick Aden-Buie (Posit, PBC)",
+    "email": "garrick@posit.co"
+  },
+  "skills": [
+    "./skills/testing-r-packages"
+  ]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..95ddee7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# r-lib
+
+Collection of skills for R package developers
diff --git a/plugin.lock.json b/plugin.lock.json
new file mode 100644
index 0000000..2fd9598
--- /dev/null
+++ b/plugin.lock.json
@@ -0,0 +1,68 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:posit-dev/skills:r-lib",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "325af2c15ef275a830deeabc0fd9c346af2b4852",
+    "treeHash": "8cbb9eb178493c0fbb0c0830b98a2e7aa2c8f88b3fd50212791392fe52ea9bac",
+    "generatedAt": "2025-11-28T10:27:39.803032Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "r-lib",
+    "description": "Collection of skills for R package developers"
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "7481e2c74f66aad399e085e0c745e004bf5329d222637c0f1ceb7886ab8e47bc"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "56f131889594130519a3418428f9b066f60ae0ce48c5be5857dfb7afed0dbca3"
+      },
+      {
+        "path": "skills/testing-r-packages/README.md",
+        "sha256": "b41eb2f7ec053d9228b0eb91e9302c630159c5cb8ac53b4e918ced3b0c19f42c"
+      },
+      {
+        "path": "skills/testing-r-packages/SKILL.md",
+        "sha256": "2beeccacc6025f393f09a837edc9af9ce6dd836ea5111b9d51dee452235b1170"
+      },
+      {
+        "path": "skills/testing-r-packages/references/mocking.md",
+        "sha256": "c2f5cabf03ad713ada1f67a8e1d6af551d0da068c86c7eb8e27dd2fdc0d944bc"
+      },
+      {
+        "path": "skills/testing-r-packages/references/fixtures.md",
+        "sha256": "8ab18c239c528133c7aadfddc3969c8df9cb3dcbf14b3ea6b53dfe3a96c4e22a"
+      },
+      {
+        "path": "skills/testing-r-packages/references/bdd.md",
+        "sha256": "28373d276bc21f65304a22595e7315f6c7461e62ecaa2faa688e3c1eb7ad39ba"
+      },
+      {
+        "path": "skills/testing-r-packages/references/advanced.md",
+        "sha256": "97f68de6727c36159f35c9d849a213474a7308b051ee76332b4d247c05f14bb2"
+      },
+      {
+        "path": "skills/testing-r-packages/references/snapshots.md",
+        "sha256": "c4861fa9b77fd965f9e178f807da3f1fd101e258deb3330cb544a23cac291b9e"
+      }
+    ],
+    "dirSha256": "8cbb9eb178493c0fbb0c0830b98a2e7aa2c8f88b3fd50212791392fe52ea9bac"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
\ No newline at end of file
diff --git a/skills/testing-r-packages/README.md b/skills/testing-r-packages/README.md
new file mode 100644
index 0000000..cd77038
--- /dev/null
+++ b/skills/testing-r-packages/README.md
@@ -0,0 +1,199 @@
+# Testing R Packages
+
+Best practices for writing R package tests using testthat version 3+.
+
+## Overview
+
+This skill provides comprehensive guidance on modern R package testing with testthat 3, including:
+
+- **Test structure and organization** - File organization, test hierarchy, and naming conventions
+- **Core expectations** - All testthat expectation functions for equality, errors, types, and more
+- **Design principles** - Self-sufficient tests, cleanup with withr, and test-first workflows
+- **Snapshot testing** - Testing complex output, error messages, and visual diffs
+- **Test fixtures** - Constructor functions, static files, and helper organization
+- **Mocking** - Replacing external dependencies for reliable testing
+- **BDD-style testing** - Using `describe()` and `it()` for behavior-driven development
+- **Advanced topics** - Skipping tests, managing secrets, CRAN requirements, and parallel testing
+
+## When This Skill Activates
+
+Claude will use this skill when you:
+
+- Write or modify tests for R packages
+- Set up testing infrastructure with testthat
+- Debug failing tests
+- Organize test files and fixtures
+- Create snapshot tests for complex output
+- Mock external dependencies
+- Follow BDD patterns with describe/it
+- Prepare packages for CRAN submission
+
+## What You'll Learn
+
+### Test Structure
+
+Learn the modern testthat 3 workflow:
+- Initializing testing with `usethis::use_testthat(3)`
+- Organizing tests to mirror package structure
+- Using helper and setup files
+- Running tests at different scales
+
+### Expectations
+
+Master all core expectations:
+- Equality: `expect_equal()`, `expect_identical()`, `expect_all_equal()`
+- Errors: `expect_error()`, `expect_no_error()`, `expect_warning()`
+- Types: `expect_type()`, `expect_s3_class()`, `expect_r6_class()`
+- Structure: `expect_length()`, `expect_shape()`, `expect_named()`
+- Sets: `expect_contains()`, `expect_in()`, `expect_disjoint()`
+
+### Design Principles
+
+Follow five key principles:
+1. **Self-sufficient tests** - Each test contains all needed setup
+2. **Self-contained tests** - Use withr for automatic cleanup
+3. **Plan for failure** - Write tests that are easy to debug
+4. **Repetition is OK** - Clarity over DRY in tests
+5. **devtools::load_all() workflow** - Efficient interactive testing
+
+### Snapshot Testing
+
+Test complex output effectively:
+- Capture printed output, messages, and errors
+- Use transforms to remove variable elements
+- Create platform-specific variants
+- Review and accept snapshot changes
+
+### BDD-Style Testing
+
+Write specification-style tests:
+- Group related specs with `describe()`
+- Define individual specs with `it()`
+- Create nested hierarchies
+- Mark pending specs without code
+- Follow test-first development
+
+## File Organization
+
+The skill uses progressive disclosure with reference files:
+
+```
+testing-r-packages/
+├── SKILL.md              # Core workflows and common patterns
+└── references/
+    ├── bdd.md           # BDD-style testing with describe/it
+    ├── snapshots.md     # Comprehensive snapshot testing
+    ├── mocking.md       # Mocking strategies and patterns
+    ├── fixtures.md      # Test data management
+    └── advanced.md      # Advanced topics and edge cases
+```
+
+Core guidance loads automatically, while reference files load only when needed for specific scenarios.
+
+## Key Features
+
+### Modern testthat 3 Patterns
+
+- Edition system with `Config/testthat/edition: 3`
+- Improved snapshot testing
+- Better error messages with waldo
+- New expectations (`expect_no_error()`, `expect_contains()`, etc.)
+- `local_mocked_bindings()` for reliable mocking
+- Parallel test execution support
+
+### Comprehensive Coverage
+
+- Basic to advanced testing techniques
+- Real-world examples from tidyverse packages
+- Common patterns and anti-patterns
+- Platform and CRAN considerations
+- Integration with withr, waldo, and related packages
+
+### Best Practices
+
+- Test-first development workflows
+- Proper fixture management
+- Secrets and credentials handling
+- File system discipline
+- Custom expectations and helpers
+
+## Examples
+
+### Standard Testing
+
+```r
+test_that("str_length() counts characters", {
+  expect_equal(str_length("abc"), 3)
+  expect_equal(str_length(""), 0)
+})
+```
+
+### BDD-Style Testing
+
+```r
+describe("User authentication", {
+  describe("login()", {
+    it("accepts valid credentials", {
+      result <- login("user@example.com", "password123")
+      expect_true(result$authenticated)
+    })
+
+    it("rejects invalid credentials", {
+      expect_error(login("user@example.com", "wrong"), class = "auth_error")
+    })
+  })
+})
+```
+
+### Snapshot Testing
+
+```r
+test_that("error messages are helpful", {
+  expect_snapshot(error = TRUE, {
+    validate_input(NULL)
+    validate_input("wrong_type")
+  })
+})
+```
+
+### Testing with Fixtures
+
+```r
+test_that("processes CSV files", {
+  csv_path <- test_path("fixtures", "sample.csv")
+  result <- process_csv(csv_path)
+  expect_equal(nrow(result), 100)
+})
+```
+
+## Requirements
+
+- R package with tests initialized via `usethis::use_testthat(3)`
+- testthat version 3.0.0 or later
+- R 4.1+ for latest testthat features
+
+## Related Skills
+
+- **package-development** - General R package development workflows
+- **debugging** - Debugging R code and tests
+- **documentation** - Writing package documentation
+
+## Resources
+
+This skill synthesizes guidance from:
+- [R Packages: Testing Basics](https://r-pkgs.org/testing-basics.html)
+- [R Packages: Testing Design](https://r-pkgs.org/testing-design.html)
+- [R Packages: Testing Advanced](https://r-pkgs.org/testing-advanced.html)
+- [testthat 3.0.0 release notes](https://tidyverse.org/blog/2020/10/testthat-3-0-0/)
+- [testthat 3.1 release notes](https://tidyverse.org/blog/2021/10/testthat-3-1/)
+- [testthat 3.2.0 release notes](https://tidyverse.org/blog/2023/10/testthat-3-2-0/)
+- [testthat 3.3.0 release notes](https://tidyverse.org/blog/2025/11/testthat-3-3-0/)
+- testthat package documentation
+
+## Contributing
+
+Found an issue or have a suggestion? Please [open an issue](https://github.com/posit-dev/skills/issues) or submit a pull request.
+
+## License
+
+This skill is part of the [Posit Claude Skills](https://github.com/posit-dev/skills) repository and is licensed under the MIT License.
diff --git a/skills/testing-r-packages/SKILL.md b/skills/testing-r-packages/SKILL.md
new file mode 100644
index 0000000..1269bf9
--- /dev/null
+++ b/skills/testing-r-packages/SKILL.md
@@ -0,0 +1,424 @@
+---
+name: testing-r-packages
+description: >
+  Best practices for writing R package tests using testthat version 3+. Use when writing, organizing, or improving tests for R packages. Covers test structure, expectations, fixtures, snapshots, mocking, and modern testthat 3 patterns including self-sufficient tests, proper cleanup with withr, and snapshot testing.
+---
+
+# Testing R Packages with testthat
+
+Modern best practices for R package testing using testthat 3+.
+
+## Initial Setup
+
+Initialize testing with testthat 3rd edition:
+
+```r
+usethis::use_testthat(3)
+```
+
+This creates `tests/testthat/` directory, adds testthat to `DESCRIPTION` Suggests with `Config/testthat/edition: 3`, and creates `tests/testthat.R`.
+
+## File Organization
+
+**Mirror package structure:**
+- Code in `R/foofy.R` → tests in `tests/testthat/test-foofy.R`
+- Use `usethis::use_r("foofy")` and `usethis::use_test("foofy")` to create paired files
+
+**Special files:**
+- `helper-*.R` - Helper functions and custom expectations, sourced before tests
+- `setup-*.R` - Run during `R CMD check` only, not during `load_all()`
+- `fixtures/` - Static test data files accessed via `test_path()`
+
+## Test Structure
+
+Tests follow a three-level hierarchy: **File → Test → Expectation**
+
+### Standard Syntax
+
+```r
+test_that("descriptive behavior", {
+  result <- my_function(input)
+  expect_equal(result, expected_value)
+})
+```
+
+**Test descriptions** should read naturally and describe behavior, not implementation.
+
+### BDD Syntax (describe/it)
+
+For behavior-driven development, use `describe()` and `it()`:
+
+```r
+describe("matrix()", {
+  it("can be multiplied by a scalar", {
+    m1 <- matrix(1:4, 2, 2)
+    m2 <- m1 * 2
+    expect_equal(matrix(1:4 * 2, 2, 2), m2)
+  })
+
+  it("can be transposed", {
+    m <- matrix(1:4, 2, 2)
+    expect_equal(t(m), matrix(c(1, 3, 2, 4), 2, 2))
+  })
+})
+```
+
+**Key features:**
+- `describe()` groups related specifications for a component
+- `it()` defines individual specifications (like `test_that()`)
+- Supports nesting for hierarchical organization
+- `it()` without code creates pending test placeholders
+
+**Use `describe()` to verify you implement the right things, use `test_that()` to ensure you do things right.**
+
+See [references/bdd.md](references/bdd.md) for comprehensive BDD patterns, nested specifications, and test-first workflows.
+
+## Running Tests
+
+Three scales of testing:
+
+**Micro** (interactive development):
+```r
+devtools::load_all()
+expect_equal(foofy(...), expected)
+```
+
+**Mezzo** (single file):
+```r
+testthat::test_file("tests/testthat/test-foofy.R")
+# RStudio: Ctrl/Cmd + Shift + T
+```
+
+**Macro** (full suite):
+```r
+devtools::test()    # Ctrl/Cmd + Shift + T
+devtools::check()   # Ctrl/Cmd + Shift + E
+```
+
+## Core Expectations
+
+### Equality
+
+```r
+expect_equal(10, 10 + 1e-7)      # Allows numeric tolerance
+expect_identical(10L, 10L)       # Exact match required
+expect_all_equal(x, expected)    # Every element matches (v3.3.0+)
+```
+
+### Errors, Warnings, Messages
+
+```r
+expect_error(1 / "a")
+expect_error(bad_call(), class = "specific_error_class")
+expect_no_error(valid_call())
+
+expect_warning(deprecated_func())
+expect_no_warning(safe_func())
+
+expect_message(informative_func())
+expect_no_message(quiet_func())
+```
+
+### Pattern Matching
+
+```r
+expect_match("Testing is fun!", "Testing")
+expect_match(text, "pattern", ignore.case = TRUE)
+```
+
+### Structure and Type
+
+```r
+expect_length(vector, 10)
+expect_type(obj, "list")
+expect_s3_class(model, "lm")
+expect_s4_class(obj, "MyS4Class")
+expect_r6_class(obj, "MyR6Class")      # v3.3.0+
+expect_shape(matrix, c(10, 5))         # v3.3.0+
+```
+
+### Sets and Collections
+
+```r
+expect_setequal(x, y)           # Same elements, any order
+expect_contains(fruits, "apple") # Subset check (v3.2.0+)
+expect_in("apple", fruits)       # Element in set (v3.2.0+)
+expect_disjoint(set1, set2)      # No overlap (v3.3.0+)
+```
+
+### Logical
+
+```r
+expect_true(condition)
+expect_false(condition)
+expect_all_true(vector > 0)      # All elements TRUE (v3.3.0+)
+expect_all_false(vector < 0)     # All elements FALSE (v3.3.0+)
+```
+
+## Design Principles
+
+### 1. Self-Sufficient Tests
+
+Each test should contain all setup, execution, and teardown code:
+
+```r
+# Good: self-contained
+test_that("foofy() works", {
+  data <- data.frame(x = 1:3, y = letters[1:3])
+  result <- foofy(data)
+  expect_equal(result$x, 1:3)
+})
+
+# Bad: relies on ambient state
+dat <- data.frame(x = 1:3, y = letters[1:3])
+test_that("foofy() works", {
+  result <- foofy(dat)  # Where did 'dat' come from?
+  expect_equal(result$x, 1:3)
+})
+```
+
+### 2. Self-Contained Tests (Cleanup Side Effects)
+
+Use `withr` to manage state changes:
+
+```r
+test_that("function respects options", {
+  withr::local_options(my_option = "test_value")
+  withr::local_envvar(MY_VAR = "test")
+  withr::local_package("jsonlite")
+
+  result <- my_function()
+  expect_equal(result$setting, "test_value")
+  # Automatic cleanup after test
+})
+```
+
+**Common withr functions:**
+- `local_options()` - Temporarily set options
+- `local_envvar()` - Temporarily set environment variables
+- `local_tempfile()` - Create temp file with automatic cleanup
+- `local_tempdir()` - Create temp directory with automatic cleanup
+- `local_package()` - Temporarily attach package
+
+### 3. Plan for Test Failure
+
+Write tests assuming they will fail and need debugging:
+- Tests should run independently in fresh R sessions
+- Avoid hidden dependencies on earlier tests
+- Make test logic explicit and obvious
+
+### 4. Repetition is Acceptable
+
+Repeat setup code in tests rather than factoring it out. Test clarity is more important than avoiding duplication.
+
+### 5. Use `devtools::load_all()` Workflow
+
+During development:
+- Use `devtools::load_all()` instead of `library()`
+- Makes all functions available (including unexported)
+- Automatically attaches testthat
+- Eliminates need for `library()` calls in tests
+
+## Snapshot Testing
+
+For complex output that's difficult to verify programmatically, use snapshot tests. See [references/snapshots.md](references/snapshots.md) for complete guide.
+
+**Basic pattern:**
+
+```r
+test_that("error message is helpful", {
+  expect_snapshot(
+    error = TRUE,
+    validate_input(NULL)
+  )
+})
+```
+
+Snapshots stored in `tests/testthat/_snaps/`.
+
+**Workflow:**
+```r
+devtools::test()                    # Creates new snapshots
+testthat::snapshot_review('name')   # Review changes
+testthat::snapshot_accept('name')   # Accept changes
+```
+
+## Test Fixtures and Data
+
+Three approaches for test data:
+
+**1. Constructor functions** - Create data on-demand:
+```r
+new_sample_data <- function(n = 10) {
+  data.frame(id = seq_len(n), value = rnorm(n))
+}
+```
+
+**2. Local functions with cleanup** - Handle side effects:
+```r
+local_temp_csv <- function(data, env = parent.frame()) {
+  path <- withr::local_tempfile(fileext = ".csv", .local_envir = env)
+  write.csv(data, path, row.names = FALSE)
+  path
+}
+```
+
+**3. Static fixture files** - Store in `fixtures/` directory:
+```r
+data <- readRDS(test_path("fixtures", "sample_data.rds"))
+```
+
+See [references/fixtures.md](references/fixtures.md) for detailed fixture patterns.
+
+## Mocking
+
+Replace external dependencies during testing using `local_mocked_bindings()`. See [references/mocking.md](references/mocking.md) for comprehensive mocking strategies.
+
+**Basic pattern:**
+
+```r
+test_that("function works with mocked dependency", {
+  local_mocked_bindings(
+    external_api = function(...) list(status = "success", data = "mocked")
+  )
+
+  result <- my_function_that_calls_api()
+  expect_equal(result$status, "success")
+})
+```
+
+## Common Patterns
+
+### Testing Errors with Specific Classes
+
+```r
+test_that("validation catches errors", {
+  expect_error(
+    validate_input("wrong_type"),
+    class = "vctrs_error_cast"
+  )
+})
+```
+
+### Testing with Temporary Files
+
+```r
+test_that("file processing works", {
+  temp_file <- withr::local_tempfile(
+    lines = c("line1", "line2", "line3")
+  )
+
+  result <- process_file(temp_file)
+  expect_equal(length(result), 3)
+})
+```
+
+### Testing with Modified Options
+
+```r
+test_that("output respects width", {
+  withr::local_options(width = 40)
+
+  output <- capture_output(print(my_object))
+  expect_lte(max(nchar(strsplit(output, "\n")[[1]])), 40)
+})
+```
+
+### Testing Multiple Related Cases
+
+```r
+test_that("str_trunc() handles all directions", {
+  trunc <- function(direction) {
+    str_trunc("This string is moderately long", direction, width = 20)
+  }
+
+  expect_equal(trunc("right"), "This string is mo...")
+  expect_equal(trunc("left"), "...erately long")
+  expect_equal(trunc("center"), "This stri...ely long")
+})
+```
+
+### Custom Expectations in Helper Files
+
+```r
+# In tests/testthat/helper-expectations.R
+expect_valid_user <- function(user) {
+  expect_type(user, "list")
+  expect_named(user, c("id", "name", "email"))
+  expect_type(user$id, "integer")
+  expect_match(user$email, "@")
+}
+
+# In test file
+test_that("user creation works", {
+  user <- create_user("test@example.com")
+  expect_valid_user(user)
+})
+```
+
+## File System Discipline
+
+**Always write to temp directory:**
+
+```r
+# Good
+output <- withr::local_tempfile(fileext = ".csv")
+write.csv(data, output)
+
+# Bad - writes to package directory
+write.csv(data, "output.csv")
+```
+
+**Access test fixtures with `test_path()`:**
+
+```r
+# Good - works in all contexts
+data <- readRDS(test_path("fixtures", "data.rds"))
+
+# Bad - relative paths break
+data <- readRDS("fixtures/data.rds")
+```
+
+## Advanced Topics
+
+For advanced testing scenarios, see:
+
+- **[references/bdd.md](references/bdd.md)** - BDD-style testing with describe/it, nested specifications, test-first workflows
+- **[references/snapshots.md](references/snapshots.md)** - Snapshot testing, transforms, variants
+- **[references/mocking.md](references/mocking.md)** - Mocking strategies, webfakes, httptest2
+- **[references/fixtures.md](references/fixtures.md)** - Fixture patterns, database fixtures, helper files
+- **[references/advanced.md](references/advanced.md)** - Skipping tests, secrets management, CRAN requirements, custom expectations, parallel testing
+
+## testthat 3 Modernizations
+
+When working with testthat 3 code, prefer modern patterns:
+
+**Deprecated → Modern:**
+- `context()` → Remove (duplicates filename)
+- `expect_equivalent()` → `expect_equal(ignore_attr = TRUE)`
+- `with_mock()` → `local_mocked_bindings()`
+- `is_null()`, `is_true()`, `is_false()` → `expect_null()`, `expect_true()`, `expect_false()`
+
+**New in testthat 3:**
+- Edition system (`Config/testthat/edition: 3`)
+- Improved snapshot testing
+- `waldo::compare()` for better diff output
+- Unified condition handling
+- `local_mocked_bindings()` works with byte-compiled code
+- Parallel test execution support
+
+## Quick Reference
+
+**Initialize:** `usethis::use_testthat(3)`
+
+**Run tests:** `devtools::test()` or Ctrl/Cmd + Shift + T
+
+**Create test file:** `usethis::use_test("name")`
+
+**Review snapshots:** `testthat::snapshot_review()`
+
+**Accept snapshots:** `testthat::snapshot_accept()`
+
+**Find slow tests:** `devtools::test(reporter = "slow")`
+
+**Shuffle tests:** `devtools::test(shuffle = TRUE)`
diff --git a/skills/testing-r-packages/references/advanced.md b/skills/testing-r-packages/references/advanced.md
new file mode 100644
index 0000000..b394338
--- /dev/null
+++ b/skills/testing-r-packages/references/advanced.md
@@ -0,0 +1,410 @@
+# Advanced Testing Topics
+
+## Skipping Tests
+
+Skip tests conditionally when requirements aren't met:
+
+### Built-in Skip Functions
+
+```r
+test_that("API integration works", {
+  skip_if_offline()
+  skip_if_not_installed("httr2")
+  skip_on_cran()
+  skip_on_os("windows")
+
+  result <- call_external_api()
+  expect_true(result$success)
+})
+```
+
+**Common skip functions:**
+- `skip()` - Skip unconditionally with message
+- `skip_if()` - Skip if condition is TRUE
+- `skip_if_not()` - Skip if condition is FALSE
+- `skip_if_offline()` - Skip if no internet
+- `skip_if_not_installed(pkg)` - Skip if package unavailable
+- `skip_on_cran()` - Skip on CRAN checks
+- `skip_on_os(os)` - Skip on specific OS
+- `skip_on_ci()` - Skip on continuous integration
+- `skip_unless_r(version)` - Skip unless R version requirement met (testthat 3.3.0+)
+
+### Custom Skip Conditions
+
+```r
+skip_if_no_api_key <- function() {
+  if (Sys.getenv("API_KEY") == "") {
+    skip("API_KEY not available")
+  }
+}
+
+skip_if_slow <- function() {
+  if (!identical(Sys.getenv("RUN_SLOW_TESTS"), "true")) {
+    skip("Slow tests not enabled")
+  }
+}
+
+test_that("authenticated endpoint works", {
+  skip_if_no_api_key()
+
+  result <- call_authenticated_endpoint()
+  expect_equal(result$status, "success")
+})
+```
+
+## Testing Flaky Code
+
+### retry with `try_again()`
+
+Test code that may fail occasionally (network calls, timing-dependent code):
+
+```r
+test_that("flaky network call succeeds eventually", {
+  result <- try_again(
+    times = 3,
+    {
+      response <- make_network_request()
+      expect_equal(response$status, 200)
+      response
+    }
+  )
+
+  expect_type(result, "list")
+})
+```
+
+### Mark Tests as Flaky
+
+```r
+test_that("timing-sensitive operation", {
+  skip_on_cran()  # Too unreliable for CRAN
+
+  start <- Sys.time()
+  result <- async_operation()
+  duration <- as.numeric(Sys.time() - start)
+
+  expect_lt(duration, 2)  # Should complete in < 2 seconds
+})
+```
+
+## Managing Secrets in Tests
+
+### Environment Variables
+
+```r
+test_that("authenticated API works", {
+  # Skip if credentials unavailable
+  api_key <- Sys.getenv("MY_API_KEY")
+  skip_if(api_key == "", "MY_API_KEY not set")
+
+  result <- call_api(api_key)
+  expect_true(result$authenticated)
+})
+```
+
+### Local Configuration Files
+
+```r
+test_that("service integration works", {
+  config_path <- test_path("fixtures", "test_config.yml")
+  skip_if_not(file.exists(config_path), "Test config not found")
+
+  config <- yaml::read_yaml(config_path)
+  result <- connect_to_service(config)
+  expect_true(result$connected)
+})
+```
+
+**Never commit secrets:**
+- Add config files with secrets to `.gitignore`
+- Use environment variables in CI/CD
+- Provide example config files: `test_config.yml.example`
+
+### Testing Without Secrets
+
+Design tests to degrade gracefully:
+
+```r
+test_that("API client works", {
+  api_key <- Sys.getenv("API_KEY")
+
+  if (api_key == "") {
+    # Mock the API when credentials unavailable
+    local_mocked_bindings(
+      make_api_call = function(...) list(status = "success", data = "mocked")
+    )
+  }
+
+  result <- my_api_wrapper()
+  expect_equal(result$status, "success")
+})
+```
+
+## Custom Expectations
+
+Create domain-specific expectations for clearer tests:
+
+### Simple Custom Expectations
+
+```r
+# In helper-expectations.R
+expect_valid_email <- function(email) {
+  expect_match(email, "^[^@]+@[^@]+\\.[^@]+$")
+}
+
+expect_positive <- function(x) {
+  expect_true(all(x > 0), info = "All values should be positive")
+}
+
+expect_named_list <- function(object, names) {
+  expect_type(object, "list")
+  expect_named(object, names, ignore.order = TRUE)
+}
+```
+
+Usage:
+
+```r
+test_that("user validation works", {
+  user <- create_user("test@example.com")
+  expect_valid_email(user$email)
+})
+```
+
+### Complex Custom Expectations
+
+```r
+expect_valid_model <- function(model) {
+  act <- quasi_label(rlang::enquo(model))
+
+  expect(
+    inherits(act$val, "lm") && !is.null(act$val$coefficients),
+    sprintf("%s is not a valid linear model", act$lab)
+  )
+
+  invisible(act$val)
+}
+```
+
+## State Inspection
+
+Detect unintended global state changes:
+
+```r
+# In setup-state.R
+set_state_inspector(function() {
+  list(
+    options = options(),
+    env_vars = Sys.getenv(),
+    search = search()
+  )
+})
+```
+
+testthat will warn if state changes between tests.
+
+## CRAN-Specific Considerations
+
+### Time Limits
+
+Tests must complete in under 1 minute:
+
+```r
+test_that("slow operation completes", {
+  skip_on_cran()  # Takes 2 minutes
+
+  result <- expensive_computation()
+  expect_equal(result$status, "complete")
+})
+```
+
+### File System Discipline
+
+Only write to temp directory:
+
+```r
+test_that("file output works", {
+  # Good
+  output <- withr::local_tempfile(fileext = ".csv")
+  write.csv(data, output)
+
+  # Bad - writes to package directory
+  # write.csv(data, "output.csv")
+})
+```
+
+### No External Dependencies
+
+Avoid relying on:
+- Network access
+- External processes
+- System commands
+- Clipboard access
+
+```r
+test_that("external dependency", {
+  skip_on_cran()
+
+  # Code requiring network or system calls
+})
+```
+
+### Platform Differences
+
+Use `expect_equal()` for numeric comparisons (allows tolerance):
+
+```r
+test_that("calculation works", {
+  result <- complex_calculation()
+
+  # Good: tolerant to floating point differences
+  expect_equal(result, 1.234567)
+
+  # Bad: fails due to platform differences
+  # expect_identical(result, 1.234567)
+})
+```
+
+## Test Performance
+
+### Identify Slow Tests
+
+```r
+devtools::test(reporter = "slow")
+```
+
+The `SlowReporter` highlights performance bottlenecks.
+
+### Test Shuffling
+
+Detect unintended test dependencies:
+
+```r
+# Randomly reorder tests
+devtools::test(shuffle = TRUE)
+
+# In test file
+test_dir("tests/testthat", shuffle = TRUE)
+```
+
+If tests fail when shuffled, they have unintended dependencies on execution order.
+
+## Parallel Testing
+
+Enable parallel test execution in `DESCRIPTION`:
+
+```
+Config/testthat/parallel: true
+```
+
+**Requirements for parallel tests:**
+- Tests must be independent
+- No shared state between tests
+- Use `local_*()` functions for all side effects
+- Snapshot tests work correctly in parallel (testthat 3.2.0+)
+
+## Testing Edge Cases
+
+### Boundary Conditions
+
+```r
+test_that("handles boundary conditions", {
+  expect_equal(my_func(0), expected_at_zero)
+  expect_equal(my_func(-1), expected_negative)
+  expect_equal(my_func(Inf), expected_infinite)
+  expect_true(is.nan(my_func(NaN)))
+})
+```
+
+### Empty Inputs
+
+```r
+test_that("handles empty inputs", {
+  expect_equal(process(character()), character())
+  expect_equal(process(NULL), NULL)
+  expect_equal(process(data.frame()), data.frame())
+})
+```
+
+### Type Validation
+
+```r
+test_that("validates input types", {
+  expect_error(my_func("string"), class = "vctrs_error_cast")
+  expect_error(my_func(list()), "must be atomic")
+  expect_no_error(my_func(1:10))
+})
+```
+
+## Debugging Failed Tests
+
+### Interactive Debugging
+
+```r
+# Run test interactively
+devtools::load_all()
+test_that("problematic test", {
+  # Add browser() to pause execution
+  browser()
+
+  result <- problematic_function()
+  expect_equal(result, expected)
+})
+```
+
+### Print Debugging in Tests
+
+```r
+test_that("debug output", {
+  data <- prepare_data()
+  print(str(data))  # Visible when test fails
+
+  result <- process(data)
+  print(result)
+
+  expect_equal(result, expected)
+})
+```
+
+### Capture Output for Inspection
+
+```r
+test_that("inspect messages", {
+  messages <- capture_messages(
+    result <- function_with_messages()
+  )
+
+  print(messages)  # See all messages when test fails
+  expect_match(messages, "Processing complete")
+})
+```
+
+## Testing R6 Classes
+
+```r
+test_that("R6 class works", {
+  obj <- MyClass$new(value = 10)
+
+  expect_r6_class(obj, "MyClass")  # testthat 3.3.0+
+  expect_equal(obj$value, 10)
+
+  obj$increment()
+  expect_equal(obj$value, 11)
+})
+```
+
+## Testing S4 Classes
+
+```r
+test_that("S4 validity works", {
+  obj <- new("MyClass", slot1 = 10, slot2 = "test")
+
+  expect_s4_class(obj, "MyClass")
+  expect_equal(obj@slot1, 10)
+
+  expect_error(
+    new("MyClass", slot1 = -1),
+    "slot1 must be positive"
+  )
+})
+```
diff --git a/skills/testing-r-packages/references/bdd.md b/skills/testing-r-packages/references/bdd.md
new file mode 100644
index 0000000..17747bb
--- /dev/null
+++ b/skills/testing-r-packages/references/bdd.md
@@ -0,0 +1,448 @@
+# BDD-Style Testing with describe() and it()
+
+Behavior-Driven Development (BDD) testing uses `describe()` and `it()` to create specification-style tests that read like natural language descriptions of behavior.
+
+## When to Use BDD Syntax
+
+**Use BDD (`describe`/`it`) when:**
+- Documenting intended behavior of new features
+- Testing complex components with multiple related facets
+- Following test-first development workflows
+- Tests serve as specification documentation
+- You want hierarchical organization of related tests
+- A group of tests (in `it()` statements) rely on a single fixture or local options/envvars (set up in `describe()`)
+
+**Use standard syntax (`test_that`) when:**
+- Writing straightforward unit tests
+- Testing implementation details
+- Converting existing test_that() tests (no need to change working code)
+
+**Key insight from testthat:** "Use `describe()` to verify you implement the right things, use `test_that()` to ensure you do things right."
+
+## Basic BDD Syntax
+
+### Simple Specifications
+
+```r
+describe("matrix()", {
+  it("can be multiplied by a scalar", {
+    m1 <- matrix(1:4, 2, 2)
+    m2 <- m1 * 2
+    expect_equal(matrix(1:4 * 2, 2, 2), m2)
+  })
+
+  it("can be transposed", {
+    m <- matrix(1:4, 2, 2)
+    expect_equal(t(m), matrix(c(1, 3, 2, 4), 2, 2))
+  })
+
+  it("can compute determinant", {
+    m <- matrix(c(1, 2, 3, 4), 2, 2)
+    expect_equal(det(m), -2)
+  })
+})
+```
+
+Each `it()` block:
+- Defines one specification (like `test_that()`)
+- Runs in its own environment
+- Has access to all expectations
+- Can use withr and other testing tools
+
+## Nested Specifications
+
+Group related specifications hierarchically:
+
+```r
+describe("User authentication", {
+  describe("login()", {
+    it("accepts valid credentials", {
+      result <- login("user@example.com", "password123")
+      expect_true(result$authenticated)
+      expect_type(result$token, "character")
+    })
+
+    it("rejects invalid email", {
+      expect_error(
+        login("invalid-email", "password"),
+        class = "validation_error"
+      )
+    })
+
+    it("rejects wrong password", {
+      expect_error(
+        login("user@example.com", "wrong"),
+        class = "auth_error"
+      )
+    })
+  })
+
+  describe("logout()", {
+    it("clears session token", {
+      session <- create_session()
+      logout(session)
+      expect_null(session$token)
+    })
+
+    it("invalidates refresh token", {
+      session <- create_session()
+      logout(session)
+      expect_error(refresh(session), "Invalid token")
+    })
+  })
+
+  describe("password_reset()", {
+    it("sends reset email", {
+      local_mocked_bindings(send_email = function(...) TRUE)
+      result <- password_reset("user@example.com")
+      expect_true(result$email_sent)
+    })
+
+    it("generates secure token", {
+      result <- password_reset("user@example.com")
+      expect_gte(nchar(result$token), 32)
+    })
+  })
+})
+```
+
+Nesting creates clear hierarchies:
+- Top level: Component or module
+- Second level: Functions or features
+- Third level: Specific behaviors
+
+## Pending Specifications
+
+Mark unimplemented tests by omitting the code:
+
+```r
+describe("division()", {
+  it("divides two numbers", {
+    expect_equal(division(10, 2), 5)
+  })
+
+  it("returns Inf for division by zero")  # Pending
+
+  it("handles complex numbers")  # Pending
+})
+```
+
+Pending tests:
+- Show up in test output as "SKIPPED"
+- Document planned functionality
+- Serve as TODO markers
+- Don't cause test failures
+
+## Complete Test File Example
+
+```r
+# tests/testthat/test-data-processor.R
+
+describe("DataProcessor", {
+  describe("initialization", {
+    it("creates processor with default config", {
+      proc <- DataProcessor$new()
+      expect_r6_class(proc, "DataProcessor")
+      expect_equal(proc$config$timeout, 30)
+    })
+
+    it("accepts custom configuration", {
+      proc <- DataProcessor$new(config = list(timeout = 60))
+      expect_equal(proc$config$timeout, 60)
+    })
+
+    it("validates configuration options", {
+      expect_error(
+        DataProcessor$new(config = list(timeout = -1)),
+        "timeout must be positive"
+      )
+    })
+  })
+
+  describe("process()", {
+    describe("with valid data", {
+      it("processes numeric data", {
+        proc <- DataProcessor$new()
+        result <- proc$process(data.frame(x = 1:10))
+        expect_s3_class(result, "data.frame")
+        expect_equal(nrow(result), 10)
+      })
+
+      it("handles missing values", {
+        proc <- DataProcessor$new()
+        data <- data.frame(x = c(1, NA, 3))
+        result <- proc$process(data)
+        expect_false(anyNA(result$x))
+      })
+
+      it("preserves column names", {
+        proc <- DataProcessor$new()
+        data <- data.frame(foo = 1:3, bar = 4:6)
+        result <- proc$process(data)
+        expect_named(result, c("foo", "bar"))
+      })
+    })
+
+    describe("with invalid data", {
+      it("rejects NULL input", {
+        proc <- DataProcessor$new()
+        expect_error(proc$process(NULL), "data cannot be NULL")
+      })
+
+      it("rejects empty data frame", {
+        proc <- DataProcessor$new()
+        expect_error(proc$process(data.frame()), "data cannot be empty")
+      })
+
+      it("rejects non-data.frame input", {
+        proc <- DataProcessor$new()
+        expect_error(proc$process(list()), class = "type_error")
+      })
+    })
+  })
+
+  describe("summary()", {
+    it("returns summary statistics", {
+      proc <- DataProcessor$new()
+      data <- data.frame(x = 1:10, y = 11:20)
+      proc$process(data)
+
+      summary <- proc$summary()
+      expect_type(summary, "list")
+      expect_named(summary, c("rows", "cols", "processed_at"))
+    })
+
+    it("throws error if no data processed", {
+      proc <- DataProcessor$new()
+      expect_error(proc$summary(), "No data has been processed")
+    })
+  })
+})
+```
+
+## Organizing Files with BDD
+
+### Single Component per File
+
+```r
+# tests/testthat/test-user-model.R
+describe("User model", {
+  describe("validation", { ... })
+  describe("persistence", { ... })
+  describe("relationships", { ... })
+})
+```
+
+### Multiple Related Components
+
+```r
+# tests/testthat/test-math-operations.R
+describe("arithmetic operations", {
+  describe("addition()", { ... })
+  describe("subtraction()", { ... })
+  describe("multiplication()", { ... })
+  describe("division()", { ... })
+})
+```
+
+### Hierarchical Domain Organization
+
+```r
+# tests/testthat/test-api-endpoints.R
+describe("API endpoints", {
+  describe("/users", {
+    describe("GET /users", { ... })
+    describe("POST /users", { ... })
+    describe("GET /users/:id", { ... })
+  })
+
+  describe("/posts", {
+    describe("GET /posts", { ... })
+    describe("POST /posts", { ... })
+  })
+})
+```
+
+## Mixing BDD and Standard Syntax
+
+You can use both styles in the same test file:
+
+```r
+# tests/testthat/test-calculator.R
+
+# BDD style for user-facing functionality
+describe("Calculator user interface", {
+  describe("button clicks", {
+    it("registers numeric input", { ... })
+    it("handles operator keys", { ... })
+  })
+})
+
+# Standard style for internal helpers
+test_that("parse_expression() tokenizes correctly", {
+  tokens <- parse_expression("2 + 3")
+  expect_equal(tokens, c("2", "+", "3"))
+})
+
+test_that("evaluate_tokens() handles operator precedence", {
+  result <- evaluate_tokens(c("2", "+", "3", "*", "4"))
+  expect_equal(result, 14)
+})
+```
+
+**Mixing guidelines:**
+- Use BDD for behavioral specifications
+- Use `test_that()` for implementation details
+- Keep related tests in the same style within a section
+- Don't nest `test_that()` inside `describe()` or vice versa
+
+## BDD with Test Fixtures
+
+Use the same fixture patterns as standard tests:
+
+```r
+describe("File processor", {
+  # Helper function for tests
+  new_test_file <- function(content) {
+    path <- withr::local_tempfile(lines = content)
+    path
+  }
+
+  describe("read_file()", {
+    it("reads text files", {
+      file <- new_test_file(c("line1", "line2"))
+      result <- read_file(file)
+      expect_length(result, 2)
+    })
+
+    it("handles empty files", {
+      file <- new_test_file(character())
+      result <- read_file(file)
+      expect_equal(result, character())
+    })
+  })
+})
+```
+
+## BDD with Snapshot Tests
+
+Snapshots work naturally with BDD:
+
+```r
+describe("error messages", {
+  it("provides helpful validation errors", {
+    expect_snapshot(error = TRUE, {
+      validate_user(NULL)
+      validate_user(list())
+      validate_user(list(email = "invalid"))
+    })
+  })
+
+  it("shows clear permission errors", {
+    expect_snapshot(error = TRUE, {
+      check_permission("guest", "admin")
+    })
+  })
+})
+```
+
+## BDD with Mocking
+
+```r
+describe("API client", {
+  describe("fetch_user()", {
+    it("handles successful response", {
+      local_mocked_bindings(
+        http_get = function(url) {
+          list(status = 200, body = '{"id": 1, "name": "Test"}')
+        }
+      )
+
+      user <- fetch_user(1)
+      expect_equal(user$name, "Test")
+    })
+
+    it("handles 404 errors", {
+      local_mocked_bindings(
+        http_get = function(url) list(status = 404)
+      )
+
+      expect_error(fetch_user(999), class = "not_found_error")
+    })
+  })
+})
+```
+
+## Test-First Workflow with BDD
+
+1. **Write specifications first:**
+
+```r
+describe("order_total()", {
+  it("sums item prices")
+  it("applies tax")
+  it("applies discount codes")
+  it("handles free shipping threshold")
+})
+```
+
+2. **Implement one specification at a time:**
+
+```r
+describe("order_total()", {
+  it("sums item prices", {
+    order <- list(items = list(
+      list(price = 10),
+      list(price = 20)
+    ))
+    expect_equal(order_total(order), 30)
+  })
+
+  it("applies tax")
+  it("applies discount codes")
+  it("handles free shipping threshold")
+})
+```
+
+3. **Continue until all specs have implementations**
+
+This workflow ensures you:
+- Think about requirements before implementation
+- Have clear success criteria
+- Build incrementally
+- Maintain focus on behavior
+
+## Comparison: describe/it vs test_that
+
+**describe/it:**
+```r
+describe("str_length()", {
+  it("counts characters in string", {
+    expect_equal(str_length("abc"), 3)
+  })
+
+  it("handles empty strings", {
+    expect_equal(str_length(""), 0)
+  })
+})
+```
+
+**test_that:**
+```r
+test_that("str_length() counts characters", {
+  expect_equal(str_length("abc"), 3)
+})
+
+test_that("str_length() handles empty strings", {
+  expect_equal(str_length(""), 0)
+})
+```
+
+Key differences:
+- BDD groups related specs under `describe()`
+- BDD uses "it" instead of "test_that"
+- BDD enables nesting for hierarchy
+- BDD supports pending specs without code
+- Both produce identical test results
+
+Choose based on your preferences and project style.
diff --git a/skills/testing-r-packages/references/fixtures.md b/skills/testing-r-packages/references/fixtures.md
new file mode 100644
index 0000000..0b0cf1f
--- /dev/null
+++ b/skills/testing-r-packages/references/fixtures.md
@@ -0,0 +1,333 @@
+# Test Fixtures and Data Management
+
+Test fixtures arrange the environment into a known state for testing. testthat provides several approaches for managing test data and state.
+
+## Fixture Approaches
+
+### Constructor Helper Functions
+
+Create reusable test objects on-demand:
+
+```r
+# In tests/testthat/helper-fixtures.R or within test file
+new_sample_data <- function(n = 10) {
+  data.frame(
+    id = seq_len(n),
+    value = rnorm(n),
+    category = sample(letters[1:3], n, replace = TRUE)
+  )
+}
+
+test_that("function handles data correctly", {
+  data <- new_sample_data(5)
+  result <- process_data(data)
+  expect_equal(nrow(result), 5)
+})
+```
+
+**Advantages:**
+- Fresh data for each test
+- Parameterizable
+- No file I/O
+
+**Use when:**
+- Data is cheap to create
+- Multiple tests need similar but not identical data
+- Data should vary between tests
+
+### Local Functions with Cleanup
+
+Handle side effects using `withr::defer()`:
+
+```r
+local_temp_csv <- function(data, pattern = "test", env = parent.frame()) {
+  path <- withr::local_tempfile(pattern = pattern, fileext = ".csv", .local_envir = env)
+  write.csv(data, path, row.names = FALSE)
+  path
+}
+
+test_that("CSV reading works", {
+  data <- data.frame(x = 1:3, y = letters[1:3])
+  csv_path <- local_temp_csv(data)
+
+  result <- read_my_csv(csv_path)
+  expect_equal(result, data)
+  # File automatically cleaned up after test
+})
+```
+
+**Advantages:**
+- Automatic cleanup
+- Encapsulates setup and teardown
+- Composable
+
+**Use when:**
+- Tests create side effects (files, connections)
+- Setup requires multiple steps
+- Cleanup logic is non-trivial
+
+### Static Fixture Files
+
+Store pre-created data files in `tests/testthat/fixtures/`:
+
+```
+tests/testthat/
+├── fixtures/
+│   ├── sample_data.rds
+│   ├── config.json
+│   └── reference_output.csv
+└── test-processing.R
+```
+
+Access with `test_path()`:
+
+```r
+test_that("function processes real data", {
+  data <- readRDS(test_path("fixtures", "sample_data.rds"))
+  result <- process_data(data)
+
+  expected <- readRDS(test_path("fixtures", "expected_output.rds"))
+  expect_equal(result, expected)
+})
+```
+
+**Advantages:**
+- Tests against real data
+- Expensive-to-create data computed once
+- Human-readable (for JSON, CSV, etc.)
+
+**Use when:**
+- Data is expensive to create
+- Data represents real-world cases
+- Multiple tests use identical data
+- Data is complex or represents edge cases
+
+## Helper Files
+
+Files in `tests/testthat/` starting with `helper-` are automatically sourced before tests run.
+
+```r
+# tests/testthat/helper-fixtures.R
+
+# Custom expectations
+expect_valid_user <- function(user) {
+  expect_type(user, "list")
+  expect_named(user, c("id", "name", "email"))
+  expect_type(user$id, "integer")
+}
+
+# Test data constructors
+new_user <- function(id = 1L, name = "Test User", email = "test@example.com") {
+  list(id = id, name = name, email = email)
+}
+
+# Shared fixtures
+standard_config <- function() {
+  list(
+    timeout = 30,
+    retries = 3,
+    verbose = FALSE
+  )
+}
+```
+
+Then use in tests:
+
+```r
+test_that("user validation works", {
+  user <- new_user()
+  expect_valid_user(user)
+})
+```
+
+## Setup Files
+
+Files starting with `setup-` run only during `R CMD check` and `devtools::test()`, not during `devtools::load_all()`.
+
+```r
+# tests/testthat/setup-options.R
+
+# Set options for test suite
+withr::local_options(
+  list(
+    reprex.clipboard = FALSE,
+    reprex.html_preview = FALSE,
+    usethis.quiet = TRUE
+  ),
+  .local_envir = teardown_env()
+)
+```
+
+**Use setup files for:**
+- Package-wide test options
+- Environment variable configuration
+- One-time expensive operations
+- Test suite initialization
+
+## Managing File System State
+
+### Use temp directories exclusively
+
+```r
+test_that("file writing works", {
+  # Good: write to temp directory
+  path <- withr::local_tempfile(lines = c("line1", "line2"))
+
+  # Bad: write to working directory
+  # writeLines(c("line1", "line2"), "test_file.txt")
+
+  result <- process_file(path)
+  expect_equal(result, expected)
+})
+```
+
+### Clean up automatically with withr
+
+```r
+test_that("directory operations work", {
+  # Create temp dir that auto-cleans
+  dir <- withr::local_tempdir()
+
+  # Create files in it
+  file.create(file.path(dir, "file1.txt"))
+  file.create(file.path(dir, "file2.txt"))
+
+  result <- process_directory(dir)
+  expect_length(result, 2)
+  # Directory automatically removed after test
+})
+```
+
+### Test files stored in fixtures
+
+```r
+test_that("file parsing handles edge cases", {
+  # Read from committed fixture
+  malformed <- test_path("fixtures", "malformed.csv")
+
+  expect_warning(
+    result <- robust_read_csv(malformed),
+    "Malformed"
+  )
+  expect_true(nrow(result) > 0)
+})
+```
+
+## Database Fixtures
+
+### In-memory SQLite
+
+```r
+test_that("database queries work", {
+  con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  withr::defer(DBI::dbDisconnect(con))
+
+  # Create schema
+  DBI::dbExecute(con, "CREATE TABLE users (id INTEGER, name TEXT)")
+  DBI::dbExecute(con, "INSERT INTO users VALUES (1, 'Alice'), (2, 'Bob')")
+
+  result <- query_users(con)
+  expect_equal(nrow(result), 2)
+})
+```
+
+### Fixture SQL scripts
+
+```r
+test_that("complex queries work", {
+  con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  withr::defer(DBI::dbDisconnect(con))
+
+  # Load schema from fixture
+  schema <- readLines(test_path("fixtures", "schema.sql"))
+  DBI::dbExecute(con, paste(schema, collapse = "\n"))
+
+  result <- complex_query(con)
+  expect_s3_class(result, "data.frame")
+})
+```
+
+## Complex Object Fixtures
+
+### Save and load complex objects
+
+Create fixtures interactively:
+
+```r
+# Run once to create fixture
+complex_model <- expensive_training_function(data)
+saveRDS(complex_model, "tests/testthat/fixtures/trained_model.rds")
+```
+
+Use in tests:
+
+```r
+test_that("predictions work", {
+  model <- readRDS(test_path("fixtures", "trained_model.rds"))
+
+  new_data <- data.frame(x = 1:5, y = 6:10)
+  predictions <- predict(model, new_data)
+
+  expect_length(predictions, 5)
+  expect_type(predictions, "double")
+})
+```
+
+## Fixture Organization
+
+```
+tests/testthat/
+├── fixtures/
+│   ├── data/              # Input data
+│   │   ├── sample.csv
+│   │   └── users.json
+│   ├── expected/          # Expected outputs
+│   │   ├── processed.rds
+│   │   └── summary.txt
+│   ├── models/            # Trained models
+│   │   └── classifier.rds
+│   └── sql/               # Database schemas
+│       └── schema.sql
+├── helper-constructors.R  # Data constructors
+├── helper-expectations.R  # Custom expectations
+├── setup-options.R        # Test suite config
+└── test-*.R               # Test files
+```
+
+## Best Practices
+
+**Keep fixtures small:**
+- Store minimal data needed for tests
+- Use constructors for variations
+- Commit fixtures to version control
+
+**Document fixture origins:**
+```r
+# tests/testthat/fixtures/README.md
+# sample_data.rds
+Created from production data on 2024-01-15
+Contains 100 representative records with PII removed
+
+# malformed.csv
+Edge case discovered in issue #123
+Contains intentional formatting errors
+```
+
+**Use consistent paths:**
+```r
+# Always use test_path() for portability
+data <- readRDS(test_path("fixtures", "data.rds"))
+
+# Never use relative paths
+# data <- readRDS("fixtures/data.rds")  # Bad
+```
+
+**Prefer deterministic fixtures:**
+```r
+# Good: reproducible
+set.seed(123)
+data <- data.frame(x = rnorm(10))
+
+# Better: no randomness
+data <- data.frame(x = seq(-2, 2, length.out = 10))
+```
diff --git a/skills/testing-r-packages/references/mocking.md b/skills/testing-r-packages/references/mocking.md
new file mode 100644
index 0000000..51d55f4
--- /dev/null
+++ b/skills/testing-r-packages/references/mocking.md
@@ -0,0 +1,251 @@
+# Mocking in testthat
+
+Mocking temporarily replaces function implementations during testing, enabling tests when dependencies are unavailable or impractical (databases, APIs, file systems, expensive computations).
+
+## Core Mocking Functions
+
+### `local_mocked_bindings()`
+
+Replace function implementations within a test:
+
+```r
+test_that("function works with mocked dependency", {
+  local_mocked_bindings(
+    get_user_data = function(id) {
+      list(id = id, name = "Test User", role = "admin")
+    }
+  )
+
+  result <- process_user(123)
+  expect_equal(result$name, "Test User")
+})
+```
+
+### `with_mocked_bindings()`
+
+Replace functions for a specific code block:
+
+```r
+test_that("handles API failures gracefully", {
+  result <- with_mocked_bindings(
+    api_call = function(...) stop("Network error"),
+    {
+      tryCatch(
+        fetch_data(),
+        error = function(e) "fallback"
+      )
+    }
+  )
+
+  expect_equal(result, "fallback")
+})
+```
+
+## S3 Method Mocking
+
+Use `local_mocked_s3_method()` to mock S3 methods:
+
+```r
+test_that("custom print method is used", {
+  local_mocked_s3_method(
+    print, "myclass",
+    function(x, ...) cat("Mocked output\n")
+  )
+
+  obj <- structure(list(), class = "myclass")
+  expect_output(print(obj), "Mocked output")
+})
+```
+
+## S4 Method Mocking
+
+Use `local_mocked_s4_method()` for S4 methods:
+
+```r
+test_that("S4 method override works", {
+  local_mocked_s4_method(
+    "show", "MyS4Class",
+    function(object) cat("Mocked S4 output\n")
+  )
+
+  # Test code using the mocked method
+})
+```
+
+## R6 Class Mocking
+
+Use `local_mocked_r6_class()` to mock R6 classes:
+
+```r
+test_that("R6 mock works", {
+  MockDatabase <- R6::R6Class("MockDatabase",
+    public = list(
+      query = function(sql) data.frame(result = "mocked")
+    )
+  )
+
+  local_mocked_r6_class("Database", MockDatabase)
+
+  db <- Database$new()
+  expect_equal(db$query("SELECT *"), data.frame(result = "mocked"))
+})
+```
+
+## Common Mocking Patterns
+
+### Database Connections
+
+```r
+test_that("database queries work", {
+  local_mocked_bindings(
+    dbConnect = function(...) list(connected = TRUE),
+    dbGetQuery = function(conn, sql) {
+      data.frame(id = 1:3, value = c("a", "b", "c"))
+    }
+  )
+
+  result <- fetch_from_db("SELECT * FROM table")
+  expect_equal(nrow(result), 3)
+})
+```
+
+### API Calls
+
+```r
+test_that("API integration works", {
+  local_mocked_bindings(
+    httr2::request = function(url) list(url = url),
+    httr2::req_perform = function(req) {
+      list(status_code = 200, content = '{"success": true}')
+    }
+  )
+
+  result <- call_external_api()
+  expect_true(result$success)
+})
+```
+
+### File System Operations
+
+```r
+test_that("file processing works", {
+  local_mocked_bindings(
+    file.exists = function(path) TRUE,
+    readLines = function(path) c("line1", "line2", "line3")
+  )
+
+  result <- process_file("dummy.txt")
+  expect_length(result, 3)
+})
+```
+
+### Random Number Generation
+
+```r
+test_that("randomized algorithm is deterministic", {
+  local_mocked_bindings(
+    runif = function(n, ...) rep(0.5, n),
+    rnorm = function(n, ...) rep(0, n)
+  )
+
+  result <- randomized_function()
+  expect_equal(result, expected_value)
+})
+```
+
+## Advanced Mocking Packages
+
+### webfakes
+
+Create fake web servers for HTTP testing:
+
+```r
+test_that("API client handles responses", {
+  app <- webfakes::new_app()
+  app$get("/users/:id", function(req, res) {
+    res$send_json(list(id = req$params$id, name = "Test"))
+  })
+
+  web <- webfakes::local_app_process(app)
+
+  result <- get_user(web$url("/users/123"))
+  expect_equal(result$name, "Test")
+})
+```
+
+### httptest2
+
+Record and replay HTTP interactions:
+
+```r
+test_that("API call returns expected data", {
+  httptest2::with_mock_dir("fixtures/api", {
+    result <- call_real_api()
+    expect_equal(result$status, "success")
+  })
+})
+```
+
+First run records real responses; subsequent runs replay them.
+
+## Mocking Best Practices
+
+**Mock at the right level:**
+- Mock external dependencies (APIs, databases)
+- Don't mock internal package functions excessively
+- Keep mocks simple and focused
+
+**Verify mock behavior:**
+```r
+test_that("mock is called correctly", {
+  calls <- list()
+  local_mocked_bindings(
+    external_func = function(...) {
+      calls <<- append(calls, list(list(...)))
+      "mocked"
+    }
+  )
+
+  my_function()
+
+  expect_length(calls, 1)
+  expect_equal(calls[[1]]$arg, "expected")
+})
+```
+
+**Prefer real fixtures when possible:**
+- Use test data files instead of mocking file reads
+- Use webfakes for full HTTP testing instead of mocking individual functions
+- Mock only when fixtures are impractical
+
+**Document what's being mocked:**
+```r
+test_that("handles unavailable service", {
+  # Mock the external authentication service
+  # which is unavailable in test environment
+  local_mocked_bindings(
+    auth_check = function(token) list(valid = TRUE)
+  )
+
+  # test code
+})
+```
+
+## Migration from Deprecated Functions
+
+**Old (deprecated):**
+```r
+with_mock(
+  pkg::func = function(...) "mocked"
+)
+```
+
+**New (recommended):**
+```r
+local_mocked_bindings(
+  func = function(...) "mocked",
+  .package = "pkg"
+)
+```
+
+The new functions work with byte-compiled code and are more reliable across platforms.
diff --git a/skills/testing-r-packages/references/snapshots.md b/skills/testing-r-packages/references/snapshots.md
new file mode 100644
index 0000000..9d4ce41
--- /dev/null
+++ b/skills/testing-r-packages/references/snapshots.md
@@ -0,0 +1,184 @@
+# Snapshot Testing
+
+Snapshot tests record expected output in human-readable files rather than inline code. They are ideal for:
+
+- Complex output that's difficult to verify programmatically
+- User-facing messages, warnings, and errors
+- Mixed output types (printed text + messages + warnings)
+- Binary formats like plots
+- Text with complex formatting
+
+## Basic Usage
+
+```r
+test_that("error messages are helpful", {
+  expect_snapshot(
+    my_function(bad_input)
+  )
+})
+```
+
+The first run creates `tests/testthat/_snaps/{test-file}/{test-name}.md` containing the captured output.
+
+## Snapshot Workflow
+
+**Initial creation:**
+```r
+devtools::test()  # Creates new snapshots
+```
+
+**Review changes:**
+```r
+testthat::snapshot_review('test-name')
+```
+
+**Accept changes:**
+```r
+testthat::snapshot_accept('test-name')
+```
+
+**Reject changes:**
+```r
+testthat::snapshot_reject('test-name')
+```
+
+**Download snapshots from GitHub CI:**
+```r
+testthat::snapshot_download_gh()
+```
+
+## Snapshot Types
+
+### Output Snapshots
+
+Capture printed output, messages, warnings, and errors:
+
+```r
+test_that("function produces expected output", {
+  expect_snapshot({
+    print(my_data)
+    message("Processing complete")
+    warning("Non-critical issue")
+  })
+})
+```
+
+### Value Snapshots
+
+Capture the structure of R objects:
+
+```r
+test_that("data structure is correct", {
+  expect_snapshot(str(complex_object))
+})
+```
+
+### Error Snapshots
+
+Capture error messages with call information:
+
+```r
+test_that("errors are informative", {
+  expect_snapshot(
+    error = TRUE,
+    my_function(invalid_input)
+  )
+})
+```
+
+## Transform Function
+
+Use `transform` to remove variable elements before comparison:
+
+```r
+test_that("output is stable", {
+  expect_snapshot(
+    my_api_call(),
+    transform = function(lines) {
+      # Remove timestamps
+      gsub("\\d{4}-\\d{2}-\\d{2}", "[DATE]", lines)
+    }
+  )
+})
+```
+
+Common uses:
+- Remove timestamps or session IDs
+- Normalize file paths
+- Strip API keys or tokens
+- Remove stochastic elements
+
+## Variants
+
+Use `variant` for platform-specific or R-version-specific snapshots:
+
+```r
+test_that("platform-specific behavior", {
+  expect_snapshot(
+    system_specific_function(),
+    variant = tolower(Sys.info()[["sysname"]])
+  )
+})
+```
+
+Variants save to `_snaps/{variant}/{test}.md` instead of `_snaps/{test}.md`.
+
+## Best Practices
+
+- **Commit snapshots to git** - They are part of your test suite
+- **Review snapshot diffs carefully** - Ensure changes are intentional
+- **Keep snapshots focused** - One concept per snapshot
+- **Use transform for stability** - Remove variable elements
+- **Update snapshots explicitly** - Never auto-accept in CI
+- **Fail on new snapshots in CI** - testthat 3.3.0+ does this automatically
+
+## Snapshot Files
+
+Snapshots are stored as markdown files in `tests/testthat/_snaps/`:
+
+```
+tests/testthat/
+├── test-utils.R
+└── _snaps/
+    ├── test-utils.md
+    └── windows/           # variant snapshots
+        └── test-utils.md
+```
+
+Each snapshot includes:
+- Test name as heading
+- Code that generated the output
+- Captured output
+
+## Common Patterns
+
+**Testing error messages:**
+```r
+test_that("validation errors are clear", {
+  expect_snapshot(error = TRUE, {
+    validate_input(NULL)
+    validate_input("wrong type")
+    validate_input(numeric())
+  })
+})
+```
+
+**Testing side-by-side comparisons:**
+```r
+test_that("diff output is readable", {
+  withr::local_options(width = 80)
+  expect_snapshot(
+    waldo::compare(expected, actual)
+  )
+})
+```
+
+**Testing printed output with messages:**
+```r
+test_that("function provides feedback", {
+  expect_snapshot({
+    result <- process_data(sample_data)
+    print(result)
+  })
+})
+```