Initial commit

2025-11-29 18:16:46 +08:00
commit 3d2cb201f0
33 changed files with 2911 additions and 0 deletions
--- a/skills/strict-tool-implementer/workflow/phase-4-testing.md
+++ b/skills/strict-tool-implementer/workflow/phase-4-testing.md
@@ -0,0 +1,81 @@
+# Phase 4: Testing Agent Workflows
+
+**Objective**: Validate agent behavior with realistic scenarios
+
+## Test Strategy
+
+```python
+import pytest
+from unittest.mock import Mock, patch
+
+@pytest.fixture
+def mock_tool_functions():
+    """Mock external tool functions."""
+    return {
+        "search_flights": Mock(return_value={"flights": [{"id": "F1", "price": 500}]}),
+        "book_flight": Mock(return_value={"confirmation": "ABC123"}),
+        "search_hotels": Mock(return_value={"hotels": [{"id": "H1", "price": 150}]}),
+    }
+
+def test_simple_flight_search(mock_tool_functions):
+    """Test agent handles simple flight search."""
+    with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
+        result = run_agent("Find flights from SF to LA on May 15 for 2 people")
+
+        # Verify search_flights was called
+        mock_tool_functions["search_flights"].assert_called_once()
+        call_args = mock_tool_functions["search_flights"].call_args[1]
+
+        # Strict mode guarantees these match schema
+        assert call_args["origin"] == "San Francisco, CA"  # or similar
+        assert call_args["destination"] == "Los Angeles, CA"
+        assert call_args["travelers"] == 2
+        assert "2024-05-15" in call_args["departure_date"]
+
+def test_multi_step_booking(mock_tool_functions):
+    """Test agent completes multi-step booking."""
+    with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
+        result = run_agent(
+            "Book a round trip from SF to Paris for 2 people, "
+            "May 15-22, and find a hotel"
+        )
+
+        # Verify correct tool sequence
+        assert mock_tool_functions["search_flights"].called
+        assert mock_tool_functions["book_flight"].called
+        assert mock_tool_functions["search_hotels"].called
+
+def test_tool_failure_handling(mock_tool_functions):
+    """Test agent handles tool failures gracefully."""
+    mock_tool_functions["search_flights"].side_effect = Exception("API down")
+
+    with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
+        result = run_agent("Find flights to Paris")
+
+        # Should handle error gracefully
+        assert "error" in result or "failed" in str(result).lower()
+
+def test_parameter_validation():
+    """Test that strict mode guarantees valid parameters."""
+    # With strict mode, parameters are guaranteed to match schema
+    # This test verifies the guarantee holds
+
+    response = client.beta.messages.create(
+        model="claude-sonnet-4-5",
+        betas=["structured-outputs-2025-11-13"],
+        messages=[{"role": "user", "content": "Search flights for 2 people"}],
+        tools=TOOLS,
+    )
+
+    for block in response.content:
+        if block.type == "tool_use":
+            # These assertions should NEVER fail with strict mode
+            assert isinstance(block.input, dict)
+            assert "travelers" in block.input
+            assert isinstance(block.input["travelers"], int)
+            assert block.input["travelers"] in [1, 2, 3, 4, 5, 6]
+```
+
+## Output
+
+Comprehensive test coverage for agent workflows.