Initial commit
This commit is contained in:
81
skills/strict-tool-implementer/workflow/phase-4-testing.md
Normal file
81
skills/strict-tool-implementer/workflow/phase-4-testing.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# Phase 4: Testing Agent Workflows
|
||||
|
||||
**Objective**: Validate agent behavior with realistic scenarios
|
||||
|
||||
## Test Strategy
|
||||
|
||||
```python
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tool_functions():
|
||||
"""Mock external tool functions."""
|
||||
return {
|
||||
"search_flights": Mock(return_value={"flights": [{"id": "F1", "price": 500}]}),
|
||||
"book_flight": Mock(return_value={"confirmation": "ABC123"}),
|
||||
"search_hotels": Mock(return_value={"hotels": [{"id": "H1", "price": 150}]}),
|
||||
}
|
||||
|
||||
def test_simple_flight_search(mock_tool_functions):
|
||||
"""Test agent handles simple flight search."""
|
||||
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
|
||||
result = run_agent("Find flights from SF to LA on May 15 for 2 people")
|
||||
|
||||
# Verify search_flights was called
|
||||
mock_tool_functions["search_flights"].assert_called_once()
|
||||
call_args = mock_tool_functions["search_flights"].call_args[1]
|
||||
|
||||
# Strict mode guarantees these match schema
|
||||
assert call_args["origin"] == "San Francisco, CA" # or similar
|
||||
assert call_args["destination"] == "Los Angeles, CA"
|
||||
assert call_args["travelers"] == 2
|
||||
assert "2024-05-15" in call_args["departure_date"]
|
||||
|
||||
def test_multi_step_booking(mock_tool_functions):
|
||||
"""Test agent completes multi-step booking."""
|
||||
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
|
||||
result = run_agent(
|
||||
"Book a round trip from SF to Paris for 2 people, "
|
||||
"May 15-22, and find a hotel"
|
||||
)
|
||||
|
||||
# Verify correct tool sequence
|
||||
assert mock_tool_functions["search_flights"].called
|
||||
assert mock_tool_functions["book_flight"].called
|
||||
assert mock_tool_functions["search_hotels"].called
|
||||
|
||||
def test_tool_failure_handling(mock_tool_functions):
|
||||
"""Test agent handles tool failures gracefully."""
|
||||
mock_tool_functions["search_flights"].side_effect = Exception("API down")
|
||||
|
||||
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
|
||||
result = run_agent("Find flights to Paris")
|
||||
|
||||
# Should handle error gracefully
|
||||
assert "error" in result or "failed" in str(result).lower()
|
||||
|
||||
def test_parameter_validation():
|
||||
"""Test that strict mode guarantees valid parameters."""
|
||||
# With strict mode, parameters are guaranteed to match schema
|
||||
# This test verifies the guarantee holds
|
||||
|
||||
response = client.beta.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
betas=["structured-outputs-2025-11-13"],
|
||||
messages=[{"role": "user", "content": "Search flights for 2 people"}],
|
||||
tools=TOOLS,
|
||||
)
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "tool_use":
|
||||
# These assertions should NEVER fail with strict mode
|
||||
assert isinstance(block.input, dict)
|
||||
assert "travelers" in block.input
|
||||
assert isinstance(block.input["travelers"], int)
|
||||
assert block.input["travelers"] in [1, 2, 3, 4, 5, 6]
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
Comprehensive test coverage for agent workflows.
|
||||
Reference in New Issue
Block a user