82 lines
3.0 KiB
Markdown
82 lines
3.0 KiB
Markdown
# Phase 4: Testing Agent Workflows
|
|
|
|
**Objective**: Validate agent behavior with realistic scenarios
|
|
|
|
## Test Strategy
|
|
|
|
```python
|
|
import pytest
|
|
from unittest.mock import Mock, patch
|
|
|
|
@pytest.fixture
|
|
def mock_tool_functions():
|
|
"""Mock external tool functions."""
|
|
return {
|
|
"search_flights": Mock(return_value={"flights": [{"id": "F1", "price": 500}]}),
|
|
"book_flight": Mock(return_value={"confirmation": "ABC123"}),
|
|
"search_hotels": Mock(return_value={"hotels": [{"id": "H1", "price": 150}]}),
|
|
}
|
|
|
|
def test_simple_flight_search(mock_tool_functions):
|
|
"""Test agent handles simple flight search."""
|
|
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
|
|
result = run_agent("Find flights from SF to LA on May 15 for 2 people")
|
|
|
|
# Verify search_flights was called
|
|
mock_tool_functions["search_flights"].assert_called_once()
|
|
call_args = mock_tool_functions["search_flights"].call_args[1]
|
|
|
|
# Strict mode guarantees these match schema
|
|
assert call_args["origin"] == "San Francisco, CA" # or similar
|
|
assert call_args["destination"] == "Los Angeles, CA"
|
|
assert call_args["travelers"] == 2
|
|
assert "2024-05-15" in call_args["departure_date"]
|
|
|
|
def test_multi_step_booking(mock_tool_functions):
|
|
"""Test agent completes multi-step booking."""
|
|
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
|
|
result = run_agent(
|
|
"Book a round trip from SF to Paris for 2 people, "
|
|
"May 15-22, and find a hotel"
|
|
)
|
|
|
|
# Verify correct tool sequence
|
|
assert mock_tool_functions["search_flights"].called
|
|
assert mock_tool_functions["book_flight"].called
|
|
assert mock_tool_functions["search_hotels"].called
|
|
|
|
def test_tool_failure_handling(mock_tool_functions):
|
|
"""Test agent handles tool failures gracefully."""
|
|
mock_tool_functions["search_flights"].side_effect = Exception("API down")
|
|
|
|
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
|
|
result = run_agent("Find flights to Paris")
|
|
|
|
# Should handle error gracefully
|
|
assert "error" in result or "failed" in str(result).lower()
|
|
|
|
def test_parameter_validation():
|
|
"""Test that strict mode guarantees valid parameters."""
|
|
# With strict mode, parameters are guaranteed to match schema
|
|
# This test verifies the guarantee holds
|
|
|
|
response = client.beta.messages.create(
|
|
model="claude-sonnet-4-5",
|
|
betas=["structured-outputs-2025-11-13"],
|
|
messages=[{"role": "user", "content": "Search flights for 2 people"}],
|
|
tools=TOOLS,
|
|
)
|
|
|
|
for block in response.content:
|
|
if block.type == "tool_use":
|
|
# These assertions should NEVER fail with strict mode
|
|
assert isinstance(block.input, dict)
|
|
assert "travelers" in block.input
|
|
assert isinstance(block.input["travelers"], int)
|
|
assert block.input["travelers"] in [1, 2, 3, 4, 5, 6]
|
|
```
|
|
|
|
## Output
|
|
|
|
Comprehensive test coverage for agent workflows.
|