Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:16:40 +08:00
commit f125e90b9f
370 changed files with 67769 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
# Phase 4: Testing Agent Workflows
**Objective**: Validate agent behavior with realistic scenarios
## Test Strategy
```python
import pytest
from unittest.mock import Mock, patch
@pytest.fixture
def mock_tool_functions():
"""Mock external tool functions."""
return {
"search_flights": Mock(return_value={"flights": [{"id": "F1", "price": 500}]}),
"book_flight": Mock(return_value={"confirmation": "ABC123"}),
"search_hotels": Mock(return_value={"hotels": [{"id": "H1", "price": 150}]}),
}
def test_simple_flight_search(mock_tool_functions):
"""Test agent handles simple flight search."""
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
result = run_agent("Find flights from SF to LA on May 15 for 2 people")
# Verify search_flights was called
mock_tool_functions["search_flights"].assert_called_once()
call_args = mock_tool_functions["search_flights"].call_args[1]
# Strict mode guarantees these match schema
assert call_args["origin"] == "San Francisco, CA" # or similar
assert call_args["destination"] == "Los Angeles, CA"
assert call_args["travelers"] == 2
assert "2024-05-15" in call_args["departure_date"]
def test_multi_step_booking(mock_tool_functions):
"""Test agent completes multi-step booking."""
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
result = run_agent(
"Book a round trip from SF to Paris for 2 people, "
"May 15-22, and find a hotel"
)
# Verify correct tool sequence
assert mock_tool_functions["search_flights"].called
assert mock_tool_functions["book_flight"].called
assert mock_tool_functions["search_hotels"].called
def test_tool_failure_handling(mock_tool_functions):
"""Test agent handles tool failures gracefully."""
mock_tool_functions["search_flights"].side_effect = Exception("API down")
with patch.dict('agent.TOOL_FUNCTIONS', mock_tool_functions):
result = run_agent("Find flights to Paris")
# Should handle error gracefully
assert "error" in result or "failed" in str(result).lower()
def test_parameter_validation():
"""Test that strict mode guarantees valid parameters."""
# With strict mode, parameters are guaranteed to match schema
# This test verifies the guarantee holds
response = client.beta.messages.create(
model="claude-sonnet-4-5",
betas=["structured-outputs-2025-11-13"],
messages=[{"role": "user", "content": "Search flights for 2 people"}],
tools=TOOLS,
)
for block in response.content:
if block.type == "tool_use":
# These assertions should NEVER fail with strict mode
assert isinstance(block.input, dict)
assert "travelers" in block.input
assert isinstance(block.input["travelers"], int)
assert block.input["travelers"] in [1, 2, 3, 4, 5, 6]
```
## Output
Comprehensive test coverage for agent workflows.