File size: 4,054 Bytes
310260a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """
End-to-End Integration Tests
Tests complete workflows across the entire system including:
- Complete task lifecycle (create, list, complete, delete)
- Multi-turn conversation with context retention
- Agent handling ambiguous input
- Agent tool selection based on intent
"""
import pytest
from tests.utils.agent_helpers import invoke_agent_with_message, assert_tool_called
from tests.utils.task_helpers import count_tasks, get_task_by_id
@pytest.mark.integration
@pytest.mark.asyncio
async def test_complete_task_lifecycle(test_user, test_session):
"""
Test: Complete task lifecycle (create, list, complete, delete)
Verifies end-to-end workflow of all task operations.
"""
# Step 1: Create task
response1 = await invoke_agent_with_message(
user_id=test_user.id,
message="Add a task to buy groceries"
)
assert_tool_called(response1, "create_task")
# Step 2: List tasks
response2 = await invoke_agent_with_message(
user_id=test_user.id,
message="Show my tasks"
)
assert_tool_called(response2, "list_tasks")
# Verify task exists
task_count = count_tasks(test_session, test_user.id)
assert task_count == 1
# Step 3: Mark complete (need task ID from list)
# For simplicity, get task from database
from sqlmodel import select
from src.models.task import Task
statement = select(Task).where(Task.user_id == test_user.id)
task = test_session.exec(statement).first()
response3 = await invoke_agent_with_message(
user_id=test_user.id,
message=f"Mark task {task.id} as complete"
)
assert_tool_called(response3, "mark_complete")
# Step 4: Delete task
response4 = await invoke_agent_with_message(
user_id=test_user.id,
message=f"Delete task {task.id}"
)
assert_tool_called(response4, "delete_task")
# Verify task deleted
final_count = count_tasks(test_session, test_user.id)
assert final_count == 0
@pytest.mark.integration
@pytest.mark.asyncio
async def test_multi_turn_conversation_with_context_retention(test_user, test_session):
"""
Test: Multi-turn conversation with context retention
Verifies that agent maintains context across multiple turns.
"""
# Turn 1: Create task
response1 = await invoke_agent_with_message(
user_id=test_user.id,
message="Add a task to call the dentist"
)
assert_tool_called(response1, "create_task")
# Turn 2: Reference previous context
response2 = await invoke_agent_with_message(
user_id=test_user.id,
message="Also add one to buy milk"
)
assert_tool_called(response2, "create_task")
# Verify both tasks created
task_count = count_tasks(test_session, test_user.id)
assert task_count == 2
@pytest.mark.integration
@pytest.mark.asyncio
async def test_agent_handles_ambiguous_user_input_gracefully(test_user):
"""
Test: Agent handles ambiguous user input gracefully
Verifies that agent responds appropriately to unclear requests.
"""
# Execute with ambiguous input
response = await invoke_agent_with_message(
user_id=test_user.id,
message="What can you do?"
)
# Assert agent responds (may not call tools)
assert response["content"] is not None
assert len(response["content"]) > 0
@pytest.mark.integration
@pytest.mark.asyncio
async def test_agent_selects_correct_tool_based_on_user_intent(test_user, test_session):
"""
Test: Agent selects correct tool based on user intent
Verifies that agent correctly interprets user intent and selects appropriate tools.
"""
# Test create intent
response1 = await invoke_agent_with_message(
user_id=test_user.id,
message="Remind me to call mom"
)
assert_tool_called(response1, "create_task")
# Test list intent
response2 = await invoke_agent_with_message(
user_id=test_user.id,
message="What do I need to do?"
)
assert_tool_called(response2, "list_tasks")
|