Skip to content

Commit 17c2e73

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Add local ADK agent multi-turn agent scraping via ADK user simulation library
PiperOrigin-RevId: 867307459
1 parent 6b5cc8f commit 17c2e73

4 files changed

Lines changed: 491 additions & 54 deletions

File tree

tests/unit/vertexai/genai/test_evals.py

Lines changed: 242 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1886,9 +1886,133 @@ def test_run_agent_internal_error_response(self, mock_run_agent):
18861886

18871887
assert "response" in result_df.columns
18881888
response_content = result_df["response"][0]
1889-
assert "Unexpected response type from agent run" in response_content
1889+
assert "agent run failed" in response_content
18901890
assert not result_df["intermediate_events"][0]
18911891

1892+
@mock.patch.object(_evals_common, "_run_agent")
1893+
def test_run_agent_internal_multi_turn_success(self, mock_run_agent):
1894+
mock_run_agent.return_value = [
1895+
[
1896+
{"turn_index": 0, "turn_id": "t1", "events": []},
1897+
{"turn_index": 1, "turn_id": "t2", "events": []},
1898+
]
1899+
]
1900+
prompt_dataset = pd.DataFrame({"prompt": ["p1"], "conversation_plan": ["plan"]})
1901+
mock_agent_engine = mock.Mock()
1902+
mock_api_client = mock.Mock()
1903+
result_df = _evals_common._run_agent_internal(
1904+
api_client=mock_api_client,
1905+
agent_engine=mock_agent_engine,
1906+
agent=None,
1907+
prompt_dataset=prompt_dataset,
1908+
)
1909+
1910+
assert "agent_data" in result_df.columns
1911+
agent_data = result_df["agent_data"][0]
1912+
assert agent_data["turns"] == [
1913+
{"turn_index": 0, "turn_id": "t1", "events": []},
1914+
{"turn_index": 1, "turn_id": "t2", "events": []},
1915+
]
1916+
1917+
@mock.patch(
1918+
"vertexai._genai._evals_common.ADK_SessionInput"
1919+
)
1920+
@mock.patch(
1921+
"vertexai._genai._evals_common.EvaluationGenerator"
1922+
)
1923+
@mock.patch(
1924+
"vertexai._genai._evals_common.LlmBackedUserSimulator"
1925+
)
1926+
@mock.patch(
1927+
"vertexai._genai._evals_common.ConversationScenario"
1928+
)
1929+
@mock.patch(
1930+
"vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
1931+
)
1932+
@pytest.mark.asyncio
1933+
async def test_run_adk_user_simulation_with_intermediate_events(
1934+
self,
1935+
mock_config,
1936+
mock_scenario,
1937+
mock_simulator,
1938+
mock_generator,
1939+
mock_session_input,
1940+
):
1941+
"""Tests that intermediate invocation events (e.g. tool calls) are parsed successfully."""
1942+
row = pd.Series(
1943+
{
1944+
"starting_prompt": "I want a laptop.",
1945+
"conversation_plan": "Ask for a laptop",
1946+
"session_inputs": json.dumps({"user_id": "u1"}),
1947+
}
1948+
)
1949+
mock_agent = mock.Mock()
1950+
1951+
mock_invocation = mock.Mock()
1952+
mock_invocation.invocation_id = "turn_123"
1953+
mock_invocation.creation_timestamp = 1771811084.88
1954+
mock_invocation.user_content.model_dump.return_value = {
1955+
"parts": [{"text": "I want a laptop."}],
1956+
"role": "user",
1957+
}
1958+
mock_event_1 = mock.Mock()
1959+
mock_event_1.author = "ecommerce_agent"
1960+
mock_event_1.content.model_dump.return_value = {
1961+
"parts": [
1962+
{
1963+
"function_call": {
1964+
"name": "search_products",
1965+
"args": {"query": "laptop"},
1966+
}
1967+
}
1968+
]
1969+
}
1970+
mock_event_2 = mock.Mock()
1971+
mock_event_2.author = "ecommerce_agent"
1972+
mock_event_2.content.model_dump.return_value = {
1973+
"parts": [
1974+
{
1975+
"function_response": {
1976+
"name": "search_products",
1977+
"response": {"products": []},
1978+
}
1979+
}
1980+
]
1981+
}
1982+
1983+
mock_invocation.intermediate_data.invocation_events = [
1984+
mock_event_1,
1985+
mock_event_2,
1986+
]
1987+
mock_invocation.final_response.model_dump.return_value = {
1988+
"parts": [{"text": "There are no laptops matching your search."}],
1989+
"role": "model",
1990+
}
1991+
mock_generator._generate_inferences_from_root_agent = mock.AsyncMock(
1992+
return_value=[mock_invocation]
1993+
)
1994+
turns = await _evals_common._run_adk_user_simulation(row, mock_agent)
1995+
1996+
assert len(turns) == 1
1997+
turn = turns[0]
1998+
assert turn["turn_index"] == 0
1999+
assert turn["turn_id"] == "turn_123"
2000+
assert len(turn["events"]) == 4
2001+
assert turn["events"][0]["author"] == "user"
2002+
assert turn["events"][0]["content"]["parts"][0]["text"] == "I want a laptop."
2003+
assert turn["events"][1]["author"] == "ecommerce_agent"
2004+
assert "function_call" in turn["events"][1]["content"]["parts"][0]
2005+
assert turn["events"][2]["author"] == "ecommerce_agent"
2006+
assert "function_response" in turn["events"][2]["content"]["parts"][0]
2007+
assert turn["events"][3]["author"] == "agent"
2008+
assert (
2009+
turn["events"][3]["content"]["parts"][0]["text"]
2010+
== "There are no laptops matching your search."
2011+
)
2012+
mock_invocation.user_content.model_dump.assert_called_with(mode="json")
2013+
mock_event_1.content.model_dump.assert_called_with(mode="json")
2014+
mock_invocation.final_response.model_dump.assert_called_with(mode="json")
2015+
18922016
@mock.patch.object(_evals_common, "_run_agent")
18932017
def test_run_agent_internal_malformed_event(self, mock_run_agent):
18942018
mock_run_agent.return_value = [
@@ -1916,6 +2040,28 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
19162040
assert not result_df["intermediate_events"][0]
19172041

19182042

2043+
class TestIsMultiTurnAgentRun:
2044+
"""Unit tests for the _is_multi_turn_agent_run function."""
2045+
2046+
def test_is_multi_turn_agent_run_with_config(self):
2047+
config = vertexai_genai_types.evals.UserSimulatorConfig(model_name="gemini-pro")
2048+
assert _evals_common._is_multi_turn_agent_run(
2049+
user_simulator_config=config, prompt_dataset=pd.DataFrame()
2050+
)
2051+
2052+
def test_is_multi_turn_agent_run_with_conversation_plan(self):
2053+
prompt_dataset = pd.DataFrame({"conversation_plan": ["plan"]})
2054+
assert _evals_common._is_multi_turn_agent_run(
2055+
user_simulator_config=None, prompt_dataset=prompt_dataset
2056+
)
2057+
2058+
def test_is_multi_turn_agent_run_false(self):
2059+
prompt_dataset = pd.DataFrame({"prompt": ["prompt"]})
2060+
assert not _evals_common._is_multi_turn_agent_run(
2061+
user_simulator_config=None, prompt_dataset=prompt_dataset
2062+
)
2063+
2064+
19192065
class TestMetricPromptBuilder:
19202066
"""Unit tests for the MetricPromptBuilder class."""
19212067

@@ -4229,6 +4375,101 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
42294375
)
42304376

42314377

4378+
@pytest.mark.usefixtures("google_auth_mock")
4379+
class TestRunAdkUserSimulation:
4380+
"""Unit tests for the _run_adk_user_simulation function."""
4381+
4382+
@mock.patch(
4383+
"vertexai._genai._evals_common.ADK_SessionInput"
4384+
)
4385+
@mock.patch(
4386+
"vertexai._genai._evals_common.EvaluationGenerator"
4387+
)
4388+
@mock.patch(
4389+
"vertexai._genai._evals_common.LlmBackedUserSimulator"
4390+
)
4391+
@mock.patch(
4392+
"vertexai._genai._evals_common.ConversationScenario"
4393+
)
4394+
@mock.patch(
4395+
"vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4396+
)
4397+
@pytest.mark.asyncio
4398+
async def test_run_adk_user_simulation_success(
4399+
self,
4400+
mock_config_cls,
4401+
mock_scenario_cls,
4402+
mock_simulator_cls,
4403+
mock_generator_cls,
4404+
mock_session_input_cls,
4405+
):
4406+
row = pd.Series(
4407+
{
4408+
"starting_prompt": "start",
4409+
"conversation_plan": "plan",
4410+
"session_inputs": json.dumps({"user_id": "u1"}),
4411+
}
4412+
)
4413+
mock_agent = mock.Mock()
4414+
mock_invocation = mock.Mock()
4415+
mock_invocation.user_content.model_dump.return_value = {"text": "user msg"}
4416+
mock_invocation.final_response.model_dump.return_value = {"text": "agent msg"}
4417+
mock_invocation.intermediate_data = None
4418+
mock_invocation.creation_timestamp = 12345
4419+
mock_invocation.invocation_id = "turn1"
4420+
4421+
mock_generator_cls._generate_inferences_from_root_agent = mock.AsyncMock(
4422+
return_value=[mock_invocation]
4423+
)
4424+
4425+
turns = await _evals_common._run_adk_user_simulation(row, mock_agent)
4426+
4427+
assert len(turns) == 1
4428+
turn = turns[0]
4429+
assert turn["turn_index"] == 0
4430+
assert turn["turn_id"] == "turn1"
4431+
assert len(turn["events"]) == 2
4432+
assert turn["events"][0]["author"] == "user"
4433+
assert turn["events"][0]["content"] == {"text": "user msg"}
4434+
assert turn["events"][1]["author"] == "agent"
4435+
assert turn["events"][1]["content"] == {"text": "agent msg"}
4436+
4437+
mock_scenario_cls.assert_called_once_with(
4438+
starting_prompt="start", conversation_plan="plan"
4439+
)
4440+
mock_session_input_cls.assert_called_once()
4441+
4442+
@mock.patch(
4443+
"vertexai._genai._evals_common.ADK_SessionInput"
4444+
)
4445+
@mock.patch(
4446+
"vertexai._genai._evals_common.EvaluationGenerator"
4447+
)
4448+
@mock.patch(
4449+
"vertexai._genai._evals_common.LlmBackedUserSimulator"
4450+
)
4451+
@mock.patch(
4452+
"vertexai._genai._evals_common.ConversationScenario"
4453+
)
4454+
@mock.patch(
4455+
"vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4456+
)
4457+
@pytest.mark.asyncio
4458+
async def test_run_adk_user_simulation_missing_columns(
4459+
self,
4460+
mock_config_cls,
4461+
mock_scenario_cls,
4462+
mock_simulator_cls,
4463+
mock_generator_cls,
4464+
mock_session_input_cls,
4465+
):
4466+
row = pd.Series({"conversation_plan": "plan"})
4467+
mock_agent = mock.Mock()
4468+
4469+
with pytest.raises(ValueError, match="User simulation requires"):
4470+
await _evals_common._run_adk_user_simulation(row, mock_agent)
4471+
4472+
42324473
@pytest.mark.usefixtures("google_auth_mock")
42334474
class TestLLMMetricHandlerPayload:
42344475
def setup_method(self):

0 commit comments

Comments
 (0)