Skip to content

Commit 2055881

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Add local ADK agent multi-turn agent scraping via ADK user simulation library
PiperOrigin-RevId: 867307459
1 parent 204e5b4 commit 2055881

4 files changed

Lines changed: 484 additions & 46 deletions

File tree

tests/unit/vertexai/genai/test_evals.py

Lines changed: 242 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1885,9 +1885,133 @@ def test_run_agent_internal_error_response(self, mock_run_agent):
18851885

18861886
assert "response" in result_df.columns
18871887
response_content = result_df["response"][0]
1888-
assert "Unexpected response type from agent run" in response_content
1888+
assert "agent run failed" in response_content
18891889
assert not result_df["intermediate_events"][0]
18901890

1891+
@mock.patch.object(_evals_common, "_run_agent")
1892+
def test_run_agent_internal_multi_turn_success(self, mock_run_agent):
1893+
mock_run_agent.return_value = [
1894+
[
1895+
{"turn_index": 0, "turn_id": "t1", "events": []},
1896+
{"turn_index": 1, "turn_id": "t2", "events": []},
1897+
]
1898+
]
1899+
prompt_dataset = pd.DataFrame({"prompt": ["p1"], "conversation_plan": ["plan"]})
1900+
mock_agent_engine = mock.Mock()
1901+
mock_api_client = mock.Mock()
1902+
result_df = _evals_common._run_agent_internal(
1903+
api_client=mock_api_client,
1904+
agent_engine=mock_agent_engine,
1905+
agent=None,
1906+
prompt_dataset=prompt_dataset,
1907+
)
1908+
1909+
assert "agent_data" in result_df.columns
1910+
agent_data = result_df["agent_data"][0]
1911+
assert agent_data["turns"] == [
1912+
{"turn_index": 0, "turn_id": "t1", "events": []},
1913+
{"turn_index": 1, "turn_id": "t2", "events": []},
1914+
]
1915+
1916+
@mock.patch(
1917+
"vertexai._genai._evals_common.ADK_SessionInput"
1918+
)
1919+
@mock.patch(
1920+
"vertexai._genai._evals_common.EvaluationGenerator"
1921+
)
1922+
@mock.patch(
1923+
"vertexai._genai._evals_common.LlmBackedUserSimulator"
1924+
)
1925+
@mock.patch(
1926+
"vertexai._genai._evals_common.ConversationScenario"
1927+
)
1928+
@mock.patch(
1929+
"vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
1930+
)
1931+
@pytest.mark.asyncio
1932+
async def test_run_adk_user_simulation_with_intermediate_events(
1933+
self,
1934+
mock_config,
1935+
mock_scenario,
1936+
mock_simulator,
1937+
mock_generator,
1938+
mock_session_input,
1939+
):
1940+
"""Tests that intermediate invocation events (e.g. tool calls) are parsed successfully."""
1941+
row = pd.Series(
1942+
{
1943+
"starting_prompt": "I want a laptop.",
1944+
"conversation_plan": "Ask for a laptop",
1945+
"session_inputs": json.dumps({"user_id": "u1"}),
1946+
}
1947+
)
1948+
mock_agent = mock.Mock()
1949+
1950+
mock_invocation = mock.Mock()
1951+
mock_invocation.invocation_id = "turn_123"
1952+
mock_invocation.creation_timestamp = 1771811084.88
1953+
mock_invocation.user_content.model_dump.return_value = {
1954+
"parts": [{"text": "I want a laptop."}],
1955+
"role": "user",
1956+
}
1957+
mock_event_1 = mock.Mock()
1958+
mock_event_1.author = "ecommerce_agent"
1959+
mock_event_1.content.model_dump.return_value = {
1960+
"parts": [
1961+
{
1962+
"function_call": {
1963+
"name": "search_products",
1964+
"args": {"query": "laptop"},
1965+
}
1966+
}
1967+
]
1968+
}
1969+
mock_event_2 = mock.Mock()
1970+
mock_event_2.author = "ecommerce_agent"
1971+
mock_event_2.content.model_dump.return_value = {
1972+
"parts": [
1973+
{
1974+
"function_response": {
1975+
"name": "search_products",
1976+
"response": {"products": []},
1977+
}
1978+
}
1979+
]
1980+
}
1981+
1982+
mock_invocation.intermediate_data.invocation_events = [
1983+
mock_event_1,
1984+
mock_event_2,
1985+
]
1986+
mock_invocation.final_response.model_dump.return_value = {
1987+
"parts": [{"text": "There are no laptops matching your search."}],
1988+
"role": "model",
1989+
}
1990+
mock_generator._generate_inferences_from_root_agent = mock.AsyncMock(
1991+
return_value=[mock_invocation]
1992+
)
1993+
turns = await _evals_common._run_adk_user_simulation(row, mock_agent)
1994+
1995+
assert len(turns) == 1
1996+
turn = turns[0]
1997+
assert turn["turn_index"] == 0
1998+
assert turn["turn_id"] == "turn_123"
1999+
assert len(turn["events"]) == 4
2000+
assert turn["events"][0]["author"] == "user"
2001+
assert turn["events"][0]["content"]["parts"][0]["text"] == "I want a laptop."
2002+
assert turn["events"][1]["author"] == "ecommerce_agent"
2003+
assert "function_call" in turn["events"][1]["content"]["parts"][0]
2004+
assert turn["events"][2]["author"] == "ecommerce_agent"
2005+
assert "function_response" in turn["events"][2]["content"]["parts"][0]
2006+
assert turn["events"][3]["author"] == "agent"
2007+
assert (
2008+
turn["events"][3]["content"]["parts"][0]["text"]
2009+
== "There are no laptops matching your search."
2010+
)
2011+
mock_invocation.user_content.model_dump.assert_called_with(mode="json")
2012+
mock_event_1.content.model_dump.assert_called_with(mode="json")
2013+
mock_invocation.final_response.model_dump.assert_called_with(mode="json")
2014+
18912015
@mock.patch.object(_evals_common, "_run_agent")
18922016
def test_run_agent_internal_malformed_event(self, mock_run_agent):
18932017
mock_run_agent.return_value = [
@@ -1915,6 +2039,28 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
19152039
assert not result_df["intermediate_events"][0]
19162040

19172041

2042+
class TestIsMultiTurnAgentRun:
2043+
"""Unit tests for the _is_multi_turn_agent_run function."""
2044+
2045+
def test_is_multi_turn_agent_run_with_config(self):
2046+
config = vertexai_genai_types.UserSimulatorConfig(model_name="gemini-pro")
2047+
assert _evals_common._is_multi_turn_agent_run(
2048+
user_simulator_config=config, prompt_dataset=pd.DataFrame()
2049+
)
2050+
2051+
def test_is_multi_turn_agent_run_with_conversation_plan(self):
2052+
prompt_dataset = pd.DataFrame({"conversation_plan": ["plan"]})
2053+
assert _evals_common._is_multi_turn_agent_run(
2054+
user_simulator_config=None, prompt_dataset=prompt_dataset
2055+
)
2056+
2057+
def test_is_multi_turn_agent_run_false(self):
2058+
prompt_dataset = pd.DataFrame({"prompt": ["prompt"]})
2059+
assert not _evals_common._is_multi_turn_agent_run(
2060+
user_simulator_config=None, prompt_dataset=prompt_dataset
2061+
)
2062+
2063+
19182064
class TestMetricPromptBuilder:
19192065
"""Unit tests for the MetricPromptBuilder class."""
19202066

@@ -4228,6 +4374,101 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
42284374
)
42294375

42304376

4377+
@pytest.mark.usefixtures("google_auth_mock")
4378+
class TestRunAdkUserSimulation:
4379+
"""Unit tests for the _run_adk_user_simulation function."""
4380+
4381+
@mock.patch(
4382+
"vertexai._genai._evals_common.ADK_SessionInput"
4383+
)
4384+
@mock.patch(
4385+
"vertexai._genai._evals_common.EvaluationGenerator"
4386+
)
4387+
@mock.patch(
4388+
"vertexai._genai._evals_common.LlmBackedUserSimulator"
4389+
)
4390+
@mock.patch(
4391+
"vertexai._genai._evals_common.ConversationScenario"
4392+
)
4393+
@mock.patch(
4394+
"vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4395+
)
4396+
@pytest.mark.asyncio
4397+
async def test_run_adk_user_simulation_success(
4398+
self,
4399+
mock_config_cls,
4400+
mock_scenario_cls,
4401+
mock_simulator_cls,
4402+
mock_generator_cls,
4403+
mock_session_input_cls,
4404+
):
4405+
row = pd.Series(
4406+
{
4407+
"starting_prompt": "start",
4408+
"conversation_plan": "plan",
4409+
"session_inputs": json.dumps({"user_id": "u1"}),
4410+
}
4411+
)
4412+
mock_agent = mock.Mock()
4413+
mock_invocation = mock.Mock()
4414+
mock_invocation.user_content.model_dump.return_value = {"text": "user msg"}
4415+
mock_invocation.final_response.model_dump.return_value = {"text": "agent msg"}
4416+
mock_invocation.intermediate_data = None
4417+
mock_invocation.creation_timestamp = 12345
4418+
mock_invocation.invocation_id = "turn1"
4419+
4420+
mock_generator_cls._generate_inferences_from_root_agent = mock.AsyncMock(
4421+
return_value=[mock_invocation]
4422+
)
4423+
4424+
turns = await _evals_common._run_adk_user_simulation(row, mock_agent)
4425+
4426+
assert len(turns) == 1
4427+
turn = turns[0]
4428+
assert turn["turn_index"] == 0
4429+
assert turn["turn_id"] == "turn1"
4430+
assert len(turn["events"]) == 2
4431+
assert turn["events"][0]["author"] == "user"
4432+
assert turn["events"][0]["content"] == {"text": "user msg"}
4433+
assert turn["events"][1]["author"] == "agent"
4434+
assert turn["events"][1]["content"] == {"text": "agent msg"}
4435+
4436+
mock_scenario_cls.assert_called_once_with(
4437+
starting_prompt="start", conversation_plan="plan"
4438+
)
4439+
mock_session_input_cls.assert_called_once()
4440+
4441+
@mock.patch(
4442+
"vertexai._genai._evals_common.ADK_SessionInput"
4443+
)
4444+
@mock.patch(
4445+
"vertexai._genai._evals_common.EvaluationGenerator"
4446+
)
4447+
@mock.patch(
4448+
"vertexai._genai._evals_common.LlmBackedUserSimulator"
4449+
)
4450+
@mock.patch(
4451+
"vertexai._genai._evals_common.ConversationScenario"
4452+
)
4453+
@mock.patch(
4454+
"vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4455+
)
4456+
@pytest.mark.asyncio
4457+
async def test_run_adk_user_simulation_missing_columns(
4458+
self,
4459+
mock_config_cls,
4460+
mock_scenario_cls,
4461+
mock_simulator_cls,
4462+
mock_generator_cls,
4463+
mock_session_input_cls,
4464+
):
4465+
row = pd.Series({"conversation_plan": "plan"})
4466+
mock_agent = mock.Mock()
4467+
4468+
with pytest.raises(ValueError, match="User simulation requires"):
4469+
await _evals_common._run_adk_user_simulation(row, mock_agent)
4470+
4471+
42314472
@pytest.mark.usefixtures("google_auth_mock")
42324473
class TestLLMMetricHandlerPayload:
42334474
def setup_method(self):

0 commit comments

Comments
 (0)