@@ -1886,9 +1886,133 @@ def test_run_agent_internal_error_response(self, mock_run_agent):
18861886
18871887 assert "response" in result_df .columns
18881888 response_content = result_df ["response" ][0 ]
1889- assert "Unexpected response type from agent run" in response_content
1889+ assert "agent run failed " in response_content
18901890 assert not result_df ["intermediate_events" ][0 ]
18911891
1892+ @mock .patch .object (_evals_common , "_run_agent" )
1893+ def test_run_agent_internal_multi_turn_success (self , mock_run_agent ):
1894+ mock_run_agent .return_value = [
1895+ [
1896+ {"turn_index" : 0 , "turn_id" : "t1" , "events" : []},
1897+ {"turn_index" : 1 , "turn_id" : "t2" , "events" : []},
1898+ ]
1899+ ]
1900+ prompt_dataset = pd .DataFrame ({"prompt" : ["p1" ], "conversation_plan" : ["plan" ]})
1901+ mock_agent_engine = mock .Mock ()
1902+ mock_api_client = mock .Mock ()
1903+ result_df = _evals_common ._run_agent_internal (
1904+ api_client = mock_api_client ,
1905+ agent_engine = mock_agent_engine ,
1906+ agent = None ,
1907+ prompt_dataset = prompt_dataset ,
1908+ )
1909+
1910+ assert "agent_data" in result_df .columns
1911+ agent_data = result_df ["agent_data" ][0 ]
1912+ assert agent_data ["turns" ] == [
1913+ {"turn_index" : 0 , "turn_id" : "t1" , "events" : []},
1914+ {"turn_index" : 1 , "turn_id" : "t2" , "events" : []},
1915+ ]
1916+
1917+ @mock .patch (
1918+ "vertexai._genai._evals_common.ADK_SessionInput"
1919+ )
1920+ @mock .patch (
1921+ "vertexai._genai._evals_common.EvaluationGenerator"
1922+ )
1923+ @mock .patch (
1924+ "vertexai._genai._evals_common.LlmBackedUserSimulator"
1925+ )
1926+ @mock .patch (
1927+ "vertexai._genai._evals_common.ConversationScenario"
1928+ )
1929+ @mock .patch (
1930+ "vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
1931+ )
1932+ @pytest .mark .asyncio
1933+ async def test_run_adk_user_simulation_with_intermediate_events (
1934+ self ,
1935+ mock_config ,
1936+ mock_scenario ,
1937+ mock_simulator ,
1938+ mock_generator ,
1939+ mock_session_input ,
1940+ ):
1941+ """Tests that intermediate invocation events (e.g. tool calls) are parsed successfully."""
1942+ row = pd .Series (
1943+ {
1944+ "starting_prompt" : "I want a laptop." ,
1945+ "conversation_plan" : "Ask for a laptop" ,
1946+ "session_inputs" : json .dumps ({"user_id" : "u1" }),
1947+ }
1948+ )
1949+ mock_agent = mock .Mock ()
1950+
1951+ mock_invocation = mock .Mock ()
1952+ mock_invocation .invocation_id = "turn_123"
1953+ mock_invocation .creation_timestamp = 1771811084.88
1954+ mock_invocation .user_content .model_dump .return_value = {
1955+ "parts" : [{"text" : "I want a laptop." }],
1956+ "role" : "user" ,
1957+ }
1958+ mock_event_1 = mock .Mock ()
1959+ mock_event_1 .author = "ecommerce_agent"
1960+ mock_event_1 .content .model_dump .return_value = {
1961+ "parts" : [
1962+ {
1963+ "function_call" : {
1964+ "name" : "search_products" ,
1965+ "args" : {"query" : "laptop" },
1966+ }
1967+ }
1968+ ]
1969+ }
1970+ mock_event_2 = mock .Mock ()
1971+ mock_event_2 .author = "ecommerce_agent"
1972+ mock_event_2 .content .model_dump .return_value = {
1973+ "parts" : [
1974+ {
1975+ "function_response" : {
1976+ "name" : "search_products" ,
1977+ "response" : {"products" : []},
1978+ }
1979+ }
1980+ ]
1981+ }
1982+
1983+ mock_invocation .intermediate_data .invocation_events = [
1984+ mock_event_1 ,
1985+ mock_event_2 ,
1986+ ]
1987+ mock_invocation .final_response .model_dump .return_value = {
1988+ "parts" : [{"text" : "There are no laptops matching your search." }],
1989+ "role" : "model" ,
1990+ }
1991+ mock_generator ._generate_inferences_from_root_agent = mock .AsyncMock (
1992+ return_value = [mock_invocation ]
1993+ )
1994+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
1995+
1996+ assert len (turns ) == 1
1997+ turn = turns [0 ]
1998+ assert turn ["turn_index" ] == 0
1999+ assert turn ["turn_id" ] == "turn_123"
2000+ assert len (turn ["events" ]) == 4
2001+ assert turn ["events" ][0 ]["author" ] == "user"
2002+ assert turn ["events" ][0 ]["content" ]["parts" ][0 ]["text" ] == "I want a laptop."
2003+ assert turn ["events" ][1 ]["author" ] == "ecommerce_agent"
2004+ assert "function_call" in turn ["events" ][1 ]["content" ]["parts" ][0 ]
2005+ assert turn ["events" ][2 ]["author" ] == "ecommerce_agent"
2006+ assert "function_response" in turn ["events" ][2 ]["content" ]["parts" ][0 ]
2007+ assert turn ["events" ][3 ]["author" ] == "agent"
2008+ assert (
2009+ turn ["events" ][3 ]["content" ]["parts" ][0 ]["text" ]
2010+ == "There are no laptops matching your search."
2011+ )
2012+ mock_invocation .user_content .model_dump .assert_called_with (mode = "json" )
2013+ mock_event_1 .content .model_dump .assert_called_with (mode = "json" )
2014+ mock_invocation .final_response .model_dump .assert_called_with (mode = "json" )
2015+
18922016 @mock .patch .object (_evals_common , "_run_agent" )
18932017 def test_run_agent_internal_malformed_event (self , mock_run_agent ):
18942018 mock_run_agent .return_value = [
@@ -1916,6 +2040,28 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
19162040 assert not result_df ["intermediate_events" ][0 ]
19172041
19182042
2043+ class TestIsMultiTurnAgentRun :
2044+ """Unit tests for the _is_multi_turn_agent_run function."""
2045+
2046+ def test_is_multi_turn_agent_run_with_config (self ):
2047+ config = vertexai_genai_types .evals .UserSimulatorConfig (model_name = "gemini-pro" )
2048+ assert _evals_common ._is_multi_turn_agent_run (
2049+ user_simulator_config = config , prompt_dataset = pd .DataFrame ()
2050+ )
2051+
2052+ def test_is_multi_turn_agent_run_with_conversation_plan (self ):
2053+ prompt_dataset = pd .DataFrame ({"conversation_plan" : ["plan" ]})
2054+ assert _evals_common ._is_multi_turn_agent_run (
2055+ user_simulator_config = None , prompt_dataset = prompt_dataset
2056+ )
2057+
2058+ def test_is_multi_turn_agent_run_false (self ):
2059+ prompt_dataset = pd .DataFrame ({"prompt" : ["prompt" ]})
2060+ assert not _evals_common ._is_multi_turn_agent_run (
2061+ user_simulator_config = None , prompt_dataset = prompt_dataset
2062+ )
2063+
2064+
19192065class TestMetricPromptBuilder :
19202066 """Unit tests for the MetricPromptBuilder class."""
19212067
@@ -4229,6 +4375,101 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
42294375 )
42304376
42314377
4378+ @pytest .mark .usefixtures ("google_auth_mock" )
4379+ class TestRunAdkUserSimulation :
4380+ """Unit tests for the _run_adk_user_simulation function."""
4381+
4382+ @mock .patch (
4383+ "vertexai._genai._evals_common.ADK_SessionInput"
4384+ )
4385+ @mock .patch (
4386+ "vertexai._genai._evals_common.EvaluationGenerator"
4387+ )
4388+ @mock .patch (
4389+ "vertexai._genai._evals_common.LlmBackedUserSimulator"
4390+ )
4391+ @mock .patch (
4392+ "vertexai._genai._evals_common.ConversationScenario"
4393+ )
4394+ @mock .patch (
4395+ "vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4396+ )
4397+ @pytest .mark .asyncio
4398+ async def test_run_adk_user_simulation_success (
4399+ self ,
4400+ mock_config_cls ,
4401+ mock_scenario_cls ,
4402+ mock_simulator_cls ,
4403+ mock_generator_cls ,
4404+ mock_session_input_cls ,
4405+ ):
4406+ row = pd .Series (
4407+ {
4408+ "starting_prompt" : "start" ,
4409+ "conversation_plan" : "plan" ,
4410+ "session_inputs" : json .dumps ({"user_id" : "u1" }),
4411+ }
4412+ )
4413+ mock_agent = mock .Mock ()
4414+ mock_invocation = mock .Mock ()
4415+ mock_invocation .user_content .model_dump .return_value = {"text" : "user msg" }
4416+ mock_invocation .final_response .model_dump .return_value = {"text" : "agent msg" }
4417+ mock_invocation .intermediate_data = None
4418+ mock_invocation .creation_timestamp = 12345
4419+ mock_invocation .invocation_id = "turn1"
4420+
4421+ mock_generator_cls ._generate_inferences_from_root_agent = mock .AsyncMock (
4422+ return_value = [mock_invocation ]
4423+ )
4424+
4425+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
4426+
4427+ assert len (turns ) == 1
4428+ turn = turns [0 ]
4429+ assert turn ["turn_index" ] == 0
4430+ assert turn ["turn_id" ] == "turn1"
4431+ assert len (turn ["events" ]) == 2
4432+ assert turn ["events" ][0 ]["author" ] == "user"
4433+ assert turn ["events" ][0 ]["content" ] == {"text" : "user msg" }
4434+ assert turn ["events" ][1 ]["author" ] == "agent"
4435+ assert turn ["events" ][1 ]["content" ] == {"text" : "agent msg" }
4436+
4437+ mock_scenario_cls .assert_called_once_with (
4438+ starting_prompt = "start" , conversation_plan = "plan"
4439+ )
4440+ mock_session_input_cls .assert_called_once ()
4441+
4442+ @mock .patch (
4443+ "vertexai._genai._evals_common.ADK_SessionInput"
4444+ )
4445+ @mock .patch (
4446+ "vertexai._genai._evals_common.EvaluationGenerator"
4447+ )
4448+ @mock .patch (
4449+ "vertexai._genai._evals_common.LlmBackedUserSimulator"
4450+ )
4451+ @mock .patch (
4452+ "vertexai._genai._evals_common.ConversationScenario"
4453+ )
4454+ @mock .patch (
4455+ "vertexai._genai._evals_common.LlmBackedUserSimulatorConfig"
4456+ )
4457+ @pytest .mark .asyncio
4458+ async def test_run_adk_user_simulation_missing_columns (
4459+ self ,
4460+ mock_config_cls ,
4461+ mock_scenario_cls ,
4462+ mock_simulator_cls ,
4463+ mock_generator_cls ,
4464+ mock_session_input_cls ,
4465+ ):
4466+ row = pd .Series ({"conversation_plan" : "plan" })
4467+ mock_agent = mock .Mock ()
4468+
4469+ with pytest .raises (ValueError , match = "User simulation requires" ):
4470+ await _evals_common ._run_adk_user_simulation (row , mock_agent )
4471+
4472+
42324473@pytest .mark .usefixtures ("google_auth_mock" )
42334474class TestLLMMetricHandlerPayload :
42344475 def setup_method (self ):
0 commit comments