6363 )
6464 ),
6565)
66-
66+ INFERENCE_CONFIG = types .EvaluationRunInferenceConfig (
67+ model = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
68+ )
6769
6870def test_create_eval_run_data_source_evaluation_set (client ):
6971 """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
@@ -189,9 +191,6 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
189191def test_create_eval_run_with_inference_configs (client ):
190192 """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
191193 client ._api_client ._http_options .api_version = "v1beta1"
192- inference_config = types .EvaluationRunInferenceConfig (
193- model = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
194- )
195194 evaluation_run = client .evals .create_evaluation_run (
196195 name = "test_inference_config" ,
197196 display_name = "test_inference_config" ,
@@ -200,7 +199,7 @@ def test_create_eval_run_with_inference_configs(client):
200199 ),
201200 dest = GCS_DEST ,
202201 metrics = [GENERAL_QUALITY_METRIC ],
203- inference_configs = {"model_1" : inference_config },
202+ inference_configs = {"model_1" : INFERENCE_CONFIG },
204203 labels = {"label1" : "value1" },
205204 )
206205 assert isinstance (evaluation_run , types .EvaluationRun )
@@ -216,7 +215,7 @@ def test_create_eval_run_with_inference_configs(client):
216215 ),
217216 metrics = [GENERAL_QUALITY_METRIC ],
218217 )
219- assert evaluation_run .inference_configs ["model_1" ] == inference_config
218+ assert evaluation_run .inference_configs ["model_1" ] == INFERENCE_CONFIG
220219 assert evaluation_run .labels == {
221220 "label1" : "value1" ,
222221 }
@@ -318,6 +317,45 @@ def test_create_eval_run_with_inference_configs(client):
318317# )
319318# assert evaluation_run.error is None
320319
320+ import pandas as pd
321+
322+ def test_create_eval_run_data_source_evaluation_dataset_inference_config (client ):
323+ """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
324+ input_df = pd .DataFrame (
325+ {
326+ "prompt" : ["prompt1" , "prompt2" ],
327+ "reference" : ["reference1" , "reference2" ],
328+ }
329+ )
330+ evaluation_run = client .evals .create_evaluation_run (
331+ name = "test9" ,
332+ display_name = "test9" ,
333+ dataset = types .EvaluationDataset (
334+ candidate_name = "candidate_1" ,
335+ eval_dataset_df = input_df ,
336+ ),
337+ dest = GCS_DEST ,
338+ metrics = [GENERAL_QUALITY_METRIC ],
339+ inference_configs = {"candidate_1" : INFERENCE_CONFIG },
340+ )
341+ assert isinstance (evaluation_run , types .EvaluationRun )
342+ assert evaluation_run .display_name == "test9"
343+ assert evaluation_run .state == types .EvaluationRunState .PENDING
344+ assert isinstance (evaluation_run .data_source , types .EvaluationRunDataSource )
345+ # Check evaluation set
346+ assert evaluation_run .data_source .evaluation_set
347+ eval_set = client .evals .get_evaluation_set (
348+ name = evaluation_run .data_source .evaluation_set
349+ )
350+ assert len (eval_set .evaluation_items ) == 2
351+ assert evaluation_run .inference_configs ["candidate_1" ] == INFERENCE_CONFIG
352+ # Check evaluation items
353+ for i , eval_item_name in enumerate (eval_set .evaluation_items ):
354+ eval_item = client .evals .get_evaluation_item (name = eval_item_name )
355+ assert eval_item .evaluation_item_type == types .EvaluationItemType .REQUEST
356+ assert eval_item .evaluation_request .prompt .text == input_df .iloc [i ]["prompt" ]
357+ assert eval_item .evaluation_request .candidate_responses == []
358+ assert evaluation_run .error is None
321359
322360pytest_plugins = ("pytest_asyncio" ,)
323361
@@ -370,9 +408,6 @@ async def test_create_eval_run_async(client):
370408async def test_create_eval_run_async_with_inference_configs (client ):
371409 """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
372410 client ._api_client ._http_options .api_version = "v1beta1"
373- inference_config = types .EvaluationRunInferenceConfig (
374- model = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
375- )
376411 evaluation_run = await client .aio .evals .create_evaluation_run (
377412 name = "test_inference_config_async" ,
378413 display_name = "test_inference_config_async" ,
@@ -381,7 +416,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
381416 ),
382417 dest = GCS_DEST ,
383418 metrics = [GENERAL_QUALITY_METRIC ],
384- inference_configs = {"model_1" : inference_config },
419+ inference_configs = {"model_1" : INFERENCE_CONFIG },
385420 labels = {"label1" : "value1" },
386421 )
387422 assert isinstance (evaluation_run , types .EvaluationRun )
@@ -397,7 +432,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
397432 ),
398433 metrics = [GENERAL_QUALITY_METRIC ],
399434 )
400- assert evaluation_run .inference_configs ["model_1" ] == inference_config
435+ assert evaluation_run .inference_configs ["model_1" ] == INFERENCE_CONFIG
401436 assert evaluation_run .labels == {
402437 "label1" : "value1" ,
403438 }
0 commit comments