Skip to content

Commit 3bc784a

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: Only include CandidateResponse if a response is present
PiperOrigin-RevId: 876784701
1 parent 6b5cc8f commit 3bc784a

2 files changed

Lines changed: 56 additions & 22 deletions

File tree

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@
6363
)
6464
),
6565
)
66-
66+
INFERENCE_CONFIG = types.EvaluationRunInferenceConfig(
67+
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
68+
)
6769

6870
def test_create_eval_run_data_source_evaluation_set(client):
6971
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
@@ -189,9 +191,6 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
189191
def test_create_eval_run_with_inference_configs(client):
190192
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
191193
client._api_client._http_options.api_version = "v1beta1"
192-
inference_config = types.EvaluationRunInferenceConfig(
193-
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
194-
)
195194
evaluation_run = client.evals.create_evaluation_run(
196195
name="test_inference_config",
197196
display_name="test_inference_config",
@@ -200,7 +199,7 @@ def test_create_eval_run_with_inference_configs(client):
200199
),
201200
dest=GCS_DEST,
202201
metrics=[GENERAL_QUALITY_METRIC],
203-
inference_configs={"model_1": inference_config},
202+
inference_configs={"model_1": INFERENCE_CONFIG},
204203
labels={"label1": "value1"},
205204
)
206205
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -216,7 +215,7 @@ def test_create_eval_run_with_inference_configs(client):
216215
),
217216
metrics=[GENERAL_QUALITY_METRIC],
218217
)
219-
assert evaluation_run.inference_configs["model_1"] == inference_config
218+
assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
220219
assert evaluation_run.labels == {
221220
"label1": "value1",
222221
}
@@ -318,6 +317,45 @@ def test_create_eval_run_with_inference_configs(client):
318317
# )
319318
# assert evaluation_run.error is None
320319

320+
import pandas as pd
321+
322+
def test_create_eval_run_data_source_evaluation_dataset_inference_config(client):
323+
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
324+
input_df = pd.DataFrame(
325+
{
326+
"prompt": ["prompt1", "prompt2"],
327+
"reference": ["reference1", "reference2"],
328+
}
329+
)
330+
evaluation_run = client.evals.create_evaluation_run(
331+
name="test9",
332+
display_name="test9",
333+
dataset=types.EvaluationDataset(
334+
candidate_name="candidate_1",
335+
eval_dataset_df=input_df,
336+
),
337+
dest=GCS_DEST,
338+
metrics=[GENERAL_QUALITY_METRIC],
339+
inference_configs={"candidate_1": INFERENCE_CONFIG},
340+
)
341+
assert isinstance(evaluation_run, types.EvaluationRun)
342+
assert evaluation_run.display_name == "test9"
343+
assert evaluation_run.state == types.EvaluationRunState.PENDING
344+
assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
345+
# Check evaluation set
346+
assert evaluation_run.data_source.evaluation_set
347+
eval_set = client.evals.get_evaluation_set(
348+
name=evaluation_run.data_source.evaluation_set
349+
)
350+
assert len(eval_set.evaluation_items) == 2
351+
assert evaluation_run.inference_configs["candidate_1"] == INFERENCE_CONFIG
352+
# Check evaluation items
353+
for i, eval_item_name in enumerate(eval_set.evaluation_items):
354+
eval_item = client.evals.get_evaluation_item(name=eval_item_name)
355+
assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
356+
assert eval_item.evaluation_request.prompt.text == input_df.iloc[i]["prompt"]
357+
assert eval_item.evaluation_request.candidate_responses == []
358+
assert evaluation_run.error is None
321359

322360
pytest_plugins = ("pytest_asyncio",)
323361

@@ -370,9 +408,6 @@ async def test_create_eval_run_async(client):
370408
async def test_create_eval_run_async_with_inference_configs(client):
371409
"""Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
372410
client._api_client._http_options.api_version = "v1beta1"
373-
inference_config = types.EvaluationRunInferenceConfig(
374-
model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
375-
)
376411
evaluation_run = await client.aio.evals.create_evaluation_run(
377412
name="test_inference_config_async",
378413
display_name="test_inference_config_async",
@@ -381,7 +416,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
381416
),
382417
dest=GCS_DEST,
383418
metrics=[GENERAL_QUALITY_METRIC],
384-
inference_configs={"model_1": inference_config},
419+
inference_configs={"model_1": INFERENCE_CONFIG},
385420
labels={"label1": "value1"},
386421
)
387422
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -397,7 +432,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
397432
),
398433
metrics=[GENERAL_QUALITY_METRIC],
399434
)
400-
assert evaluation_run.inference_configs["model_1"] == inference_config
435+
assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
401436
assert evaluation_run.labels == {
402437
"label1": "value1",
403438
}

vertexai/_genai/_evals_common.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,6 +1962,15 @@ def _create_evaluation_set_from_dataframe(
19621962
for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
19631963
if CONTENT in event:
19641964
intermediate_events.append(event[CONTENT])
1965+
candidate_responses = []
1966+
if _evals_constant.RESPONSE in row:
1967+
candidate_responses.append(
1968+
types.CandidateResponse(
1969+
candidate=candidate_name or "Candidate 1",
1970+
text=row[_evals_constant.RESPONSE],
1971+
events=intermediate_events or None,
1972+
)
1973+
)
19651974
eval_item_requests.append(
19661975
types.EvaluationItemRequest(
19671976
prompt=(
@@ -1974,17 +1983,7 @@ def _create_evaluation_set_from_dataframe(
19741983
if _evals_constant.REFERENCE in row
19751984
else None
19761985
),
1977-
candidate_responses=[
1978-
types.CandidateResponse(
1979-
candidate=candidate_name or "Candidate 1",
1980-
text=row.get(_evals_constant.RESPONSE, None),
1981-
events=(
1982-
intermediate_events
1983-
if len(intermediate_events) > 0
1984-
else None
1985-
),
1986-
)
1987-
],
1986+
candidate_responses=candidate_responses,
19881987
)
19891988
)
19901989
logger.info("Writing evaluation item requests to GCS.")

0 commit comments

Comments
 (0)