Skip to content

Commit 37f72e5

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: Fix column name inconsistency (history vs conversation_history) across evaluation flows
PiperOrigin-RevId: 900310250
1 parent 1f5af01 commit 37f72e5

4 files changed

Lines changed: 228 additions & 6 deletions

File tree

tests/unit/vertexai/genai/test_evals.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4691,6 +4691,34 @@ def test_convert_with_conversation_history(self):
46914691
eval_case.conversation_history[1].content.parts[0].text == "Old model msg"
46924692
)
46934693

4694+
def test_convert_with_conversation_history_column_name(self):
4695+
"""Tests that 'conversation_history' is accepted as a column name alias for 'history'."""
4696+
raw_data_df = pd.DataFrame(
4697+
{
4698+
"prompt": ["Current prompt"],
4699+
"response": ["A response"],
4700+
"conversation_history": [
4701+
[
4702+
{"role": "user", "parts": [{"text": "Old user msg"}]},
4703+
{"role": "model", "parts": [{"text": "Old model msg"}]},
4704+
]
4705+
],
4706+
}
4707+
)
4708+
raw_data = raw_data_df.to_dict(orient="records")
4709+
result_dataset = self.converter.convert(raw_data)
4710+
eval_case = result_dataset.eval_cases[0]
4711+
4712+
assert eval_case.prompt == genai_types.Content(
4713+
parts=[genai_types.Part(text="Current prompt")]
4714+
)
4715+
assert eval_case.reference is None
4716+
assert len(eval_case.conversation_history) == 2
4717+
assert eval_case.conversation_history[0].content.parts[0].text == "Old user msg"
4718+
assert (
4719+
eval_case.conversation_history[1].content.parts[0].text == "Old model msg"
4720+
)
4721+
46944722
def test_convert_missing_response_raises_value_error(self):
46954723
raw_data_df = pd.DataFrame({"prompt": ["Hello"]}) # Missing response
46964724
raw_data = raw_data_df.to_dict(orient="records")
@@ -8355,6 +8383,176 @@ def test_create_evaluation_set_with_agent_data(
83558383
assert candidate_response["candidate"] == "test-candidate"
83568384
assert candidate_response["agent_data"] == agent_data
83578385

8386+
@mock.patch.object(_evals_common, "evals")
8387+
@mock.patch.object(_evals_common, "_gcs_utils")
8388+
def test_create_evaluation_set_with_history_column(
8389+
self, mock_gcs_utils, mock_evals_module
8390+
):
8391+
"""Tests that 'history' column is accepted and mapped to prompt_template_data."""
8392+
eval_df = pd.DataFrame(
8393+
[
8394+
{
8395+
"prompt": "test prompt",
8396+
"response": "test response",
8397+
"history": "previous conversation",
8398+
}
8399+
]
8400+
)
8401+
8402+
mock_gcs_instance = mock_gcs_utils.GcsUtils.return_value
8403+
mock_gcs_instance.upload_json_to_prefix.return_value = (
8404+
"gs://bucket/path/request.json"
8405+
)
8406+
8407+
mock_evals_instance = mock_evals_module.Evals.return_value
8408+
mock_eval_item = mock.Mock()
8409+
mock_eval_item.name = "eval_item_1"
8410+
mock_evals_instance.create_evaluation_item.return_value = mock_eval_item
8411+
8412+
mock_eval_set = mock.Mock()
8413+
mock_evals_instance.create_evaluation_set.return_value = mock_eval_set
8414+
8415+
result = _evals_common._create_evaluation_set_from_dataframe(
8416+
api_client=self.mock_api_client,
8417+
gcs_dest_prefix="gs://bucket/prefix",
8418+
eval_df=eval_df,
8419+
candidate_name="test-candidate",
8420+
)
8421+
8422+
assert result == mock_eval_set
8423+
8424+
mock_gcs_instance.upload_json_to_prefix.assert_called_once()
8425+
call_args = mock_gcs_instance.upload_json_to_prefix.call_args
8426+
uploaded_data = call_args.kwargs["data"]
8427+
8428+
assert "prompt_template_data" in uploaded_data["prompt"]
8429+
ptd_values = uploaded_data["prompt"]["prompt_template_data"]["values"]
8430+
assert "conversation_history" in ptd_values
8431+
8432+
@mock.patch.object(_evals_common, "evals")
8433+
@mock.patch.object(_evals_common, "_gcs_utils")
8434+
def test_create_evaluation_set_with_conversation_history_column(
8435+
self, mock_gcs_utils, mock_evals_module
8436+
):
8437+
"""Tests that 'conversation_history' column is accepted and mapped to prompt_template_data."""
8438+
eval_df = pd.DataFrame(
8439+
[
8440+
{
8441+
"prompt": "test prompt",
8442+
"response": "test response",
8443+
"conversation_history": "previous conversation",
8444+
}
8445+
]
8446+
)
8447+
8448+
mock_gcs_instance = mock_gcs_utils.GcsUtils.return_value
8449+
mock_gcs_instance.upload_json_to_prefix.return_value = (
8450+
"gs://bucket/path/request.json"
8451+
)
8452+
8453+
mock_evals_instance = mock_evals_module.Evals.return_value
8454+
mock_eval_item = mock.Mock()
8455+
mock_eval_item.name = "eval_item_1"
8456+
mock_evals_instance.create_evaluation_item.return_value = mock_eval_item
8457+
8458+
mock_eval_set = mock.Mock()
8459+
mock_evals_instance.create_evaluation_set.return_value = mock_eval_set
8460+
8461+
result = _evals_common._create_evaluation_set_from_dataframe(
8462+
api_client=self.mock_api_client,
8463+
gcs_dest_prefix="gs://bucket/prefix",
8464+
eval_df=eval_df,
8465+
candidate_name="test-candidate",
8466+
)
8467+
8468+
assert result == mock_eval_set
8469+
8470+
mock_gcs_instance.upload_json_to_prefix.assert_called_once()
8471+
call_args = mock_gcs_instance.upload_json_to_prefix.call_args
8472+
uploaded_data = call_args.kwargs["data"]
8473+
8474+
assert "prompt_template_data" in uploaded_data["prompt"]
8475+
ptd_values = uploaded_data["prompt"]["prompt_template_data"]["values"]
8476+
assert "conversation_history" in ptd_values
8477+
8478+
8479+
class TestResolveDataset:
8480+
"""Unit tests for the _resolve_dataset function."""
8481+
8482+
def setup_method(self):
8483+
self.mock_api_client = mock.Mock(spec=client.Client)
8484+
self.mock_api_client.project = "test-project"
8485+
self.mock_api_client.location = "us-central1"
8486+
8487+
@mock.patch.object(_evals_common, "evals")
8488+
@mock.patch.object(_evals_common, "_gcs_utils")
8489+
def test_resolve_dataset_preserves_conversation_history(
8490+
self, mock_gcs_utils, mock_evals_module
8491+
):
8492+
"""Tests that conversation_history from EvalCase is included in the DataFrame."""
8493+
mock_gcs_instance = mock_gcs_utils.GcsUtils.return_value
8494+
mock_gcs_instance.upload_json_to_prefix.return_value = (
8495+
"gs://bucket/path/request.json"
8496+
)
8497+
8498+
mock_evals_instance = mock_evals_module.Evals.return_value
8499+
mock_eval_item = mock.Mock()
8500+
mock_eval_item.name = "eval_item_1"
8501+
mock_evals_instance.create_evaluation_item.return_value = mock_eval_item
8502+
8503+
mock_eval_set = mock.Mock()
8504+
mock_eval_set.name = "eval_set_1"
8505+
mock_evals_instance.create_evaluation_set.return_value = mock_eval_set
8506+
8507+
history_content_1 = genai_types.Content(
8508+
role="user", parts=[genai_types.Part(text="Old user msg")]
8509+
)
8510+
history_content_2 = genai_types.Content(
8511+
role="model", parts=[genai_types.Part(text="Old model msg")]
8512+
)
8513+
8514+
dataset = vertexai_genai_types.EvaluationDataset(
8515+
eval_cases=[
8516+
vertexai_genai_types.EvalCase(
8517+
prompt=genai_types.Content(
8518+
parts=[genai_types.Part(text="test prompt")]
8519+
),
8520+
responses=[
8521+
vertexai_genai_types.ResponseCandidate(
8522+
response=genai_types.Content(
8523+
parts=[genai_types.Part(text="test response")]
8524+
)
8525+
)
8526+
],
8527+
conversation_history=[
8528+
vertexai_genai_types.evals.Message(
8529+
turn_id="0", content=history_content_1
8530+
),
8531+
vertexai_genai_types.evals.Message(
8532+
turn_id="1", content=history_content_2
8533+
),
8534+
],
8535+
)
8536+
]
8537+
)
8538+
8539+
result = _evals_common._resolve_dataset(
8540+
api_client=self.mock_api_client,
8541+
dataset=dataset,
8542+
dest="gs://bucket/prefix",
8543+
)
8544+
8545+
assert result.evaluation_set == "eval_set_1"
8546+
8547+
# Verify that conversation_history was passed through to the GCS upload
8548+
mock_gcs_instance.upload_json_to_prefix.assert_called_once()
8549+
call_args = mock_gcs_instance.upload_json_to_prefix.call_args
8550+
uploaded_data = call_args.kwargs["data"]
8551+
8552+
assert "prompt_template_data" in uploaded_data["prompt"]
8553+
ptd_values = uploaded_data["prompt"]["prompt_template_data"]["values"]
8554+
assert "conversation_history" in ptd_values
8555+
83588556

83598557
class TestRateLimiter:
83608558
"""Tests for the RateLimiter class in _evals_utils."""

vertexai/_genai/_evals_common.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,18 @@ def _resolve_dataset(
326326
if event.content
327327
]
328328

329+
if case.conversation_history:
330+
history_parts = []
331+
for msg in case.conversation_history:
332+
if msg.content:
333+
role = msg.content.role or "user"
334+
text = _evals_data_converters._get_content_text(msg.content)
335+
history_parts.append(f"{role}: {text}")
336+
if history_parts:
337+
row[_evals_constant.CONVERSATION_HISTORY] = "\n".join(
338+
history_parts
339+
)
340+
329341
if case.user_scenario:
330342
if case.user_scenario.starting_prompt:
331343
row[_evals_constant.STARTING_PROMPT] = (
@@ -2586,6 +2598,14 @@ def _create_evaluation_set_from_dataframe(
25862598
)
25872599

25882600
prompt = None
2601+
# Determine which history column name is present, preferring
2602+
# "conversation_history" over "history" if both exist.
2603+
history_col = None
2604+
if _evals_constant.CONVERSATION_HISTORY in row:
2605+
history_col = _evals_constant.CONVERSATION_HISTORY
2606+
elif _evals_constant.HISTORY in row:
2607+
history_col = _evals_constant.HISTORY
2608+
25892609
if (
25902610
_evals_constant.STARTING_PROMPT in row
25912611
and _evals_constant.CONVERSATION_PLAN in row
@@ -2596,15 +2616,15 @@ def _create_evaluation_set_from_dataframe(
25962616
conversation_plan=row[_evals_constant.CONVERSATION_PLAN],
25972617
)
25982618
)
2599-
elif _evals_constant.CONTEXT in row or _evals_constant.HISTORY in row:
2619+
elif _evals_constant.CONTEXT in row or history_col:
26002620
values = {}
26012621
if _evals_constant.CONTEXT in row:
26022622
values[_evals_constant.CONTEXT] = _get_content(
26032623
row, _evals_constant.CONTEXT
26042624
)
2605-
if _evals_constant.HISTORY in row:
2606-
values[_evals_constant.HISTORY] = _get_content(
2607-
row, _evals_constant.HISTORY
2625+
if history_col:
2626+
values[_evals_constant.CONVERSATION_HISTORY] = _get_content(
2627+
row, history_col
26082628
)
26092629
if _evals_constant.PROMPT in row:
26102630
values[_evals_constant.PROMPT] = _get_content(

vertexai/_genai/_evals_constant.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@
5959
AGENT_DATA = "agent_data"
6060
STARTING_PROMPT = "starting_prompt"
6161
CONVERSATION_PLAN = "conversation_plan"
62-
HISTORY = "conversation_history"
62+
HISTORY = "history"
63+
CONVERSATION_HISTORY = "conversation_history"
6364

6465
COMMON_DATASET_COLUMNS = frozenset(
6566
{
@@ -69,6 +70,7 @@
6970
SESSION_INPUT,
7071
CONTEXT,
7172
HISTORY,
73+
CONVERSATION_HISTORY,
7274
STARTING_PROMPT,
7375
CONVERSATION_PLAN,
7476
AGENT_DATA,

vertexai/_genai/_evals_data_converters.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,9 @@ def convert(self, raw_data: list[dict[str, Any]]) -> types.EvaluationDataset:
196196
if not prompt_data:
197197
prompt_data = item.pop("source", None)
198198

199-
conversation_history_data = item.pop("history", None)
199+
conversation_history_data = item.pop("conversation_history", None)
200+
if conversation_history_data is None:
201+
conversation_history_data = item.pop("history", None)
200202
response_data = item.pop("response", None)
201203
reference_data = item.pop("reference", None)
202204
system_instruction_data = item.pop("instruction", None)

0 commit comments

Comments
 (0)