Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions agentplatform/_genai/_evals_metric_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,27 +277,27 @@ def __getattr__(

@property
def GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("GENERAL_QUALITY")
return self.__getattr__("GENERAL_QUALITY", version="v1")

@property
def TEXT_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("TEXT_QUALITY")
return self.__getattr__("TEXT_QUALITY", version="v1")

@property
def INSTRUCTION_FOLLOWING(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("INSTRUCTION_FOLLOWING")
return self.__getattr__("INSTRUCTION_FOLLOWING", version="v1")

@property
def SAFETY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("SAFETY")
return self.__getattr__("SAFETY", version="v1")

@property
def MULTI_TURN_GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("MULTI_TURN_GENERAL_QUALITY")
return self.__getattr__("MULTI_TURN_GENERAL_QUALITY", version="v1")

@property
def MULTI_TURN_TEXT_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("MULTI_TURN_TEXT_QUALITY")
return self.__getattr__("MULTI_TURN_TEXT_QUALITY", version="v1")

@property
def MULTI_TURN_TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
Expand All @@ -317,43 +317,43 @@ def FINAL_RESPONSE_MATCH(self) -> LazyLoadedPrebuiltMetric:

@property
def FINAL_RESPONSE_REFERENCE_FREE(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("FINAL_RESPONSE_REFERENCE_FREE")
return self.__getattr__("FINAL_RESPONSE_REFERENCE_FREE", version="v1")

@property
def COHERENCE(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("COHERENCE")
return self.__getattr__("COHERENCE", version="v1")

@property
def FLUENCY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("FLUENCY")
return self.__getattr__("FLUENCY", version="v1")

@property
def VERBOSITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("VERBOSITY")
return self.__getattr__("VERBOSITY", version="v1")

@property
def SUMMARIZATION_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("SUMMARIZATION_QUALITY")
return self.__getattr__("SUMMARIZATION_QUALITY", version="v1")

@property
def QUESTION_ANSWERING_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("QUESTION_ANSWERING_QUALITY")
return self.__getattr__("QUESTION_ANSWERING_QUALITY", version="v1")

@property
def MULTI_TURN_CHAT_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("MULTI_TURN_CHAT_QUALITY")
return self.__getattr__("MULTI_TURN_CHAT_QUALITY", version="v1")

@property
def MULTI_TURN_SAFETY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("MULTI_TURN_SAFETY")
return self.__getattr__("MULTI_TURN_SAFETY", version="v1")

@property
def FINAL_RESPONSE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("FINAL_RESPONSE_QUALITY")
return self.__getattr__("FINAL_RESPONSE_QUALITY", version="v1")

@property
def HALLUCINATION(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("HALLUCINATION")
return self.__getattr__("HALLUCINATION", version="v1")

@property
def GROUNDING(self) -> LazyLoadedPrebuiltMetric: # pylint: disable=invalid-name
Expand All @@ -374,15 +374,15 @@ def GROUNDEDNESS(self) -> LazyLoadedPrebuiltMetric: # pylint: disable=invalid-n

@property
def TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("TOOL_USE_QUALITY")
return self.__getattr__("TOOL_USE_QUALITY", version="v1")

@property
def GECKO_TEXT2IMAGE(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("GECKO_TEXT2IMAGE")
return self.__getattr__("GECKO_TEXT2IMAGE", version="v1")

@property
def GECKO_TEXT2VIDEO(self) -> LazyLoadedPrebuiltMetric:
return self.__getattr__("GECKO_TEXT2VIDEO")
return self.__getattr__("GECKO_TEXT2VIDEO", version="v1")


PrebuiltMetric = PrebuiltMetricLoader()
Expand Down
44 changes: 44 additions & 0 deletions tests/unit/agentplatform/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def mock_api_client_fixture():

@pytest.fixture
def mock_eval_dependencies(mock_api_client_fixture):
_evals_metric_loaders.LazyLoadedPrebuiltMetric._cache.clear()
# fmt: off
with (
mock.patch("google.cloud.storage.Client") as mock_storage_client,
Expand Down Expand Up @@ -6386,6 +6387,49 @@ def test_groundedness_resolve_returns_grounding_v1_metric(self):
assert resolved.name == "grounding_v1"


class TestPrebuiltMetricLoaderVersionPinning:
"""Verifies explicit version pinning for all RubricMetric properties."""

@pytest.mark.parametrize(
"prop_name,expected_spec",
[
("GENERAL_QUALITY", "general_quality_v1"),
("TEXT_QUALITY", "text_quality_v1"),
("INSTRUCTION_FOLLOWING", "instruction_following_v1"),
("SAFETY", "safety_v1"),
("MULTI_TURN_GENERAL_QUALITY", "multi_turn_general_quality_v1"),
("MULTI_TURN_TEXT_QUALITY", "multi_turn_text_quality_v1"),
("FINAL_RESPONSE_REFERENCE_FREE", "final_response_reference_free_v1"),
("FINAL_RESPONSE_QUALITY", "final_response_quality_v1"),
("HALLUCINATION", "hallucination_v1"),
("TOOL_USE_QUALITY", "tool_use_quality_v1"),
("GECKO_TEXT2IMAGE", "gecko_text2image_v1"),
("GECKO_TEXT2VIDEO", "gecko_text2video_v1"),
],
)
def test_predefined_property_pins_to_v1(self, prop_name, expected_spec):
lazy_metric = getattr(agentplatform_genai_types.RubricMetric, prop_name)
assert lazy_metric.version == "v1"
assert lazy_metric._get_api_metric_spec_name() == expected_spec

@pytest.mark.parametrize(
"prop_name",
[
"COHERENCE",
"FLUENCY",
"VERBOSITY",
"SUMMARIZATION_QUALITY",
"QUESTION_ANSWERING_QUALITY",
"MULTI_TURN_CHAT_QUALITY",
"MULTI_TURN_SAFETY",
],
)
def test_gcs_backed_property_pins_to_v1(self, prop_name):
lazy_metric = getattr(agentplatform_genai_types.RubricMetric, prop_name)
assert lazy_metric.version == "v1"
assert lazy_metric._get_api_metric_spec_name() is None


class TestMergeResponseDatasets:
"""Unit tests for the merge_response_datasets_into_canonical_format function."""

Expand Down
4 changes: 2 additions & 2 deletions tests/unit/vertexai/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1882,9 +1882,9 @@ def test_runnable_trajectory_eval_with_runnable_inference(self, api_transport):
"trajectory_exact_match/score",
]
)
assert list(
assert sorted(
test_result.metrics_table["trajectory_exact_match/score"].to_list()
) == [1.0, 0.0]
) == [0.0, 1.0]

@pytest.mark.parametrize("api_transport", ["grpc", "rest"])
def test_pointwise_autorater_request_config_enabled(self, api_transport):
Expand Down
Loading