googleapis
diff --git a/‎tests/unit/vertexai/genai/replays/test_evaluation_metric.py‎
Lines changed: 65 additions & 0 deletions b/‎tests/unit/vertexai/genai/replays/test_evaluation_metric.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎vertexai/_genai/_transformers.py‎
Lines changed: 64 additions & 52 deletions b/‎vertexai/_genai/_transformers.py‎
Lines changed: 64 additions & 52 deletions
@@ -0,0 +1,65 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# pylint: disable=protected-access,bad-continuation,missing-function-docstring
+
+from tests.unit.vertexai.genai.replays import pytest_helper
+from vertexai._genai import types
+
+_TEST_PROJECT = "977012026409"
+_TEST_LOCATION = "us-central1"
+
+def test_create_evaluation_metric(client):
+    client._api_client._http_options.api_version = "v1beta1"
+    client._api_client._http_options.base_url = (
+        "https://us-central1-staging-aiplatform.sandbox.googleapis.com/"
+    )
+    metric = client.evals.create_evaluation_metric(
+        display_name="test_metric",
+        description="test_description",
+        metric=types.RubricMetric.GENERAL_QUALITY,
+    )
+    assert isinstance(metric, types.EvaluationMetric)
+    assert metric.display_name == "test_metric"
+
+
+def test_get_evaluation_metric(client):
+    client._api_client._http_options.api_version = "v1beta1"
+    client._api_client._http_options.base_url = (
+        "https://us-central1-staging-aiplatform.sandbox.googleapis.com/"
+    )
+    metric_resource_name = "projects/977012026409/locations/us-central1/evaluationMetrics/6048334299558576128"
+    metric = client.evals.get_evaluation_metric(
+        metric_resource_name=metric_resource_name
+    )
+    assert isinstance(metric, types.EvaluationMetric)
+    assert metric.display_name == 'tone-check-v1'
+
+
+def test_list_evaluation_metrics(client):
+    client._api_client._http_options.api_version = "v1beta1"
+    client._api_client._http_options.base_url = (
+        "https://us-central1-staging-aiplatform.sandbox.googleapis.com/"
+    )
+    response = client.evals.list_evaluation_metrics()
+    assert isinstance(response, types.ListEvaluationMetricsResponse)
+    assert len(response.evaluation_metrics) >= 0
+
+
+# The setup function registers the module and method for the recorder
+pytestmark = pytest_helper.setup(
+    file=__file__,
+    globals_for_file=globals(),
+    test_method="evals.create_evaluation_metric",
+)
@@ -22,6 +22,63 @@
 from . import types
 
 
+def _transform_metric(
+    metric: "types.MetricSubclass",
+    set_default_aggregation_metrics: bool = False,
+) -> dict[str, Any]:
+    """Transforms a single metric to its payload representation."""
+    metric_payload_item: dict[str, Any] = {}
+    if hasattr(metric, "metric_resource_name") and metric.metric_resource_name:
+        metric_payload_item["metric_resource_name"] = metric.metric_resource_name
+
+    metric_name = getv(metric, ["name"]).lower()
+
+    if set_default_aggregation_metrics:
+        metric_payload_item["aggregation_metrics"] = [
+            "AVERAGE",
+            "STANDARD_DEVIATION",
+        ]
+
+    if metric_name == "exact_match":
+        metric_payload_item["exact_match_spec"] = {}
+    elif metric_name == "bleu":
+        metric_payload_item["bleu_spec"] = {}
+    elif metric_name.startswith("rouge"):
+        rouge_type = metric_name.replace("_", "")
+        metric_payload_item["rouge_spec"] = {"rouge_type": rouge_type}
+    # API Pre-defined metrics
+    elif metric_name in _evals_constant.SUPPORTED_PREDEFINED_METRICS:
+        metric_payload_item["predefined_metric_spec"] = {
+            "metric_spec_name": metric_name,
+            "metric_spec_parameters": metric.metric_spec_parameters,
+        }
+    # Custom Code Execution Metric
+    elif hasattr(metric, "remote_custom_function") and metric.remote_custom_function:
+        metric_payload_item["custom_code_execution_spec"] = {
+            "evaluation_function": metric.remote_custom_function
+        }
+    # Pointwise metrics
+    elif hasattr(metric, "prompt_template") and metric.prompt_template:
+        pointwise_spec = {"metric_prompt_template": metric.prompt_template}
+        system_instruction = getv(metric, ["judge_model_system_instruction"])
+        if system_instruction:
+            pointwise_spec["system_instruction"] = system_instruction
+        return_raw_output = getv(metric, ["return_raw_output"])
+        if return_raw_output:
+            pointwise_spec["custom_output_format_config"] = {
+                "return_raw_output": return_raw_output
+            }
+        metric_payload_item["pointwise_metric_spec"] = pointwise_spec
+    elif "metric_resource_name" in metric_payload_item:
+        # Valid case: Metric is identified by resource name; no inline spec required.
+        pass
+    else:
+        raise ValueError(
+            f"Unsupported metric type or invalid metric name: {metric_name}"
+        )
+    return metric_payload_item
+
+
 def t_metrics(
     metrics: list["types.MetricSubclass"],
     set_default_aggregation_metrics: bool = False,
@@ -35,58 +92,13 @@ def t_metrics(
         A list of resolved metric payloads for the evaluation request.
     """
     metrics_payload = []
-
     for metric in metrics:
-        metric_payload_item: dict[str, Any] = {}
-        if hasattr(metric, "metric_resource_name") and metric.metric_resource_name:
-            metric_payload_item["metric_resource_name"] = metric.metric_resource_name
-
-        metric_name = getv(metric, ["name"]).lower()
+        metrics_payload.append(
+            _transform_metric(metric, set_default_aggregation_metrics)
+        )
+    return metrics_payload
 
-        if set_default_aggregation_metrics:
-            metric_payload_item["aggregation_metrics"] = [
-                "AVERAGE",
-                "STANDARD_DEVIATION",
-            ]
 
-        if metric_name == "exact_match":
-            metric_payload_item["exact_match_spec"] = {}
-        elif metric_name == "bleu":
-            metric_payload_item["bleu_spec"] = {}
-        elif metric_name.startswith("rouge"):
-            rouge_type = metric_name.replace("_", "")
-            metric_payload_item["rouge_spec"] = {"rouge_type": rouge_type}
-        # API Pre-defined metrics
-        elif metric_name in _evals_constant.SUPPORTED_PREDEFINED_METRICS:
-            metric_payload_item["predefined_metric_spec"] = {
-                "metric_spec_name": metric_name,
-                "metric_spec_parameters": metric.metric_spec_parameters,
-            }
-        # Custom Code Execution Metric
-        elif (
-            hasattr(metric, "remote_custom_function") and metric.remote_custom_function
-        ):
-            metric_payload_item["custom_code_execution_spec"] = {
-                "evaluation_function": metric.remote_custom_function
-            }
-        # Pointwise metrics
-        elif hasattr(metric, "prompt_template") and metric.prompt_template:
-            pointwise_spec = {"metric_prompt_template": metric.prompt_template}
-            system_instruction = getv(metric, ["judge_model_system_instruction"])
-            if system_instruction:
-                pointwise_spec["system_instruction"] = system_instruction
-            return_raw_output = getv(metric, ["return_raw_output"])
-            if return_raw_output:
-                pointwise_spec["custom_output_format_config"] = {
-                    "return_raw_output": return_raw_output
-                }
-            metric_payload_item["pointwise_metric_spec"] = pointwise_spec
-        elif "metric_resource_name" in metric_payload_item:
-            # Valid case: Metric is identified by resource name; no inline spec required.
-            pass
-        else:
-            raise ValueError(
-                f"Unsupported metric type or invalid metric name: {metric_name}"
-            )
-        metrics_payload.append(metric_payload_item)
-    return metrics_payload
+def t_metric(metric: "types.MetricOrDict") -> dict[str, Any]:
+    """Prepares the metric payload for the evaluation metric resource."""
+    return _transform_metric(metric)