From c12aedcea7f778bc6cdfd1d871334ef504870c97 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Tue, 31 Mar 2026 17:22:39 -0700 Subject: [PATCH] feat: Limit metric registry support to only custom code execution metric and llm based metric in SDK PiperOrigin-RevId: 892623204 --- .../genai/replays/test_evaluation_metric.py | 10 +++------- vertexai/_genai/_transformers.py | 18 ++---------------- 2 files changed, 5 insertions(+), 23 deletions(-) diff --git a/tests/unit/vertexai/genai/replays/test_evaluation_metric.py b/tests/unit/vertexai/genai/replays/test_evaluation_metric.py index 6b1e9dc1b7..1d364a5a31 100644 --- a/tests/unit/vertexai/genai/replays/test_evaluation_metric.py +++ b/tests/unit/vertexai/genai/replays/test_evaluation_metric.py @@ -24,13 +24,12 @@ def test_create_and_get_evaluation_metric(client): client._api_client._http_options.api_version = "v1beta1" - client._api_client._http_options.base_url = ( - "https://us-central1-staging-aiplatform.sandbox.googleapis.com/" - ) result = client.evals.create_evaluation_metric( display_name="test_metric", description="test_description", - metric=types.RubricMetric.GENERAL_QUALITY, + metric=types.LLMMetric( + name="custom_llm_metric", prompt_template="test_prompt_template" + ), ) assert isinstance(result, str) assert re.match( @@ -44,9 +43,6 @@ def test_create_and_get_evaluation_metric(client): def test_list_evaluation_metrics(client): client._api_client._http_options.api_version = "v1beta1" - client._api_client._http_options.base_url = ( - "https://us-central1-staging-aiplatform.sandbox.googleapis.com/" - ) response = client.evals.list_evaluation_metrics() assert isinstance(response, types.ListEvaluationMetricsResponse) assert len(response.evaluation_metrics) >= 0 diff --git a/vertexai/_genai/_transformers.py b/vertexai/_genai/_transformers.py index ffb70459ce..65ca401ae3 100644 --- a/vertexai/_genai/_transformers.py +++ b/vertexai/_genai/_transformers.py @@ -187,22 +187,8 @@ def t_metric_for_registry( if metric_name: metric_name = metric_name.lower() - # Handle standard computation metrics - if metric_name == "exact_match": - metric_payload_item["exact_match_spec"] = {} - elif metric_name == "bleu": - metric_payload_item["bleu_spec"] = {} - elif metric_name and metric_name.startswith("rouge"): - rouge_type = metric_name.replace("_", "") - metric_payload_item["rouge_spec"] = {"rouge_type": rouge_type} - # API Pre-defined metrics - elif metric_name and metric_name in _evals_constant.SUPPORTED_PREDEFINED_METRICS: - metric_payload_item["predefined_metric_spec"] = { - "metric_spec_name": metric_name, - "metric_spec_parameters": metric.metric_spec_parameters, - } # Custom Code Execution Metric - elif hasattr(metric, "remote_custom_function") and metric.remote_custom_function: + if hasattr(metric, "remote_custom_function") and metric.remote_custom_function: metric_payload_item["custom_code_execution_spec"] = { "evaluation_function": metric.remote_custom_function } @@ -217,7 +203,7 @@ def t_metric_for_registry( "evaluation_function": metric.custom_function } - # Map LLM-based metrics to the new llm_based_metric_spec + # LLM-based metric elif (hasattr(metric, "prompt_template") and metric.prompt_template) or ( hasattr(metric, "rubric_group_name") and metric.rubric_group_name ):