2222from . import types
2323
2424
25+ def _transform_metric (
26+ metric : "types.MetricSubclass" ,
27+ set_default_aggregation_metrics : bool = False ,
28+ ) -> dict [str , Any ]:
29+ """Transforms a single metric to its payload representation."""
30+ metric_payload_item : dict [str , Any ] = {}
31+ if hasattr (metric , "metric_resource_name" ) and metric .metric_resource_name :
32+ metric_payload_item ["metric_resource_name" ] = metric .metric_resource_name
33+
34+ metric_name = getv (metric , ["name" ]).lower ()
35+
36+ if set_default_aggregation_metrics :
37+ metric_payload_item ["aggregation_metrics" ] = [
38+ "AVERAGE" ,
39+ "STANDARD_DEVIATION" ,
40+ ]
41+
42+ if metric_name == "exact_match" :
43+ metric_payload_item ["exact_match_spec" ] = {}
44+ elif metric_name == "bleu" :
45+ metric_payload_item ["bleu_spec" ] = {}
46+ elif metric_name .startswith ("rouge" ):
47+ rouge_type = metric_name .replace ("_" , "" )
48+ metric_payload_item ["rouge_spec" ] = {"rouge_type" : rouge_type }
49+ # API Pre-defined metrics
50+ elif metric_name in _evals_constant .SUPPORTED_PREDEFINED_METRICS :
51+ metric_payload_item ["predefined_metric_spec" ] = {
52+ "metric_spec_name" : metric_name ,
53+ "metric_spec_parameters" : metric .metric_spec_parameters ,
54+ }
55+ # Custom Code Execution Metric
56+ elif hasattr (metric , "remote_custom_function" ) and metric .remote_custom_function :
57+ metric_payload_item ["custom_code_execution_spec" ] = {
58+ "evaluation_function" : metric .remote_custom_function
59+ }
60+ # Pointwise metrics
61+ elif hasattr (metric , "prompt_template" ) and metric .prompt_template :
62+ pointwise_spec = {"metric_prompt_template" : metric .prompt_template }
63+ system_instruction = getv (metric , ["judge_model_system_instruction" ])
64+ if system_instruction :
65+ pointwise_spec ["system_instruction" ] = system_instruction
66+ return_raw_output = getv (metric , ["return_raw_output" ])
67+ if return_raw_output :
68+ pointwise_spec ["custom_output_format_config" ] = {
69+ "return_raw_output" : return_raw_output
70+ }
71+ metric_payload_item ["pointwise_metric_spec" ] = pointwise_spec
72+ elif "metric_resource_name" in metric_payload_item :
73+ # Valid case: Metric is identified by resource name; no inline spec required.
74+ pass
75+ else :
76+ raise ValueError (
77+ f"Unsupported metric type or invalid metric name: { metric_name } "
78+ )
79+ return metric_payload_item
80+
81+
2582def t_metrics (
2683 metrics : list ["types.MetricSubclass" ],
2784 set_default_aggregation_metrics : bool = False ,
@@ -35,58 +92,13 @@ def t_metrics(
3592 A list of resolved metric payloads for the evaluation request.
3693 """
3794 metrics_payload = []
38-
3995 for metric in metrics :
40- metric_payload_item : dict [str , Any ] = {}
41- if hasattr (metric , "metric_resource_name" ) and metric .metric_resource_name :
42- metric_payload_item ["metric_resource_name" ] = metric .metric_resource_name
43-
44- metric_name = getv (metric , ["name" ]).lower ()
96+ metrics_payload .append (
97+ _transform_metric (metric , set_default_aggregation_metrics )
98+ )
99+ return metrics_payload
45100
46- if set_default_aggregation_metrics :
47- metric_payload_item ["aggregation_metrics" ] = [
48- "AVERAGE" ,
49- "STANDARD_DEVIATION" ,
50- ]
51101
52- if metric_name == "exact_match" :
53- metric_payload_item ["exact_match_spec" ] = {}
54- elif metric_name == "bleu" :
55- metric_payload_item ["bleu_spec" ] = {}
56- elif metric_name .startswith ("rouge" ):
57- rouge_type = metric_name .replace ("_" , "" )
58- metric_payload_item ["rouge_spec" ] = {"rouge_type" : rouge_type }
59- # API Pre-defined metrics
60- elif metric_name in _evals_constant .SUPPORTED_PREDEFINED_METRICS :
61- metric_payload_item ["predefined_metric_spec" ] = {
62- "metric_spec_name" : metric_name ,
63- "metric_spec_parameters" : metric .metric_spec_parameters ,
64- }
65- # Custom Code Execution Metric
66- elif (
67- hasattr (metric , "remote_custom_function" ) and metric .remote_custom_function
68- ):
69- metric_payload_item ["custom_code_execution_spec" ] = {
70- "evaluation_function" : metric .remote_custom_function
71- }
72- # Pointwise metrics
73- elif hasattr (metric , "prompt_template" ) and metric .prompt_template :
74- pointwise_spec = {"metric_prompt_template" : metric .prompt_template }
75- system_instruction = getv (metric , ["judge_model_system_instruction" ])
76- if system_instruction :
77- pointwise_spec ["system_instruction" ] = system_instruction
78- return_raw_output = getv (metric , ["return_raw_output" ])
79- if return_raw_output :
80- pointwise_spec ["custom_output_format_config" ] = {
81- "return_raw_output" : return_raw_output
82- }
83- metric_payload_item ["pointwise_metric_spec" ] = pointwise_spec
84- elif "metric_resource_name" in metric_payload_item :
85- # Valid case: Metric is identified by resource name; no inline spec required.
86- pass
87- else :
88- raise ValueError (
89- f"Unsupported metric type or invalid metric name: { metric_name } "
90- )
91- metrics_payload .append (metric_payload_item )
92- return metrics_payload
102+ def t_metric (metric : "types.MetricOrDict" ) -> dict [str , Any ]:
103+ """Prepares the metric payload for the evaluation metric resource."""
104+ return _transform_metric (metric )
0 commit comments