@@ -591,6 +591,218 @@ def _resolve_loss_analysis_config(
591591 return resolved_config
592592
593593
594+ def _build_rubric_description_map (
595+ eval_result : types .EvaluationResult ,
596+ ) -> dict [str , str ]:
597+ """Builds a rubric_id -> description map from the EvaluationResult."""
598+ rubric_map : dict [str , str ] = {}
599+ for case_result in eval_result .eval_case_results or []:
600+ for resp_cand in case_result .response_candidate_results or []:
601+ for metric_res in (resp_cand .metric_results or {}).values ():
602+ for verdict in metric_res .rubric_verdicts or []:
603+ rubric = verdict .evaluated_rubric
604+ if rubric and rubric .rubric_id and rubric .content :
605+ if (
606+ rubric .content .property
607+ and rubric .content .property .description
608+ ):
609+ rubric_map [rubric .rubric_id ] = (
610+ rubric .content .property .description
611+ )
612+ return rubric_map
613+
614+
615+ def _extract_scenario_preview_from_dict (
616+ eval_result_dict : dict [str , Any ],
617+ ) -> Optional [str ]:
618+ """Extracts the first user message from an evaluation_result dict.
619+
620+ Handles both snake_case (SDK-side) and camelCase (API echo-back) keys.
621+ """
622+ request = eval_result_dict .get ("request" )
623+ if not request :
624+ return None
625+ prompt = request .get ("prompt" )
626+ if not prompt :
627+ return None
628+ # Try agent_data (snake_case or camelCase)
629+ agent_data = prompt .get ("agent_data" ) or prompt .get ("agentData" )
630+ if agent_data and isinstance (agent_data , dict ):
631+ turns = agent_data .get ("turns" , [])
632+ for turn in turns :
633+ events = turn .get ("events" , [])
634+ for event in events :
635+ author = event .get ("author" , "" )
636+ content = event .get ("content" )
637+ if (
638+ author .lower () == "user"
639+ and content
640+ and isinstance (content , dict )
641+ ):
642+ parts = content .get ("parts" , [])
643+ for part in parts :
644+ text = str (part .get ("text" , "" )).strip ()
645+ if text :
646+ if len (text ) > 150 :
647+ return text [:150 ] + "..."
648+ return text
649+ # Try simple prompt path
650+ parts = prompt .get ("parts" , [])
651+ for part in parts :
652+ text = str (part .get ("text" , "" )).strip ()
653+ if text :
654+ if len (text ) > 150 :
655+ return text [:150 ] + "..."
656+ return text
657+ return None
658+
659+
660+ def _extract_scenario_from_agent_data (agent_data : Any ) -> Optional [str ]:
661+ """Extracts the first user message from an AgentData object or dict."""
662+ if agent_data is None :
663+ return None
664+ if hasattr (agent_data , "model_dump" ):
665+ agent_data = agent_data .model_dump ()
666+ if isinstance (agent_data , str ):
667+ try :
668+ agent_data = json .loads (agent_data )
669+ except (json .JSONDecodeError , ValueError ):
670+ return None
671+ if not isinstance (agent_data , dict ):
672+ return None
673+ turns = agent_data .get ("turns" , [])
674+ if not isinstance (turns , list ):
675+ return None
676+ for turn in turns :
677+ if not isinstance (turn , dict ):
678+ continue
679+ events = turn .get ("events" , [])
680+ if not isinstance (events , list ):
681+ continue
682+ for event in events :
683+ if not isinstance (event , dict ):
684+ continue
685+ author = event .get ("author" , "" )
686+ if not isinstance (author , str ) or author .lower () != "user" :
687+ continue
688+ content = event .get ("content" )
689+ if not content or not isinstance (content , dict ):
690+ continue
691+ parts = content .get ("parts" , [])
692+ if not isinstance (parts , list ):
693+ continue
694+ for part in parts :
695+ if not isinstance (part , dict ):
696+ continue
697+ text = str (part .get ("text" , "" )).strip ()
698+ if text :
699+ if len (text ) > 150 :
700+ return text [:150 ] + "..."
701+ return text
702+ return None
703+
704+
705+ def _build_scenario_preview_list (
706+ eval_result : types .EvaluationResult ,
707+ ) -> list [Optional [str ]]:
708+ """Builds an ordered list of scenario previews from the EvaluationResult.
709+
710+ Returns one scenario preview per eval_case_result, in the same order as
711+ eval_case_results. This extracts the first user message from the original
712+ SDK EvaluationResult (via eval_cases or DataFrame), rather than relying
713+ on the API echo-back which may not preserve the request data.
714+ """
715+ eval_dataset = eval_result .evaluation_dataset
716+ eval_cases : list [Any ] = []
717+ if isinstance (eval_dataset , list ) and eval_dataset :
718+ eval_cases = getv (eval_dataset [0 ], ["eval_cases" ]) or []
719+
720+ eval_case_results = eval_result .eval_case_results or []
721+ scenarios : list [Optional [str ]] = []
722+
723+ for case_result in eval_case_results :
724+ case_idx = case_result .eval_case_index or 0
725+ scenario : Optional [str ] = None
726+
727+ eval_case = None
728+ if 0 <= case_idx < len (eval_cases ):
729+ eval_case = eval_cases [case_idx ]
730+
731+ if eval_case :
732+ agent_data = getv (eval_case , ["agent_data" ])
733+ if agent_data :
734+ scenario = _extract_scenario_from_agent_data (agent_data )
735+ elif getv (eval_case , ["prompt" ]):
736+ from . import _evals_data_converters
737+
738+ text = _evals_data_converters ._get_content_text (
739+ getv (eval_case , ["prompt" ])
740+ )
741+ if text :
742+ text = str (text ).strip ()
743+ if len (text ) > 150 :
744+ scenario = text [:150 ] + "..."
745+ else :
746+ scenario = text
747+
748+ # Fallback: extract from DataFrame
749+ if scenario is None :
750+ df_agent_data = _transformers ._extract_agent_data_from_df (
751+ eval_dataset , case_idx
752+ )
753+ if df_agent_data is not None :
754+ scenario = _extract_scenario_from_agent_data (df_agent_data )
755+
756+ scenarios .append (scenario )
757+
758+ return scenarios
759+
760+
761+ def _enrich_loss_response_with_rubric_descriptions (
762+ response : types .GenerateLossClustersResponse ,
763+ eval_result : types .EvaluationResult ,
764+ ) -> None :
765+ """Enriches loss response with rubric descriptions and scenario previews.
766+
767+ Rubric descriptions and scenario previews are extracted from the original
768+ SDK EvaluationResult object, because the API echo-back in
769+ LossExample.evaluation_result may not preserve all request data (e.g.,
770+ agent_data turns with user messages).
771+ """
772+ rubric_map = _build_rubric_description_map (eval_result )
773+ scenario_list = _build_scenario_preview_list (eval_result )
774+ for result in response .results or []:
775+ for cluster in result .clusters or []:
776+ for example in cluster .examples or []:
777+ if example .evaluation_result is None :
778+ example .evaluation_result = {}
779+ if rubric_map :
780+ example .evaluation_result ["rubric_descriptions" ] = (
781+ rubric_map
782+ )
783+ # Try extracting scenario from the API echo-back first
784+ if "scenario_preview" not in example .evaluation_result :
785+ scenario = _extract_scenario_preview_from_dict (
786+ example .evaluation_result
787+ )
788+ if scenario :
789+ example .evaluation_result ["scenario_preview" ] = (
790+ scenario
791+ )
792+ # Fallback: match against scenarios from original eval_result
793+ if "scenario_preview" not in example .evaluation_result :
794+ if scenario_list :
795+ # Use the first available scenario as a fallback.
796+ # A single eval_result typically maps to one scenario
797+ # per eval case, and loss examples share these.
798+ for s in scenario_list :
799+ if s :
800+ example .evaluation_result [
801+ "scenario_preview"
802+ ] = s
803+ break
804+
805+
594806def _poll_operation (
595807 api_client : BaseApiClient ,
596808 operation : types .GenerateLossClustersOperation ,
0 commit comments