feat: add WorkloadExecuting status to evaluation runs

mjnovice · claude · mjnovice · commit 1075f2d4a9d6 · 2026-02-27T17:13:36.000-08:00
Create eval runs with WorkloadExecuting status (4) instead of
IN_PROGRESS (1) to distinguish agent execution from evaluation/scoring.
After agent execution completes, transition to IN_PROGRESS before
running evaluators.

- Add WORKLOAD_EXECUTING=4 to EvaluationStatus enum
- Add AGENT_EXECUTION_COMPLETED event and AgentExecutionCompletedEvent model
- Create eval runs with WorkloadExecuting status in _create_eval_run_spec
- Publish AGENT_EXECUTION_COMPLETED after agent finishes in _execute_eval
- Handle event in StudioWebProgressReporter to update status to IN_PROGRESS

Flow: WorkloadExecuting (agent runs) → Running (evaluators run) → Completed/Failed

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/uipath/_cli/_evals/_progress_reporter.py b/src/uipath/_cli/_evals/_progress_reporter.py
@@ -30,6 +30,7 @@
 from uipath.eval.models import EvalItemResult, ScoreType
 from uipath.eval.models.evaluation_set import EvaluationItem
 from uipath.eval.runtime.events import (
+    AgentExecutionCompletedEvent,
     EvalRunCreatedEvent,
     EvalRunUpdatedEvent,
     EvalSetRunCreatedEvent,
@@ -47,6 +48,7 @@ class EvaluationStatus(IntEnum):
     IN_PROGRESS = 1
     COMPLETED = 2
     FAILED = 3
+    WORKLOAD_EXECUTING = 4
 
 
 class StudioWebProgressItem(BaseModel):
@@ -725,6 +727,54 @@ async def handle_create_eval_run(self, payload: EvalRunCreatedEvent) -> None:
         except Exception as e:
             self._format_error_message(e, "StudioWeb create eval run error")
 
+    async def handle_agent_execution_completed(
+        self, payload: AgentExecutionCompletedEvent
+    ) -> None:
+        """Handle agent execution completed — move eval run from WorkloadExecuting to Running."""
+        try:
+            eval_run_id = self.eval_run_ids.get(payload.execution_id)
+            if not eval_run_id:
+                logger.warning(
+                    f"Cannot move eval run to Running: eval_run_id not found for "
+                    f"execution_id={payload.execution_id}"
+                )
+                return
+
+            is_coded = self.is_coded_eval.get(
+                self.eval_set_execution_id or "", False
+            )
+
+            endpoint_suffix = "coded/" if is_coded else ""
+            spec = RequestSpec(
+                method="PUT",
+                endpoint=Endpoint(
+                    f"{self._get_endpoint_prefix()}execution/agents/{self._project_id}/{endpoint_suffix}evalRun"
+                ),
+                json={
+                    "evalRunId": eval_run_id,
+                    "status": EvaluationStatus.IN_PROGRESS.value,
+                },
+                headers=self._tenant_header(),
+            )
+
+            await self._client.request_async(
+                method=spec.method,
+                url=spec.endpoint,
+                params=spec.params,
+                json=spec.json,
+                headers=spec.headers,
+                scoped="org" if self._is_localhost() else "tenant",
+            )
+
+            logger.info(
+                f"Moved eval_run_id={eval_run_id} from WorkloadExecuting to Running (coded={is_coded})"
+            )
+
+        except Exception as e:
+            self._format_error_message(
+                e, "StudioWeb agent execution completed error"
+            )
+
     async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None:
         try:
             logger.info(
@@ -827,6 +877,10 @@ async def subscribe_to_eval_runtime_events(self, event_bus: EventBus) -> None:
         event_bus.subscribe(
             EvaluationEvents.CREATE_EVAL_RUN, self.handle_create_eval_run
         )
+        event_bus.subscribe(
+            EvaluationEvents.AGENT_EXECUTION_COMPLETED,
+            self.handle_agent_execution_completed,
+        )
         event_bus.subscribe(
             EvaluationEvents.UPDATE_EVAL_RUN, self.handle_update_eval_run
         )
@@ -1234,7 +1288,7 @@ def _create_eval_run_spec(
             "evalSetRunId": eval_set_run_id,
             "evalSnapshot": eval_snapshot,
             # Backend expects integer status
-            "status": EvaluationStatus.IN_PROGRESS.value,
+            "status": EvaluationStatus.WORKLOAD_EXECUTING.value,
         }
 
         # Legacy backend expects payload wrapped in "request" field
diff --git a/src/uipath/eval/runtime/events.py b/src/uipath/eval/runtime/events.py
@@ -19,6 +19,7 @@ class EvaluationEvents(str, Enum):
     CREATE_EVAL_RUN = "create_eval_run"
     UPDATE_EVAL_SET_RUN = "update_eval_set_run"
     UPDATE_EVAL_RUN = "update_eval_run"
+    AGENT_EXECUTION_COMPLETED = "agent_execution_completed"
 
 
 class EvalSetRunCreatedEvent(BaseModel):
@@ -80,9 +81,16 @@ class EvalSetRunUpdatedEvent(BaseModel):
     success: bool = True
 
 
+class AgentExecutionCompletedEvent(BaseModel):
+    """Event emitted when agent execution completes and evaluation/scoring is about to begin."""
+
+    execution_id: str
+
+
 ProgressEvent = Union[
     EvalSetRunCreatedEvent,
     EvalRunCreatedEvent,
     EvalRunUpdatedEvent,
     EvalSetRunUpdatedEvent,
+    AgentExecutionCompletedEvent,
 ]
diff --git a/src/uipath/eval/runtime/runtime.py b/src/uipath/eval/runtime/runtime.py
@@ -85,6 +85,7 @@
 from ._utils import apply_input_overrides
 from .context import UiPathEvalContext
 from .events import (
+    AgentExecutionCompletedEvent,
     EvalItemExceptionDetails,
     EvalRunCreatedEvent,
     EvalRunUpdatedEvent,
@@ -536,6 +537,14 @@ async def _execute_eval(
                     # The evaluation will be completed when resumed
                     return evaluation_run_results
 
+                # Agent execution completed — move eval run from WorkloadExecuting to Running
+                await self.event_bus.publish(
+                    EvaluationEvents.AGENT_EXECUTION_COMPLETED,
+                    AgentExecutionCompletedEvent(
+                        execution_id=execution_id,
+                    ),
+                )
+
                 if self.context.verbose:
                     evaluation_run_results.agent_execution_output = (
                         convert_eval_execution_output_to_serializable(