Skip to content

Commit 1075f2d

Browse files
mjnoviceclaude
andcommitted
feat: add WorkloadExecuting status to evaluation runs
Create eval runs with WorkloadExecuting status (4) instead of IN_PROGRESS (1) to distinguish agent execution from evaluation/scoring. After agent execution completes, transition to IN_PROGRESS before running evaluators. - Add WORKLOAD_EXECUTING=4 to EvaluationStatus enum - Add AGENT_EXECUTION_COMPLETED event and AgentExecutionCompletedEvent model - Create eval runs with WorkloadExecuting status in _create_eval_run_spec - Publish AGENT_EXECUTION_COMPLETED after agent finishes in _execute_eval - Handle event in StudioWebProgressReporter to update status to IN_PROGRESS Flow: WorkloadExecuting (agent runs) → Running (evaluators run) → Completed/Failed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 048dafc commit 1075f2d

3 files changed

Lines changed: 72 additions & 1 deletion

File tree

src/uipath/_cli/_evals/_progress_reporter.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from uipath.eval.models import EvalItemResult, ScoreType
3131
from uipath.eval.models.evaluation_set import EvaluationItem
3232
from uipath.eval.runtime.events import (
33+
AgentExecutionCompletedEvent,
3334
EvalRunCreatedEvent,
3435
EvalRunUpdatedEvent,
3536
EvalSetRunCreatedEvent,
@@ -47,6 +48,7 @@ class EvaluationStatus(IntEnum):
4748
IN_PROGRESS = 1
4849
COMPLETED = 2
4950
FAILED = 3
51+
WORKLOAD_EXECUTING = 4
5052

5153

5254
class StudioWebProgressItem(BaseModel):
@@ -725,6 +727,54 @@ async def handle_create_eval_run(self, payload: EvalRunCreatedEvent) -> None:
725727
except Exception as e:
726728
self._format_error_message(e, "StudioWeb create eval run error")
727729

730+
async def handle_agent_execution_completed(
731+
self, payload: AgentExecutionCompletedEvent
732+
) -> None:
733+
"""Handle agent execution completed — move eval run from WorkloadExecuting to Running."""
734+
try:
735+
eval_run_id = self.eval_run_ids.get(payload.execution_id)
736+
if not eval_run_id:
737+
logger.warning(
738+
f"Cannot move eval run to Running: eval_run_id not found for "
739+
f"execution_id={payload.execution_id}"
740+
)
741+
return
742+
743+
is_coded = self.is_coded_eval.get(
744+
self.eval_set_execution_id or "", False
745+
)
746+
747+
endpoint_suffix = "coded/" if is_coded else ""
748+
spec = RequestSpec(
749+
method="PUT",
750+
endpoint=Endpoint(
751+
f"{self._get_endpoint_prefix()}execution/agents/{self._project_id}/{endpoint_suffix}evalRun"
752+
),
753+
json={
754+
"evalRunId": eval_run_id,
755+
"status": EvaluationStatus.IN_PROGRESS.value,
756+
},
757+
headers=self._tenant_header(),
758+
)
759+
760+
await self._client.request_async(
761+
method=spec.method,
762+
url=spec.endpoint,
763+
params=spec.params,
764+
json=spec.json,
765+
headers=spec.headers,
766+
scoped="org" if self._is_localhost() else "tenant",
767+
)
768+
769+
logger.info(
770+
f"Moved eval_run_id={eval_run_id} from WorkloadExecuting to Running (coded={is_coded})"
771+
)
772+
773+
except Exception as e:
774+
self._format_error_message(
775+
e, "StudioWeb agent execution completed error"
776+
)
777+
728778
async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None:
729779
try:
730780
logger.info(
@@ -827,6 +877,10 @@ async def subscribe_to_eval_runtime_events(self, event_bus: EventBus) -> None:
827877
event_bus.subscribe(
828878
EvaluationEvents.CREATE_EVAL_RUN, self.handle_create_eval_run
829879
)
880+
event_bus.subscribe(
881+
EvaluationEvents.AGENT_EXECUTION_COMPLETED,
882+
self.handle_agent_execution_completed,
883+
)
830884
event_bus.subscribe(
831885
EvaluationEvents.UPDATE_EVAL_RUN, self.handle_update_eval_run
832886
)
@@ -1234,7 +1288,7 @@ def _create_eval_run_spec(
12341288
"evalSetRunId": eval_set_run_id,
12351289
"evalSnapshot": eval_snapshot,
12361290
# Backend expects integer status
1237-
"status": EvaluationStatus.IN_PROGRESS.value,
1291+
"status": EvaluationStatus.WORKLOAD_EXECUTING.value,
12381292
}
12391293

12401294
# Legacy backend expects payload wrapped in "request" field

src/uipath/eval/runtime/events.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class EvaluationEvents(str, Enum):
1919
CREATE_EVAL_RUN = "create_eval_run"
2020
UPDATE_EVAL_SET_RUN = "update_eval_set_run"
2121
UPDATE_EVAL_RUN = "update_eval_run"
22+
AGENT_EXECUTION_COMPLETED = "agent_execution_completed"
2223

2324

2425
class EvalSetRunCreatedEvent(BaseModel):
@@ -80,9 +81,16 @@ class EvalSetRunUpdatedEvent(BaseModel):
8081
success: bool = True
8182

8283

84+
class AgentExecutionCompletedEvent(BaseModel):
85+
"""Event emitted when agent execution completes and evaluation/scoring is about to begin."""
86+
87+
execution_id: str
88+
89+
8390
ProgressEvent = Union[
8491
EvalSetRunCreatedEvent,
8592
EvalRunCreatedEvent,
8693
EvalRunUpdatedEvent,
8794
EvalSetRunUpdatedEvent,
95+
AgentExecutionCompletedEvent,
8896
]

src/uipath/eval/runtime/runtime.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
from ._utils import apply_input_overrides
8686
from .context import UiPathEvalContext
8787
from .events import (
88+
AgentExecutionCompletedEvent,
8889
EvalItemExceptionDetails,
8990
EvalRunCreatedEvent,
9091
EvalRunUpdatedEvent,
@@ -536,6 +537,14 @@ async def _execute_eval(
536537
# The evaluation will be completed when resumed
537538
return evaluation_run_results
538539

540+
# Agent execution completed — move eval run from WorkloadExecuting to Running
541+
await self.event_bus.publish(
542+
EvaluationEvents.AGENT_EXECUTION_COMPLETED,
543+
AgentExecutionCompletedEvent(
544+
execution_id=execution_id,
545+
),
546+
)
547+
539548
if self.context.verbose:
540549
evaluation_run_results.agent_execution_output = (
541550
convert_eval_execution_output_to_serializable(

0 commit comments

Comments
 (0)