Skip to content

Commit 68e3659

Browse files
committed
feat: report eval details to SW
1 parent 1d86a25 commit 68e3659

19 files changed

Lines changed: 1879 additions & 1046 deletions

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath"
3-
version = "2.1.60"
3+
version = "2.1.61"
44
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.10"

src/uipath/_cli/_evals/_models/_output.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,19 @@ class EvaluationRunResultDto(BaseModel):
6262
class EvaluationRunResult(BaseModel):
6363
model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
6464

65+
score: float = 0.0
6566
evaluation_name: str
6667
evaluation_run_results: List[EvaluationRunResultDto]
6768

69+
def compute_average_score(self) -> None:
70+
"""Compute average score for this single eval_item."""
71+
if not self.evaluation_run_results:
72+
self.score = 0.0
73+
return
74+
75+
total_score = sum(dto.result.score for dto in self.evaluation_run_results)
76+
self.score = total_score / len(self.evaluation_run_results)
77+
6878

6979
class UiPathEvalOutput(BaseModel):
7080
model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
@@ -74,12 +84,15 @@ class UiPathEvalOutput(BaseModel):
7484
evaluation_set_results: List[EvaluationRunResult]
7585

7686
def compute_average_score(self) -> None:
77-
total_score = 0.0
78-
total_count = 0
79-
80-
for evaluation_set_result in self.evaluation_set_results:
81-
for evaluation_run_result in evaluation_set_result.evaluation_run_results:
82-
total_score += evaluation_run_result.result.score
83-
total_count += 1
84-
85-
self.score = total_score / total_count if total_count > 0 else 0.0
87+
"""Compute overall average by calling eval_item.compute_average_score()."""
88+
if not self.evaluation_set_results:
89+
self.score = 0.0
90+
return
91+
92+
for eval_result in self.evaluation_set_results:
93+
eval_result.compute_average_score()
94+
95+
eval_item_scores = [
96+
eval_result.score for eval_result in self.evaluation_set_results
97+
]
98+
self.score = sum(eval_item_scores) / len(eval_item_scores)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from typing import Any
2+
3+
from pydantic import BaseModel, ConfigDict
4+
from pydantic.alias_generators import to_camel
5+
6+
from uipath.eval.models import EvalItemResult
7+
8+
9+
class StudioWebProgressItem(BaseModel):
10+
eval_run_id: str
11+
eval_results: list[EvalItemResult]
12+
success: bool
13+
agent_output: dict[str, Any]
14+
agent_execution_time: float
15+
16+
17+
class StudioWebAgentSnapshot(BaseModel):
18+
model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
19+
20+
input_schema: dict[str, Any]
21+
output_schema: dict[str, Any]

0 commit comments

Comments
 (0)