Skip to content

Commit b380f59

Browse files
committed
feat: add AgentEvaluator
1 parent 2641a2c commit b380f59

61 files changed

Lines changed: 3482 additions & 1184 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies = [
1010
"opentelemetry-sdk>=1.31.1",
1111
"pydantic>=2.11.1",
1212
"python-dotenv>=1.0.1",
13-
"tenacity>=9.0.0",
13+
"tenacity>=8.0.0",
1414
"tomli>=2.2.1",
1515
"pathlib>=1.0.1",
1616
"rich>=13.0.0",
@@ -101,7 +101,7 @@ line-ending = "auto"
101101

102102
[tool.mypy]
103103
plugins = ["pydantic.mypy"]
104-
exclude = ["samples/.*"]
104+
exclude = ["samples/.*", "tests/.*"]
105105

106106
follow_imports = "silent"
107107
warn_redundant_casts = true

src/uipath/_cli/_evals/evaluation_service.py renamed to src/uipath/_cli/_evals/_evaluation_service.py

Lines changed: 92 additions & 151 deletions
Large diffs are not rendered by default.

src/uipath/_cli/_evals/_evaluators/_evaluator_factory.py renamed to src/uipath/_cli/_evals/_evaluator_factory.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
from typing import Any, Dict
22

3-
from .._models import EvaluatorCategory, EvaluatorType
4-
from ._evaluator_base import EvaluatorBase, EvaluatorBaseParams
5-
from ._exact_match_evaluator import ExactMatchEvaluator
6-
from ._json_similarity_evaluator import JsonSimilarityEvaluator
7-
from ._llm_as_judge_evaluator import LlmAsAJudgeEvaluator
8-
from ._trajectory_evaluator import TrajectoryEvaluator
3+
from uipath.eval.evaluators import (
4+
BaseEvaluator,
5+
ExactMatchEvaluator,
6+
JsonSimilarityEvaluator,
7+
LlmAsAJudgeEvaluator,
8+
TrajectoryEvaluator,
9+
)
10+
from uipath.eval.models import EvaluatorCategory, EvaluatorType
11+
12+
from ._models import EvaluatorBaseParams
913

1014

1115
class EvaluatorFactory:
1216
"""Factory class for creating evaluator instances based on configuration."""
1317

1418
@staticmethod
15-
def create_evaluator(data: Dict[str, Any]) -> EvaluatorBase:
19+
def create_evaluator(data: Dict[str, Any]) -> BaseEvaluator:
1620
"""Create an evaluator instance from configuration data.
1721
1822
Args:
@@ -81,8 +85,7 @@ def _create_exact_match_evaluator(
8185
) -> ExactMatchEvaluator:
8286
"""Create a deterministic evaluator."""
8387
return ExactMatchEvaluator.from_params(
84-
base_params,
85-
target_output_key=data.get("targetOutputKey", "*"),
88+
**base_params.model_dump(),
8689
)
8790

8891
@staticmethod
@@ -91,8 +94,7 @@ def _create_json_similarity_evaluator(
9194
) -> JsonSimilarityEvaluator:
9295
"""Create a deterministic evaluator."""
9396
return JsonSimilarityEvaluator.from_params(
94-
base_params,
95-
target_output_key=data.get("targetOutputKey", "*"),
97+
**base_params.model_dump(),
9698
)
9799

98100
@staticmethod
@@ -113,10 +115,9 @@ def _create_llm_as_judge_evaluator(
113115
)
114116

115117
return LlmAsAJudgeEvaluator.from_params(
116-
base_params,
118+
**base_params.model_dump(),
117119
prompt=prompt,
118120
model=model,
119-
target_output_key=data.get("targetOutputKey", "*"),
120121
)
121122

122123
@staticmethod

src/uipath/_cli/_evals/_evaluators/__init__.py

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/uipath/_cli/_evals/_evaluators/_deterministic_evaluator_base.py

Lines changed: 0 additions & 46 deletions
This file was deleted.

src/uipath/_cli/_evals/_evaluators/_evaluator_base.py

Lines changed: 0 additions & 124 deletions
This file was deleted.

src/uipath/_cli/_evals/_evaluators/_exact_match_evaluator.py

Lines changed: 0 additions & 40 deletions
This file was deleted.

src/uipath/_cli/_evals/_evaluators/_trajectory_evaluator.py

Lines changed: 0 additions & 48 deletions
This file was deleted.
Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
11
from uipath._cli._evals._models._evaluation_set import EvaluationItem, EvaluationSet
22
from uipath._cli._evals._models._evaluators import (
3-
EvaluationResult,
43
EvaluationSetResult,
5-
EvaluatorCategory,
6-
EvaluatorType,
7-
LLMResponse,
4+
EvaluatorBaseParams,
85
)
96

107
__all__ = [
11-
"LLMResponse",
12-
"EvaluatorCategory",
13-
"EvaluatorType",
14-
"EvaluationResult",
158
"EvaluationSetResult",
169
"EvaluationItem",
1710
"EvaluationSet",
11+
"EvaluatorBaseParams",
1812
]

src/uipath/_cli/_evals/_models/_evaluation_set.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class EvaluationSet(BaseModel):
3636
createdAt: str
3737
updatedAt: str
3838

39-
def extract_selected_evals(self, eval_ids) -> None:
39+
def extract_selected_evals(self, eval_ids: list[str]) -> None:
4040
selected_evals: list[EvaluationItem] = []
4141
for evaluation in self.evaluations:
4242
if evaluation.id in eval_ids:

0 commit comments

Comments
 (0)