Skip to content

Commit f353b13

Browse files
committed
add evals
1 parent 9d36c26 commit f353b13

File tree

7 files changed

+1867
-0
lines changed

7 files changed

+1867
-0
lines changed

human-seeded-evals/evals/__init__.py

Whitespace-only changes.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from dataclasses import dataclass
2+
from datetime import timedelta
3+
4+
from pydantic_evals.evaluators import Evaluator, EvaluatorContext, EvaluatorOutput
5+
6+
from app.models import TimeRangeBuilderSuccess, TimeRangeInputs, TimeRangeResponse
7+
8+
9+
@dataclass
10+
class ValidateTimeRange(Evaluator[TimeRangeInputs, TimeRangeResponse]):
11+
def evaluate(self, ctx: EvaluatorContext[TimeRangeInputs, TimeRangeResponse]) -> EvaluatorOutput:
12+
if isinstance(ctx.output, TimeRangeBuilderSuccess):
13+
window_end = ctx.output.end_timestamp
14+
window_size = window_end - ctx.output.start_timestamp
15+
return {
16+
'window_is_not_too_long': window_size <= timedelta(days=30),
17+
'window_is_not_in_the_future': window_end <= ctx.inputs['now'],
18+
}
19+
20+
return {} # No evaluation needed for errors
21+
22+
23+
@dataclass
24+
class UserMessageIsConcise(Evaluator[TimeRangeInputs, TimeRangeResponse]):
25+
async def evaluate(
26+
self,
27+
ctx: EvaluatorContext[TimeRangeInputs, TimeRangeResponse],
28+
) -> EvaluatorOutput:
29+
if isinstance(ctx.output, TimeRangeBuilderSuccess):
30+
user_facing_message = ctx.output.explanation
31+
else:
32+
user_facing_message = ctx.output.error
33+
34+
if user_facing_message is not None:
35+
return len(user_facing_message.split()) < 50
36+
else:
37+
return {}
38+
39+
40+
CUSTOM_EVALUATOR_TYPES = (ValidateTimeRange, UserMessageIsConcise)

0 commit comments

Comments
 (0)