@@ -198,8 +198,8 @@ def adapt(
198198 """
199199 original = self
200200
201- async def evaluate (object : OuterT, * args : t.Any, ** kwargs : t.Any) -> list[Metric]:
202- return await original.normalize_and_score(adapt(object ), * args, ** kwargs)
201+ async def evaluate (obj : OuterT, * args : t.Any, ** kwargs : t.Any) -> list[Metric]:
202+ return await original.normalize_and_score(adapt(obj ), * args, ** kwargs)
203203
204204 return Scorer(evaluate, name = name or self .name, wraps = original)
205205```
@@ -301,15 +301,15 @@ def fit_like(
301301### normalize\_ and\_ score
302302
303303``` python
304- normalize_and_score(object : T, * args: Any, ** kwargs: Any) -> list[Metric]
304+ normalize_and_score(obj : T, * args: Any, ** kwargs: Any) -> list[Metric]
305305```
306306
307307Executes the scorer and returns all generated metrics,
308308including from nested compositions.
309309
310310** Parameters:**
311311
312- * ** ` object ` **
312+ * ** ` obj ` **
313313 (` T ` )
314314 –The object to score.
315315
@@ -320,13 +320,13 @@ including from nested compositions.
320320
321321<Accordion title = " Source code in dreadnode/scorers/base.py" icon = " code" >
322322``` python
323- async def normalize_and_score (self , object : T, * args : t.Any, ** kwargs : t.Any) -> list[Metric]:
323+ async def normalize_and_score (self , obj : T, * args : t.Any, ** kwargs : t.Any) -> list[Metric]:
324324 """
325325 Executes the scorer and returns all generated metrics,
326326 including from nested compositions.
327327
328328 Args:
329- object : The object to score.
329+ obj : The object to score.
330330
331331 Returns:
332332 All metrics generated by the scorer.
@@ -339,7 +339,7 @@ async def normalize_and_score(self, object: T, *args: t.Any, **kwargs: t.Any) ->
339339 )
340340
341341 try :
342- bound_args = self ._bind_args(object , * args, ** kwargs)
342+ bound_args = self ._bind_args(obj , * args, ** kwargs)
343343 result = self .func(* bound_args.args, ** bound_args.kwargs)
344344 if inspect.isawaitable(result):
345345 result = await result
@@ -348,12 +348,12 @@ async def normalize_and_score(self, object: T, *args: t.Any, **kwargs: t.Any) ->
348348 raise
349349
350350 warn_at_user_stacklevel(
351- f " Error executing scorer { self .name!r } for object { object .__class__ .__name__ } : { e} " ,
351+ f " Error executing scorer { self .name!r } for object { obj .__class__ .__name__ } : { e} " ,
352352 ScorerWarning,
353353 )
354354 result = Metric(value = 0.0 , step = self .step, attributes = {" error" : str (e)})
355355
356- if not isinstance (result, ( list , tuple ) ):
356+ if not isinstance (result, list | tuple ):
357357 result = t.cast(" list[ScorerResult]" , [result])
358358
359359 metrics = [
@@ -424,7 +424,7 @@ def rename(self, new_name: str) -> "Scorer[T]":
424424### score
425425
426426``` python
427- score(object : T, * args: Any, ** kwargs: Any) -> Metric
427+ score(obj : T, * args: Any, ** kwargs: Any) -> Metric
428428```
429429
430430Execute the scorer and return the metric. If the scorer is a composition of other scorers,
@@ -434,7 +434,7 @@ Any output value will be converted to a Metric object if not already one.
434434
435435** Parameters:**
436436
437- * ** ` object ` **
437+ * ** ` obj ` **
438438 (` T ` )
439439 –The object to score.
440440
@@ -445,20 +445,20 @@ Any output value will be converted to a Metric object if not already one.
445445
446446<Accordion title = " Source code in dreadnode/scorers/base.py" icon = " code" >
447447``` python
448- async def score (self , object : T, * args : t.Any, ** kwargs : t.Any) -> Metric:
448+ async def score (self , obj : T, * args : t.Any, ** kwargs : t.Any) -> Metric:
449449 """
450450 Execute the scorer and return the metric. If the scorer is a composition of other scorers,
451451 it will return the "highest-priority" metric, typically the first in the list.
452452
453453 Any output value will be converted to a Metric object if not already one.
454454
455455 Args:
456- object : The object to score.
456+ obj : The object to score.
457457
458458 Returns:
459459 A Metric object.
460460 """
461- all_metrics = await self .normalize_and_score(object , * args, ** kwargs)
461+ all_metrics = await self .normalize_and_score(obj , * args, ** kwargs)
462462 return all_metrics[0 ]
463463```
464464
@@ -468,15 +468,15 @@ async def score(self, object: T, *args: t.Any, **kwargs: t.Any) -> Metric:
468468### score\_ composite
469469
470470``` python
471- score_composite(object : T, * args: Any, ** kwargs: Any) -> tuple[Metric, list[Metric]]
471+ score_composite(obj : T, * args: Any, ** kwargs: Any) -> tuple[Metric, list[Metric]]
472472```
473473
474474Executes the scorer and returns both the primary Metric and a list of any
475475additional metrics from nested compositions.
476476
477477** Parameters:**
478478
479- * ** ` object ` **
479+ * ** ` obj ` **
480480 (` T ` )
481481 –The object to score.
482482
@@ -488,19 +488,19 @@ additional metrics from nested compositions.
488488<Accordion title = " Source code in dreadnode/scorers/base.py" icon = " code" >
489489``` python
490490async def score_composite (
491- self , object : T, * args : t.Any, ** kwargs : t.Any
491+ self , obj : T, * args : t.Any, ** kwargs : t.Any
492492) -> tuple[Metric, list[Metric]]:
493493 """
494494 Executes the scorer and returns both the primary Metric and a list of any
495495 additional metrics from nested compositions.
496496
497497 Args:
498- object : The object to score.
498+ obj : The object to score.
499499
500500 Returns:
501501 A tuple of the primary Metric and a list of all metrics generated.
502502 """
503- metrics = await self .normalize_and_score(object , * args, ** kwargs)
503+ metrics = await self .normalize_and_score(obj , * args, ** kwargs)
504504 return metrics[0 ], metrics[1 :]
505505```
506506
@@ -2556,7 +2556,7 @@ llm\_judge
25562556----------
25572557
25582558``` python
2559- llm_judge(model: str | Generator, rubric: str , * , expected_output: str | None = None , model_params: GenerateParams | AnyDict | None = None , passing: Callable[[float ], bool ] | None = None , min_score: float | None = None , max_score: float | None = None , name: str = ' llm_judge' ) -> Scorer[t.Any]
2559+ llm_judge(model: str | Generator, rubric: str , * , input : Any | None = None , expected_output: Any | None = None , model_params: GenerateParams | AnyDict | None = None , passing: Callable[[float ], bool ] | None = None , min_score: float | None = None , max_score: float | None = None , name: str = ' llm_judge' ) -> Scorer[t.Any]
25602560```
25612561
25622562Score the output of a task using an LLM to judge it against a rubric.
@@ -2569,8 +2569,13 @@ Score the output of a task using an LLM to judge it against a rubric.
25692569* ** ` rubric ` **
25702570 (` str ` )
25712571 –The rubric to use for judging.
2572+ * ** ` input ` **
2573+ (` Any | None ` , default:
2574+ ` None `
2575+ )
2576+ –The input which produced the output for context, if applicable.
25722577* ** ` expected_output ` **
2573- (` str | None` , default:
2578+ (` Any | None` , default:
25742579 ` None `
25752580 )
25762581 –The expected output to compare against, if applicable.
@@ -2606,7 +2611,8 @@ def llm_judge(
26062611 model : str | rg.Generator,
26072612 rubric : str ,
26082613 * ,
2609- expected_output : str | None = None ,
2614+ input : t.Any | None = None ,
2615+ expected_output : t.Any | None = None ,
26102616 model_params : rg.GenerateParams | AnyDict | None = None ,
26112617 passing : t.Callable[[float ], bool ] | None = None ,
26122618 min_score : float | None = None ,
@@ -2619,6 +2625,7 @@ def llm_judge(
26192625 Args:
26202626 model: The model to use for judging.
26212627 rubric: The rubric to use for judging.
2628+ input: The input which produced the output for context, if applicable.
26222629 expected_output: The expected output to compare against, if applicable.
26232630 model_params: Optional parameters for the model.
26242631 passing: Optional callback to determine if the score is passing based on the score value - overrides any model-specified value.
@@ -2634,7 +2641,8 @@ def llm_judge(
26342641 model, help = " The model to use for judging." , expose_as = str
26352642 ),
26362643 rubric : str = rubric,
2637- expected_output : str | None = expected_output,
2644+ input : t.Any | None = input ,
2645+ expected_output : t.Any | None = expected_output,
26382646 model_params : rg.GenerateParams | AnyDict | None = model_params,
26392647 min_score : float | None = min_score,
26402648 max_score : float | None = max_score,
@@ -2655,8 +2663,8 @@ def llm_judge(
26552663 raise TypeError (" Model must be a string identifier or a Generator instance." )
26562664
26572665 input_data = JudgeInput(
2658- input = str (data) ,
2659- expected_output = expected_output,
2666+ input = str (input ) if input is not None else None ,
2667+ expected_output = str ( expected_output) if expected_output is not None else None ,
26602668 output = str (data),
26612669 rubric = rubric,
26622670 )
0 commit comments