Skip to content

Commit b1d45e3

Browse files
authored
Revert extraction setting for IndicesExtractionConfig (#998)
Revert `try_extract_without_anchor` to True in `IndicesExtractionConfig` to avoid issues in `gpqa:diamond` eval * Change `try_extract_without_anchor` only for GPQA * Update GPQA test to reflect the extract setting
1 parent c7a063a commit b1d45e3

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

src/lighteval/metrics/metrics.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,12 @@ class Metrics(Enum):
526526
metric_name="extractive_match",
527527
sample_level_fn=MultilingualExtractiveMatchMetric(
528528
language=Language.ENGLISH,
529-
gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
530-
pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
529+
gold_extraction_target=[
530+
IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
531+
],
532+
pred_extraction_target=[
533+
IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
534+
],
531535
precision=6,
532536
),
533537
category=SamplingMethod.GENERATIVE,
@@ -539,8 +543,12 @@ class Metrics(Enum):
539543
sample_level_fn=PassAtK(
540544
sample_scoring_function=MultilingualExtractiveMatchMetric(
541545
language=Language.ENGLISH,
542-
gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
543-
pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
546+
gold_extraction_target=[
547+
IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
548+
],
549+
pred_extraction_target=[
550+
IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
551+
],
544552
precision=6,
545553
),
546554
),

tests/unit/metrics/test_cases/gpqa_instruct_metric.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@
249249
]
250250
},
251251
"expected_output": {
252-
"extractive_match": 0.0
252+
"extractive_match": 1.0
253253
},
254254
"tolerance": 0.01,
255255
"description": "Answer with quotes but still extractable"

0 commit comments

Comments
 (0)