Skip to content

Commit 6e01614

Browse files
Fix DPO dataset builder for helpsteer3 and ultrafeedback (#46)
1 parent 651bd0c commit 6e01614

File tree

2 files changed

+3
-6
lines changed

2 files changed

+3
-6
lines changed

tinker_cookbook/recipes/preference/datasets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def example_to_labeled_comparison(self, example: dict) -> LabeledComparison | No
174174
completion_B=[{"role": "assistant", "content": response2}],
175175
)
176176
return LabeledComparison(
177-
comparison=comparison, label="A" if overall_preference > 0 else "B"
177+
comparison=comparison, label="A" if overall_preference < 0 else "B"
178178
)
179179

180180

tinker_cookbook/recipes/preference/dpo/train.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@
1010
from tinker_cookbook.preference.dpo_datasets import (
1111
DPODatasetBuilderFromComparisons,
1212
)
13-
from tinker_cookbook.preference.preference_datasets import (
14-
ChatDatasetBuilderFromComparisons,
15-
)
1613
from tinker_cookbook.recipes.preference.datasets import (
1714
HelpSteer3ComparisonBuilder,
1815
HHHComparisonBuilder,
@@ -69,11 +66,11 @@ def get_dataset_builder(
6966
common_config=common_config, comparison_builder=HHHComparisonBuilder()
7067
)
7168
elif dataset == "helpsteer3":
72-
return ChatDatasetBuilderFromComparisons(
69+
return DPODatasetBuilderFromComparisons(
7370
common_config=common_config, comparison_builder=HelpSteer3ComparisonBuilder()
7471
)
7572
elif dataset == "ultrafeedback":
76-
return ChatDatasetBuilderFromComparisons(
73+
return DPODatasetBuilderFromComparisons(
7774
common_config=common_config, comparison_builder=UltraFeedbackComparisonBuilder()
7875
)
7976
else:

0 commit comments

Comments
 (0)