@@ -260,7 +260,7 @@ def _split_generators(
260260 split ['name' ]: self ._generate_examples (
261261 filters = {
262262 ** self ._filters ,
263- split_reference .reference_field .id : split ['name' ]. encode () ,
263+ split_reference .reference_field .id : split ['name' ],
264264 }
265265 )
266266 for split in split_reference .split_record_set .data
@@ -285,15 +285,10 @@ def _generate_examples(
285285 record_set = croissant_utils .get_record_set (
286286 self .builder_config .name , metadata = self .metadata
287287 )
288- records = self .dataset .records (record_set .id )
288+ records = self .dataset .records (record_set .id , filters = filters )
289289 for i , record in enumerate (records ):
290290 # Some samples might not be TFDS-compatible as-is, e.g. from croissant
291291 # describing HuggingFace datasets, so we convert them here. This shouldn't
292292 # impact datasets which are already TFDS-compatible.
293293 record = conversion_utils .to_tfds_value (record , self .info .features )
294- # After partition implementation, the filters will be applied from
295- # mlcroissant `dataset.records` directly.
296- # `records = records.filter(f == v for f, v in filters.items())``
297- # For now, we apply them in TFDS.
298- if all (record [filter ] == value for filter , value in filters .items ()):
299- yield i , record
294+ yield i , record
0 commit comments