tensorflow
diff --git a/‎tensorflow_datasets/core/dataset_builders/conll/conll_dataset_builder_test.py‎
Lines changed: 46 additions & 58 deletions b/‎tensorflow_datasets/core/dataset_builders/conll/conll_dataset_builder_test.py‎
Lines changed: 46 additions & 58 deletions
@@ -13,9 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Tests for conll_dataset_builder."""
 import textwrap
-from unittest import mock
 
 from etils import epath
 import pytest
@@ -25,28 +23,22 @@
 
 _FOLDER_PATH = "mock/path"
 
-_VALID_INPUT = textwrap.dedent(
-    """
+_VALID_INPUT = textwrap.dedent("""
 -DOCSTART- -X- -X- O
 Winter NN B-NP O
 is VBZ B-VP O
 
 Air NN I-NP O
 . . O O
-"""
-)
+""")
 
-_INVALID_INPUT = textwrap.dedent(
-    """
+_INVALID_INPUT = textwrap.dedent("""
 Winter NN B-NP
 is VBZ B-VP O
 
 Air NN I-NP O
 . . O O
-"""
-)
-
-_INPUT_PATH = epath.Path(_FOLDER_PATH, "input_path.txt")
+""")
 
 
 class DummyConllDataset(conll_dataset_builder.ConllDatasetBuilder):
@@ -63,60 +55,56 @@ def _info(self) -> tfds.core.DatasetInfo:
   def _split_generators(self, dl_manager: tfds.download.DownloadManager):
     """Returns SplitGenerators."""
     del dl_manager
-    return {"train": self._generate_examples(_INPUT_PATH)}
-
-
-def test_generate_example():
-  tf_mock = mock.Mock()
-  tf_mock.gfile.GFile.return_value = _VALID_INPUT
-  expected_examples = []
-
-  dataset = DummyConllDataset()
-
-  with tfds.testing.MockFs() as fs:
-    fs.add_file(path=_INPUT_PATH, content=_VALID_INPUT)
-    examples = list(dataset._generate_examples(_INPUT_PATH))
-
-    expected_examples = [
-        (
-            0,
-            {
-                "tokens": ["Winter", "is"],
-                "pos": ["NN", "VBZ"],
-                "chunks": ["B-NP", "B-VP"],
-                "ner": ["O", "O"],
-            },
-        ),
-        (
-            1,
-            {
-                "tokens": ["Air", "."],
-                "pos": ["NN", "."],
-                "chunks": ["I-NP", "O"],
-                "ner": ["O", "O"],
-            },
-        ),
-    ]
-
-    assert examples == expected_examples
-
-    for _, example in examples:
-      assert len(example) == len(conll_lib.CONLL_2003_ORDERED_FEATURES)
+    return {"train": self._generate_examples("/tmp/input.txt")}
+
+
+def test_generate_example(tmpdir):
+  tmpdir = epath.Path(tmpdir)
+  input_path = tmpdir / "input_path.txt"
+  input_path.write_text(_VALID_INPUT)
+
+  dataset = DummyConllDataset(data_dir=tmpdir)
+  examples = list(dataset._generate_examples(input_path))
+
+  expected_examples = [
+      (
+          0,
+          {
+              "tokens": ["Winter", "is"],
+              "pos": ["NN", "VBZ"],
+              "chunks": ["B-NP", "B-VP"],
+              "ner": ["O", "O"],
+          },
+      ),
+      (
+          1,
+          {
+              "tokens": ["Air", "."],
+              "pos": ["NN", "."],
+              "chunks": ["I-NP", "O"],
+              "ner": ["O", "O"],
+          },
+      ),
+  ]
+
+  assert examples == expected_examples
+
+  for _, example in examples:
+    assert len(example) == len(conll_lib.CONLL_2003_ORDERED_FEATURES)
 
   assert len(examples) == 2
 
 
-def test_generate_corrupted_example():
-  tf_mock = mock.Mock()
-  tf_mock.gfile.GFile.return_value = _VALID_INPUT
-  dataset = DummyConllDataset()
+def test_generate_corrupted_example(tmpdir):
+  tmpdir = epath.Path(tmpdir)
+  input_path = tmpdir / "input_path.txt"
+  input_path.write_text(_INVALID_INPUT)
+  dataset = DummyConllDataset(data_dir=tmpdir)
 
   error_line = "Winter NN B-NP"
   error_msg = (
       f"Mismatch in the number of features found in line: {error_line}\n\n"
       "Should be 4, but found 3"
   )
   with pytest.raises(ValueError, match=error_msg):
-    with tfds.testing.MockFs() as fs:
-      fs.add_file(path=_INPUT_PATH, content=_INVALID_INPUT)
-      list(dataset._generate_examples(_INPUT_PATH))
+    list(dataset._generate_examples(input_path))