Skip to content

Commit 11629d4

Browse files
fineguyThe TensorFlow Datasets Authors
authored andcommitted
Fix c4_test.
PiperOrigin-RevId: 796108166
1 parent 52f552c commit 11629d4

File tree

3 files changed

+61
-28
lines changed

3 files changed

+61
-28
lines changed

.github/workflows/pytest-template.yml

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,7 @@ jobs:
5252

5353
# Run tests
5454
# Ignores:
55-
# * Nsynth is run in isolation due to dependency conflict (crepe).
56-
# * Lsun tests is disabled because the tensorflow_io used in open-source
57-
# is linked to static libraries compiled again specific TF version, which
58-
# makes test fails with linking error (libtensorflow_io_golang.so).
55+
# * c4, lsun, nsynth are run separately to avoid dependency conflicts.
5956
# * imagenet2012_corrupted requires imagemagick binary.
6057
# * import_without_tf_test.py, because the test relies on TensorFlow not being imported.
6158
# * github_api is run separately to not overuse API quota.
@@ -65,8 +62,9 @@ jobs:
6562
- name: Run core tests
6663
run: |
6764
pytest --durations=100 -vv -n auto --shard-id=$((${{ matrix.shard-id }} - 1)) --num-shards=${{ matrix.num-shards }} \
68-
--ignore="tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py" \
65+
--ignore-glob="tensorflow_datasets/text/c4*_test.py" \
6966
--ignore="tensorflow_datasets/image/lsun_test.py" \
67+
--ignore="tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py" \
7068
--ignore="tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py" \
7169
--ignore="tensorflow_datasets/scripts/documentation/build_api_docs_test.py" \
7270
--ignore="tensorflow_datasets/import_without_tf_test.py" \
@@ -86,3 +84,27 @@ jobs:
8684
retry_on: timeout
8785
command: |
8886
pytest -vv -o faulthandler_timeout=10 tensorflow_datasets/translate/wmt19_test.py
87+
88+
# Run `c4` dataset tests separately.
89+
- name: Run c4 dataset tests
90+
if: ${{ matrix.shard-id == 1 }}
91+
run: |
92+
pip install .[c4]
93+
pytest -vv -n auto \
94+
tensorflow_datasets/text/c4*_test.py
95+
96+
# Run `lsun` dataset tests separately.
97+
- name: Run lsun dataset tests
98+
if: ${{ matrix.shard-id == 2 }}
99+
run: |
100+
pip install .[lsun]
101+
pytest -vv -n auto \
102+
tensorflow_datasets/image/lsun_test.py
103+
104+
# Run `nsynth` dataset tests separately.
105+
- name: Run nsynth dataset tests
106+
if: ${{ matrix.shard-id == 3 }}
107+
run: |
108+
pip install .[nsynth]
109+
pytest -vv -n auto \
110+
tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py

setup.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@
8181
'pytest-shard',
8282
'pytest-xdist',
8383
# Lazy-deps required by core
84-
# TODO(b/418761065): Update to 2.65.0 once the bug is fixed.
8584
'apache-beam<2.65.0',
8685
'conllu',
8786
'mlcroissant>=1.0.9',
@@ -150,6 +149,9 @@
150149
# nltk==3.8.2 is broken: https://github.com/nltk/nltk/issues/3293
151150
'nltk==3.8.1',
152151
'tldextract',
152+
# tensorflow==2.20.0 is not compatible with gcld3 because of protobuf
153+
# version conflict.
154+
'tensorflow<2.20.0',
153155
],
154156
'c4_wsrs': ['apache-beam<2.65.0'],
155157
'cats_vs_dogs': ['matplotlib'],
@@ -167,11 +169,17 @@
167169
'scipy',
168170
],
169171
'librispeech': ['pydub'], # and ffmpeg installed
170-
'lsun': ['tensorflow-io[tensorflow]'],
171-
# sklearn version required to avoid conflict with librosa from
172-
# https://github.com/scikit-learn/scikit-learn/issues/14485
173-
# See https://github.com/librosa/librosa/issues/1160
174-
'nsynth': ['crepe>=0.0.11', 'librosa', 'scikit-learn==0.20.3'],
172+
'lsun': [
173+
# tensorflow-io is compiled against specific versions of TF.
174+
'tensorflow-io[tensorflow]',
175+
],
176+
'nsynth': [
177+
'crepe',
178+
'librosa',
179+
# tensorflow==2.20.0 is not compatible with librosa because of protobuf
180+
# version conflict.
181+
'tensorflow<2.20.0',
182+
],
175183
'ogbg_molpcba': ['pandas', 'networkx'],
176184
'pet_finder': ['pandas'],
177185
'qm9': ['pandas'],
@@ -196,7 +204,7 @@
196204

197205
# Those datasets have dependencies which conflict with the rest of TFDS, so
198206
# running them in an isolated environments.
199-
ISOLATED_DATASETS = ('nsynth', 'lsun')
207+
ISOLATED_DATASETS = ('c4', 'lsun', 'nsynth')
200208

201209
# Extra dataset deps are required for the tests
202210
all_dataset_dependencies = list(
@@ -238,18 +246,21 @@
238246
license='Apache 2.0',
239247
packages=setuptools.find_packages(),
240248
package_data={
241-
'tensorflow_datasets': DATASET_FILES + [
242-
# Bundle `datasets/` folder in PyPI releases
243-
'datasets/*/*',
244-
'core/utils/colormap.csv',
245-
'scripts/documentation/templates/*',
246-
'url_checksums/*',
247-
'checksums.tsv',
248-
'community-datasets.toml',
249-
'dataset_collections/*/*.md',
250-
'dataset_collections/*/*.bib',
251-
'core/valid_tags.txt',
252-
],
249+
'tensorflow_datasets': (
250+
DATASET_FILES
251+
+ [
252+
# Bundle `datasets/` folder in PyPI releases
253+
'datasets/*/*',
254+
'core/utils/colormap.csv',
255+
'scripts/documentation/templates/*',
256+
'url_checksums/*',
257+
'checksums.tsv',
258+
'community-datasets.toml',
259+
'dataset_collections/*/*.md',
260+
'dataset_collections/*/*.bib',
261+
'core/valid_tags.txt',
262+
]
263+
),
253264
},
254265
exclude_package_data={
255266
'tensorflow_datasets': [

tensorflow_datasets/core/lazy_imports_lib_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,18 @@ class LazyImportsTest(testing.TestCase, parameterized.TestCase):
2525
# The following deps are not in the test list because the datasets that
2626
# require them need to have their tests run in isolation:
2727
# * crepe (NSynth)
28+
# * gcld3 (c4)
29+
# * langdetect (c4)
2830
# * librosa (NSynth)
31+
# * nltk (c4)
32+
# * tldextract (c4)
2933
@parameterized.parameters(
3034
"bs4",
3135
"cv2",
32-
"gcld3",
3336
"gcsfs_store",
34-
"langdetect",
3537
"lxml",
3638
"matplotlib",
3739
"mwparserfromhell",
38-
"nltk",
3940
"os",
4041
"pandas",
4142
"pretty_midi",
@@ -44,7 +45,6 @@ class LazyImportsTest(testing.TestCase, parameterized.TestCase):
4445
"scipy",
4546
"skimage",
4647
"tifffile",
47-
"tldextract",
4848
"zarr",
4949
)
5050
def test_import(self, module_name):

0 commit comments

Comments
 (0)