Skip to content

Commit 8cd061b

Browse files
authored
fix: adds simple check on empty filename (#786)
1 parent ac9bac3 commit 8cd061b

File tree

7 files changed

+1665
-23
lines changed

7 files changed

+1665
-23
lines changed

conftest.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@
2525
import tempfile
2626
import time
2727
import urllib
28+
from pathlib import Path
2829

2930
import pytest
3031
import responses
32+
import yaml
3133
from click.testing import CliRunner
3234

3335

@@ -452,17 +454,24 @@ def renku_cli(*args):
452454
return renku_cli
453455

454456

455-
@pytest.fixture
456-
def doi_dataset():
457-
"""Return a yaml of dataset using DOI for its id."""
458-
from pathlib import Path
459-
dataset_path = Path(
460-
__file__
461-
).parent / 'tests' / 'fixtures' / 'doi-dataset.yml'
462-
with open(dataset_path.as_posix()) as f:
463-
dataset_yaml = f.read()
457+
@pytest.fixture(
458+
params=[{
459+
'path':
460+
Path(__file__).parent / 'tests' / 'fixtures' / 'doi-dataset.yml',
461+
}, {
462+
'path':
463+
Path(__file__).parent / 'tests' / 'fixtures' /
464+
'broken-dataset-v0.5.2.yml',
465+
}]
466+
)
467+
def dataset_metadata(request):
468+
"""Return dataset metadata fixture."""
469+
from renku.core.models.jsonld import NoDatesSafeLoader
470+
471+
file_path = request.param['path']
464472

465-
return dataset_yaml
473+
data = yaml.load(file_path.read_text(), Loader=NoDatesSafeLoader)
474+
yield data
466475

467476

468477
@pytest.fixture()

renku/core/commands/checks/migration.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
from renku.core.models.datasets import Dataset
2828
from renku.core.models.refs import LinkReference
29+
from renku.core.utils.urls import url_to_string
2930

3031
from ..echo import WARNING
3132

@@ -235,9 +236,19 @@ def fix_uncommitted_labels(client):
235236
dataset.to_yaml()
236237

237238

239+
def fix_dataset_files_urls(client):
240+
"""Ensure dataset files have correct url format."""
241+
for dataset in client.datasets.values():
242+
for file_ in dataset.files:
243+
file_.url = url_to_string(file_.url)
244+
245+
dataset.to_yaml()
246+
247+
238248
STRUCTURE_MIGRATIONS = [
239249
ensure_clean_lock,
240250
migrate_datasets_pre_v0_3,
241251
migrate_broken_dataset_paths,
242252
fix_uncommitted_labels,
253+
fix_dataset_files_urls,
243254
]

renku/core/models/datasets.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,12 @@ def _convert_dataset_files_creators(value):
128128
return [Creator.from_jsonld(c) for c in coll]
129129

130130

131+
def convert_filename_path(p):
132+
"""Return name of the file."""
133+
if p:
134+
return Path(p).name
135+
136+
131137
@jsonld.s(
132138
type='schema:DigitalDocument',
133139
slots=True,
@@ -153,7 +159,7 @@ class DatasetFile(Entity, CreatorsMixin):
153159

154160
dataset = jsonld.ib(context='schema:isPartOf', default=None, kw_only=True)
155161

156-
filename = attr.ib(kw_only=True, converter=lambda x: Path(x).name)
162+
filename = attr.ib(kw_only=True, converter=convert_filename_path)
157163

158164
name = jsonld.ib(context='schema:name', kw_only=True, default=None)
159165

renku/core/utils/urls.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright 2019 - Swiss Data Science Center (SDSC)
4+
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
5+
# Eidgenössische Technische Hochschule Zürich (ETHZ).
6+
#
7+
# Licensed under the Apache License, Version 2.0 (the "License");
8+
# you may not use this file except in compliance with the License.
9+
# You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
"""Helpers utils for handling URLs."""
19+
20+
from urllib.parse import ParseResult
21+
22+
23+
def url_to_string(url):
24+
"""Convert url from ``list`` or ``ParseResult`` to string."""
25+
if isinstance(url, list):
26+
return ParseResult(
27+
scheme=url[0],
28+
netloc=url[1],
29+
path=url[2],
30+
params=None,
31+
query=None,
32+
fragment=None,
33+
).geturl()
34+
35+
if isinstance(url, ParseResult):
36+
return url.geturl()
37+
38+
if isinstance(url, str):
39+
return url
40+
41+
raise ValueError('url value not recognized')

tests/cli/test_migrate.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from renku import LocalClient
2424
from renku.cli import cli
2525
from renku.core.management.config import RENKU_HOME
26+
from renku.core.models.datasets import Dataset
27+
from renku.core.utils.urls import url_to_string
2628

2729

2830
@pytest.mark.migration
@@ -161,3 +163,15 @@ def test_migrations_run_once(isolated_runner, old_project):
161163

162164
result = isolated_runner.invoke(cli, ['migrate', 'datasets'])
163165
assert 1 == result.exit_code
166+
167+
168+
@pytest.mark.migration
169+
def test_migration_broken_urls(dataset_metadata):
170+
"""Check that migration of broken dataset file URLs is string."""
171+
dataset = Dataset.from_jsonld(
172+
dataset_metadata,
173+
client=LocalClient('.'),
174+
)
175+
176+
for file_ in dataset.files:
177+
assert isinstance(url_to_string(file_.url), str)

tests/core/commands/test_serialization.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@
1818
"""Serialization tests for renku models."""
1919

2020
import datetime
21+
from urllib.parse import quote, urljoin
2122

2223
import yaml
2324

25+
from renku.core.management.client import LocalClient
26+
from renku.core.models.datasets import Dataset
27+
2428

2529
def test_dataset_serialization(client, dataset, data_file):
2630
"""Test Dataset serialization."""
@@ -71,20 +75,32 @@ def test_dataset_deserialization(client, dataset):
7175
assert type(creator.get(attribute)) is type_
7276

7377

74-
def test_doi_migration(doi_dataset):
75-
"""Test migration of id with doi."""
76-
import yaml
77-
from urllib.parse import quote, urljoin
78+
def test_dataset_doi_metadata(dataset_metadata):
79+
"""Check dataset metadata for correct DOI."""
7880
from renku.core.utils.doi import is_doi
79-
from renku.core.models.datasets import Dataset
80-
from renku.core.models.jsonld import NoDatesSafeLoader
81+
dataset = Dataset.from_jsonld(
82+
dataset_metadata,
83+
client=LocalClient('.'),
84+
)
85+
86+
if is_doi(dataset.identifier):
87+
assert urljoin(
88+
'https://doi.org', dataset.identifier
89+
) == dataset.same_as
8190

91+
assert dataset._id.endswith(
92+
'datasets/{}'.format(quote(dataset.identifier, safe=''))
93+
)
94+
95+
96+
def test_dataset_files_empty_metadata(dataset_metadata):
97+
"""Check parsing metadata of dataset files with empty filename."""
8298
dataset = Dataset.from_jsonld(
83-
yaml.load(doi_dataset, Loader=NoDatesSafeLoader)
99+
dataset_metadata,
100+
client=LocalClient('.'),
84101
)
85102

86-
assert is_doi(dataset.identifier)
87-
assert urljoin(
88-
'https://localhost', 'datasets/' + quote(dataset.identifier, safe='')
89-
) == dataset._id
90-
assert dataset.same_as == urljoin('https://doi.org', dataset.identifier)
103+
files = [file.filename for file in dataset.files if not file.filename]
104+
105+
if files:
106+
assert None in files

0 commit comments

Comments
 (0)