Skip to content

Commit 6bfb8be

Browse files
import dataset from renku aware repo (#765)
* feat: import data from other renku projects * doc: update command * feat: print full traceback when re-raising
1 parent 2f85457 commit 6bfb8be

File tree

9 files changed

+346
-71
lines changed

9 files changed

+346
-71
lines changed

conftest.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,3 +474,30 @@ def sleep_after():
474474
import time
475475
yield
476476
time.sleep(0.5)
477+
478+
479+
@pytest.fixture
480+
def remote_project(data_repository, directory_tree):
481+
"""A second Renku project with a dataset."""
482+
from renku.cli import cli
483+
484+
runner = CliRunner()
485+
486+
with runner.isolated_filesystem() as project_path:
487+
runner.invoke(cli, ['-S', 'init'])
488+
result = runner.invoke(
489+
cli, ['-S', 'dataset', 'create', 'remote-dataset']
490+
)
491+
assert 0 == result.exit_code
492+
493+
result = runner.invoke(
494+
cli,
495+
[
496+
'-S', 'dataset', 'add', '-s', 'file', '-s', 'dir2',
497+
'remote-dataset', directory_tree.strpath
498+
],
499+
catch_exceptions=False,
500+
)
501+
assert 0 == result.exit_code
502+
503+
yield runner, project_path

renku/cli/dataset.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,27 @@
115115
branch, commit, or tag. The value passed to this option must be a valid
116116
reference in the remote Git repository.
117117
118+
Updating a dataset:
119+
120+
After adding files from a remote Git repository, you can check for updates in
121+
those files by using ``renku dataset update`` command. This command checks all
122+
remote files and copies over new content if there is any. It does not delete
123+
files from the local dataset if they are deleted from the remote Git
124+
repository; to force the delete use ``--delete`` argument. You can update to a
125+
specific branch, commit, or tag by passing ``--ref`` option.
126+
127+
You can limit the scope of updated files by specifying dataset names, using
128+
``--include`` and ``--exclude`` to filter based on file names, or using
129+
``--creators`` to filter based on creators. For example, the following command
130+
updates only CSV files from ``my-dataset``:
131+
132+
.. code-block:: console
133+
134+
$ renku dataset update -I '*.csv' my-dataset
135+
136+
Note that putting glob patterns in quotes is needed to tell Unix shell not
137+
to expand them.
138+
118139
Tagging a dataset:
119140
120141
A dataset can be tagged with an arbitrary tag to refer to the dataset at that
@@ -649,8 +670,21 @@ def _init(lock, id_queue):
649670
@click.option(
650671
'--ref', default=None, help='Update to a specific commit/tag/branch.'
651672
)
652-
def update(names, creators, include, exclude, ref):
673+
@click.option(
674+
'--delete',
675+
is_flag=True,
676+
help='Delete local files that are deleted from remote.'
677+
)
678+
def update(names, creators, include, exclude, ref, delete):
653679
"""Updates files in dataset from a remote Git repo."""
654680
progress_context = partial(progressbar, label='Updating files')
655-
update_datasets(names, creators, include, exclude, ref, progress_context)
681+
update_datasets(
682+
names=names,
683+
creators=creators,
684+
include=include,
685+
exclude=exclude,
686+
ref=ref,
687+
delete=delete,
688+
progress_context=progress_context
689+
)
656690
click.secho('OK', fg='green')

renku/cli/exception_handler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ def main(self, *args, **kwargs):
9090
return super().main(*args, **kwargs)
9191
except RenkuException as e:
9292
click.echo('Error: {}'.format(e))
93+
if e.__cause__ is not None:
94+
click.echo('\n{}'.format(traceback.format_exc()))
9395
exit_code = 1
9496
if isinstance(e, (ParameterError, UsageError)):
9597
exit_code = 2

renku/core/commands/dataset.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from renku.core.commands.providers import ProviderFactory
4040
from renku.core.compat import contextlib
4141
from renku.core.errors import DatasetNotFound, InvalidAccessToken, \
42-
MigrationRequired, ParameterError
42+
MigrationRequired, ParameterError, UsageError
4343
from renku.core.management.datasets import DATASET_METADATA_PATHS
4444
from renku.core.management.git import COMMIT_DIFF_STRATEGY
4545
from renku.core.models.creators import Creator
@@ -166,6 +166,14 @@ def add_to_dataset(
166166
urlscontext=contextlib.nullcontext
167167
):
168168
"""Add data to a dataset."""
169+
if sources or destination:
170+
if len(urls) == 0:
171+
raise UsageError('No URL is specified')
172+
elif len(urls) > 1:
173+
raise UsageError(
174+
'Cannot add multiple URLs with --source or --destination'
175+
)
176+
169177
# check for identifier before creating the dataset
170178
identifier = extract_doi(
171179
with_metadata.identifier
@@ -207,8 +215,10 @@ def add_to_dataset(
207215
'"renku dataset add {0}" command with "--create" option for '
208216
'automatic dataset creation.'.format(name)
209217
)
210-
except (FileNotFoundError, git.exc.NoSuchPathError):
211-
raise ParameterError('Could not process \n{0}'.format('\n'.join(urls)))
218+
except (FileNotFoundError, git.exc.NoSuchPathError) as e:
219+
raise ParameterError(
220+
'Could not find paths/URLs: \n{0}'.format('\n'.join(urls))
221+
) from e
212222

213223

214224
@pass_local_client(clean=False, commit=False)
@@ -507,14 +517,20 @@ def import_dataset(
507517
)
508518

509519

510-
@pass_local_client(clean=True, commit=True, commit_only=DATASET_METADATA_PATHS)
520+
@pass_local_client(
521+
clean=True,
522+
commit=True,
523+
commit_only=DATASET_METADATA_PATHS,
524+
commit_empty=False
525+
)
511526
def update_datasets(
512527
client,
513528
names,
514529
creators,
515530
include,
516531
exclude,
517532
ref,
533+
delete,
518534
progress_context=contextlib.nullcontext
519535
):
520536
"""Update files from a remote Git repo."""
@@ -556,7 +572,15 @@ def update_datasets(
556572
with progress_context(
557573
possible_updates, item_show_func=lambda x: x.path if x else None
558574
) as progressbar:
559-
client.update_dataset_files(progressbar, ref)
575+
deleted_files = client.update_dataset_files(
576+
files=progressbar, ref=ref, delete=delete
577+
)
578+
579+
if deleted_files and not delete:
580+
click.echo(
581+
'Some files are deleted from remote. To also delete them locally '
582+
'run update command with `--delete` flag.'
583+
)
560584

561585

562586
def _include_exclude(file_path, include=None, exclude=None):

renku/core/errors.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,11 @@ def __init__(self, message, param_hint=None):
8484
if param_hint:
8585
if isinstance(param_hint, (tuple, list)):
8686
param_hint = ' / '.join('"{}"'.format(x) for x in param_hint)
87-
message = 'Invalid value for {}: {}'.format(param_hint, message)
87+
message = 'Invalid parameter value for {}: {}'.format(
88+
param_hint, message
89+
)
8890
else:
89-
message = 'Invalid value: {}'.format(message)
91+
message = 'Invalid parameter value: {}'.format(message)
9092

9193
super().__init__(message)
9294

@@ -365,5 +367,9 @@ class GitError(RenkuException):
365367
"""Raised when a remote Git repo cannot be accessed."""
366368

367369

368-
class UrlSchemaNotSupported(RenkuException):
369-
"""Raised when adding data from unsupported URL schemas."""
370+
class UrlSchemeNotSupported(RenkuException):
371+
"""Raised when adding data from unsupported URL schemes."""
372+
373+
374+
class OperationError(RenkuException):
375+
"""Raised when an operation at runtime raises an error."""

0 commit comments

Comments
 (0)