3636from renku .core import errors
3737from renku .core .management .clone import clone
3838from renku .core .management .config import RENKU_HOME
39- from renku .core .models .datasets import Dataset , DatasetFile , DatasetTag
39+ from renku .core .models .datasets import Dataset , DatasetFile , DatasetTag , \
40+ generate_default_short_name , is_dataset_name_valid
4041from renku .core .models .git import GitURL
4142from renku .core .models .locals import with_reference
4243from renku .core .models .provenance .agents import Person
@@ -85,31 +86,35 @@ def datasets(self):
8586 result = {}
8687 paths = (self .path / self .renku_datasets_path ).rglob (self .METADATA )
8788 for path in paths :
88- result [path ] = self .get_dataset (path )
89+ result [path ] = self .load_dataset_from_path (path )
8990 return result
9091
91- def get_dataset (self , path , commit = None ):
92+ def load_dataset_from_path (self , path , commit = None ):
9293 """Return a dataset from a given path."""
94+ path = Path (path )
9395 if not path .is_absolute ():
9496 path = self .path / path
9597 return Dataset .from_yaml (path , client = self , commit = commit )
9698
97- def dataset_path (self , name ):
99+ def get_dataset_path (self , name ):
98100 """Get dataset path from name."""
99101 path = self .renku_datasets_path / name / self .METADATA
100102 if not path .exists ():
101- path = LinkReference (
102- client = self , name = 'datasets/' + name
103- ).reference
103+ try :
104+ path = LinkReference (
105+ client = self , name = 'datasets/' + name
106+ ).reference
107+ except errors .ParameterError :
108+ return None
104109
105110 return path
106111
107112 def load_dataset (self , name = None ):
108113 """Load dataset reference file."""
109114 if name :
110- path = self .dataset_path (name )
111- if path .exists ():
112- return self .get_dataset (path )
115+ path = self .get_dataset_path (name )
116+ if path and path .exists ():
117+ return self .load_dataset_from_path (path )
113118
114119 @contextmanager
115120 def with_dataset (self , name = None , identifier = None , create = False ):
@@ -118,50 +123,25 @@ def with_dataset(self, name=None, identifier=None, create=False):
118123 clean_up_required = False
119124
120125 if dataset is None :
121- # Avoid nested datasets: name mustn't have '/' in it
122- if len (Path (name ).parts ) > 1 :
123- raise errors .ParameterError (
124- 'Dataset name {} is not valid.' .format (name )
125- )
126-
127126 if not create :
128127 raise errors .DatasetNotFound
129- clean_up_required = True
130- dataset_ref = None
131- identifier = str (uuid .uuid4 ())
132- path = (self .renku_datasets_path / identifier / self .METADATA )
133- try :
134- path .parent .mkdir (parents = True , exist_ok = False )
135- except FileExistsError :
136- raise errors .DatasetExistsError (
137- 'Dataset with reference {} exists' .format (path .parent )
138- )
139-
140- with with_reference (path ):
141- dataset = Dataset (
142- identifier = identifier , name = name , client = self
143- )
144-
145- if name :
146- dataset_ref = LinkReference .create (
147- client = self , name = 'datasets/' + name
148- )
149- dataset_ref .set_reference (path )
150128
129+ clean_up_required = True
130+ dataset , path , dataset_ref = self .create_dataset (name )
151131 elif create :
152132 raise errors .DatasetExistsError (
153133 'Dataset exists: "{}".' .format (name )
154134 )
155135
156- dataset_path = self .path / self .datadir / dataset .name
136+ dataset_path = self .path / self .datadir / dataset .short_name
157137 dataset_path .mkdir (parents = True , exist_ok = True )
158138
159139 try :
160140 yield dataset
161141 except Exception :
162142 # TODO use a general clean-up strategy
163143 # https://github.com/SwissDataScienceCenter/renku-python/issues/736
164- if clean_up_required and dataset_ref :
144+ if clean_up_required :
165145 dataset_ref .delete ()
166146 shutil .rmtree (path .parent , ignore_errors = True )
167147 raise
@@ -174,6 +154,54 @@ def with_dataset(self, name=None, identifier=None, create=False):
174154
175155 dataset .to_yaml ()
176156
157+ def create_dataset (
158+ self , name , short_name = None , description = '' , creators = ()
159+ ):
160+ """Create a dataset."""
161+ if not name :
162+ raise errors .ParameterError ('Dataset name must be provided.' )
163+
164+ if not short_name :
165+ short_name = generate_default_short_name (name , None )
166+
167+ if not is_dataset_name_valid (short_name ):
168+ raise errors .ParameterError (
169+ 'Dataset name "{}" is not valid.' .format (short_name )
170+ )
171+
172+ if self .load_dataset (name = short_name ):
173+ raise errors .DatasetExistsError (
174+ 'Dataset exists: "{}".' .format (short_name )
175+ )
176+
177+ identifier = str (uuid .uuid4 ())
178+ path = (self .renku_datasets_path / identifier / self .METADATA )
179+ try :
180+ path .parent .mkdir (parents = True , exist_ok = False )
181+ except FileExistsError :
182+ raise errors .DatasetExistsError (
183+ 'Dataset with reference {} exists' .format (path .parent )
184+ )
185+
186+ with with_reference (path ):
187+ dataset = Dataset (
188+ client = self ,
189+ identifier = identifier ,
190+ name = name ,
191+ short_name = short_name ,
192+ description = description ,
193+ creator = creators
194+ )
195+
196+ dataset_ref = LinkReference .create (
197+ client = self , name = 'datasets/' + short_name
198+ )
199+ dataset_ref .set_reference (path )
200+
201+ dataset .to_yaml ()
202+
203+ return dataset , path , dataset_ref
204+
177205 def add_data_to_dataset (
178206 self ,
179207 dataset ,
@@ -186,7 +214,7 @@ def add_data_to_dataset(
186214 ):
187215 """Import the data into the data directory."""
188216 warning_message = ''
189- dataset_path = self .path / self .datadir / dataset .name
217+ dataset_path = self .path / self .datadir / dataset .short_name
190218
191219 destination = destination or Path ('.' )
192220 destination = self ._resolve_path (dataset_path , destination )
0 commit comments