Skip to content

Commit af7d2c5

Browse files
Merge pull request #10 from giannisdoukas/dumpables
add dumpables & documentation
2 parents 13edcd5 + fb7bfc4 commit af7d2c5

File tree

9 files changed

+392
-60
lines changed

9 files changed

+392
-60
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,3 +247,6 @@ tmp.py
247247
cwlbuild
248248
/tests/repo-like/result.yaml
249249
/tests/repo-like/messages.txt
250+
/tests/binary_message
251+
/tests/message
252+
/tests/message2

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ with open(result_file, 'w') as f:
2323
```
2424

2525
IPython2CWL is based on [repo2docker](https://github.com/jupyter/repo2docker), the same tool
26-
used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publish them, including repo2docker
26+
used by [mybinder](https://mybinder.org/). Now, by writing Jupyter Notebook and publishing them, including repo2docker
2727
configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
2828
workflows.
2929

@@ -37,7 +37,7 @@ pip install ipython2cwl
3737
### Example
3838

3939
```
40-
jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild
40+
jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o .
4141
```
4242

4343
### Docs

docs/index.rst

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,53 @@ IPython2CWL is a tool for converting `IPython <https://ipython.org/>`_ Jupyter N
3030
------------------------------------------------------------------------------------------
3131

3232
IPython2CWL is based on `repo2docker <https://github.com/jupyter/repo2docker>`_, the same tool
33-
used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publish them, including repo2docker
34-
configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
33+
used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publishing them, including repo2docker
34+
configuration, the community can not only execute the notebooks remotely but can also use them as steps in scientific
3535
workflows.
3636

37-
* Install ipython2cwl: :code:`pip install python2cwl`
37+
* `Install ipython2cwl <https://pypi.org/project/ipython2cwl/>`_: :code:`pip install ipython2cwl`
3838
* Ensure that you have docker running
3939
* Create a directory to store the generated cwl files, for example cwlbuild
4040
* Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild`
4141

42-
Indices and tables
43-
==================
42+
HOW IT WORKS?
43+
------------------
44+
45+
IPython2CWL parses each IPython notebook and finds the variables with the typing annotations. For each input variable,
46+
the assigment of that variable will be generalised as a command line argument. Each output variable will be mapped
47+
in the cwl description as an output file.
48+
49+
SUPPORTED TYPES
50+
------------------
51+
52+
.. automodule:: ipython2cwl.iotypes
53+
:members:
54+
55+
56+
THAT'S COOL! WHAT ABOUT LIST & OPTIONAL ARGUMENTS?
57+
"""""""""""""""""""""""""""""""""""""""""""""""""""
58+
59+
The basic input data types can be combined with the List and Optional annotations. For example, write the following
60+
annotation:
61+
62+
.. code-block:: python
63+
64+
file_inputs: List[CWLFilePathInput] = ['data1.txt', 'data2.txt', 'data3.txt']
65+
example: Optional[CWLStringInput] = None
66+
67+
68+
SEEMS INTERESTING! WHAT ABOUT A DEMO?
69+
----------------------------------------
70+
71+
If you would like to see a demo before you want to start annotating your notebooks check here!
72+
`github.com/giannisdoukas/ipython2cwl-demo <https://github.com/giannisdoukas/ipython2cwl-demo>`_
73+
74+
75+
WHAT IF I WANT TO VALIDATE THAT THE GENERATED SCRIPTS ARE CORRECT?
76+
------------------------------------------------------------------
77+
78+
All the generated scripts are stored in the docker image under the directory :code:`/app/cwl/bin`. You can see the list
79+
of the files by running :code:`docker run [IMAGE_ID] find /app/cwl/bin/ -type f`.
80+
81+
4482

45-
* :ref:`genindex`
46-
* :ref:`modindex`
47-
* :ref:`search`

ipython2cwl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.3"
1+
__version__ = "0.0.4"

ipython2cwl/cwltoolextractor.py

Lines changed: 97 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,29 @@
55
import tarfile
66
import tempfile
77
from collections import namedtuple
8+
from copy import deepcopy
89
from pathlib import Path
9-
from typing import Dict, Any
10+
from typing import Dict, Any, List
1011

1112
import astor
1213
import nbconvert
1314
import yaml
1415
from nbformat.notebooknode import NotebookNode
1516

16-
from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput
17+
from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput, \
18+
CWLDumpableFile, CWLDumpableBinaryFile, CWLDumpable
1719
from .requirements_manager import RequirementsManager
1820

1921
with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.dockerfile'])) as f:
2022
DOCKERFILE_TEMPLATE = f.read()
2123
with open(os.sep.join([os.path.abspath(os.path.dirname(__file__)), 'templates', 'template.setup'])) as f:
2224
SETUP_TEMPLATE = f.read()
2325

26+
_VariableNameTypePair = namedtuple(
27+
'VariableNameTypePair',
28+
['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
29+
)
2430

25-
# TODO: check if supports recursion if main function exists
2631

2732
class AnnotatedVariablesExtractor(ast.NodeTransformer):
2833
input_type_mapper = {
@@ -52,12 +57,19 @@ class AnnotatedVariablesExtractor(ast.NodeTransformer):
5257
}}
5358

5459
output_type_mapper = {
55-
CWLFilePathOutput.__name__
60+
(CWLFilePathOutput.__name__,)
61+
}
62+
63+
dumpable_mapper = {
64+
(CWLDumpableFile.__name__,): "with open('{var_name}', 'w') as f:\n\tf.write({var_name})",
65+
(CWLDumpableBinaryFile.__name__,): "with open('{var_name}', 'wb') as f:\n\tf.write({var_name})",
66+
(CWLDumpable.__name__, CWLDumpable.dump.__name__): None,
5667
}
5768

5869
def __init__(self, *args, **kwargs):
5970
super().__init__(*args, **kwargs)
60-
self.extracted_nodes = []
71+
self.extracted_variables: List = []
72+
self.to_dump: List = []
6173

6274
def __get_annotation__(self, type_annotation):
6375
annotation = None
@@ -70,30 +82,84 @@ def __get_annotation__(self, type_annotation):
7082
annotation = self.__get_annotation__(ann_expr.value)
7183
elif isinstance(type_annotation, ast.Subscript):
7284
annotation = (type_annotation.value.id, *self.__get_annotation__(type_annotation.slice.value))
85+
elif isinstance(type_annotation, ast.Call):
86+
annotation = (type_annotation.func.value.id, type_annotation.func.attr)
7387
return annotation
7488

89+
@classmethod
90+
def conv_AnnAssign_to_Assign(cls, node):
91+
return ast.Assign(
92+
col_offset=node.col_offset,
93+
lineno=node.lineno,
94+
targets=[node.target],
95+
value=node.value
96+
)
97+
98+
def _visit_input_ann_assign(self, node, annotation):
99+
mapper = self.input_type_mapper[annotation]
100+
self.extracted_variables.append(_VariableNameTypePair(
101+
node.target.id, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False, None)
102+
)
103+
return None
104+
105+
def _visit_default_dumper(self, node, dumper):
106+
dump_tree = ast.parse(dumper.format(var_name=node.target.id))
107+
self.to_dump.append(dump_tree.body)
108+
self.extracted_variables.append(_VariableNameTypePair(
109+
node.target.id, None, None, None, False, True, node.target.id)
110+
)
111+
return self.conv_AnnAssign_to_Assign(node)
112+
113+
def _visit_user_defined_dumper(self, node):
114+
load_ctx = ast.Load()
115+
func_name = deepcopy(node.annotation.args[0].value)
116+
func_name.ctx = load_ctx
117+
ast.fix_missing_locations(func_name)
118+
119+
new_dump_node = ast.Expr(
120+
col_offset=0, lineno=0,
121+
value=ast.Call(
122+
args=node.annotation.args[1:], keywords=node.annotation.keywords, col_offset=0,
123+
func=ast.Attribute(
124+
attr=node.annotation.args[0].attr,
125+
value=func_name,
126+
col_offset=0, ctx=load_ctx, lineno=0,
127+
),
128+
)
129+
)
130+
ast.fix_missing_locations(new_dump_node)
131+
self.to_dump.append([new_dump_node])
132+
self.extracted_variables.append(_VariableNameTypePair(
133+
node.target.id, None, None, None, False, True, node.annotation.args[1].s)
134+
)
135+
# removing type annotation
136+
return self.conv_AnnAssign_to_Assign(node)
137+
138+
def _visit_output_type(self, node):
139+
self.extracted_variables.append(_VariableNameTypePair(
140+
node.target.id, None, None, None, False, True, node.value.s)
141+
)
142+
# removing type annotation
143+
return ast.Assign(
144+
col_offset=node.col_offset,
145+
lineno=node.lineno,
146+
targets=[node.target],
147+
value=node.value
148+
)
149+
75150
def visit_AnnAssign(self, node):
76151
try:
77152
annotation = self.__get_annotation__(node.annotation)
78153
if annotation in self.input_type_mapper:
79-
mapper = self.input_type_mapper[annotation]
80-
self.extracted_nodes.append(
81-
(node, mapper[0], mapper[1], not mapper[0].endswith('?'), True, False)
82-
)
83-
return None
84-
85-
elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
86-
(isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
87-
self.extracted_nodes.append(
88-
(node, None, None, None, False, True)
89-
)
90-
# removing type annotation
91-
return ast.Assign(
92-
col_offset=node.col_offset,
93-
lineno=node.lineno,
94-
targets=[node.target],
95-
value=node.value
96-
)
154+
return self._visit_input_ann_assign(node, annotation)
155+
elif annotation in self.dumpable_mapper:
156+
dumper = self.dumpable_mapper[annotation]
157+
if dumper is not None:
158+
return self._visit_default_dumper(node, dumper)
159+
else:
160+
return self._visit_user_defined_dumper(node)
161+
elif annotation in self.output_type_mapper:
162+
return self._visit_output_type(node)
97163
except Exception:
98164
pass
99165
return node
@@ -123,12 +189,6 @@ class AnnotatedIPython2CWLToolConverter:
123189
"""
124190

125191
_code: str
126-
127-
_VariableNameTypePair = namedtuple(
128-
'VariableNameTypePair',
129-
['name', 'cwl_typeof', 'argparse_typeof', 'required', 'is_input', 'is_output', 'value']
130-
)
131-
132192
"""The annotated python code to convert."""
133193

134194
def __init__(self, annotated_ipython_code: str):
@@ -137,19 +197,15 @@ def __init__(self, annotated_ipython_code: str):
137197

138198
self._code = annotated_ipython_code
139199
extractor = AnnotatedVariablesExtractor()
140-
self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))
200+
self._tree = extractor.visit(ast.parse(self._code))
201+
[self._tree.body.extend(d) for d in extractor.to_dump]
202+
self._tree = ast.fix_missing_locations(self._tree)
141203
self._variables = []
142-
for node, cwl_type, click_type, required, is_input, is_output in extractor.extracted_nodes:
143-
if is_input:
144-
self._variables.append(
145-
self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
146-
None)
147-
)
148-
if is_output:
149-
self._variables.append(
150-
self._VariableNameTypePair(node.target.id, cwl_type, click_type, required, is_input, is_output,
151-
node.value.s)
152-
)
204+
for variable in extractor.extracted_variables: # type: _VariableNameTypePair
205+
if variable.is_input:
206+
self._variables.append(variable)
207+
if variable.is_output:
208+
self._variables.append(variable)
153209

154210
@classmethod
155211
def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter':

0 commit comments

Comments
 (0)