Skip to content

Commit 44cc26b

Browse files
committed
WIP: Skeleton for ImageJ tutorial ingestion
1 parent cff68c5 commit 44cc26b

File tree

1 file changed

+86
-0
lines changed

1 file changed

+86
-0
lines changed

_search/server/tutorials.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/bin/env python
2+
3+
# Parse ImageJ tutorials into documents for
4+
# use with their own searchable collection.
5+
6+
import logging, os, traceback
7+
import yaml
8+
from parseutil import first_sentence
9+
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
def is_imagej_tutorials(root):
15+
java = os.path.join(root, 'java')
16+
notebooks = os.path.join(root, 'notebooks')
17+
return os.path.isdir(java) and os.path.isdir(notebooks)
18+
19+
20+
def parse_java_source(root, path):
21+
logger.debug(f'Parsing Java source file {path}...')
22+
23+
with open(path) as f:
24+
lines = json.read(f)
25+
26+
# This is dumb -- do we want to do better?
27+
doc = {}
28+
doc['content'] = ''.join(lines)
29+
30+
return doc
31+
32+
33+
def parse_notebook(root, path):
34+
logger.debug(f'Parsing notebook {path}...')
35+
36+
with open(path) as f:
37+
data = json.read(f)
38+
39+
doc = {}
40+
doc['content'] = ''
41+
for cell in data['cells']:
42+
# TODO: implement process_cell: extract source and output(s) if present
43+
doc['content'] += process_cell(cell)
44+
45+
return doc
46+
47+
48+
def find_resources(root, suffix):
49+
# TODO: use pathlib to find all .java or .ipynb (based on suffix) inside root.
50+
pass
51+
52+
53+
def load_imagej_tutorials(root):
54+
"""
55+
Loads the content from the given imagej/tutorials folder.
56+
See: https://github.com/imagej/tutorials
57+
"""
58+
java = os.path.join(siteroot, 'java')
59+
notebooks = os.path.join(siteroot, 'notebooks')
60+
if not os.path.isdir(java) or not os.path.isdir(notebooks):
61+
raise ValueError(f'The path {siteroot} does not appear to be a Jekyll site.')
62+
63+
logger.info('Loading content...')
64+
documents = []
65+
66+
for javafile in find_resources(java, '.java'):
67+
try:
68+
doc = parse_java_source(root, path)
69+
if doc:
70+
documents.append(doc)
71+
except:
72+
logger.error(f'Failed to parse {path}:')
73+
traceback.print_exc()
74+
logger.info(f'Loaded {len(documents)} documents from Java source files')
75+
76+
for nbfile in find_resources(notebooks, '.ipynb'):
77+
try:
78+
doc = parse_notebook(root, path)
79+
if doc:
80+
documents.append(doc)
81+
except:
82+
logger.error(f'Failed to parse {path}:')
83+
traceback.print_exc()
84+
logger.info(f'Loaded {len(documents)} documents from Jupyter notebooks')
85+
86+
return documents

0 commit comments

Comments
 (0)