Skip to content

Commit 4a4bfad

Browse files
committed
Cosmetic changes
1 parent 66c7b89 commit 4a4bfad

File tree

1 file changed

+22
-19
lines changed

1 file changed

+22
-19
lines changed

extruct/rdfa.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@
3131

3232

3333
class RDFaExtractor(object):
34-
35-
# expands namespace to match with returned json (ex: og -> 'http://ogp.me/ns#')
36-
def replaceNS(self, prop, html_element, head_element):
34+
35+
def _replaceNS(self, prop, html_element, head_element):
36+
"""Expand namespace to match with returned json (e.g.: og -> 'http://ogp.me/ns#')"""
3737

3838
# context namespaces taken from pyrdfa3
3939
# https://github.com/RDFLib/PyRDFa/blob/master/pyRdfa/initialcontext.py
@@ -94,26 +94,29 @@ def replaceNS(self, prop, html_element, head_element):
9494
return context[prefix] + prop.split(':')[1]
9595

9696
return prop
97-
98-
# sorts the rdfa tags in jsonld string
99-
def sort(self, unordered, ordered):
97+
98+
def _sort(self, unordered, ordered):
99+
"""Sort the rdfa tags in jsonld string"""
100100
idx_for_value = dict(reversed([(value, idx) for idx, value in enumerate(ordered)]))
101101
unordered.sort(key=lambda props: idx_for_value.get(props.get('@value'), len(ordered)))
102102

103-
104-
# fixes order of rdfa tags in jsonld string
105-
# by comparing with order in document object
106-
def fix_order(self, jsonld_string, document):
103+
104+
def _fix_order(self, jsonld_string, document):
105+
"""
106+
Fix order of rdfa tags in jsonld string
107+
by checking the appearance order in the HTML
108+
"""
107109
html_element = document.xpath('/html')[0]
108110
head_element = document.xpath('//head')[0]
109-
110-
meta_tags = defaultdict(list)
111+
112+
# Stores the values or each property in appearance order
113+
values_for_property = defaultdict(list)
111114

112115
for meta_tag in head_element.xpath("meta[@property]"):
113-
expanded_property = self.replaceNS(meta_tag.attrib['property'],
114-
html_element,
115-
head_element)
116-
meta_tags[expanded_property].append(meta_tag.get('content'))
116+
expanded_property = self._replaceNS(meta_tag.attrib['property'],
117+
html_element,
118+
head_element)
119+
values_for_property[expanded_property].append(meta_tag.get('content'))
117120

118121
json_objects = json.loads(jsonld_string)
119122

@@ -122,7 +125,7 @@ def fix_order(self, jsonld_string, document):
122125

123126
for key in keys:
124127
if type(json_object[key]) is list and len(json_object[key]) > 1:
125-
self.sort(json_object[key], meta_tags[key])
128+
self._sort(json_object[key], values_for_property[key])
126129

127130
return json_objects
128131

@@ -145,8 +148,8 @@ def extract_items(self, document, base_url=None, expanded=True):
145148
jsonld_string = g.serialize(format='json-ld', auto_compact=not expanded).decode('utf-8')
146149

147150
try:
148-
# hack to fix the ordering of duplicate properties (see issue 116)
151+
# hack to fix the ordering of multi-value properties (see issue 116)
149152
# it should be disabled once PyRDFA fixes itself
150-
return self.fix_order(jsonld_string, document)
153+
return self._fix_order(jsonld_string, document)
151154
except:
152155
return json.loads(jsonld_string)

0 commit comments

Comments
 (0)