Skip to content

Commit 00fa8bf

Browse files
committed
* Added a faster implimentation
* Removed list suffix
1 parent e138b11 commit 00fa8bf

File tree

3 files changed

+19
-20
lines changed

3 files changed

+19
-20
lines changed

extruct/_extruct.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def extract(htmlstring,
135135
for syntax, uniform, raw, schema_context in uniform_processors:
136136
try:
137137
if syntax == 'opengraph':
138-
output[syntax] = uniform(raw, with_og_array)
138+
output[syntax] = uniform(raw, with_og_array=with_og_array)
139139
else:
140140
output[syntax] = uniform(raw, schema_context)
141141
except Exception as e:

extruct/uniform.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,27 @@
11
from six.moves.urllib.parse import urlparse, urljoin
22

33

4-
def _uopengraph(extracted, with_og_arr=False):
4+
def _uopengraph(extracted, with_og_array=False):
55
out = []
66
for obj in extracted:
7+
# In order of appearance in the page
78
properties = list(reversed(obj['properties']))
89
# Set of non empty properties
910
non_empty_props = {k for k, v in properties if v and v.strip()}
10-
# Set of repeated properties with at least 2 non empty values
11-
repeated_props = {}
12-
if with_og_arr:
13-
repeated_props = {k for k in non_empty_props
14-
if len([i for i, v in properties if i == k and (v and v.strip())]) > 1}
15-
# Add properties that either have only empty values or are duplicated and
16-
# have only 1 non empty value
17-
flattened = {k: v for k, v in properties
18-
if k not in repeated_props and (k not in non_empty_props or (v and v.strip()))}
19-
if with_og_arr:
20-
# Add list suffix for those with duplicated and non empty values
21-
for k in repeated_props:
22-
flattened[k+"_list"] = []
23-
for k, v in properties:
24-
if k in repeated_props:
25-
flattened[k+"_list"].append(v)
11+
flattened = {}
12+
for k, v in properties:
13+
if k not in non_empty_props:
14+
flattened[k] = v
15+
elif v and v.strip():
16+
# If og_array isn't required or key isn't in flattened already
17+
if not with_og_array or k not in flattened:
18+
flattened[k] = v
19+
else:
20+
if isinstance(flattened[k], list):
21+
flattened[k].append(v)
22+
else:
23+
flattened[k] = [flattened[k], v]
24+
2625
t = flattened.pop('og:type', None)
2726
if t:
2827
flattened['@type'] = t

tests/test_uniform.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def test_uopengraph_with_og_array(self):
3939
"og:title": "Elysian Fields",
4040
"og:description": "Buy tickets for an upcoming Elysian Fields concert near you. List of all Elysian Fields tickets and tour dates for 2017.",
4141
"og:url": "http://www.songkick.com/artists/236156-elysian-fields",
42-
"og:image_list": [ "http://images.sk-static.com/SECONDARY_IMAGE.jpg",
43-
"http://images.sk-static.com/images/media/img/col4/20100330-103600-169450.jpg"],
42+
"og:image": ["http://images.sk-static.com/SECONDARY_IMAGE.jpg",
43+
"http://images.sk-static.com/images/media/img/col4/20100330-103600-169450.jpg"],
4444
}]
4545
body = get_testdata('songkick', 'elysianfields.html')
4646
data = extruct.extract(body, syntaxes=['opengraph'], uniform=True, with_og_array=True)

0 commit comments

Comments
 (0)