|
1 | 1 | from six.moves.urllib.parse import urlparse, urljoin |
2 | 2 |
|
3 | 3 |
|
4 | | -def _uopengraph(extracted, with_og_arr=False): |
| 4 | +def _uopengraph(extracted, with_og_array=False): |
5 | 5 | out = [] |
6 | 6 | for obj in extracted: |
| 7 | + # In order of appearance in the page |
7 | 8 | properties = list(reversed(obj['properties'])) |
8 | 9 | # Set of non empty properties |
9 | 10 | non_empty_props = {k for k, v in properties if v and v.strip()} |
10 | | - # Set of repeated properties with at least 2 non empty values |
11 | | - repeated_props = {} |
12 | | - if with_og_arr: |
13 | | - repeated_props = {k for k in non_empty_props |
14 | | - if len([i for i, v in properties if i == k and (v and v.strip())]) > 1} |
15 | | - # Add properties that either have only empty values or are duplicated and |
16 | | - # have only 1 non empty value |
17 | | - flattened = {k: v for k, v in properties |
18 | | - if k not in repeated_props and (k not in non_empty_props or (v and v.strip()))} |
19 | | - if with_og_arr: |
20 | | - # Add list suffix for those with duplicated and non empty values |
21 | | - for k in repeated_props: |
22 | | - flattened[k+"_list"] = [] |
23 | | - for k, v in properties: |
24 | | - if k in repeated_props: |
25 | | - flattened[k+"_list"].append(v) |
| 11 | + flattened = {} |
| 12 | + for k, v in properties: |
| 13 | + if k not in non_empty_props: |
| 14 | + flattened[k] = v |
| 15 | + elif v and v.strip(): |
| 16 | + # If og_array isn't required or key isn't in flattened already |
| 17 | + if not with_og_array or k not in flattened: |
| 18 | + flattened[k] = v |
| 19 | + else: |
| 20 | + if isinstance(flattened[k], list): |
| 21 | + flattened[k].append(v) |
| 22 | + else: |
| 23 | + flattened[k] = [flattened[k], v] |
| 24 | + |
26 | 25 | t = flattened.pop('og:type', None) |
27 | 26 | if t: |
28 | 27 | flattened['@type'] = t |
|
0 commit comments