|
4 | 4 | class NestedParser: |
5 | 5 | _valid = None |
6 | 6 | errors = None |
7 | | - _reg = re.compile(r"\[|\]") |
8 | 7 |
|
9 | | - def __init__(self, data): |
| 8 | + def __init__(self, data, options={}): |
10 | 9 | self.data = data |
| 10 | + self._merge_options(options) |
| 11 | + |
| 12 | + def _merge_options(self, options): |
| 13 | + DEFAULT_OPTIONS = { |
| 14 | + "separator": "bracket", |
| 15 | + "raise_duplicate": True, |
| 16 | + "assign_duplicate": False |
| 17 | + } |
| 18 | + |
| 19 | + options = {**DEFAULT_OPTIONS, **options} |
| 20 | + self._options = options |
| 21 | + |
| 22 | + assert self._options.get("separator", "dot") in ["dot", "bracket"] |
| 23 | + assert isinstance(self._options.get("raise_duplicate", False), bool) |
| 24 | + assert isinstance(self._options.get("assign_duplicate", False), bool) |
| 25 | + |
| 26 | + self._is_dot = self._options["separator"] == "dot" |
| 27 | + if not self.is_dot: |
| 28 | + self._reg = re.compile(r"\[|\]") |
| 29 | + |
| 30 | + @property |
| 31 | + def is_dot(self): |
| 32 | + return self._is_dot |
11 | 33 |
|
12 | 34 | def split_key(self, key): |
13 | 35 | # remove space |
14 | 36 | k = key.replace(" ", "") |
| 37 | + |
15 | 38 | # remove empty string and count key length for check is a good format |
16 | 39 | # reduce + filter are a hight cost so do manualy with for loop |
17 | | - results = [] |
18 | | - check = -2 |
19 | 40 |
|
20 | | - for select in self._reg.split(k): |
| 41 | + # optimize by split with string func |
| 42 | + if self.is_dot: |
| 43 | + length = 1 |
| 44 | + splitter = k.split(".") |
| 45 | + else: |
| 46 | + splitter = self._reg.split(k) |
| 47 | + length = 2 |
| 48 | + |
| 49 | + check = -length |
| 50 | + |
| 51 | + results = [] |
| 52 | + for select in splitter: |
21 | 53 | if select: |
22 | 54 | results.append(select) |
23 | | - check += len(select) + 2 |
| 55 | + check += len(select) + length |
24 | 56 |
|
25 | 57 | if len(k) != check: |
26 | 58 | raise Exception(f"invalid format from key {key}") |
27 | 59 | return results |
28 | 60 |
|
29 | | - def set_type(self, dtc, key, value, full_keys): |
| 61 | + def set_type(self, dtc, key, value, full_keys, prev=None, last=False): |
30 | 62 | if isinstance(dtc, list): |
31 | 63 | key = int(key) |
32 | 64 | if len(dtc) < key: |
33 | 65 | raise ValueError( |
34 | 66 | f"key \"{full_keys}\" is upper than actual list") |
35 | 67 | if len(dtc) == key: |
36 | 68 | dtc.append(value) |
37 | | - return key |
38 | 69 | elif isinstance(dtc, dict): |
39 | | - if key not in dtc: |
| 70 | + if key not in dtc or self._options["assign_duplicate"] and last: |
40 | 71 | dtc[key] = value |
41 | 72 | else: |
42 | | - raise ValueError( |
43 | | - f"invalid rewrite key from \"{full_keys}\" to \"{dtc}\"") |
| 73 | + if self._options["raise_duplicate"]: |
| 74 | + raise ValueError( |
| 75 | + f"invalid rewrite key from \"{full_keys}\" to \"{dtc}\"") |
| 76 | + elif self._options["assign_duplicate"]: |
| 77 | + dtc = prev['dtc'] |
| 78 | + dtc[prev['key']] = prev['type'] |
| 79 | + return self.set_type(dtc[prev['key']], key, value, full_keys, prev, last) |
44 | 80 | return key |
45 | 81 |
|
| 82 | + def get_next_type(self, keys): |
| 83 | + return [] if keys.isdigit() else {} |
| 84 | + |
46 | 85 | def construct(self, data): |
47 | 86 | dictionary = {} |
48 | 87 |
|
49 | 88 | for key in data: |
50 | 89 | keys = self.split_key(key) |
51 | 90 | tmp = dictionary |
| 91 | + prev = { |
| 92 | + 'key': keys[0], |
| 93 | + 'dtc': tmp, |
| 94 | + 'type': None |
| 95 | + } |
52 | 96 |
|
53 | 97 | # optimize with while loop instend of for in with zip function |
54 | 98 | i = 0 |
55 | 99 | lenght = len(keys) - 1 |
56 | 100 | while i < lenght: |
57 | | - set_type = [] if keys[i+1].isdigit() else {} |
58 | | - tmp = tmp[self.set_type(tmp, keys[i], set_type, key)] |
| 101 | + set_type = self.get_next_type(keys[i+1]) |
| 102 | + index = self.set_type( |
| 103 | + tmp, keys[i], set_type, key, prev=prev) |
| 104 | + prev['dtc'] = tmp |
| 105 | + prev['key'] = index |
| 106 | + prev['type'] = set_type |
| 107 | + tmp = tmp[index] |
59 | 108 | i += 1 |
60 | 109 |
|
61 | | - self.set_type(tmp, keys[-1], data[key], key) |
| 110 | + self.set_type(tmp, keys[-1], data[key], key, prev=prev, last=True) |
62 | 111 | return dictionary |
63 | 112 |
|
64 | 113 | def is_valid(self): |
|
0 commit comments