|
12 | 12 | most things by construction instead of by filtering. That's the difference |
13 | 13 | between "I'd like it to be faster" and "doesn't finish at all". |
14 | 14 | """ |
15 | | -import functools |
16 | 15 | import itertools |
17 | 16 | import json |
18 | 17 | import math |
19 | 18 | import re |
20 | 19 | from copy import deepcopy |
21 | | -from json.encoder import _make_iterencode, encode_basestring_ascii # type: ignore |
22 | | -from typing import Any, Callable, Dict, List, NoReturn, Optional, Tuple, Type, Union |
| 20 | +from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union |
23 | 21 |
|
24 | 22 | import jsonschema |
25 | 23 | from hypothesis.errors import InvalidArgument |
26 | 24 | from hypothesis.internal.floats import next_down as ieee_next_down, next_up |
27 | 25 |
|
28 | | -# Mypy does not (yet!) support recursive type definitions. |
29 | | -# (and writing a few steps by hand is a DoS attack on the AST walker in Pytest) |
30 | | -JSONType = Union[None, bool, float, str, list, Dict[str, Any]] |
| 26 | +from ._encode import JSONType, encode_canonical_json, sort_key |
| 27 | + |
31 | 28 | Schema = Dict[str, JSONType] |
32 | 29 | JSONSchemaValidator = Union[ |
33 | 30 | jsonschema.validators.Draft4Validator, |
@@ -86,110 +83,10 @@ def make_validator(schema: Schema) -> JSONSchemaValidator: |
86 | 83 | return validator(schema) |
87 | 84 |
|
88 | 85 |
|
89 | | -class CanonicalisingJsonEncoder(json.JSONEncoder): |
90 | | - def iterencode(self, o: Any, _one_shot: bool = False) -> Any: |
91 | | - """Replace a stdlib method, so we encode integer-valued floats as ints.""" |
92 | | - |
93 | | - def floatstr(o: float) -> str: |
94 | | - # This is the bit we're overriding - integer-valued floats are |
95 | | - # encoded as integers, to support JSONschemas's uniqueness. |
96 | | - assert math.isfinite(o) |
97 | | - if o == int(o): |
98 | | - return repr(int(o)) |
99 | | - return repr(o) |
100 | | - |
101 | | - return _make_iterencode( |
102 | | - {}, |
103 | | - self.default, |
104 | | - encode_basestring_ascii, |
105 | | - self.indent, |
106 | | - floatstr, |
107 | | - self.key_separator, |
108 | | - self.item_separator, |
109 | | - self.sort_keys, |
110 | | - self.skipkeys, |
111 | | - _one_shot, |
112 | | - )(o, 0) |
113 | | - |
114 | | - |
115 | 86 | class HypothesisRefResolutionError(jsonschema.exceptions.RefResolutionError): |
116 | 87 | pass |
117 | 88 |
|
118 | 89 |
|
119 | | -def _make_cache_key( |
120 | | - value: JSONType, |
121 | | -) -> Tuple[Type, Union[Tuple, None, bool, float, str]]: |
122 | | - """Make a hashable object from any JSON value. |
123 | | -
|
124 | | - The idea is to recursively convert all mutable values to immutable and adding values types as a discriminant. |
125 | | - """ |
126 | | - if isinstance(value, dict): |
127 | | - return (dict, tuple((k, _make_cache_key(v)) for k, v in value.items())) |
128 | | - if isinstance(value, list): |
129 | | - return (list, tuple(map(_make_cache_key, value))) |
130 | | - # Primitive types are hashable |
131 | | - # `type` is needed to distinguish false-ish values - 0, "", False have the same hash (0) |
132 | | - return (type(value), value) |
133 | | - |
134 | | - |
135 | | -class HashedJSON: |
136 | | - """A proxy that holds a JSON value. |
137 | | -
|
138 | | - Adds a capability for the inner value to be cached, loosely based on `functools._HashedSeq`. |
139 | | - """ |
140 | | - |
141 | | - __slots__ = ("value", "hashedvalue") |
142 | | - |
143 | | - def __init__(self, value: JSONType): |
144 | | - self.value = value |
145 | | - # `hash` is called multiple times on cache miss, therefore it is evaluated only once |
146 | | - self.hashedvalue = hash(_make_cache_key(value)) |
147 | | - |
148 | | - def __hash__(self) -> int: |
149 | | - return self.hashedvalue |
150 | | - |
151 | | - def __eq__(self, other: "HashedJSON") -> bool: # type: ignore |
152 | | - # TYPES: This class should be used only for caching purposes and there should be |
153 | | - # no values of other types to compare |
154 | | - return self.hashedvalue == other.hashedvalue |
155 | | - |
156 | | - |
157 | | -def cached_json(func: Callable[[HashedJSON], str]) -> Callable[[JSONType], str]: |
158 | | - """Cache calls to `encode_canonical_json`. |
159 | | -
|
160 | | - The same schemas are encoded multiple times during canonicalisation and caching gives visible performance impact. |
161 | | - """ |
162 | | - cached_func = functools.lru_cache(maxsize=1024)(func) |
163 | | - |
164 | | - @functools.wraps(cached_func) |
165 | | - def wrapped(value: JSONType) -> str: |
166 | | - return cached_func(HashedJSON(value)) |
167 | | - |
168 | | - return wrapped |
169 | | - |
170 | | - |
171 | | -@cached_json |
172 | | -def encode_canonical_json(value: HashedJSON) -> str: |
173 | | - """Canonical form serialiser, for uniqueness testing.""" |
174 | | - return json.dumps(value.value, sort_keys=True, cls=CanonicalisingJsonEncoder) |
175 | | - |
176 | | - |
177 | | -def sort_key(value: JSONType) -> Tuple[int, float, Union[float, str]]: |
178 | | - """Return a sort key (type, guess, tiebreak) that can compare any JSON value. |
179 | | -
|
180 | | - Sorts scalar types before collections, and within each type tries for a |
181 | | - sensible ordering similar to Hypothesis' idea of simplicity. |
182 | | - """ |
183 | | - if value is None: |
184 | | - return (0, 0, 0) |
185 | | - if isinstance(value, bool): |
186 | | - return (1, int(value), 0) |
187 | | - if isinstance(value, (int, float)): |
188 | | - return (2 if int(value) == value else 3, abs(value), value >= 0) |
189 | | - type_key = {str: 4, list: 5, dict: 6}[type(value)] |
190 | | - return (type_key, len(value), encode_canonical_json(value)) |
191 | | - |
192 | | - |
193 | 90 | def get_type(schema: Schema) -> List[str]: |
194 | 91 | """Return a canonical value for the "type" key. |
195 | 92 |
|
|
0 commit comments