Skip to content

Commit be76e78

Browse files
committed
More efficient pickling
Because pickling preserves object identity, the behavior was still correct before, it was just less efficient. The new version is fully backwards compatible, and is still able to deserialize data from v0.1.4. There is a test to make sure this is true. Fixes #5
1 parent 7a33931 commit be76e78

File tree

6 files changed

+65
-1
lines changed

6 files changed

+65
-1
lines changed

mypy.ini

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
[mypy]
22
strict = true
33
mypy_path = src
4-

src/techcable/orderedset/_orderedset.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,35 @@ def __get_pydantic_core_schema__(
259259
),
260260
)
261261

262+
def __getstate__(self) -> Any:
263+
"""Efficiently pickles the elements of an OrderedSet."""
264+
assert len(self._elements) == len(self._unique)
265+
# format for v0.1.5
266+
return self._elements.copy()
267+
268+
def __setstate__(self, state: Any) -> None:
269+
"""Restores the elements of an OrderedSet from the pickled representation."""
270+
# init variables
271+
self._unique = set()
272+
self._elements = list()
273+
# deserialize `state` - a poor man's `match`
274+
elements: list[T]
275+
if isinstance(state, list):
276+
# format for v0.1.5 - list of elements
277+
elements = state
278+
elif isinstance(state, tuple) and len(state) == 2:
279+
state_dict, state_slots = state
280+
if state_dict is not None:
281+
raise TypeError
282+
# format for v0.1.4 - (None, dict(_elements=..., _unique=...))
283+
elements = state_slots["_elements"]
284+
if set(elements) != state_slots["_unique"]:
285+
raise ValueError("Fields `_elements` and `_unique` must match")
286+
else:
287+
raise TypeError(f"Cannot unpickle from {type(state)}")
288+
# set elements
289+
self.update(elements)
290+
262291
@classmethod
263292
def dedup(self, source: Iterable[T], /) -> Generator[T]:
264293
"""A utility method to deduplicate the specified iterable,

tests/data/pickle/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# tests.data.pickle
2+
Contains data pickled using different versions of `techcable.orderedset`.
3+
4+
This ensures
115 Bytes
Binary file not shown.
76 Bytes
Binary file not shown.

tests/test_pickle.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import pickle
2+
from pathlib import Path
3+
4+
import pytest
5+
6+
from techcable.orderedset import OrderedSet
7+
8+
9+
def test_pickle_roundtrip():
10+
original = OrderedSet([1, 2, 3, 4, 7, 1])
11+
ser = pickle.dumps(original)
12+
deser = pickle.loads(ser)
13+
assert original == deser
14+
15+
16+
_LATEST_DATA_VERSION = "v0.1.5"
17+
"""The current version of the data being stored"""
18+
19+
20+
def _load_data(name: str, /, *, version: str) -> bytes:
21+
return (Path(__file__).parent / f"data/pickle/{name}-{version}.dat").read_bytes()
22+
23+
24+
@pytest.mark.parametrize("version", ["v0.1.4"])
25+
@pytest.mark.parametrize("data", ["example1"])
26+
def test_unpickle_backwards_compat(version: str, data: str):
27+
"""Verify we can still unpickle from old versions"""
28+
current_data = _load_data(data, version=_LATEST_DATA_VERSION)
29+
old_data = _load_data(data, version=version)
30+
current = pickle.loads(current_data)
31+
old = pickle.loads(old_data)
32+
assert old == current

0 commit comments

Comments
 (0)