Skip to content

Commit 0722933

Browse files
committed
Add OrderedSet.dedup to deduplicate iterators
Since this is a generator, it yields items without waiting for the whole input. Also add OrderedSet.async_dedup, which does the same thing to an AsyncIterator
1 parent c21fbe9 commit 0722933

File tree

3 files changed

+85
-2
lines changed

3 files changed

+85
-2
lines changed

pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ pydantic = [
2424

2525
[dependency-groups]
2626
mypy = ["mypy~=1.0", {include-group = "typing"}]
27-
test = ["pytest~=8.3", "pytest-sugar~=1.0", "techcable.orderedset[pydantic]"]
27+
test = [
28+
"pytest~=8.3",
29+
"pytest-asyncio>=0.26",
30+
"pytest-sugar~=1.0",
31+
"techcable.orderedset[pydantic]"
32+
]
2833
typing = ["typing-extensions~=4.12", "techcable.orderedset[pydantic]"]
2934
dev = [{include-group = "mypy"}, {include-group = "test"}]
3035

src/techcable/orderedset/_orderedset.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
from __future__ import annotations
22

33
import operator
4-
from collections.abc import Callable, Iterable, Iterator, MutableSet, Sequence, Set
4+
from collections.abc import (
5+
AsyncGenerator,
6+
AsyncIterable,
7+
Callable,
8+
Generator,
9+
Iterable,
10+
Iterator,
11+
MutableSet,
12+
Sequence,
13+
Set,
14+
)
515
from typing import TYPE_CHECKING, Any, Optional, TypeVar, overload
616

717
if TYPE_CHECKING:
@@ -249,5 +259,40 @@ def __get_pydantic_core_schema__(
249259
),
250260
)
251261

262+
@classmethod
263+
def dedup(self, source: Iterable[T], /) -> Generator[T]:
264+
"""A utility method to deduplicate the specified iterable,
265+
while preserving the original order.
266+
267+
This is a generator, so does not need to wait for the entire input,
268+
and will return items as soon as they are available.
269+
270+
Since: v0.1.4
271+
"""
272+
oset: OrderedSet[T] = OrderedSet()
273+
for item in source:
274+
if oset.append(item):
275+
# new value
276+
yield item
277+
278+
@classmethod
279+
async def dedup_async(self, source: AsyncIterable[T], /) -> AsyncGenerator[T]:
280+
"""A utility method to deduplicate the specified iterable,
281+
while preserving the original order.
282+
283+
This is a generator, so does not need to wait for the entire input.
284+
Because it is asynchronous, it does not block the thread while waiting.
285+
286+
This is an asynchronous version of `OrderedSet.dedup`.
287+
288+
Since: v0.1.4
289+
"""
290+
# Defined in PEP 525
291+
oset: OrderedSet[T] = OrderedSet()
292+
async for item in source:
293+
if oset.append(item):
294+
# new value
295+
yield item
296+
252297

253298
__all__ = ("OrderedSet",)

tests/test_simple.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
1+
from __future__ import annotations
2+
13
import random
4+
from collections.abc import AsyncGenerator
25
from typing import Any, TypeVar
36

7+
import pytest
8+
49
from techcable.orderedset import OrderedSet
510

611
T = TypeVar("T")
@@ -20,6 +25,7 @@ def _remove_duplicates(l: list[T]) -> list[T]:
2025
["foo", "bar", "baz", "foo"],
2126
[1, 2, 7, 13, 9, 12, 2, 8, 7],
2227
[float("NaN"), 2.8, float("NaN"), 7.9],
28+
[2.7, 3, 2.7, 9, 8.2, 3, 4.1],
2329
]
2430

2531

@@ -43,3 +49,30 @@ def test_remove():
4349
assert orig_oset == OrderedSet(orig_data), "Copy didn't work"
4450
assert oset == (orig_oset - {target})
4551
assert oset == OrderedSet(data)
52+
53+
54+
def test_dedup():
55+
for example in EXAMPLE_DATA:
56+
assert list(OrderedSet.dedup(example)) == _remove_duplicates(example)
57+
58+
59+
@pytest.mark.asyncio
60+
async def test_async_dedup():
61+
for example in EXAMPLE_DATA:
62+
async_counter = 0
63+
64+
async def increment_counter() -> None:
65+
nonlocal async_counter
66+
async_counter += 1
67+
68+
async def source(items: list[T], /) -> AsyncGenerator[T]:
69+
for item in items:
70+
await increment_counter()
71+
yield item
72+
73+
result = []
74+
async for item in OrderedSet.dedup_async(source(example)):
75+
result.append(item)
76+
77+
assert async_counter == len(example)
78+
assert result == _remove_duplicates(example)

0 commit comments

Comments
 (0)