Skip to content

Commit 7fdfb6e

Browse files
committed
Improve documentation
Enable `pydocstyle` lints
1 parent cf4b367 commit 7fdfb6e

File tree

4 files changed

+155
-34
lines changed

4 files changed

+155
-34
lines changed

README.md

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,33 @@ techcable.orderedset
55
[![pypi](https://img.shields.io/pypi/v/techcable.orderedset)](https://pypi.org/project/techcable.orderedset/)
66
![types](https://img.shields.io/pypi/types/techcable.orderedset)]
77

8-
A simple and efficient pure-python ordered set.
8+
A simple and efficient `OrderedSet` in pure python. Implements both [`MutableSet`] and [`Sequence`].
9+
10+
[`MutableSet`]: https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableSet
11+
[`Sequence`]: https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence
12+
913

1014
## Example Usage
1115
```python
1216
from techcable.orderedset import OrderedSet
1317

1418
# prints {1, 2, 7, 3}
15-
print(OrderedSet.of(1, 2, 7, 2, 3))
19+
print(OrderedSet([1, 2, 7, 2, 3]))
20+
# Implements all standard set methods, still preserves order
21+
print(OrderedSet.of[1,2]) | OrderedSet([3,2,4])) # {1,2,3,4}
22+
# OrderedSet.of(1, 2) is shorthand for OrderedSet([1, 2]),
23+
# avoiding an extra pair of brackets
24+
print(OrderedSet.of(1, 2)) # {1, 2}
25+
26+
27+
# Implements `append` method, returning True on success
28+
# and False if the item was a duplicate
29+
oset = OrderedSet()
30+
oset.append(1) # True
31+
oset.append(2) # True
32+
oset.append(1) # False - already in set, did nothing
33+
oset.extend([2,3]) # True - at least one success
34+
oset.append([2,3]) # False - all duplicates
1635
```
1736

1837
Supports [pydantic](pydantic.org) validation & serialization:
@@ -27,9 +46,9 @@ assert model.dump_python(OrderedSet.of(1,2,7,8)) == [1,2,7,8]
2746
```
2847

2948
## Potential Future Features
30-
- Add [acceleration module] using C/Rust/Cython
31-
- Implemented `OrderedFrozenSet`
32-
- Publish HTML documentation using Sphinx or [pdoc](https://pdoc.dev/)
49+
- Implement `OrderedFrozenSet`
50+
- Consider [acceleration module] using C/Rust/Cython
51+
- Probably unnecessary since this has library has very little overhead compared to the builtin `set`/`list`
3352

3453
[acceleration module]: https://peps.python.org/pep-0399/
3554

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ extend-select = [
103103
"T20", # flake8-print - prevent use of `print` statement
104104
"TC", # flake8-type-checking - guard imports needed only for type-checking
105105
"TID252", # forbid relative imports
106-
# "D", # pydocstyle - require docstrings to be well-written
106+
"D", # pydocstyle - lint docstrings and require for all public methods
107107
"PLW", # pylint[warning]
108108
"PLC", # pylint[convention]
109109
"F", # pyflakes

src/techcable/orderedset/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
Implements `OrderedSet`, a [`MutableSet`] that preserves insertion order and is also a [`Sequence`].
3+
4+
[`MutableSet`]: https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableSet
5+
[`Sequence`]: https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence
6+
7+
The implementation is pure-python and does not require any native code.
8+
"""
9+
110
from ._orderedset import OrderedSet
211
from ._version import __version__
312

src/techcable/orderedset/_orderedset.py

Lines changed: 121 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,28 @@ def override(v):
5454

5555

5656
class OrderedSet(MutableSet[T], Sequence[T]):
57-
"""A Set of elements, which preserves insertion order.
57+
"""
58+
A [`MutableSet`] that preserves insertion order and is also a [`Sequence`].
5859
59-
This type cannot implement `MutableSequence` because OrderedSet.append
60-
ignores duplicate elements and returns a `bool` instead of `None`."""
60+
## Conveniences
61+
Calling [`OrderedSet.append`] returns `True` if the element was successfully added,
62+
and `False` if the element is a duplicate.
63+
64+
Calling [`OrderedSet.__str__`]` is equivalent to `f"{x!r, y!r}"`.
65+
This is much prettier than [`OrderedSet.__repr__`],
66+
which is expected to roundtrip through `eval`.
67+
68+
## Gotchas
69+
This type does not implement [`MutableSequence`]
70+
because [`OrderedSet.append`] ignores duplicate elements
71+
and returns `bool` instead of `None`.
72+
73+
### Thread Safety
74+
This type is *NOT* safe to mutate from multiple threads.
75+
76+
Concurrent reads are fully supported,
77+
as long as no modifications are being made.
78+
"""
6179

6280
__slots__ = ("_elements", "_unique")
6381

@@ -66,7 +84,11 @@ class OrderedSet(MutableSet[T], Sequence[T]):
6684

6785
@override
6886
def __init__(self, source: Iterable[T] | None = None, /) -> None:
69-
"""Create an ordered set containing the specified elements"""
87+
"""
88+
Create an `OrderedSet` containing the specified elements.
89+
90+
This preserves the order of the original input and implicitly ignores duplicates.
91+
"""
7092
self._unique = set()
7193
self._elements = []
7294
if source is None:
@@ -100,10 +122,15 @@ def of(cls, /, *args: T) -> OrderedSet[T]:
100122
return cls(args)
101123

102124
def append(self, value: T, /) -> bool:
103-
"""Append a value to the set if it doesn't already exist.
125+
"""
126+
Append a value to the set, returning `True` if successfully added.
104127
105-
Returns `True` if successfully added, or `False` if already exists.
106-
Note that the return value doesn't match list.append, which always returns `None`"""
128+
Returns `False` if the element already exists.
129+
130+
There are two important differences between this method and [`list.append`]:
131+
1. This method does nothing if the value is a duplicate
132+
2. This method returns a `bool` instead of `None`
133+
"""
107134
is_new = value not in self._unique
108135
if is_new:
109136
self._unique.add(value)
@@ -115,10 +142,10 @@ def extend(self, values: Iterable[T], /) -> bool:
115142
"""
116143
Add all the specified values to the set.
117144
118-
Returns True if at least one element was added,
145+
Returns `True` if at least one element was added,
119146
or `False` if every element is a duplicate.
120147
121-
Roughly equivalent to `any(oset.append(v) for v in values)`.
148+
Equivalent to `any(oset.append(v) for v in values)`.
122149
"""
123150
changed = False
124151
for val in values:
@@ -127,26 +154,47 @@ def extend(self, values: Iterable[T], /) -> bool:
127154

128155
@override
129156
def add(self, value: T, /) -> None:
130-
"""Add a value to the set if it doesn't already exist.
157+
"""
158+
Add a value to the set if it doesn't already exist.
131159
132-
Return value is `None` for consistency with `set.add`.
133-
Use `OrderedSet.append` if you want to know if the element already existed."""
160+
Return value is `None` for consistency with [`set.add`].
161+
Use [`OrderedSet.append`] if you want to know if the element already existed.
162+
"""
134163
self.append(value)
135164

136165
@override
137166
def discard(self, value: T, /) -> None:
138-
"""Remove the element from the set if it exists."""
167+
"""
168+
Remove an element from the set if it exists.
169+
170+
Unlike [`OrderedSet.remove`], this method does not raise
171+
an exception if this element is missing.
172+
"""
139173
if value in self._unique:
140174
self._elements.remove(value)
141175
self._unique.remove(value)
142176

143177
def update(self, values: Iterable[T], /) -> None:
144-
"""Add all the"""
178+
"""
179+
Add all the specified values to this set.
180+
181+
Equivalent to running
182+
```
183+
for val in values:
184+
oset.add(val)
185+
```
186+
"""
145187
self.extend(values)
146188

147189
@override
148190
def pop(self, index: int = -1) -> T:
149-
"""Pop an item from the end of the list (or at `index`)"""
191+
"""
192+
Remove and return an item from the end of the list (or from `self[index]`).
193+
194+
Raises `IndexError` if the list is empty or `index` is out of bounds.
195+
196+
Equivalent to [`list.pop`].
197+
"""
150198
item = self._elements.pop(index)
151199
self._unique.remove(item)
152200
return item
@@ -199,7 +247,7 @@ def __eq__(self, other: object) -> bool:
199247
return NotImplemented
200248

201249
__hash__ = None # type: ignore
202-
"""Since an OrderedSet is mutable, it cannot be hashed"""
250+
"""Since an OrderedSet is mutable, it does is not hashable."""
203251

204252
def _impl_cmp_op(self, other: object, op: Callable[[Any, Any], bool]) -> bool:
205253
if isinstance(other, OrderedSet):
@@ -227,25 +275,56 @@ def __ge__(self, other: object) -> bool:
227275
return self._impl_cmp_op(other, operator.ge)
228276

229277
def sort(self, *, key: Callable[[T], U] | None = None, reverse: bool = False) -> None:
230-
"""Sort the elements in the set, as if calling list.sort"""
278+
"""Sort the elements of the set in-place, as if calling [`list.sort`]."""
231279
self._elements.sort(key=key, reverse=reverse)
232280

233281
def reverse(self) -> None:
234-
"""Reverse the elements in the set, as if calling list.reverse"""
282+
"""Reverse the elements of the set in-place, as if calling [`list.reverse`]."""
235283
self._elements.reverse()
236284

237285
def copy(self) -> OrderedSet[T]:
238-
"""Create a copy of the set"""
286+
"""
287+
Create a shallow copy of the set.
288+
289+
Equivalent to `OrderedSet(self)`.
290+
"""
239291
return OrderedSet(self)
240292

241293
@override
242294
def __repr__(self) -> str:
295+
"""
296+
Represent this set in a form that will round-trip through [`eval`].
297+
298+
Examples:
299+
- `repr(OrderedSet([1, 2, 3]))` returns `"OrderedSet([1, 2, 3])"`
300+
- `repr(OrderedSet([1, 2, 3]))` returns `"OrderedSet([1, 2"])`
301+
302+
The representation used by [`OrderedSet.__str__`] is much prettier.
303+
It still calls `repr` on each element and not `str`,
304+
so `str(OrderedSet([' '])` gives `"{' '}"` instead of `"{ }"`.
305+
It is really just a prettier `repr` which isn't contained
306+
by the need to round-trip through [`eval`].
307+
308+
The format changed in v0.1.6 to take advantage of [`OrderedSet.of`].
309+
It now uses `"OrderedSet.of(1,2,3)"` instead of OrderedSet([1,2,3])`.
310+
This may break users relying on the format,
311+
but I consider this acceptable during the beta.
312+
"""
243313
# by convention, this should roundtrip through eval
244-
# in v0.1.6, this changed to use OrderedSet.of(1,2,3) instead of OrderedSet([1,2,3])
245314
return f"OrderedSet.of({', '.join(map(repr, self))})"
246315

247316
@override
248317
def __str__(self) -> str:
318+
"""
319+
Represent the elements in this set by calling `repr` on each element, surrounding it with braces.
320+
321+
Examples:
322+
- `str(OrderedSet([1, 2, 3]))` returns `"{1, 2, 3}"`
323+
- `str(OrderedSet(["a", "b", "c"]))` returns `"{'a', 'b', 'c'}"`
324+
325+
This would make a very good implementation of [`OrderedSet.__repr__`],
326+
except for the fact it will not round-trip through [`eval`].
327+
"""
249328
return f"{{{', '.join(map(repr, self))}}}"
250329

251330
@classmethod
@@ -305,13 +384,19 @@ def __setstate__(self, state: Any) -> None:
305384

306385
@classmethod
307386
def dedup(cls, source: Iterable[T], /) -> Generator[T]:
308-
"""A utility method to deduplicate the specified iterable,
309-
while preserving the original order.
387+
"""
388+
Yield unique elements, preserving order.
310389
311-
This is a generator, so does not need to wait for the entire input,
390+
This is an iterator combinator (generator) similar to those in [`itertools`].
391+
It does not need to wait for the entire input,
312392
and will return items as soon as they are available.
313393
394+
This is similar to [`more_itertools.unique_everseen`],
395+
although it uses an `OrderedSet` internally and does not support the `key` argument.
396+
314397
Since: v0.1.4
398+
399+
[`more_itertools.unique_everseen`]: https://more-itertools.readthedocs.io/en/v10.7.0/api.html#more_itertools.unique_everseen
315400
"""
316401
oset: OrderedSet[T] = OrderedSet()
317402
for item in source:
@@ -321,17 +406,25 @@ def dedup(cls, source: Iterable[T], /) -> Generator[T]:
321406

322407
@classmethod
323408
async def dedup_async(cls, source: AsyncIterable[T], /) -> AsyncGenerator[T]:
324-
"""A utility method to deduplicate the specified iterable,
325-
while preserving the original order.
409+
"""
410+
Yield unique elements, preserving order.
326411
327-
This is a generator, so does not need to wait for the entire input.
412+
This is an iterator combinator (generator) similar to those in [`itertools`].
413+
It does not need to wait for the entire input,
414+
and will return items as soon as they are available.
328415
Because it is asynchronous, it does not block the thread while waiting.
329416
330-
This is an asynchronous version of `OrderedSet.dedup`.
417+
This is an asynchronous version of [`OrderedSet.dedup`].
418+
419+
It is similar to [`more_itertools.unique_everseen`],
420+
but is asynchronous, uses an `OrderedSet` internally,
421+
and does not support the `key` argument.
331422
332423
Since: v0.1.4
424+
425+
[`more_itertools.unique_everseen`]: https://more-itertools.readthedocs.io/en/v10.7.0/api.html#more_itertools.unique_everseen
333426
"""
334-
# Defined in PEP 525
427+
# async for defined in PEP 525
335428
oset: OrderedSet[T] = OrderedSet()
336429
async for item in source:
337430
if oset.append(item):

0 commit comments

Comments
 (0)