Skip to content

Commit 20d702d

Browse files
perf(chunking): Return a generator instead of a list
1 parent 9217bbc commit 20d702d

File tree

2 files changed

+15
-10
lines changed

2 files changed

+15
-10
lines changed

src/thread/utils/algorithm.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
|_ b.py
99
"""
1010

11-
from typing import List, Tuple
11+
from typing import Tuple, Generator
1212

1313

14-
def chunk_split(dataset_length: int, number_of_chunks: int) -> List[Tuple[int, int]]:
14+
def chunk_split(
15+
dataset_length: int, number_of_chunks: int
16+
) -> Generator[Tuple[int, int], None, None]:
1517
"""
1618
Splits a dataset into balanced chunks
1719
@@ -41,13 +43,10 @@ def chunk_split(dataset_length: int, number_of_chunks: int) -> List[Tuple[int, i
4143
overflow = dataset_length % number_of_chunks
4244

4345
i = 0
44-
split = []
4546
while i < dataset_length:
4647
chunk_length = chunk_count + int(overflow > 0)
4748
b = i + chunk_length
4849

49-
split.append((i, b))
50+
yield (i, b)
5051
overflow -= 1
5152
i = b
52-
53-
return split

tests/test_algorithm.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
import random
2+
from typing import Generator
3+
24
from src.thread.utils import algorithm
35

46

7+
def test_type():
8+
assert isinstance(algorithm.chunk_split(5, 1), Generator)
9+
10+
511
def test_chunking_1():
6-
assert algorithm.chunk_split(5, 1) == [(0, 5)]
12+
assert list(algorithm.chunk_split(5, 1)) == [(0, 5)]
713

814

915
def test_chunking_2():
10-
assert algorithm.chunk_split(5, 2) == [(0, 3), (3, 5)]
16+
assert list(algorithm.chunk_split(5, 2)) == [(0, 3), (3, 5)]
1117

1218

1319
def test_chunking_3():
14-
assert algorithm.chunk_split(100, 8) == [
20+
assert list(algorithm.chunk_split(100, 8)) == [
1521
(0, 13),
1622
(13, 26),
1723
(26, 39),
@@ -41,5 +47,5 @@ def test_chunking_dynamic():
4147
i = b
4248

4349
assert (
44-
algorithm.chunk_split(dataset_length, thread_count) == heap
50+
list(algorithm.chunk_split(dataset_length, thread_count)) == heap
4551
), f'\nLength: {dataset_length}\nThreads: {thread_count}\nExpected: {heap}\nActual: {algorithm.chunk_split(dataset_length, thread_count)}'

0 commit comments

Comments
 (0)