perf(chunking): Return a generator instead of a list

caffeine-addictt · caffeine-addictt · commit 20d702d3bf9c · 2024-04-28T13:54:01.000+08:00
diff --git a/src/thread/utils/algorithm.py b/src/thread/utils/algorithm.py
@@ -8,10 +8,12 @@
   |_ b.py
 """
 
-from typing import List, Tuple
+from typing import Tuple, Generator
 
 
-def chunk_split(dataset_length: int, number_of_chunks: int) -> List[Tuple[int, int]]:
+def chunk_split(
+    dataset_length: int, number_of_chunks: int
+) -> Generator[Tuple[int, int], None, None]:
     """
     Splits a dataset into balanced chunks
 
@@ -41,13 +43,10 @@ def chunk_split(dataset_length: int, number_of_chunks: int) -> List[Tuple[int, i
     overflow = dataset_length % number_of_chunks
 
     i = 0
-    split = []
     while i < dataset_length:
         chunk_length = chunk_count + int(overflow > 0)
         b = i + chunk_length
 
-        split.append((i, b))
+        yield (i, b)
         overflow -= 1
         i = b
-
-    return split
diff --git a/tests/test_algorithm.py b/tests/test_algorithm.py
@@ -1,17 +1,23 @@
 import random
+from typing import Generator
+
 from src.thread.utils import algorithm
 
 
+def test_type():
+    assert isinstance(algorithm.chunk_split(5, 1), Generator)
+
+
 def test_chunking_1():
-    assert algorithm.chunk_split(5, 1) == [(0, 5)]
+    assert list(algorithm.chunk_split(5, 1)) == [(0, 5)]
 
 
 def test_chunking_2():
-    assert algorithm.chunk_split(5, 2) == [(0, 3), (3, 5)]
+    assert list(algorithm.chunk_split(5, 2)) == [(0, 3), (3, 5)]
 
 
 def test_chunking_3():
-    assert algorithm.chunk_split(100, 8) == [
+    assert list(algorithm.chunk_split(100, 8)) == [
         (0, 13),
         (13, 26),
         (26, 39),
@@ -41,5 +47,5 @@ def test_chunking_dynamic():
         i = b
 
     assert (
-        algorithm.chunk_split(dataset_length, thread_count) == heap
+        list(algorithm.chunk_split(dataset_length, thread_count)) == heap
     ), f'\nLength: {dataset_length}\nThreads: {thread_count}\nExpected: {heap}\nActual: {algorithm.chunk_split(dataset_length, thread_count)}'