Merge pull request #92 from stealthrocket/poll-min-results

chriso · web-flow · commit 2a4c856d78b8 · 2024-02-26T09:46:54.000+10:00
Use min_results when polling
diff --git a/src/dispatch/proto.py b/src/dispatch/proto.py
@@ -202,7 +202,8 @@ def poll(
         cls,
         state: Any,
         calls: None | list[Call] = None,
-        max_results: int = 1,
+        min_results: int = 1,
+        max_results: int = 10,
         max_wait_seconds: int | None = None,
     ) -> Output:
         """Suspend the function with a set of Calls, instructing the
@@ -216,6 +217,7 @@ def poll(
         )
         poll = poll_pb.Poll(
             coroutine_state=state_bytes,
+            min_results=min_results,
             max_results=max_results,
             max_wait=max_wait,
         )
diff --git a/src/dispatch/scheduler.py b/src/dispatch/scheduler.py
@@ -12,7 +12,6 @@
 
 logger = logging.getLogger(__name__)
 
-
 CallID: TypeAlias = int
 CoroutineID: TypeAlias = int
 CorrelationID: TypeAlias = int
@@ -38,9 +37,13 @@ class CallResult:
 
 class Future(Protocol):
     def add_result(self, result: CallResult | CoroutineResult): ...
+
     def add_error(self, error: Exception): ...
+
     def ready(self) -> bool: ...
+
     def error(self) -> Exception | None: ...
+
     def value(self) -> Any: ...
 
 
@@ -147,7 +150,9 @@ class State:
     next_coroutine_id: int
     next_call_id: int
 
-    prev_calls: list[Coroutine]
+    prev_callers: list[Coroutine]
+
+    outstanding_calls: int
 
 
 class OneShotScheduler:
@@ -158,13 +163,46 @@ class OneShotScheduler:
     take over scheduling asynchronous calls.
     """
 
-    __slots__ = ("entry_point", "version", "poll_max_wait_seconds")
+    __slots__ = (
+        "entry_point",
+        "version",
+        "poll_min_results",
+        "poll_max_results",
+        "poll_max_wait_seconds",
+    )
 
     def __init__(
-        self, entry_point: Callable, version=sys.version, poll_max_wait_seconds=5
+        self,
+        entry_point: Callable,
+        version: str = sys.version,
+        poll_min_results: int = 1,
+        poll_max_results: int = 10,
+        poll_max_wait_seconds: int | None = None,
     ):
+        """Initialize the scheduler.
+
+        Args:
+            entry_point: Entry point for the main coroutine.
+
+            version: Version string to attach to scheduler/coroutine state.
+                If the scheduler sees a version mismatch, it will respond to
+                Dispatch with an INCOMPATIBLE_STATE status code.
+
+            poll_min_results: Minimum number of call results to wait for before
+                coroutine execution should continue. Dispatch waits until this
+                many results are available, or the poll_max_wait_seconds
+                timeout is reached, whichever comes first.
+
+            poll_max_results: Maximum number of calls to receive from Dispatch
+                per request.
+
+            poll_max_wait_seconds: Maximum amount of time to suspend coroutines
+                while waiting for call results. Optional.
+        """
         self.entry_point = entry_point
         self.version = version
+        self.poll_min_results = poll_min_results
+        self.poll_max_results = poll_max_results
         self.poll_max_wait_seconds = poll_max_wait_seconds
         logger.debug(
             "booting coroutine scheduler with entry point '%s' version '%s'",
@@ -198,7 +236,8 @@ def _init_state(self, input: Input) -> State:
             ready=[Coroutine(id=0, parent_id=None, coroutine=main)],
             next_coroutine_id=1,
             next_call_id=1,
-            prev_calls=[],
+            prev_callers=[],
+            outstanding_calls=0,
         )
 
     def _rebuild_state(self, input: Input):
@@ -229,16 +268,17 @@ def _run(self, input: Input) -> Output:
             if poll_error is not None:
                 error = poll_error.to_exception()
                 logger.debug("dispatching poll error: %s", error)
-                for coroutine in state.prev_calls:
+                for coroutine in state.prev_callers:
                     future = coroutine.result
                     assert future is not None
                     future.add_error(error)
                     if future.ready() and coroutine.id in state.suspended:
                         state.ready.append(coroutine)
                         del state.suspended[coroutine.id]
                         logger.debug("coroutine %s is now ready", coroutine)
+                    state.outstanding_calls -= 1
 
-            state.prev_calls = []
+            state.prev_callers = []
 
             logger.debug("dispatching %d call result(s)", len(input.call_results))
             for cr in input.call_results:
@@ -265,6 +305,7 @@ def _run(self, input: Input) -> Output:
                     state.ready.append(owner)
                     del state.suspended[owner.id]
                     logger.debug("owner %s is now ready", owner)
+                state.outstanding_calls -= 1
 
         logger.debug(
             "%d/%d coroutines are ready",
@@ -342,7 +383,8 @@ def _run(self, input: Input) -> Output:
                     pending_calls.append(call)
                     coroutine.result = CallFuture()
                     state.suspended[coroutine.id] = coroutine
-                    state.prev_calls.append(coroutine)
+                    state.prev_callers.append(coroutine)
+                    state.outstanding_calls += 1
 
                 case Gather():
                     gather = coroutine_yield
@@ -398,9 +440,8 @@ def _run(self, input: Input) -> Output:
         return Output.poll(
             state=serialized_state,
             calls=pending_calls,
-            max_results=1,
-            # FIXME: use min_results + max_results + max_wait to balance latency/throughput
-            # max_results=len(max_results),
+            min_results=max(1, self.poll_min_results),
+            max_results=max(1, min(state.outstanding_calls, self.poll_max_results)),
             max_wait_seconds=self.poll_max_wait_seconds,
         )
 
diff --git a/src/dispatch/sdk/v1/poll_pb2.py b/src/dispatch/sdk/v1/poll_pb2.py
diff --git a/src/dispatch/sdk/v1/poll_pb2.pyi b/src/dispatch/sdk/v1/poll_pb2.pyi
@@ -16,21 +16,24 @@ from dispatch.sdk.v1 import error_pb2 as _error_pb2
 DESCRIPTOR: _descriptor.FileDescriptor
 
 class Poll(_message.Message):
-    __slots__ = ("coroutine_state", "calls", "max_wait", "max_results")
+    __slots__ = ("coroutine_state", "calls", "max_wait", "max_results", "min_results")
     COROUTINE_STATE_FIELD_NUMBER: _ClassVar[int]
     CALLS_FIELD_NUMBER: _ClassVar[int]
     MAX_WAIT_FIELD_NUMBER: _ClassVar[int]
     MAX_RESULTS_FIELD_NUMBER: _ClassVar[int]
+    MIN_RESULTS_FIELD_NUMBER: _ClassVar[int]
     coroutine_state: bytes
     calls: _containers.RepeatedCompositeFieldContainer[_call_pb2.Call]
     max_wait: _duration_pb2.Duration
     max_results: int
+    min_results: int
     def __init__(
         self,
         coroutine_state: _Optional[bytes] = ...,
         calls: _Optional[_Iterable[_Union[_call_pb2.Call, _Mapping]]] = ...,
         max_wait: _Optional[_Union[_duration_pb2.Duration, _Mapping]] = ...,
         max_results: _Optional[int] = ...,
+        min_results: _Optional[int] = ...,
     ) -> None: ...
 
 class PollResult(_message.Message):
diff --git a/src/dispatch/test/server.py b/src/dispatch/test/server.py
@@ -19,8 +19,8 @@ class DispatchServer:
     def __init__(
         self,
         service: dispatch_grpc.DispatchServiceServicer,
-        hostname="127.0.0.1",
-        port=0,
+        hostname: str = "127.0.0.1",
+        port: int = 0,
     ):
         self._thread_pool = concurrent.futures.thread.ThreadPoolExecutor()
         self._server = grpc.server(self._thread_pool)
diff --git a/src/dispatch/test/service.py b/src/dispatch/test/service.py
@@ -275,7 +275,7 @@ class Poller:
     function: str
 
     coroutine_state: bytes
-    # TODO: support max_wait/max_results
+    # TODO: support max_wait/min_results/max_results
 
     waiting: dict[DispatchID, call_pb.Call]
     results: dict[DispatchID, call_pb.CallResult]
diff --git a/tests/dispatch/test_scheduler.py b/tests/dispatch/test_scheduler.py
@@ -102,7 +102,10 @@ async def main():
         output = self.start(main)
 
         self.assert_poll_call_functions(
-            output, ["a", "b", "c", "d", "e", "f", "g", "h"]
+            output,
+            ["a", "b", "c", "d", "e", "f", "g", "h"],
+            min_results=1,
+            max_results=8,
         )
 
     def test_resume_after_call(self):
@@ -175,31 +178,39 @@ async def main():
 
         output = self.start(main)
         # a, b, c, d are called first. e is not because it depends on a.
-        calls = self.assert_poll_call_functions(output, ["a", "b", "c", "d"])
+        calls = self.assert_poll_call_functions(
+            output, ["a", "b", "c", "d"], min_results=1, max_results=4
+        )
         correlation_ids.update(call.correlation_id for call in calls)
         results = [
             CallResult.from_value(i, correlation_id=call.correlation_id)
             for i, call in enumerate(calls)
         ]
         output = self.resume(main, output, results)
         # e is called next
-        calls = self.assert_poll_call_functions(output, ["e"])
+        calls = self.assert_poll_call_functions(
+            output, ["e"], min_results=1, max_results=1
+        )
         correlation_ids.update(call.correlation_id for call in calls)
         output = self.resume(
             main,
             output,
             [CallResult.from_value(4, correlation_id=calls[0].correlation_id)],
         )
         # f is called next
-        calls = self.assert_poll_call_functions(output, ["f"])
+        calls = self.assert_poll_call_functions(
+            output, ["f"], min_results=1, max_results=1
+        )
         correlation_ids.update(call.correlation_id for call in calls)
         output = self.resume(
             main,
             output,
             [CallResult.from_value(5, correlation_id=calls[0].correlation_id)],
         )
         # g, h are called next
-        calls = self.assert_poll_call_functions(output, ["g", "h"])
+        calls = self.assert_poll_call_functions(
+            output, ["g", "h"], min_results=1, max_results=2
+        )
         correlation_ids.update(call.correlation_id for call in calls)
         output = self.resume(
             main,
@@ -244,7 +255,9 @@ async def main(c_then_d):
             )
 
         output = self.start(main, c_then_d)
-        calls = self.assert_poll_call_functions(output, ["a", "b", "c"])
+        calls = self.assert_poll_call_functions(
+            output, ["a", "b", "c"], min_results=1, max_results=3
+        )
 
         call_a, call_b, call_c = calls
         a_result, b_result, c_result = 10, 20, 30
@@ -253,7 +266,7 @@ async def main(c_then_d):
             output,
             [CallResult.from_value(c_result, correlation_id=call_c.correlation_id)],
         )
-        self.assert_poll_call_functions(output, ["d"])
+        self.assert_poll_call_functions(output, ["d"], min_results=1, max_results=3)
 
         output = self.resume(
             main, output, [], poll_error=RuntimeError("too many calls")
@@ -343,7 +356,9 @@ def assert_empty_poll(self, output: Output):
         poll = self.assert_poll(output)
         self.assertEqual(len(poll.calls), 0)
 
-    def assert_poll_call_functions(self, output: Output, expect: list[str]):
+    def assert_poll_call_functions(
+        self, output: Output, expect: list[str], min_results=None, max_results=None
+    ):
         poll = self.assert_poll(output)
         # Note: we're not testing endpoint/input here.
         # Check function names match:
@@ -355,4 +370,8 @@ def assert_poll_call_functions(self, output: Output, expect: list[str]):
             len(set(correlation_ids)),
             "correlation IDs were not unique",
         )
+        if min_results is not None:
+            self.assertEqual(min_results, poll.min_results)
+        if max_results is not None:
+            self.assertEqual(max_results, poll.max_results)
         return poll.calls