Disallow repeat header tasks during sync

carver · carver · commit 644dcd5408c2 · 2018-08-28T06:38:09.000-07:00
diff --git a/eth/chains/base.py b/eth/chains/base.py
@@ -295,7 +295,10 @@ def validate_uncles(self, block: BaseBlock) -> None:
 
     @abstractmethod
     def validate_chain(
-            self, chain: Tuple[BlockHeader, ...], seal_check_random_sample_rate: int = 1) -> None:
+            self,
+            parent: BlockHeader,
+            chain: Tuple[BlockHeader, ...],
+            seal_check_random_sample_rate: int = 1) -> None:
         raise NotImplementedError("Chain classes must implement this method")
 
 
@@ -868,5 +871,8 @@ async def coro_import_block(self,
         raise NotImplementedError()
 
     async def coro_validate_chain(
-            self, chain: Tuple[BlockHeader, ...], seal_check_random_sample_rate: int = 1) -> None:
+            self,
+            parent: BlockHeader,
+            chain: Tuple[BlockHeader, ...],
+            seal_check_random_sample_rate: int = 1) -> None:
         raise NotImplementedError()
diff --git a/trinity/chains/light.py b/trinity/chains/light.py
@@ -225,7 +225,11 @@ def validate_seal(self, header: BlockHeader) -> None:
     def validate_uncles(self, block: BaseBlock) -> None:
         raise NotImplementedError("Chain classes must implement " + inspect.stack()[0][3])
 
-    def validate_chain(self, chain: Tuple[BlockHeader, ...], seal_check_frequency: int = 1) -> None:
+    def validate_chain(
+            self,
+            parent: BlockHeader,
+            chain: Tuple[BlockHeader, ...],
+            seal_check_random_sample_rate: int = 1) -> None:
         raise NotImplementedError("Chain classes must implement " + inspect.stack()[0][3])
 
     #
diff --git a/trinity/sync/common/chain.py b/trinity/sync/common/chain.py
@@ -1,8 +1,8 @@
 import asyncio
 from abc import abstractmethod
+from operator import attrgetter
 from typing import (
     AsyncGenerator,
-    Set,
     Tuple,
     Union,
     cast,
@@ -81,7 +81,7 @@ def __init__(self,
         # pending queue size should be big enough to avoid starving the processing consumers, but
         # small enough to avoid wasteful over-requests before post-processing can happen
         max_pending_headers = ETHPeer.max_headers_fetch * 8
-        self.header_queue = TaskQueue(max_pending_headers, lambda header: header.block_number)
+        self.header_queue = TaskQueue(max_pending_headers, attrgetter('block_number'))
 
     @property
     def msg_queue_maxsize(self) -> int:
@@ -168,7 +168,7 @@ async def _sync(self, peer: HeaderRequestingPeer) -> None:
             return
 
         self.logger.info("Starting sync with %s", peer)
-        last_received_header = None
+        last_received_header: BlockHeader = None
         # When we start the sync with a peer, we always request up to MAX_REORG_DEPTH extra
         # headers before our current head's number, in case there were chain reorgs since the last
         # time _sync() was called. All of the extra headers that are already present in our DB
@@ -238,10 +238,21 @@ async def _sync(self, peer: HeaderRequestingPeer) -> None:
             # Setting the latest header hash for the peer, before queuing header processing tasks
             self._target_header_hash = peer.head_hash
 
-            await self.header_queue.add(headers)
+            unrequested_headers = tuple(h for h in headers if h not in self.header_queue)
+            await self.header_queue.add(unrequested_headers)
             last_received_header = headers[-1]
             start_at = last_received_header.block_number + 1
 
+        # erase any pending tasks, to restart on next _sync() run
+        try:
+            batch_id, pending_tasks = self.header_queue.get_nowait()
+        except asyncio.QueueFull:
+            # nothing pending, continue
+            pass
+        else:
+            # fully remove pending tasks from queue
+            self.header_queue.complete(batch_id, pending_tasks)
+
     async def _fetch_missing_headers(
             self, peer: HeaderRequestingPeer, start_at: int) -> Tuple[BlockHeader, ...]:
         """Fetch a batch of headers starting at start_at and return the ones we're missing."""
diff --git a/trinity/sync/full/chain.py b/trinity/sync/full/chain.py
@@ -1,4 +1,5 @@
 import asyncio
+from concurrent.futures import CancelledError
 import math
 import operator
 from typing import (
@@ -90,9 +91,9 @@ async def _load_and_process_headers(self) -> None:
             # in independent loops
             # TODO implement the maximum task size at each step instead of this magic number
             max_headers = min((MAX_BODIES_FETCH, MAX_RECEIPTS_FETCH)) * 4
-            batch, headers = await self.header_queue.get(max_headers)
+            batch_id, headers = await self.header_queue.get(max_headers)
             await self._process_headers(headers)
-            self.header_queue.complete(batch, headers)
+            self.header_queue.complete(batch_id, headers)
 
     async def _calculate_td(self, headers: Tuple[BlockHeader, ...]) -> int:
         """Return the score (total difficulty) of the last header in the given list.
@@ -215,8 +216,11 @@ async def _get_block_bodies(self,
                 "Timed out requesting block bodies for %d headers from %s", len(batch), peer,
             )
             return tuple(), batch
+        except CancelledError:
+            self.logger.debug("Pending block bodies call to %r future cancelled", peer)
+            return tuple(), batch
         except OperationCancelled:
-            self.logger.trace("Pending block bodies call to %r cancelled", peer)
+            self.logger.trace("Pending block bodies call to %r operation cancelled", peer)
             return tuple(), batch
         except PeerConnectionLost:
             self.logger.debug("Peer went away, cancelling the block body request and moving on...")
@@ -317,8 +321,11 @@ async def _get_receipts(self,
                 "Timed out requesting receipts for %d headers from %s", len(batch), peer,
             )
             return tuple(), batch
+        except CancelledError:
+            self.logger.debug("Pending receipts call to %r future cancelled", peer)
+            return tuple(), batch
         except OperationCancelled:
-            self.logger.trace("Pending receipts call to %r cancelled", peer)
+            self.logger.trace("Pending receipts call to %r operation cancelled", peer)
             return tuple(), batch
         except PeerConnectionLost:
             self.logger.debug("Peer went away, cancelling the receipts request and moving on...")
diff --git a/trinity/sync/light/chain.py b/trinity/sync/light/chain.py
@@ -63,7 +63,7 @@ async def _handle_get_block_headers(self, peer: LESPeer, msg: Dict[str, Any]) ->
 
     async def _persist_headers(self) -> None:
         while self.is_operational:
-            batch, headers = await self.wait(self.header_queue.get())
+            batch_id, headers = await self.wait(self.header_queue.get())
 
             timer = Timer()
             for header in headers:
@@ -74,4 +74,4 @@ async def _persist_headers(self) -> None:
                 "Imported %d headers in %0.2f seconds, new head: #%d",
                 len(headers), timer.elapsed, head.block_number)
 
-            self.header_queue.complete(batch, headers)
+            self.header_queue.complete(batch_id, headers)