From f731e9dc697cd988e68edc2eb9ab69376c49f99b Mon Sep 17 00:00:00 2001 From: Jonny Wylie Date: Wed, 18 Aug 2021 07:33:48 +0100 Subject: [PATCH 1/4] Don't log so many exceptions. Some errors are fatal, so we should log the exception. Other errors we will try to handle and try again, in such a case just log a warning unless it is the final attempt. This means successful retries will not result in an exception being logged. --- rediscluster/client.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/rediscluster/client.py b/rediscluster/client.py index 844cb4c9..9b3ddb0d 100644 --- a/rediscluster/client.py +++ b/rediscluster/client.py @@ -589,6 +589,14 @@ def _execute_command(self, *args, **kwargs): ttl = int(self.RedisClusterRequestTTL) connection_error_retry_counter = 0 + def log_exception(message, exception): + if ttl == 1: + # This is the last attempt before we run out of TTL, so log the full exception. + log.exception(message) + else: + # We are going to retry, and therefore may yet succeed, so just log a warning. + log.warning(message + str(exception)) + while ttl > 0: ttl -= 1 connection = None @@ -630,7 +638,7 @@ def _execute_command(self, *args, **kwargs): connection.send_command(*args) return self.parse_response(connection, command, **kwargs) except SlotNotCoveredError as e: - log.exception("SlotNotCoveredError") + log_exception("SlotNotCoveredError", e) # In some cases during failover to a replica is happening # a slot sometimes is not covered by the cluster layout and @@ -644,8 +652,8 @@ def _execute_command(self, *args, **kwargs): except (RedisClusterException, BusyLoadingError): log.exception("RedisClusterException || BusyLoadingError") raise - except ConnectionError: - log.exception("ConnectionError") + except ConnectionError as e: + log_exception("ConnectionError", e) # ConnectionError can also be raised if we couldn't get a connection # from the pool before timing out, so check that this is an actual @@ -670,8 +678,8 @@ def _execute_command(self, *args, **kwargs): self.connection_pool.nodes.increment_reinitialize_counter( count=self.connection_pool.nodes.reinitialize_steps, ) - except TimeoutError: - log.exception("TimeoutError") + except TimeoutError as e: + log_exception("TimeoutError", e) connection.disconnect() if ttl < self.RedisClusterRequestTTL / 2: @@ -692,20 +700,20 @@ def _execute_command(self, *args, **kwargs): # This counter will increase faster when the same client object # is shared between multiple threads. To reduce the frequency you # can set the variable 'reinitialize_steps' in the constructor. - log.exception("MovedError") + log_exception("MovedError", e) self.refresh_table_asap = True self.connection_pool.nodes.increment_reinitialize_counter() node = self.connection_pool.nodes.set_node(e.host, e.port, server_type='master') self.connection_pool.nodes.slots[e.slot_id][0] = node - except TryAgainError: - log.exception("TryAgainError") + except TryAgainError as e: + log_exception("TryAgainError", e) if ttl < self.RedisClusterRequestTTL / 2: time.sleep(0.05) except AskError as e: - log.exception("AskError") + log_exception("AskError", e) redirect_addr, asking = "{0}:{1}".format(e.host, e.port), True except BaseException as e: From 6b4dbf3e180c1c7bcb3fe1eeec495912b14200d8 Mon Sep 17 00:00:00 2001 From: Jonny Wylie Date: Wed, 18 Aug 2021 07:38:36 +0100 Subject: [PATCH 2/4] Fix detecting when it is the last attempt - SlotNotCoveredError, was re-raising the exception one attempt too soon. - Similarly log_exception --- rediscluster/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rediscluster/client.py b/rediscluster/client.py index 9b3ddb0d..34229178 100644 --- a/rediscluster/client.py +++ b/rediscluster/client.py @@ -590,7 +590,7 @@ def _execute_command(self, *args, **kwargs): connection_error_retry_counter = 0 def log_exception(message, exception): - if ttl == 1: + if ttl == 0: # This is the last attempt before we run out of TTL, so log the full exception. log.exception(message) else: @@ -647,7 +647,7 @@ def log_exception(message, exception): time.sleep(0.1) # This is the last attempt before we run out of TTL, raise the exception - if ttl == 1: + if ttl == 0: raise e except (RedisClusterException, BusyLoadingError): log.exception("RedisClusterException || BusyLoadingError") From 7c87352f82d681603739bf2bb454bd778cddb1ac Mon Sep 17 00:00:00 2001 From: Jonny Wylie Date: Fri, 3 Sep 2021 03:23:57 +0100 Subject: [PATCH 3/4] Initialise the node manager correctly populate_startup_nodes is meant to put all the known nodes in the startup nodes list, but when it was called, the nodes list it was using (self.nodes) is not populated yet. So populate startup nodes after initialising self.nodes, otherwise when we choose a random connection (which is chosen from startup nodes), the options are too limited. Often a user will specify a single startup node at the beginning. --- rediscluster/nodemanager.py | 4 +++- tests/test_cluster_node_manager.py | 14 ++++++++++++-- tests/test_multiprocessing_cluster.py | 1 - 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/rediscluster/nodemanager.py b/rediscluster/nodemanager.py index 9fe2b13a..c7231d94 100644 --- a/rediscluster/nodemanager.py +++ b/rediscluster/nodemanager.py @@ -306,6 +306,7 @@ def initialize(self): # Set the tmp variables to the real variables self.slots = tmp_slots self.nodes = nodes_cache + self.populate_startup_nodes() self.reinitialize_counter = 0 log.debug("NodeManager initialize done : Nodes") @@ -413,7 +414,8 @@ def set_node(self, host, port, server_type=None): def populate_startup_nodes(self): """ - Do something with all startup nodes and filters out any duplicates + Use nodes to populate startup_nodes, so that we have more chances + if a subset of the cluster fails. """ for item in self.startup_nodes: self.set_node_name(item) diff --git a/tests/test_cluster_node_manager.py b/tests/test_cluster_node_manager.py index 3b25392f..5885ee5b 100644 --- a/tests/test_cluster_node_manager.py +++ b/tests/test_cluster_node_manager.py @@ -169,7 +169,7 @@ def test_empty_startup_nodes(): def test_wrong_startup_nodes_type(): """ - If something other then a list type itteratable is provided it should fail + If something other then a list type iterable is provided it should fail """ with pytest.raises(RedisClusterException): NodeManager({}) @@ -263,9 +263,19 @@ def test_all_nodes(): assert node in nodes +def test_startup_nodes_are_populated(): + """ + Set a list of nodes and it should be possible to iterate over all + """ + n = NodeManager(startup_nodes=[{"host": "127.0.0.1", "port": 7000}]) + n.initialize() + + assert sorted([node['port'] for node in n.startup_nodes]) == [7000, 7000, 7001, 7002, 7003, 7004, 7005] + + def test_all_nodes_masters(): """ - Set a list of nodes with random masters/slaves config and it shold be possible + Set a list of nodes with random masters/slaves config and it should be possible to iterate over all of them. """ n = NodeManager( diff --git a/tests/test_multiprocessing_cluster.py b/tests/test_multiprocessing_cluster.py index 3354a3b6..b72f7951 100644 --- a/tests/test_multiprocessing_cluster.py +++ b/tests/test_multiprocessing_cluster.py @@ -117,7 +117,6 @@ def target(pool): # Check that connection is still alive after fork process has exited # and disconnected the connections in its pool - conn = pool.get_random_connection() with exit_callback(pool.release, conn): assert conn.send_command('ping') is None assert conn.read_response() == b'PONG' From dffbd5274176d44f3ef3920d3d997bbe474f508c Mon Sep 17 00:00:00 2001 From: Jonny Wylie Date: Fri, 3 Sep 2021 03:35:25 +0100 Subject: [PATCH 4/4] Make it easier to run the tests. Add a docker-compose.yml file configuring a redis cluster instance for running the tests against. --- docs/cluster-setup.rst | 2 +- tests/docker-compose.yml | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tests/docker-compose.yml diff --git a/docs/cluster-setup.rst b/docs/cluster-setup.rst index 300be95e..0dfb2987 100644 --- a/docs/cluster-setup.rst +++ b/docs/cluster-setup.rst @@ -19,7 +19,7 @@ A fully functional docker image can be found at https://github.com/Grokzen/docke See repo `README` for detailed instructions how to setup and run. - +A docker_compose.yml file is included in the test suite, which can be used to run a redis cluster configured appropriately to run the tests against. Vagrant ------- diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml new file mode 100644 index 00000000..919edc73 --- /dev/null +++ b/tests/docker-compose.yml @@ -0,0 +1,20 @@ +version: "3.7" + +services: + + redis: + image: grokzen/redis-cluster:latest + environment: + REDIS_PORT: 7000 + BIND_ADDRESS: 0.0.0.0 + ports: + - "7000:7000" + - "7001:7001" + - "7002:7002" + - "7003:7003" + - "7004:7004" + - "7005:7005" + - "7006:7006" + - "7007:7007" + # Catch signals (in particular, termination) + init: true