diff --git a/docs/cluster-setup.rst b/docs/cluster-setup.rst index 300be95e..0dfb2987 100644 --- a/docs/cluster-setup.rst +++ b/docs/cluster-setup.rst @@ -19,7 +19,7 @@ A fully functional docker image can be found at https://github.com/Grokzen/docke See repo `README` for detailed instructions how to setup and run. - +A docker_compose.yml file is included in the test suite, which can be used to run a redis cluster configured appropriately to run the tests against. Vagrant ------- diff --git a/rediscluster/client.py b/rediscluster/client.py index 844cb4c9..34229178 100644 --- a/rediscluster/client.py +++ b/rediscluster/client.py @@ -589,6 +589,14 @@ def _execute_command(self, *args, **kwargs): ttl = int(self.RedisClusterRequestTTL) connection_error_retry_counter = 0 + def log_exception(message, exception): + if ttl == 0: + # This is the last attempt before we run out of TTL, so log the full exception. + log.exception(message) + else: + # We are going to retry, and therefore may yet succeed, so just log a warning. + log.warning(message + str(exception)) + while ttl > 0: ttl -= 1 connection = None @@ -630,7 +638,7 @@ def _execute_command(self, *args, **kwargs): connection.send_command(*args) return self.parse_response(connection, command, **kwargs) except SlotNotCoveredError as e: - log.exception("SlotNotCoveredError") + log_exception("SlotNotCoveredError", e) # In some cases during failover to a replica is happening # a slot sometimes is not covered by the cluster layout and @@ -639,13 +647,13 @@ def _execute_command(self, *args, **kwargs): time.sleep(0.1) # This is the last attempt before we run out of TTL, raise the exception - if ttl == 1: + if ttl == 0: raise e except (RedisClusterException, BusyLoadingError): log.exception("RedisClusterException || BusyLoadingError") raise - except ConnectionError: - log.exception("ConnectionError") + except ConnectionError as e: + log_exception("ConnectionError", e) # ConnectionError can also be raised if we couldn't get a connection # from the pool before timing out, so check that this is an actual @@ -670,8 +678,8 @@ def _execute_command(self, *args, **kwargs): self.connection_pool.nodes.increment_reinitialize_counter( count=self.connection_pool.nodes.reinitialize_steps, ) - except TimeoutError: - log.exception("TimeoutError") + except TimeoutError as e: + log_exception("TimeoutError", e) connection.disconnect() if ttl < self.RedisClusterRequestTTL / 2: @@ -692,20 +700,20 @@ def _execute_command(self, *args, **kwargs): # This counter will increase faster when the same client object # is shared between multiple threads. To reduce the frequency you # can set the variable 'reinitialize_steps' in the constructor. - log.exception("MovedError") + log_exception("MovedError", e) self.refresh_table_asap = True self.connection_pool.nodes.increment_reinitialize_counter() node = self.connection_pool.nodes.set_node(e.host, e.port, server_type='master') self.connection_pool.nodes.slots[e.slot_id][0] = node - except TryAgainError: - log.exception("TryAgainError") + except TryAgainError as e: + log_exception("TryAgainError", e) if ttl < self.RedisClusterRequestTTL / 2: time.sleep(0.05) except AskError as e: - log.exception("AskError") + log_exception("AskError", e) redirect_addr, asking = "{0}:{1}".format(e.host, e.port), True except BaseException as e: diff --git a/rediscluster/nodemanager.py b/rediscluster/nodemanager.py index 9fe2b13a..c7231d94 100644 --- a/rediscluster/nodemanager.py +++ b/rediscluster/nodemanager.py @@ -306,6 +306,7 @@ def initialize(self): # Set the tmp variables to the real variables self.slots = tmp_slots self.nodes = nodes_cache + self.populate_startup_nodes() self.reinitialize_counter = 0 log.debug("NodeManager initialize done : Nodes") @@ -413,7 +414,8 @@ def set_node(self, host, port, server_type=None): def populate_startup_nodes(self): """ - Do something with all startup nodes and filters out any duplicates + Use nodes to populate startup_nodes, so that we have more chances + if a subset of the cluster fails. """ for item in self.startup_nodes: self.set_node_name(item) diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml new file mode 100644 index 00000000..919edc73 --- /dev/null +++ b/tests/docker-compose.yml @@ -0,0 +1,20 @@ +version: "3.7" + +services: + + redis: + image: grokzen/redis-cluster:latest + environment: + REDIS_PORT: 7000 + BIND_ADDRESS: 0.0.0.0 + ports: + - "7000:7000" + - "7001:7001" + - "7002:7002" + - "7003:7003" + - "7004:7004" + - "7005:7005" + - "7006:7006" + - "7007:7007" + # Catch signals (in particular, termination) + init: true diff --git a/tests/test_cluster_node_manager.py b/tests/test_cluster_node_manager.py index 3b25392f..5885ee5b 100644 --- a/tests/test_cluster_node_manager.py +++ b/tests/test_cluster_node_manager.py @@ -169,7 +169,7 @@ def test_empty_startup_nodes(): def test_wrong_startup_nodes_type(): """ - If something other then a list type itteratable is provided it should fail + If something other then a list type iterable is provided it should fail """ with pytest.raises(RedisClusterException): NodeManager({}) @@ -263,9 +263,19 @@ def test_all_nodes(): assert node in nodes +def test_startup_nodes_are_populated(): + """ + Set a list of nodes and it should be possible to iterate over all + """ + n = NodeManager(startup_nodes=[{"host": "127.0.0.1", "port": 7000}]) + n.initialize() + + assert sorted([node['port'] for node in n.startup_nodes]) == [7000, 7000, 7001, 7002, 7003, 7004, 7005] + + def test_all_nodes_masters(): """ - Set a list of nodes with random masters/slaves config and it shold be possible + Set a list of nodes with random masters/slaves config and it should be possible to iterate over all of them. """ n = NodeManager( diff --git a/tests/test_multiprocessing_cluster.py b/tests/test_multiprocessing_cluster.py index 3354a3b6..b72f7951 100644 --- a/tests/test_multiprocessing_cluster.py +++ b/tests/test_multiprocessing_cluster.py @@ -117,7 +117,6 @@ def target(pool): # Check that connection is still alive after fork process has exited # and disconnected the connections in its pool - conn = pool.get_random_connection() with exit_callback(pool.release, conn): assert conn.send_command('ping') is None assert conn.read_response() == b'PONG'