Remove latency checks for realtime load test & increase workload timeouts (#2228)

RobertLucian · web-flow · commit 212d52ca2765 · 2021-06-06T09:37:42.000-07:00
diff --git a/test/e2e/e2e/tests.py b/test/e2e/e2e/tests.py
@@ -522,10 +522,6 @@ def test_load_realtime(
     total_requests = load_config["total_requests"]
     desired_replicas = load_config["desired_replicas"]
     concurrency = load_config["concurrency"]
-    min_rtt = load_config["min_rtt"]
-    max_rtt = load_config["max_rtt"]
-    avg_rtt = load_config["avg_rtt"]
-    avg_rtt_tolerance = load_config["avg_rtt_tolerance"]
     status_code_timeout = load_config["status_code_timeout"]
 
     api_dir = TEST_APIS_DIR / api
@@ -541,7 +537,6 @@ def test_load_realtime(
 
     # controls the flow of requests
     request_stopper = td.Event()
-    latencies: List[float] = []
     failed = False
     try:
         printer(f"getting {desired_replicas} replicas ready")
@@ -573,27 +568,11 @@ def test_load_realtime(
             api_name,
             concurrency,
             request_stopper,
-            latencies=latencies,
             max_total_requests=total_requests,
             payload=payload,
         )
 
         while not request_stopper.is_set():
-            current_min_rtt = min(latencies) if len(latencies) > 0 else min_rtt
-            assert (
-                current_min_rtt >= min_rtt
-            ), f"min latency threshold hit; got {current_min_rtt}s, but the lowest accepted latency is {min_rtt}s"
-
-            current_max_rtt = max(latencies) if len(latencies) > 0 else max_rtt
-            assert (
-                current_max_rtt <= max_rtt
-            ), f"max latency threshold hit; got {current_max_rtt}s, but the highest accepted latency is {max_rtt}s"
-
-            current_avg_rtt = sum(latencies) / len(latencies) if len(latencies) > 0 else avg_rtt
-            assert (
-                avg_rtt - avg_rtt_tolerance < current_avg_rtt < avg_rtt + avg_rtt_tolerance
-            ), f"avg latency ({current_avg_rtt}s) falls outside the expected range ({avg_rtt - avg_rtt_tolerance}s - {avg_rtt + avg_rtt_tolerance})"
-
             api_info = client.get_api(api_name)
             network_stats = api_info["metrics"]["network_stats"]
 
@@ -604,10 +583,6 @@ def test_load_realtime(
                 network_stats["code_5xx"] - offset_5xx == 0
             ), f"detected 5xx response codes ({network_stats['code_5xx'] - offset_5xx}) in cortex get"
 
-            printer(
-                f"min RTT: {current_min_rtt} | max RTT: {current_max_rtt} | avg RTT: {current_avg_rtt} | requests: {network_stats['code_2xx']-offset_2xx} (out of {total_requests})"
-            )
-
             # check if the requesting threads are still healthy
             # if not, they'll raise an exception
             check_futures_healthy(threads_futures)
diff --git a/test/e2e/tests/conftest.py b/test/e2e/tests/conftest.py
@@ -84,11 +84,11 @@ def pytest_configure(config):
         "global": {
             "local_operator": config.getoption("--local-operator"),
             "realtime_deploy_timeout": int(
-                os.environ.get("CORTEX_TEST_REALTIME_DEPLOY_TIMEOUT", 200)
+                os.environ.get("CORTEX_TEST_REALTIME_DEPLOY_TIMEOUT", 320)
             ),
             "batch_deploy_timeout": int(os.environ.get("CORTEX_TEST_BATCH_DEPLOY_TIMEOUT", 150)),
             "batch_job_timeout": int(os.environ.get("CORTEX_TEST_BATCH_JOB_TIMEOUT", 200)),
-            "async_deploy_timeout": int(os.environ.get("CORTEX_TEST_ASYNC_DEPLOY_TIMEOUT", 150)),
+            "async_deploy_timeout": int(os.environ.get("CORTEX_TEST_ASYNC_DEPLOY_TIMEOUT", 320)),
             "async_workload_timeout": int(
                 os.environ.get("CORTEX_TEST_ASYNC_WORKLOAD_TIMEOUT", 200)
             ),
@@ -107,10 +107,6 @@ def pytest_configure(config):
                     "total_requests": 10 ** 5,
                     "desired_replicas": 50,
                     "concurrency": 50,
-                    "min_rtt": 0.004,  # measured in seconds
-                    "max_rtt": 1.200,  # measured in seconds
-                    "avg_rtt": 0.07,  # measured in seconds
-                    "avg_rtt_tolerance": 0.06,  # measured in seconds
                     "status_code_timeout": 60,  # measured in seconds
                 },
                 "async": {
@@ -125,7 +121,7 @@ def pytest_configure(config):
                     "workers_per_job": 10,
                     "items_per_job": 10 ** 5,
                     "batch_size": 10 * 2,
-                    "workload_timeout": 200,  # measured in seconds
+                    "workload_timeout": 300,  # measured in seconds
                 },
                 "task": {
                     "jobs": 10 ** 2,