Test progress bar output

FlorentinD · FlorentinD · commit 37e83bb6dee7 · 2025-01-21T15:05:36.000+01:00
Also fixing some inconsistencies on the way.
Such as only update if needed and capitalize status
diff --git a/graphdatascience/query_runner/progress/query_progress_logger.py b/graphdatascience/query_runner/progress/query_progress_logger.py
@@ -6,25 +6,27 @@
 from tqdm.auto import tqdm
 
 from ...server_version.server_version import ServerVersion
-from .progress_provider import ProgressProvider
+from .progress_provider import ProgressProvider, TaskWithProgress
 from .query_progress_provider import CypherQueryFunction, QueryProgressProvider, ServerVersionFunction
 from .static_progress_provider import StaticProgressProvider, StaticProgressStore
 
 DataFrameProducer = Callable[[], DataFrame]
 
 
 class QueryProgressLogger:
-    _LOG_POLLING_INTERVAL = 0.5
-
     def __init__(
         self,
         run_cypher_func: CypherQueryFunction,
         server_version_func: ServerVersionFunction,
+        polling_interval: float = 0.5,
+        progress_bar_options: dict[str, Any] = {},
     ):
         self._run_cypher_func = run_cypher_func
         self._server_version_func = server_version_func
         self._static_progress_provider = StaticProgressProvider()
         self._query_progress_provider = QueryProgressProvider(run_cypher_func, server_version_func)
+        self._polling_interval = polling_interval
+        self._progress_bar_options = progress_bar_options
 
     def run_with_progress_logging(
         self, runnable: DataFrameProducer, job_id: str, database: Optional[str] = None
@@ -54,39 +56,18 @@ def _select_progress_provider(self, job_id: str) -> ProgressProvider:
         )
 
     def _log(
-        self, future: "Future[Any]", job_id: str, progress_provider: ProgressProvider, database: Optional[str] = None
+        self, future: Future[Any], job_id: str, progress_provider: ProgressProvider, database: Optional[str] = None
     ) -> None:
         pbar: Optional[tqdm[NoReturn]] = None
         warn_if_failure = True
 
-        while wait([future], timeout=self._LOG_POLLING_INTERVAL).not_done:
+        while wait([future], timeout=self._polling_interval).not_done:
             try:
                 task_with_progress = progress_provider.root_task_with_progress(job_id, database)
-                root_task_name = task_with_progress.task_name
-                progress_percent = task_with_progress.progress_percent
-
-                has_relative_progress = progress_percent != "n/a"
                 if pbar is None:
-                    if has_relative_progress:
-                        pbar = tqdm(total=100, unit="%", desc=root_task_name, maxinterval=self._LOG_POLLING_INTERVAL)
-                    else:
-                        pbar = tqdm(
-                            total=None,
-                            unit="",
-                            desc=root_task_name,
-                            maxinterval=self._LOG_POLLING_INTERVAL,
-                            bar_format="{desc} [elapsed: {elapsed} {postfix}]",
-                        )
-
-                pbar.set_postfix_str(
-                    f"status: {task_with_progress.status}, task: {task_with_progress.sub_tasks_description}"
-                )
-                if has_relative_progress:
-                    parsed_progress = float(progress_percent[:-1])
-                    new_progress = parsed_progress - pbar.n
-                    pbar.update(new_progress)
-                else:
-                    pbar.refresh()  # show latest elapsed time + postfix
+                    pbar = self._init_pbar(task_with_progress)
+
+                self._update_pbar(pbar, task_with_progress)
             except Exception as e:
                 # Do nothing if the procedure either:
                 # * has not started yet,
@@ -100,7 +81,51 @@ def _log(
                     continue
 
         if pbar is not None:
-            if pbar.total is not None:
-                pbar.update(pbar.total - pbar.n)
-            pbar.set_postfix_str("status: finished")
+            self._finish_pbar(pbar)
+
+    def _init_pbar(self, task_with_progress: TaskWithProgress) -> tqdm:  # type: ignore
+        root_task_name = task_with_progress.task_name
+        parsed_progress = QueryProgressLogger._relative_progress(task_with_progress)
+        if parsed_progress is None:  # Qualitative progress report
+            return tqdm(
+                total=None,
+                unit="",
+                desc=root_task_name,
+                maxinterval=self._polling_interval,
+                bar_format="{desc} [elapsed: {elapsed} {postfix}]",
+                **self._progress_bar_options,
+            )
+        else:
+            return tqdm(
+                total=100,
+                unit="%",
+                desc=root_task_name,
+                maxinterval=self._polling_interval,
+                **self._progress_bar_options,
+            )
+
+    def _update_pbar(self, pbar: tqdm, task_with_progress: TaskWithProgress) -> None:  # type: ignore
+        parsed_progress = QueryProgressLogger._relative_progress(task_with_progress)
+        postfix = (
+            f"status: {task_with_progress.status}, task: {task_with_progress.sub_tasks_description}"
+            if task_with_progress.sub_tasks_description
+            else f"status: {task_with_progress.status}"
+        )
+        pbar.set_postfix_str(postfix, refresh=False)
+        if parsed_progress is not None:
+            new_progress = parsed_progress - pbar.n
+            pbar.update(new_progress)
+        else:
             pbar.refresh()
+
+    def _finish_pbar(self, pbar: tqdm) -> None:  # type: ignore
+        if pbar.total is not None:
+            pbar.update(pbar.total - pbar.n)
+        pbar.set_postfix_str("status: FINISHED", refresh=True)
+
+    @staticmethod
+    def _relative_progress(task: TaskWithProgress) -> Optional[float]:
+        try:
+            return float(task.progress_percent.removesuffix("%"))
+        except ValueError:
+            return None
diff --git a/graphdatascience/tests/unit/query_runner/progress/test_query_progress_logger.py b/graphdatascience/tests/unit/query_runner/progress/test_query_progress_logger.py
@@ -1,9 +1,12 @@
+import re
 import time
+from io import StringIO
 from typing import Optional
 
 from pandas import DataFrame
 
 from graphdatascience import ServerVersion
+from graphdatascience.query_runner.progress.progress_provider import TaskWithProgress
 from graphdatascience.query_runner.progress.query_progress_logger import QueryProgressLogger
 from graphdatascience.query_runner.progress.query_progress_provider import QueryProgressProvider
 from graphdatascience.query_runner.progress.static_progress_provider import StaticProgressProvider, StaticProgressStore
@@ -97,6 +100,66 @@ def simple_run_cypher(query: str, database: Optional[str] = None) -> DataFrame:
     assert progress.task_name == "Test task"
 
 
+def test_progress_bar_quantitive_output() -> None:
+    def simple_run_cypher(query: str, database: Optional[str] = None) -> DataFrame:
+        raise NotImplementedError("Should not be called!")
+
+    with StringIO() as pbarOutputStream:
+        qpl = QueryProgressLogger(
+            simple_run_cypher,
+            lambda: ServerVersion(3, 0, 0),
+            progress_bar_options={"file": pbarOutputStream, "mininterval": 0},
+        )
+
+        pbar = qpl._init_pbar(TaskWithProgress("test task", "0%", "PENDING", ""))
+        assert pbarOutputStream.getvalue().split("\r")[-1] == "test task:   0%|          | 0/100 [00:00<?, ?%/s]"
+
+        qpl._update_pbar(pbar, TaskWithProgress("test task", "0%", "PENDING", ""))
+        assert (
+            pbarOutputStream.getvalue().split("\r")[-1]
+            == "test task:   0%|          | 0.0/100 [00:00<?, ?%/s, status: PENDING]"
+        )
+        qpl._update_pbar(pbar, TaskWithProgress("test task", "42%", "RUNNING", "root::1/1::leaf"))
+
+        running_output = pbarOutputStream.getvalue().split("\r")[-1]
+        assert re.match(
+            r"test task:  42%\|####2     \| 42.0/100 \[00:00<00:00, \d+.\d*%/s, status: RUNNING, task: root::1/1::leaf\]",
+            running_output,
+        ), running_output
+
+        qpl._finish_pbar(pbar)
+        finished_output = pbarOutputStream.getvalue().split("\r")[-1]
+        assert re.match(
+            r"test task: 100%\|##########\| 100.0/100 \[00:00<00:00, \d+.\d+%/s, status: FINISHED\]", finished_output
+        ), finished_output
+
+
+def test_progress_bar_qualitative_output() -> None:
+    def simple_run_cypher(query: str, database: Optional[str] = None) -> DataFrame:
+        raise NotImplementedError("Should not be called!")
+
+    with StringIO() as pbarOutputStream:
+        qpl = QueryProgressLogger(
+            simple_run_cypher,
+            lambda: ServerVersion(3, 0, 0),
+            progress_bar_options={"file": pbarOutputStream, "mininterval": 100},
+        )
+
+        pbar = qpl._init_pbar(TaskWithProgress("test task", "n/a", "PENDING", ""))
+
+        qpl._update_pbar(pbar, TaskWithProgress("test task", "n/a", "PENDING", ""))
+        qpl._update_pbar(pbar, TaskWithProgress("test task", "", "RUNNING", "root 1/1::leaf"))
+        qpl._finish_pbar(pbar)
+
+        assert pbarOutputStream.getvalue().rstrip() == "".join(
+            [
+                "\rtest task [elapsed: 00:00 ]\rtest task [elapsed: 00:00 , status: PENDING]",
+                "\rtest task [elapsed: 00:00 , status: RUNNING, task: root 1/1::leaf]",
+                "\rtest task [elapsed: 00:00 , status: FINISHED]",
+            ]
+        )
+
+
 def test_uses_static_store() -> None:
     def fake_run_cypher(query: str, database: Optional[str] = None) -> DataFrame:
         return DataFrame([{"progress": "n/a", "taskName": "Test task", "status": "RUNNING"}])