[IMP] util.explode_query{,_range}: only format the {parallel_filter} placeholder

KangOl · KangOl · commit 58851b2fd74b · 2025-10-13T13:13:23.000Z
The usage of `str.format` to inject the parallel filter used to explode queries is not robust to the presence of other curly braces. Examples: 1. `JSON` strings (typically to leverage their mapping capabilities): See 79f3d71, where a query had to be modified to accommodate that. 2. Hardcoded sets of curly braces: ```python >>> "UPDATE t SET c = '{usage as literal characters}' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> KeyError: 'usage as literal characters' ``` Which can be (unelegantly) solved adding even more braces, leveraging one side effect of `str.format`: ```python >>> "UPDATE t SET c = '{{usage as literal characters}}' WHERE {parallel_filter}".format(parallel_filter="…") "UPDATE t SET c = '{usage as literal characters}' WHERE …" ``` 3. Hardcoded single unpaired curly braces (AFAICT no way to solve this): ```python >>> "UPDATE t SET c = 'this is an open curly brace = {' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: unexpected '{' in field name ``` ```python >>> "UPDATE t SET c = 'this is a close brace = }' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: Single '}' encountered in format string ``` To circumvent this, we now use a dedicated Formatter that only handle the `{parallel_filter}` placeholder. This has the advantage of still deduplicate the doubled curly braces (see point 2 above) and thus being retro-compatible. This doesn't solve the single unpaired curly braces case, but this is rare enough to be handled by other means. closes #339 Signed-off-by: Christophe Simonis (chs) <chs@odoo.com>
diff --git a/src/base/tests/test_util.py b/src/base/tests/test_util.py
@@ -768,6 +768,42 @@ def test_pg_text2html(self, value, expected):
         result = cr.fetchone()[0]
         self.assertEqual(result, expected)
 
+    @parametrize(
+        [
+            ("{parallel_filter}", "…"),
+            ("{{parallel_filter}}", "{parallel_filter}"),
+            ("{}", "{}"),
+            ("{0}", "{0}"),
+            ("{{0}}", "{0}"),
+            ("{x}", "{x}"),
+            ("{{x}}", "{x}"),
+            ("{{}}", "{}"),
+            ("{{", "{"),
+            ("test", "test"),
+            ("", ""),
+            ("WHERE {parallel_filter} AND true", "WHERE … AND true"),
+            ("WHERE {parallel_filter} AND {other}", "WHERE … AND {other}"),
+            ("WHERE {parallel_filter} AND {other!r}", "WHERE … AND {other!r}"),
+            ("WHERE {parallel_filter} AND {{other}}", "WHERE … AND {other}"),
+            ("WHERE {parallel_filter} AND {}", "WHERE … AND {}"),
+            ("WHERE {parallel_filter} AND {{}}", "WHERE … AND {}"),
+            ("WHERE {parallel_filter} AND {parallel_filter}", "WHERE … AND …"),
+            ("using { with other things inside } and {parallel_filter}", "using { with other things inside } and …"),
+        ]
+    )
+    def test_ExplodeFormatter(self, value, expected):
+        formatted = util.pg._ExplodeFormatter().format(value, parallel_filter="…")
+        self.assertEqual(formatted, expected)
+        # retro-compatibility test
+        try:
+            std_formatted = value.format(parallel_filter="…")
+        except (IndexError, KeyError):
+            # ignore string that weren't valid
+            pass
+        else:
+            # assert that the new formatted output match the old one.
+            self.assertEqual(formatted, std_formatted)
+
     def _get_cr(self):
         cr = self.registry.cursor()
         self.addCleanup(cr.close)
diff --git a/src/util/pg.py b/src/util/pg.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import re
+import string
 import threading
 import time
 import uuid
@@ -220,6 +221,43 @@ def wrap(arg):
     return SQLStr(sql.SQL(query).format(*args, **kwargs).as_string(cr._cnx))
 
 
+class _ExplodeFormatter(string.Formatter):
+    """
+    Retro-compatible parallel filter formatter.
+
+    Any input that didn't fail before satisfies:
+    1. There is no replacement in the string other than `{parallel_filter}`.
+    2. Any literal brace was escaped --by doubling them.
+
+    For any input that didn't fail before this new implementation returns the same output
+    as `str.format`.
+
+    The main change here, and the goal of this class, is to now make former invalid input
+    valid. Thus this formatter will _only_ replace `{parallel_filter}` while keeping any
+    other `{str}` or `{int}` elements. Double braces will still be formatted into
+    single ones.
+
+    :meta private: exclude from online docs
+    """
+
+    def parse(self, format_string):
+        for literal_text, field_name, format_spec, conversion in super(_ExplodeFormatter, self).parse(format_string):
+            if field_name is not None and field_name != "parallel_filter":
+                yield literal_text + "{", None, None, None
+                composed = (
+                    field_name
+                    + (("!" + conversion) if conversion else "")
+                    + ((":" + format_spec) if format_spec else "")
+                    + "}"
+                )
+                yield composed, None, None, None
+            else:
+                yield literal_text, field_name, format_spec, conversion
+
+
+_explode_format = _ExplodeFormatter().format
+
+
 def explode_query(cr, query, alias=None, num_buckets=8, prefix=None):
     """
     Explode a query to multiple queries that can be executed in parallel.
@@ -256,7 +294,7 @@ def explode_query(cr, query, alias=None, num_buckets=8, prefix=None):
     if num_buckets < 1:
         raise ValueError("num_buckets should be greater than zero")
     parallel_filter = "mod(abs({prefix}id), %s) = %s".format(prefix=prefix)
-    query = query.replace("%", "%%").format(parallel_filter=parallel_filter)
+    query = _explode_format(query.replace("%", "%%"), parallel_filter=parallel_filter)
     return [cr.mogrify(query, [num_buckets, index]).decode() for index in range(num_buckets)]
 
 
@@ -294,7 +332,7 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
             # Even if there are any records, return one query to be executed to validate its correctness and avoid
             # scripts that pass the CI but fail in production.
             parallel_filter = "{alias}.id IS NOT NULL".format(alias=alias)
-            return [query.format(parallel_filter=parallel_filter)]
+            return [_explode_format(query, parallel_filter=parallel_filter)]
         else:
             return []
 
@@ -335,10 +373,11 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
         # Still, since the query may only be valid if there is no split, we force the usage of `prefix` in the query to
         # validate its correctness and avoid scripts that pass the CI but fail in production.
         parallel_filter = "{alias}.id IS NOT NULL".format(alias=alias)
-        return [query.format(parallel_filter=parallel_filter)]
+        return [_explode_format(query, parallel_filter=parallel_filter)]
 
     parallel_filter = "{alias}.id BETWEEN %(lower-bound)s AND %(upper-bound)s".format(alias=alias)
-    query = query.replace("%", "%%").format(parallel_filter=parallel_filter)
+    query = _explode_format(query.replace("%", "%%"), parallel_filter=parallel_filter)
+
     return [
         cr.mogrify(query, {"lower-bound": ids[i], "upper-bound": ids[i + 1] - 1}).decode() for i in range(len(ids) - 1)
     ]