Skip to content

Commit 58851b2

Browse files
committed
[IMP] util.explode_query{,_range}: only format the {parallel_filter} placeholder
The usage of `str.format` to inject the parallel filter used to explode queries is not robust to the presence of other curly braces. Examples: 1. `JSON` strings (typically to leverage their mapping capabilities): See 79f3d71, where a query had to be modified to accommodate that. 2. Hardcoded sets of curly braces: ```python >>> "UPDATE t SET c = '{usage as literal characters}' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> KeyError: 'usage as literal characters' ``` Which can be (unelegantly) solved adding even more braces, leveraging one side effect of `str.format`: ```python >>> "UPDATE t SET c = '{{usage as literal characters}}' WHERE {parallel_filter}".format(parallel_filter="…") "UPDATE t SET c = '{usage as literal characters}' WHERE …" ``` 3. Hardcoded single unpaired curly braces (AFAICT no way to solve this): ```python >>> "UPDATE t SET c = 'this is an open curly brace = {' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: unexpected '{' in field name ``` ```python >>> "UPDATE t SET c = 'this is a close brace = }' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: Single '}' encountered in format string ``` To circumvent this, we now use a dedicated Formatter that only handle the `{parallel_filter}` placeholder. This has the advantage of still deduplicate the doubled curly braces (see point 2 above) and thus being retro-compatible. This doesn't solve the single unpaired curly braces case, but this is rare enough to be handled by other means. closes #339 Signed-off-by: Christophe Simonis (chs) <chs@odoo.com>
1 parent 3e22c9a commit 58851b2

File tree

2 files changed

+79
-4
lines changed

2 files changed

+79
-4
lines changed

src/base/tests/test_util.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,42 @@ def test_pg_text2html(self, value, expected):
768768
result = cr.fetchone()[0]
769769
self.assertEqual(result, expected)
770770

771+
@parametrize(
772+
[
773+
("{parallel_filter}", "…"),
774+
("{{parallel_filter}}", "{parallel_filter}"),
775+
("{}", "{}"),
776+
("{0}", "{0}"),
777+
("{{0}}", "{0}"),
778+
("{x}", "{x}"),
779+
("{{x}}", "{x}"),
780+
("{{}}", "{}"),
781+
("{{", "{"),
782+
("test", "test"),
783+
("", ""),
784+
("WHERE {parallel_filter} AND true", "WHERE … AND true"),
785+
("WHERE {parallel_filter} AND {other}", "WHERE … AND {other}"),
786+
("WHERE {parallel_filter} AND {other!r}", "WHERE … AND {other!r}"),
787+
("WHERE {parallel_filter} AND {{other}}", "WHERE … AND {other}"),
788+
("WHERE {parallel_filter} AND {}", "WHERE … AND {}"),
789+
("WHERE {parallel_filter} AND {{}}", "WHERE … AND {}"),
790+
("WHERE {parallel_filter} AND {parallel_filter}", "WHERE … AND …"),
791+
("using { with other things inside } and {parallel_filter}", "using { with other things inside } and …"),
792+
]
793+
)
794+
def test_ExplodeFormatter(self, value, expected):
795+
formatted = util.pg._ExplodeFormatter().format(value, parallel_filter="…")
796+
self.assertEqual(formatted, expected)
797+
# retro-compatibility test
798+
try:
799+
std_formatted = value.format(parallel_filter="…")
800+
except (IndexError, KeyError):
801+
# ignore string that weren't valid
802+
pass
803+
else:
804+
# assert that the new formatted output match the old one.
805+
self.assertEqual(formatted, std_formatted)
806+
771807
def _get_cr(self):
772808
cr = self.registry.cursor()
773809
self.addCleanup(cr.close)

src/util/pg.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import os
77
import re
8+
import string
89
import threading
910
import time
1011
import uuid
@@ -220,6 +221,43 @@ def wrap(arg):
220221
return SQLStr(sql.SQL(query).format(*args, **kwargs).as_string(cr._cnx))
221222

222223

224+
class _ExplodeFormatter(string.Formatter):
225+
"""
226+
Retro-compatible parallel filter formatter.
227+
228+
Any input that didn't fail before satisfies:
229+
1. There is no replacement in the string other than `{parallel_filter}`.
230+
2. Any literal brace was escaped --by doubling them.
231+
232+
For any input that didn't fail before this new implementation returns the same output
233+
as `str.format`.
234+
235+
The main change here, and the goal of this class, is to now make former invalid input
236+
valid. Thus this formatter will _only_ replace `{parallel_filter}` while keeping any
237+
other `{str}` or `{int}` elements. Double braces will still be formatted into
238+
single ones.
239+
240+
:meta private: exclude from online docs
241+
"""
242+
243+
def parse(self, format_string):
244+
for literal_text, field_name, format_spec, conversion in super(_ExplodeFormatter, self).parse(format_string):
245+
if field_name is not None and field_name != "parallel_filter":
246+
yield literal_text + "{", None, None, None
247+
composed = (
248+
field_name
249+
+ (("!" + conversion) if conversion else "")
250+
+ ((":" + format_spec) if format_spec else "")
251+
+ "}"
252+
)
253+
yield composed, None, None, None
254+
else:
255+
yield literal_text, field_name, format_spec, conversion
256+
257+
258+
_explode_format = _ExplodeFormatter().format
259+
260+
223261
def explode_query(cr, query, alias=None, num_buckets=8, prefix=None):
224262
"""
225263
Explode a query to multiple queries that can be executed in parallel.
@@ -256,7 +294,7 @@ def explode_query(cr, query, alias=None, num_buckets=8, prefix=None):
256294
if num_buckets < 1:
257295
raise ValueError("num_buckets should be greater than zero")
258296
parallel_filter = "mod(abs({prefix}id), %s) = %s".format(prefix=prefix)
259-
query = query.replace("%", "%%").format(parallel_filter=parallel_filter)
297+
query = _explode_format(query.replace("%", "%%"), parallel_filter=parallel_filter)
260298
return [cr.mogrify(query, [num_buckets, index]).decode() for index in range(num_buckets)]
261299

262300

@@ -294,7 +332,7 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
294332
# Even if there are any records, return one query to be executed to validate its correctness and avoid
295333
# scripts that pass the CI but fail in production.
296334
parallel_filter = "{alias}.id IS NOT NULL".format(alias=alias)
297-
return [query.format(parallel_filter=parallel_filter)]
335+
return [_explode_format(query, parallel_filter=parallel_filter)]
298336
else:
299337
return []
300338

@@ -335,10 +373,11 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
335373
# Still, since the query may only be valid if there is no split, we force the usage of `prefix` in the query to
336374
# validate its correctness and avoid scripts that pass the CI but fail in production.
337375
parallel_filter = "{alias}.id IS NOT NULL".format(alias=alias)
338-
return [query.format(parallel_filter=parallel_filter)]
376+
return [_explode_format(query, parallel_filter=parallel_filter)]
339377

340378
parallel_filter = "{alias}.id BETWEEN %(lower-bound)s AND %(upper-bound)s".format(alias=alias)
341-
query = query.replace("%", "%%").format(parallel_filter=parallel_filter)
379+
query = _explode_format(query.replace("%", "%%"), parallel_filter=parallel_filter)
380+
342381
return [
343382
cr.mogrify(query, {"lower-bound": ids[i], "upper-bound": ids[i + 1] - 1}).decode() for i in range(len(ids) - 1)
344383
]

0 commit comments

Comments
 (0)