From 79798da4abae2a2c10b83ce90c7439a03e20d449 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 5 Nov 2025 08:01:11 +0000 Subject: [PATCH] Optimize sanitize_relationship_for_cypher The optimized code achieves a **22% speedup** by eliminating redundant work and leveraging more efficient Python operations. Here are the key optimizations: **1. Moved expensive setup outside the function**: The original code recreated the 37-key `char_map` dictionary on every function call (31.4% of runtime). The optimized version moves this to module level, eliminating this overhead entirely. **2. Pre-compiled regex pattern**: Instead of compiling `r"_+"` on each call (24.2% of original runtime), the pattern is compiled once at module level as `_re_sub_underscores`. **3. Optimized character replacement strategy**: - Multi-character keys (like `"..."`) are handled first with `str.replace()` to avoid conflicts - Single-character replacements use `str.translate()` with a pre-built translation table, which is significantly faster than iterating through individual `str.replace()` calls **4. Reduced iteration overhead**: The original code performed 37 individual `str.replace()` operations (23% of runtime). The optimized version does just 2 multi-character replacements plus one efficient `translate()` call. **Performance characteristics by test type**: - **Small strings with few special chars**: 100-400% faster due to eliminated setup overhead - **Large strings with no special chars**: 88-117% faster, benefiting from reduced function overhead - **Large strings with many special chars**: Mixed results (some 5-45% faster, others 16-36% slower) as the translation approach trades setup cost for per-character efficiency The optimization is most beneficial for typical use cases with shorter strings and moderate special character density, which appear to be the common case based on the test results showing consistent 2-4x speedups for basic scenarios. --- mem0/memory/utils.py | 106 ++++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 47 deletions(-) diff --git a/mem0/memory/utils.py b/mem0/memory/utils.py index 8c11705c87..e6c29b8007 100644 --- a/mem0/memory/utils.py +++ b/mem0/memory/utils.py @@ -7,14 +7,63 @@ AGENT_MEMORY_EXTRACTION_PROMPT, ) +_char_map = { + "...": "_ellipsis_", + "…": "_ellipsis_", + "。": "_period_", + ",": "_comma_", + ";": "_semicolon_", + ":": "_colon_", + "!": "_exclamation_", + "?": "_question_", + "(": "_lparen_", + ")": "_rparen_", + "【": "_lbracket_", + "】": "_rbracket_", + "《": "_langle_", + "》": "_rangle_", + "'": "_apostrophe_", + '"': "_quote_", + "\\": "_backslash_", + "/": "_slash_", + "|": "_pipe_", + "&": "_ampersand_", + "=": "_equals_", + "+": "_plus_", + "*": "_asterisk_", + "^": "_caret_", + "%": "_percent_", + "$": "_dollar_", + "#": "_hash_", + "@": "_at_", + "!": "_bang_", + "?": "_question_", + "(": "_lparen_", + ")": "_rparen_", + "[": "_lbracket_", + "]": "_rbracket_", + "{": "_lbrace_", + "}": "_rbrace_", + "<": "_langle_", + ">": "_rangle_", +} + +_multi_keys = [k for k in _char_map if len(k) > 1] + +_single_keys = [k for k in _char_map if len(k) == 1] + +_translation_table = str.maketrans({k: v for k, v in _char_map.items() if len(k) == 1}) + +_re_sub_underscores = re.compile(r"_+") + def get_fact_retrieval_messages(message, is_agent_memory=False): """Get fact retrieval messages based on the memory type. - + Args: message: The message content to extract facts from is_agent_memory: If True, use agent memory extraction prompt, else use user memory extraction prompt - + Returns: tuple: (system_prompt, user_prompt) """ @@ -64,11 +113,10 @@ def remove_code_blocks(content: str) -> str: """ pattern = r"^```[a-zA-Z0-9]*\n([\s\S]*?)\n```$" match = re.match(pattern, content.strip()) - match_res=match.group(1).strip() if match else content.strip() + match_res = match.group(1).strip() if match else content.strip() return re.sub(r".*?", "", match_res, flags=re.DOTALL).strip() - def extract_json(text): """ Extracts JSON content from a string, removing enclosing triple backticks and optional 'json' tag if present. @@ -158,51 +206,15 @@ def process_telemetry_filters(filters): def sanitize_relationship_for_cypher(relationship) -> str: """Sanitize relationship text for Cypher queries by replacing problematic characters.""" - char_map = { - "...": "_ellipsis_", - "…": "_ellipsis_", - "。": "_period_", - ",": "_comma_", - ";": "_semicolon_", - ":": "_colon_", - "!": "_exclamation_", - "?": "_question_", - "(": "_lparen_", - ")": "_rparen_", - "【": "_lbracket_", - "】": "_rbracket_", - "《": "_langle_", - "》": "_rangle_", - "'": "_apostrophe_", - '"': "_quote_", - "\\": "_backslash_", - "/": "_slash_", - "|": "_pipe_", - "&": "_ampersand_", - "=": "_equals_", - "+": "_plus_", - "*": "_asterisk_", - "^": "_caret_", - "%": "_percent_", - "$": "_dollar_", - "#": "_hash_", - "@": "_at_", - "!": "_bang_", - "?": "_question_", - "(": "_lparen_", - ")": "_rparen_", - "[": "_lbracket_", - "]": "_rbracket_", - "{": "_lbrace_", - "}": "_rbrace_", - "<": "_langle_", - ">": "_rangle_", - } # Apply replacements and clean up sanitized = relationship - for old, new in char_map.items(): - sanitized = sanitized.replace(old, new) - return re.sub(r"_+", "_", sanitized).strip("_") + # First handle all multi-character replacements + for old in _multi_keys: + sanitized = sanitized.replace(old, _char_map[old]) + + # Next, handle all single-character replacements in one pass + sanitized = sanitized.translate(_translation_table) + return _re_sub_underscores.sub("_", sanitized).strip("_")