From f7ee2e19da34c1dfac001a853843f2ab00adedec Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 5 Nov 2025 07:44:00 +0000 Subject: [PATCH] Optimize format_entities The optimized code replaces an explicit for-loop with a list comprehension, achieving a **15% speedup** by eliminating redundant operations. **Key optimization**: The original code creates an empty list and then repeatedly calls `.append()` in a loop, which involves multiple function calls and intermediate variable assignments. The optimized version uses a single list comprehension that builds the entire list in one operation. **Performance benefits**: - **Eliminates intermediate variable**: No need for the `simplified` variable that temporarily stores each formatted string - **Reduces function call overhead**: `.append()` is called thousands of times in the original version (5,036 hits in profiler), but the list comprehension builds the list directly - **Better memory allocation**: List comprehensions can pre-allocate memory more efficiently since Python knows the final size **Profiler evidence**: The original version shows significant time spent on string formatting (44.6%) and list appending (30.3%), totaling ~75% of execution time. The optimized version consolidates these operations into a single list comprehension line that accounts for 96.5% of the time but completes faster overall. **Test case performance**: The optimization particularly excels with larger datasets - the 1000-entity test case shows **19-20% speedup**, while smaller test cases (1-3 entities) show slight slowdowns due to list comprehension setup overhead. This suggests the optimization is most beneficial for batch processing scenarios with many entities, which is likely the primary use case for a utility function formatting relationship data. --- mem0/memory/utils.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/mem0/memory/utils.py b/mem0/memory/utils.py index 8c11705c87..2644379e34 100644 --- a/mem0/memory/utils.py +++ b/mem0/memory/utils.py @@ -10,11 +10,11 @@ def get_fact_retrieval_messages(message, is_agent_memory=False): """Get fact retrieval messages based on the memory type. - + Args: message: The message content to extract facts from is_agent_memory: If True, use agent memory extraction prompt, else use user memory extraction prompt - + Returns: tuple: (system_prompt, user_prompt) """ @@ -45,11 +45,9 @@ def format_entities(entities): if not entities: return "" - formatted_lines = [] - for entity in entities: - simplified = f"{entity['source']} -- {entity['relationship']} -- {entity['destination']}" - formatted_lines.append(simplified) - + formatted_lines = [ + f"{entity['source']} -- {entity['relationship']} -- {entity['destination']}" for entity in entities + ] return "\n".join(formatted_lines) @@ -64,11 +62,10 @@ def remove_code_blocks(content: str) -> str: """ pattern = r"^```[a-zA-Z0-9]*\n([\s\S]*?)\n```$" match = re.match(pattern, content.strip()) - match_res=match.group(1).strip() if match else content.strip() + match_res = match.group(1).strip() if match else content.strip() return re.sub(r".*?", "", match_res, flags=re.DOTALL).strip() - def extract_json(text): """ Extracts JSON content from a string, removing enclosing triple backticks and optional 'json' tag if present. @@ -205,4 +202,3 @@ def sanitize_relationship_for_cypher(relationship) -> str: sanitized = sanitized.replace(old, new) return re.sub(r"_+", "_", sanitized).strip("_") -