diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py
index 8b80737..96c66f6 100644
--- a/numexpr/necompiler.py
+++ b/numexpr/necompiler.py
@@ -920,6 +920,7 @@ def evaluate(ex: str,
              casting: str = 'same_kind',
              sanitize: Optional[bool] = None,
              _frame_depth: int = 3,
+             disable_cache: bool = False,
              **kwargs) -> numpy.ndarray:
     r"""
     Evaluate a simple array expression element-wise using the virtual machine.
@@ -978,10 +979,41 @@ def evaluate(ex: str,
         The calling frame depth. Unless you are a NumExpr developer you should
         not set this value.
 
+    disable_cache: bool
+        If set to be `True`, disables the uses of internal expression cache.
+
+        By default, NumExpr caches compiled expressions and associated metadata
+        (via the internal `_numexpr_last`, `_numexpr_cache`, and `_names_cache`
+        structures). This allows repeated evaluations of the same expression
+        to skip recompilation, improving performance in workloads where the same
+        expression is executed multiple times.
+
+        However, caching retains references to input and output arrays in order
+        to support re-evaluation. As a result, this can increase their reference
+        counts and may prevent them from being garbage-collected immediately.
+        In situations where precise control over object lifetimes or memory
+        management is required, set `disable_cache=True` to avoid this behavior.
+
+        Default is `False`.
+
     """
     # We could avoid code duplication if we called validate and then re_evaluate
     # here, but we have difficulties with the `sys.getframe(2)` call in
     # `getArguments`
+
+    # If dissable_cache set to be True, we evaluate the expression here
+    # Otherwise we validate and then re_evaluate
+    if disable_cache:
+        context = getContext(kwargs)
+        names, ex_uses_vml = getExprNames(ex, context, sanitize=sanitize)
+        arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth - 1)
+        signature = [(name, getType(arg)) for (name, arg) in
+                     zip(names, arguments)]
+        compiled_ex = NumExpr(ex, signature, sanitize=sanitize, **context)
+        kwargs = {'out': out, 'order': order, 'casting': casting,
+                  'ex_uses_vml': ex_uses_vml}
+        return compiled_ex(*arguments, **kwargs)
+
     e = validate(ex, local_dict=local_dict, global_dict=global_dict,
                  out=out, order=order, casting=casting,
                  _frame_depth=_frame_depth, sanitize=sanitize, **kwargs)
diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py
index 9e98ff1..46fad29 100644
--- a/numexpr/tests/test_numexpr.py
+++ b/numexpr/tests/test_numexpr.py
@@ -337,6 +337,16 @@ def _test_refcount(self):
         evaluate('1')
         assert sys.getrefcount(a) == 2
 
+    # Test if `disable_cache` works correctly with refcount, see issue #521
+    # Comment out as modern Python optimizes handling refcounts.
+    @unittest.skipIf(hasattr(sys, "pypy_version_info"),
+                     "PyPy does not have sys.getrefcount()")
+    def _test_refcount_disable_cache(self):
+        a = array([1])
+        b = array([1])
+        evaluate('a', out=b, disable_cache=True)
+        assert sys.getrefcount(b) == 2
+
     @pytest.mark.thread_unsafe
     def test_locals_clears_globals(self):
         # Check for issue #313, whereby clearing f_locals also clear f_globals