neo4j
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/api.rst‎
Lines changed: 3 additions & 3 deletions b/‎docs/source/api.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/source/user_guide_rag.rst‎
Lines changed: 33 additions & 2 deletions b/‎docs/source/user_guide_rag.rst‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎examples/customize/embeddings/custom_embeddings.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/customize/embeddings/custom_embeddings.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/customize/llms/custom_llm.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/customize/llms/custom_llm.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/customize/retrievers/hybrid_retrievers/hybrid_cypher_search.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/customize/retrievers/hybrid_retrievers/hybrid_cypher_search.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/customize/retrievers/hybrid_retrievers/hybrid_search.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/customize/retrievers/hybrid_retrievers/hybrid_search.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/neo4j_graphrag/embeddings/base.py‎
Lines changed: 29 additions & 1 deletion b/‎src/neo4j_graphrag/embeddings/base.py‎
Lines changed: 29 additions & 1 deletion
diff --git a/‎src/neo4j_graphrag/embeddings/cohere.py‎
Lines changed: 22 additions & 9 deletions b/‎src/neo4j_graphrag/embeddings/cohere.py‎
Lines changed: 22 additions & 9 deletions
diff --git a/‎src/neo4j_graphrag/embeddings/mistral.py‎
Lines changed: 19 additions & 6 deletions b/‎src/neo4j_graphrag/embeddings/mistral.py‎
Lines changed: 19 additions & 6 deletions
@@ -2,6 +2,10 @@
 
 ## Next
 
+### Added
+
+- Added automatic rate limiting with retry logic and exponential backoff for all Embedding providers using tenacity. The `RateLimitHandler` interface allows for custom rate limiting strategies, including the ability to disable rate limiting entirely.
+
 ## 1.10.0
 
 ### Added
 
@@ -359,19 +359,19 @@ Rate Limiting
 RateLimitHandler
 ----------------
 
-.. autoclass:: neo4j_graphrag.llm.rate_limit.RateLimitHandler
+.. autoclass:: neo4j_graphrag.utils.rate_limit.RateLimitHandler
     :members:
 
 RetryRateLimitHandler
 ---------------------
 
-.. autoclass:: neo4j_graphrag.llm.rate_limit.RetryRateLimitHandler
+.. autoclass:: neo4j_graphrag.utils.rate_limit.RetryRateLimitHandler
     :members:
 
 NoOpRateLimitHandler
 --------------------
 
-.. autoclass:: neo4j_graphrag.llm.rate_limit.NoOpRateLimitHandler
+.. autoclass:: neo4j_graphrag.utils.rate_limit.NoOpRateLimitHandler
     :members:
 
 
 
@@ -327,7 +327,7 @@ Rate limiting is enabled by default for all LLM instances with the following con
     .. code:: python
 
         from neo4j_graphrag.llm import OpenAILLM
-        from neo4j_graphrag.llm.rate_limit import RetryRateLimitHandler
+        from neo4j_graphrag.utils.rate_limit import RetryRateLimitHandler
 
         # Customize rate limiting parameters
         llm = OpenAILLM(
@@ -348,7 +348,7 @@ You can customize the rate limiting behavior by creating your own rate limit han
 .. code:: python
 
     from neo4j_graphrag.llm import AnthropicLLM
-    from neo4j_graphrag.llm.rate_limit import RateLimitHandler
+    from neo4j_graphrag.utils.rate_limit import RateLimitHandler
 
     class CustomRateLimitHandler(RateLimitHandler):
         """Implement your custom rate limiting strategy."""
@@ -528,6 +528,37 @@ The `OpenAIEmbeddings` was illustrated previously. Here is how to use the `Sente
 
 If another embedder is desired, a custom embedder can be created, using the `Embedder` interface.
 
+Embedder Rate Limiting
+----------------------
+
+All embedder implementations include automatic rate limiting that uses retry logic with exponential backoff by default, similar to LLM implementations. This feature helps handle API rate limits from embedding providers gracefully.
+
+.. code:: python
+
+    from neo4j_graphrag.embeddings import OpenAIEmbeddings
+    from neo4j_graphrag.utils.rate_limit import RetryRateLimitHandler, NoOpRateLimitHandler
+
+    # Default rate limiting (automatically enabled)
+    embedder = OpenAIEmbeddings(model="text-embedding-3-large")
+
+    # Custom rate limiting configuration
+    embedder = OpenAIEmbeddings(
+        model="text-embedding-3-large",
+        rate_limit_handler=RetryRateLimitHandler(
+            max_attempts=5,
+            min_wait=2.0,
+            max_wait=120.0
+        )
+    )
+
+    # Disable rate limiting
+    embedder = OpenAIEmbeddings(
+        model="text-embedding-3-large",
+        rate_limit_handler=NoOpRateLimitHandler()
+    )
+
+The rate limiting configuration works the same way as for LLMs. See the :ref:`Rate Limit Handling <Rate Limit Handling>` section above for more details on customization options.
+
 
 Other Vector Retriever Configuration
 ----------------------------------------
 
@@ -8,7 +8,7 @@ class CustomEmbeddings(Embedder):
     def __init__(self, dimension: int = 10, **kwargs: Any):
         self.dimension = dimension
 
-    def embed_query(self, input: str) -> list[float]:
+    def _embed_query(self, input: str) -> list[float]:
         return [random.random() for _ in range(self.dimension)]
 
 
 
@@ -3,7 +3,7 @@
 from typing import Any, Awaitable, Callable, List, Optional, TypeVar, Union
 
 from neo4j_graphrag.llm import LLMInterface, LLMResponse
-from neo4j_graphrag.llm.rate_limit import (
+from neo4j_graphrag.utils.rate_limit import (
     RateLimitHandler,
     # rate_limit_handler,
     # async_rate_limit_handler,
 
@@ -20,7 +20,7 @@
 
 # Create Embedder object
 class CustomEmbedder(Embedder):
-    def embed_query(self, text: str) -> list[float]:
+    def _embed_query(self, text: str) -> list[float]:
         return [random() for _ in range(DIMENSION)]
 
 
 
@@ -20,7 +20,7 @@
 
 # Create Embedder object
 class CustomEmbedder(Embedder):
-    def embed_query(self, text: str) -> list[float]:
+    def _embed_query(self, text: str) -> list[float]:
         return [random() for _ in range(DIMENSION)]
 
 
 
@@ -15,15 +15,31 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
+from typing import Optional
+
+from neo4j_graphrag.utils.rate_limit import (
+    DEFAULT_RATE_LIMIT_HANDLER,
+    RateLimitHandler,
+    rate_limit_handler,
+)
 
 
 class Embedder(ABC):
     """
     Interface for embedding models.
     An embedder passed into a retriever must implement this interface.
+
+    Args:
+        rate_limit_handler (Optional[RateLimitHandler]): Handler for rate limiting. Defaults to retry with exponential backoff.
     """
 
-    @abstractmethod
+    def __init__(self, rate_limit_handler: Optional[RateLimitHandler] = None):
+        if rate_limit_handler is not None:
+            self._rate_limit_handler = rate_limit_handler
+        else:
+            self._rate_limit_handler = DEFAULT_RATE_LIMIT_HANDLER
+
+    @rate_limit_handler
     def embed_query(self, text: str) -> list[float]:
         """Embed query text.
 
@@ -33,3 +49,15 @@ def embed_query(self, text: str) -> list[float]:
         Returns:
             list[float]: A vector embedding.
         """
+        return self._embed_query(text)
+
+    @abstractmethod
+    def _embed_query(self, text: str) -> list[float]:
+        """Embed query text.
+
+        Args:
+            text (str): Text to convert to vector embedding
+
+        Returns:
+            list[float]: A vector embedding.
+        """
@@ -14,9 +14,11 @@
 #  limitations under the License.
 from __future__ import annotations
 
-from typing import Any
+from typing import Any, Optional
 
 from neo4j_graphrag.embeddings.base import Embedder
+from neo4j_graphrag.exceptions import EmbeddingsGenerationError
+from neo4j_graphrag.utils.rate_limit import RateLimitHandler
 
 try:
     import cohere
@@ -25,19 +27,30 @@
 
 
 class CohereEmbeddings(Embedder):
-    def __init__(self, model: str = "", **kwargs: Any) -> None:
+    def __init__(
+        self,
+        model: str = "",
+        rate_limit_handler: Optional[RateLimitHandler] = None,
+        **kwargs: Any,
+    ) -> None:
         if cohere is None:
             raise ImportError(
                 """Could not import cohere python client.
                 Please install it with `pip install "neo4j-graphrag[cohere]"`."""
             )
+        super().__init__(rate_limit_handler)
         self.model = model
         self.client = cohere.Client(**kwargs)
 
-    def embed_query(self, text: str, **kwargs: Any) -> list[float]:
-        response = self.client.embed(
-            texts=[text],
-            model=self.model,
-            **kwargs,
-        )
-        return response.embeddings[0]  # type: ignore
+    def _embed_query(self, text: str, **kwargs: Any) -> list[float]:
+        try:
+            response = self.client.embed(
+                texts=[text],
+                model=self.model,
+                **kwargs,
+            )
+            return response.embeddings[0]  # type: ignore
+        except Exception as e:
+            raise EmbeddingsGenerationError(
+                f"Failed to generate embedding with Cohere: {e}"
+            ) from e
@@ -16,10 +16,11 @@
 from __future__ import annotations
 
 import os
-from typing import Any
+from typing import Any, Optional
 
 from neo4j_graphrag.embeddings.base import Embedder
 from neo4j_graphrag.exceptions import EmbeddingsGenerationError
+from neo4j_graphrag.utils.rate_limit import RateLimitHandler
 
 try:
     from mistralai import Mistral
@@ -36,29 +37,41 @@ class MistralAIEmbeddings(Embedder):
         model (str): The name of the Mistral AI text embedding model to use. Defaults to "mistral-embed".
     """
 
-    def __init__(self, model: str = "mistral-embed", **kwargs: Any) -> None:
+    def __init__(
+        self,
+        model: str = "mistral-embed",
+        rate_limit_handler: Optional[RateLimitHandler] = None,
+        **kwargs: Any,
+    ) -> None:
         if Mistral is None:
             raise ImportError(
                 """Could not import mistralai.
                 Please install it with `pip install "neo4j-graphrag[mistralai]"`."""
             )
+        super().__init__(rate_limit_handler)
         api_key = kwargs.pop("api_key", None)
         if api_key is None:
             api_key = os.getenv("MISTRAL_API_KEY", "")
         self.model = model
         self.mistral_client = Mistral(api_key=api_key, **kwargs)
 
-    def embed_query(self, text: str, **kwargs: Any) -> list[float]:
+    def _embed_query(self, text: str, **kwargs: Any) -> list[float]:
         """
         Generate embeddings for a given query using a Mistral AI text embedding model.
 
         Args:
             text (str): The text to generate an embedding for.
             **kwargs (Any): Additional keyword arguments to pass to the Mistral AI client.
         """
-        embeddings_batch_response = self.mistral_client.embeddings.create(
-            model=self.model, inputs=[text], **kwargs
-        )
+        try:
+            embeddings_batch_response = self.mistral_client.embeddings.create(
+                model=self.model, inputs=[text], **kwargs
+            )
+        except Exception as e:
+            raise EmbeddingsGenerationError(
+                f"Failed to generate embedding with MistralAI: {e}"
+            ) from e
+
         if embeddings_batch_response is None or not embeddings_batch_response.data:
             raise EmbeddingsGenerationError("Failed to retrieve embeddings.")