Skip to content

Commit 5db3e34

Browse files
Merge pull request #911 from no-dice-io/li-bridge
Added support for LlamaIndex Retrievers as a DSPy Retriever Module
2 parents 2d12b0b + 9ab749e commit 5db3e34

File tree

4 files changed

+711
-28
lines changed

4 files changed

+711
-28
lines changed

dspy/retrieve/llama_index_rm.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import logging
2+
from typing import Optional
3+
4+
import dspy
5+
6+
try:
7+
from llama_index.core.base.base_retriever import BaseRetriever
8+
except ImportError:
9+
err = "The 'llama_index' package is required to use LlamaIndexRM. Install it with 'pip install llama_index'."
10+
raise ImportError(err) from None
11+
12+
NO_TOP_K_WARNING = "The underlying LlamaIndex retriever does not support top k retrieval. Ignoring k value."
13+
14+
15+
class LlamaIndexRM(dspy.Retrieve):
16+
"""Implements a retriever which wraps over a LlamaIndex retriever.
17+
18+
This is done to bridge LlamaIndex and DSPy and allow the various retrieval
19+
abstractions in LlamaIndex to be used in DSPy.
20+
21+
To-do (maybe):
22+
- Async support (DSPy lacks this entirely it seems, so not a priority until the rest of the repo catches on)
23+
- Text/video retrieval (Available in LI, not sure if this will be a priority in DSPy)
24+
25+
Args:
26+
retriever (BaseRetriever): A LlamaIndex retriever object - text based only
27+
k (int): Optional; the number of examples to retrieve (similarity_top_k)
28+
29+
If the underlying LI retriever does not have the property similarity_top_k, k will be ignored.
30+
31+
Returns:
32+
DSPy RM Object - this is a retriever object that can be used in DSPy
33+
"""
34+
35+
retriever: BaseRetriever
36+
37+
def __init__(
38+
self,
39+
retriever: BaseRetriever,
40+
k: Optional[int] = None,
41+
):
42+
self.retriever = retriever
43+
44+
if k:
45+
self.k = k
46+
47+
@property
48+
def k(self) -> Optional[int]:
49+
"""Get similarity top k of retriever."""
50+
if not hasattr(self.retriever, "similarity_top_k"):
51+
logging.warning(NO_TOP_K_WARNING)
52+
return None
53+
54+
return self.retriever.similarity_top_k
55+
56+
@k.setter
57+
def k(self, k: int) -> None:
58+
"""Set similarity top k of retriever."""
59+
if hasattr(self.retriever, "similarity_top_k"):
60+
self.retriever.similarity_top_k = k
61+
else:
62+
logging.warning(NO_TOP_K_WARNING)
63+
64+
def forward(self, query: str, k: Optional[int] = None) -> list[dspy.Example]:
65+
"""Forward function for the LI retriever.
66+
67+
This is the function that is called to retrieve the top k examples for a given query.
68+
Top k is set via the setter similarity_top_k or at LI instantiation.
69+
70+
Args:
71+
query (str): The query to retrieve examples for
72+
k (int): Optional; the number of examples to retrieve (similarity_top_k)
73+
74+
If the underlying LI retriever does not have the property similarity_top_k, k will be ignored.
75+
76+
Returns:
77+
List[dspy.Example]: A list of examples retrieved by the retriever
78+
"""
79+
if k:
80+
self.k = k
81+
82+
raw = self.retriever.retrieve(query)
83+
84+
return [
85+
dspy.Example(
86+
text=result.text,
87+
score=result.score,
88+
)
89+
for result in raw
90+
]

0 commit comments

Comments
 (0)