Skip to content

Commit 17d76b7

Browse files
Merge pull request stanfordnlp#1026 from ofermend/vectara_retriever_update
support multiple corpora in vectara retriever
2 parents 05b3557 + c74b45e commit 17d76b7

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

dspy/retrieve/vectara_rm.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class VectaraRM(dspy.Retrieve):
1818
"""
1919
A retrieval module that uses Vectara to return the top passages for a given query.
2020
21-
Assumes that a Vectara corpus has been created and populated with the following payload:
21+
Assumes that a Vectara corpora have been created and populated with the following payload:
2222
- document: The text of the passage
2323
2424
Args:
@@ -67,17 +67,21 @@ def __init__(
6767
def _vectara_query(
6868
self,
6969
query: str,
70-
limit: int = 3,
70+
limit: int = 5,
7171
) -> List[str]:
7272
"""Query Vectara index to get for top k matching passages.
7373
Args:
7474
query: query string
7575
"""
76-
corpus_key = {
77-
"customerId": self._vectara_customer_id,
78-
"corpusId": self._vectara_corpus_id,
79-
"lexicalInterpolationConfig": {"lambda": 0.025 },
80-
}
76+
# If multiple corpus ids are provided (comma-separated), create a list of corpus keys
77+
# otherwise by default, the `split(',')` is a no-op so retains the single corpus id
78+
corpus_key = [
79+
{
80+
"customerId": self._vectara_customer_id,
81+
"corpusId": corpus_id,
82+
"lexicalInterpolationConfig": {"lambda": 0.025 },
83+
} for corpus_id in self._vectara_corpus_id.split(',')
84+
]
8185

8286
data = {
8387
"query": [
@@ -91,7 +95,7 @@ def _vectara_query(
9195
"startTag": START_SNIPPET,
9296
"endTag": END_SNIPPET,
9397
},
94-
"corpusKey": [corpus_key],
98+
"corpusKey": corpus_key,
9599
},
96100
],
97101
}

0 commit comments

Comments
 (0)