|
13 | 13 | combine_pipelines, # noqa: F401 |
14 | 14 | final_hybrid_stage, # noqa: F401 |
15 | 15 | reciprocal_rank_stage, # noqa: F401 |
| 16 | + vector_search_stage, # noqa: F401 |
16 | 17 | ) |
17 | 18 |
|
18 | 19 |
|
@@ -54,44 +55,3 @@ def text_search_stage( |
54 | 55 | pipeline.append({"$limit": limit}) # type: ignore |
55 | 56 |
|
56 | 57 | return pipeline # type: ignore |
57 | | - |
58 | | - |
59 | | -def vector_search_stage( |
60 | | - query_vector: List[float], |
61 | | - search_field: str, |
62 | | - index_name: str, |
63 | | - top_k: int = 4, |
64 | | - filter: Optional[Dict[str, Any]] = None, |
65 | | - oversampling_factor: int = 10, |
66 | | - **kwargs: Any, |
67 | | -) -> Dict[str, Any]: # noqa: E501 |
68 | | - """Vector Search Stage without Scores. |
69 | | -
|
70 | | - Scoring is applied later depending on strategy. |
71 | | - vector search includes a vectorSearchScore that is typically used. |
72 | | - hybrid uses Reciprocal Rank Fusion. |
73 | | -
|
74 | | - Args: |
75 | | - query_vector: List of embedding vector |
76 | | - search_field: Field in Collection containing embedding vectors |
77 | | - index_name: Name of Atlas Vector Search Index tied to Collection |
78 | | - top_k: Number of documents to return |
79 | | - oversampling_factor: this times limit is the number of candidates |
80 | | - filter: MQL match expression comparing an indexed field. |
81 | | - Some operators are not supported. |
82 | | - See `vectorSearch filter docs <https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/#atlas-vector-search-pre-filter>`_ |
83 | | -
|
84 | | -
|
85 | | - Returns: |
86 | | - Dictionary defining the $vectorSearch |
87 | | - """ |
88 | | - stage = { |
89 | | - "index": index_name, |
90 | | - "path": search_field, |
91 | | - "queryVector": query_vector, |
92 | | - "numCandidates": top_k * oversampling_factor, |
93 | | - "limit": top_k, |
94 | | - } |
95 | | - if filter: |
96 | | - stage["filter"] = filter |
97 | | - return {"$vectorSearch": stage} |
0 commit comments