Skip to content

Commit 14dd1b9

Browse files
Pouyanpitgasser-nv
authored andcommitted
test: add mock embedding provider tests (#1446)
Add comprehensive mock-based unit tests for Cohere and OpenAI embedding providers that run without requiring API credentials. Tests cover: - Provider initialization with known/unknown models - Sync and async encoding methods - Custom parameters (input_type, api_key) - ImportError handling - All predefined model configurations These tests complement existing live integration tests and enable consistent CI/CD testing without external API dependencies. * skip cohere tests till #1305 is rebased onto develop after merging this PR
1 parent 9b9ce79 commit 14dd1b9

File tree

1 file changed

+350
-0
lines changed

1 file changed

+350
-0
lines changed
Lines changed: 350 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,350 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import sys
17+
from unittest.mock import MagicMock, Mock, patch
18+
19+
import pytest
20+
21+
try:
22+
import nemoguardrails.embeddings.providers.cohere
23+
24+
COHERE_AVAILABLE = True
25+
except (ImportError, ModuleNotFoundError):
26+
COHERE_AVAILABLE = False
27+
28+
29+
@pytest.mark.skipif(
30+
not COHERE_AVAILABLE, reason="Cohere provider not available in this branch"
31+
)
32+
class TestCohereEmbeddingModelMocked:
33+
def test_init_with_known_model(self):
34+
mock_cohere = MagicMock()
35+
mock_client = Mock()
36+
mock_cohere.Client.return_value = mock_client
37+
38+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
39+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
40+
41+
model = CohereEmbeddingModel("embed-multilingual-v3.0")
42+
43+
assert model.model == "embed-multilingual-v3.0"
44+
assert model.embedding_size == 1024
45+
assert model.input_type == "search_document"
46+
assert model.client == mock_client
47+
mock_cohere.Client.assert_called_once()
48+
49+
def test_init_with_custom_input_type(self):
50+
mock_cohere = MagicMock()
51+
mock_client = Mock()
52+
mock_cohere.Client.return_value = mock_client
53+
54+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
55+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
56+
57+
model = CohereEmbeddingModel(
58+
"embed-english-v3.0", input_type="search_query"
59+
)
60+
61+
assert model.model == "embed-english-v3.0"
62+
assert model.embedding_size == 1024
63+
assert model.input_type == "search_query"
64+
65+
def test_init_with_unknown_model(self):
66+
mock_cohere = MagicMock()
67+
mock_client = Mock()
68+
mock_cohere.Client.return_value = mock_client
69+
70+
mock_response = Mock()
71+
mock_response.embeddings = [[0.1] * 512]
72+
mock_client.embed.return_value = mock_response
73+
74+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
75+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
76+
77+
model = CohereEmbeddingModel("custom-unknown-model")
78+
79+
assert model.model == "custom-unknown-model"
80+
assert model.embedding_size == 512
81+
mock_client.embed.assert_called_once_with(
82+
texts=["test"],
83+
model="custom-unknown-model",
84+
input_type="search_document",
85+
)
86+
87+
def test_import_error_when_cohere_not_installed(self):
88+
with patch.dict("sys.modules", {"cohere": None}):
89+
with pytest.raises(ImportError, match="Could not import cohere"):
90+
if "nemoguardrails.embeddings.providers.cohere" in sys.modules:
91+
del sys.modules["nemoguardrails.embeddings.providers.cohere"]
92+
93+
from nemoguardrails.embeddings.providers.cohere import (
94+
CohereEmbeddingModel,
95+
)
96+
97+
CohereEmbeddingModel("embed-v4.0")
98+
99+
def test_encode_success(self):
100+
mock_cohere = MagicMock()
101+
mock_client = Mock()
102+
mock_cohere.Client.return_value = mock_client
103+
104+
mock_response = Mock()
105+
expected_embeddings = [
106+
[0.1, 0.2, 0.3],
107+
[0.4, 0.5, 0.6],
108+
]
109+
mock_response.embeddings = expected_embeddings
110+
mock_client.embed.return_value = mock_response
111+
112+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
113+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
114+
115+
model = CohereEmbeddingModel("embed-english-light-v3.0")
116+
documents = ["hello world", "test document"]
117+
result = model.encode(documents)
118+
119+
assert result == expected_embeddings
120+
mock_client.embed.assert_called_with(
121+
texts=documents,
122+
model="embed-english-light-v3.0",
123+
input_type="search_document",
124+
)
125+
126+
def test_encode_with_custom_input_type(self):
127+
mock_cohere = MagicMock()
128+
mock_client = Mock()
129+
mock_cohere.Client.return_value = mock_client
130+
131+
mock_response = Mock()
132+
expected_embeddings = [[0.1, 0.2]]
133+
mock_response.embeddings = expected_embeddings
134+
mock_client.embed.return_value = mock_response
135+
136+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
137+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
138+
139+
model = CohereEmbeddingModel("embed-v4.0", input_type="classification")
140+
documents = ["classify this"]
141+
result = model.encode(documents)
142+
143+
assert result == expected_embeddings
144+
mock_client.embed.assert_called_with(
145+
texts=documents, model="embed-v4.0", input_type="classification"
146+
)
147+
148+
@pytest.mark.asyncio
149+
async def test_encode_async_success(self):
150+
mock_cohere = MagicMock()
151+
mock_client = Mock()
152+
mock_cohere.Client.return_value = mock_client
153+
154+
mock_response = Mock()
155+
expected_embeddings = [[0.1, 0.2, 0.3]]
156+
mock_response.embeddings = expected_embeddings
157+
mock_client.embed.return_value = mock_response
158+
159+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
160+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
161+
162+
model = CohereEmbeddingModel("embed-multilingual-v3.0")
163+
documents = ["async test"]
164+
result = await model.encode_async(documents)
165+
166+
assert result == expected_embeddings
167+
mock_client.embed.assert_called_once()
168+
169+
def test_init_with_api_key_kwarg(self):
170+
mock_cohere = MagicMock()
171+
mock_client = Mock()
172+
mock_cohere.Client.return_value = mock_client
173+
174+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
175+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
176+
177+
model = CohereEmbeddingModel("embed-v4.0", api_key="test-key-123")
178+
179+
mock_cohere.Client.assert_called_once_with(api_key="test-key-123")
180+
181+
def test_all_predefined_models(self):
182+
mock_cohere = MagicMock()
183+
mock_client = Mock()
184+
mock_cohere.Client.return_value = mock_client
185+
186+
models_to_test = {
187+
"embed-v4.0": 1536,
188+
"embed-english-v3.0": 1024,
189+
"embed-english-light-v3.0": 384,
190+
"embed-multilingual-v3.0": 1024,
191+
"embed-multilingual-light-v3.0": 384,
192+
}
193+
194+
with patch.dict("sys.modules", {"cohere": mock_cohere}):
195+
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel
196+
197+
for model_name, expected_size in models_to_test.items():
198+
model = CohereEmbeddingModel(model_name)
199+
assert model.embedding_size == expected_size
200+
assert model.model == model_name
201+
202+
203+
class TestOpenAIEmbeddingModelMocked:
204+
def test_init_with_known_model(self):
205+
mock_openai = MagicMock()
206+
mock_openai.__version__ = "1.0.0"
207+
mock_client = Mock()
208+
mock_openai.OpenAI.return_value = mock_client
209+
210+
with patch.dict("sys.modules", {"openai": mock_openai}):
211+
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
212+
213+
model = OpenAIEmbeddingModel("text-embedding-3-small")
214+
215+
assert model.model == "text-embedding-3-small"
216+
assert model.embedding_size == 1536
217+
assert model.client == mock_client
218+
mock_openai.OpenAI.assert_called_once()
219+
220+
def test_init_with_unknown_model(self):
221+
mock_openai = MagicMock()
222+
mock_openai.__version__ = "1.0.0"
223+
mock_client = Mock()
224+
mock_openai.OpenAI.return_value = mock_client
225+
226+
mock_response = Mock()
227+
mock_record = Mock()
228+
mock_record.embedding = [0.1] * 2048
229+
mock_response.data = [mock_record]
230+
mock_client.embeddings.create.return_value = mock_response
231+
232+
with patch.dict("sys.modules", {"openai": mock_openai}):
233+
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
234+
235+
model = OpenAIEmbeddingModel("custom-unknown-model")
236+
237+
assert model.model == "custom-unknown-model"
238+
assert model.embedding_size == 2048
239+
mock_client.embeddings.create.assert_called_once_with(
240+
input=["test"], model="custom-unknown-model"
241+
)
242+
243+
def test_import_error_when_openai_not_installed(self):
244+
with patch.dict("sys.modules", {"openai": None}):
245+
with pytest.raises(ImportError, match="Could not import openai"):
246+
if "nemoguardrails.embeddings.providers.openai" in sys.modules:
247+
del sys.modules["nemoguardrails.embeddings.providers.openai"]
248+
249+
from nemoguardrails.embeddings.providers.openai import (
250+
OpenAIEmbeddingModel,
251+
)
252+
253+
OpenAIEmbeddingModel("text-embedding-3-small")
254+
255+
def test_old_version_error(self):
256+
mock_openai = MagicMock()
257+
mock_openai.__version__ = "0.28.0"
258+
259+
with patch.dict("sys.modules", {"openai": mock_openai}):
260+
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
261+
262+
with pytest.raises(RuntimeError, match="openai<1.0.0"):
263+
OpenAIEmbeddingModel("text-embedding-3-small")
264+
265+
def test_encode_success(self):
266+
mock_openai = MagicMock()
267+
mock_openai.__version__ = "1.0.0"
268+
mock_client = Mock()
269+
mock_openai.OpenAI.return_value = mock_client
270+
271+
mock_response = Mock()
272+
mock_record1 = Mock()
273+
expected_embedding1 = [0.1, 0.2, 0.3]
274+
mock_record1.embedding = expected_embedding1
275+
mock_record2 = Mock()
276+
expected_embedding2 = [0.4, 0.5, 0.6]
277+
mock_record2.embedding = expected_embedding2
278+
mock_response.data = [mock_record1, mock_record2]
279+
mock_client.embeddings.create.return_value = mock_response
280+
281+
with patch.dict("sys.modules", {"openai": mock_openai}):
282+
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
283+
284+
model = OpenAIEmbeddingModel("text-embedding-ada-002")
285+
documents = ["hello world", "test document"]
286+
result = model.encode(documents)
287+
288+
assert result == [expected_embedding1, expected_embedding2]
289+
mock_client.embeddings.create.assert_called_with(
290+
input=documents, model="text-embedding-ada-002"
291+
)
292+
293+
@pytest.mark.asyncio
294+
async def test_encode_async_success(self):
295+
mock_openai = MagicMock()
296+
mock_openai.__version__ = "1.0.0"
297+
mock_client = Mock()
298+
mock_openai.OpenAI.return_value = mock_client
299+
300+
mock_response = Mock()
301+
mock_record = Mock()
302+
expected_embedding = [0.1, 0.2, 0.3]
303+
mock_record.embedding = expected_embedding
304+
mock_response.data = [mock_record]
305+
mock_client.embeddings.create.return_value = mock_response
306+
307+
with patch.dict("sys.modules", {"openai": mock_openai}):
308+
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
309+
310+
model = OpenAIEmbeddingModel("text-embedding-3-small")
311+
documents = ["async test"]
312+
result = await model.encode_async(documents)
313+
314+
assert result == [expected_embedding]
315+
mock_client.embeddings.create.assert_called_once()
316+
317+
def test_init_with_api_key_kwarg(self):
318+
mock_openai = MagicMock()
319+
mock_openai.__version__ = "1.0.0"
320+
mock_client = Mock()
321+
mock_openai.OpenAI.return_value = mock_client
322+
323+
with patch.dict("sys.modules", {"openai": mock_openai}):
324+
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
325+
326+
model = OpenAIEmbeddingModel(
327+
"text-embedding-3-small", api_key="test-key-123"
328+
)
329+
330+
mock_openai.OpenAI.assert_called_once_with(api_key="test-key-123")
331+
332+
def test_all_predefined_models(self):
333+
mock_openai = MagicMock()
334+
mock_openai.__version__ = "1.0.0"
335+
mock_client = Mock()
336+
mock_openai.OpenAI.return_value = mock_client
337+
338+
models_to_test = {
339+
"text-embedding-ada-002": 1536,
340+
"text-embedding-3-small": 1536,
341+
"text-embedding-3-large": 3072,
342+
}
343+
344+
with patch.dict("sys.modules", {"openai": mock_openai}):
345+
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel
346+
347+
for model_name, expected_size in models_to_test.items():
348+
model = OpenAIEmbeddingModel(model_name)
349+
assert model.embedding_size == expected_size
350+
assert model.model == model_name

0 commit comments

Comments
 (0)