Skip to content

Commit 2058348

Browse files
OpenAI API client upgrade and code migration (#126)
Upgrade OpenAI Python lib to >1.0.0 and migrates the code following openai/openai-python#742 (comment) Also, conditional skipping logic for vectorizers has been fixed (it was always skipping) --------- Co-authored-by: Tyler Hutcherson <tyler.hutcherson@redis.com>
1 parent 9c61922 commit 2058348

File tree

4 files changed

+59
-40
lines changed

4 files changed

+59
-40
lines changed

docs/user_guide/llmcache_03.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
"outputs": [],
2626
"source": [
2727
"import os\n",
28-
"import openai\n",
28+
"from openai import OpenAI\n",
2929
"import getpass\n",
3030
"import time\n",
3131
"\n",
@@ -35,11 +35,11 @@
3535
"\n",
3636
"api_key = os.getenv(\"OPENAI_API_KEY\") or getpass.getpass(\"Enter your OpenAI API key: \")\n",
3737
"\n",
38-
"openai.api_key = api_key\n",
38+
"client = OpenAI(api_key=api_key)\n",
3939
"\n",
4040
"def ask_openai(question: str) -> str:\n",
41-
" response = openai.Completion.create(\n",
42-
" engine=\"gpt-3.5-turbo-instruct\",\n",
41+
" response = client.completions.create(\n",
42+
" model=\"gpt-3.5-turbo-instruct\",\n",
4343
" prompt=question,\n",
4444
" max_tokens=200\n",
4545
" )\n",

redisvl/utils/vectorize/text/openai.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from typing import Callable, Dict, List, Optional
2+
from typing import Any, Callable, Dict, List, Optional
33

44
from tenacity import retry, stop_after_attempt, wait_random_exponential
55
from tenacity.retry import retry_if_not_exception_type
@@ -19,7 +19,7 @@ class OpenAITextVectorizer(BaseVectorizer):
1919
in the `api_config` dictionary or through the `OPENAI_API_KEY` environment
2020
variable. Users must obtain an API key from OpenAI's website
2121
(https://api.openai.com/). Additionally, the `openai` python client must be
22-
installed with `pip install openai==0.28.1`.
22+
installed with `pip install openai>=1.13.0`.
2323
2424
The vectorizer supports both synchronous and asynchronous operations,
2525
allowing for batch processing of texts and flexibility in handling
@@ -42,6 +42,8 @@ class OpenAITextVectorizer(BaseVectorizer):
4242
4343
"""
4444

45+
aclient: Any # Since the OpenAI module is loaded dynamically
46+
4547
def __init__(
4648
self, model: str = "text-embedding-ada-002", api_config: Optional[Dict] = None
4749
):
@@ -59,7 +61,7 @@ def __init__(
5961
"""
6062
# Dynamic import of the openai module
6163
try:
62-
import openai
64+
from openai import AsyncOpenAI, OpenAI
6365
except ImportError:
6466
raise ImportError(
6567
"OpenAI vectorizer requires the openai library. \
@@ -77,17 +79,19 @@ def __init__(
7779
environment variable."
7880
)
7981

80-
openai.api_key = api_key
81-
client = openai.Embedding
82+
client = OpenAI(api_key=api_key)
8283
dims = self._set_model_dims(client, model)
8384
super().__init__(model=model, dims=dims, client=client)
85+
self.aclient = AsyncOpenAI(api_key=api_key)
8486

8587
@staticmethod
8688
def _set_model_dims(client, model) -> int:
8789
try:
88-
embedding = client.create(input=["dimension test"], engine=model)["data"][
89-
0
90-
]["embedding"]
90+
embedding = (
91+
client.embeddings.create(input=["dimension test"], model=model)
92+
.data[0]
93+
.embedding
94+
)
9195
except (KeyError, IndexError) as ke:
9296
raise ValueError(f"Unexpected response from the OpenAI API: {str(ke)}")
9397
except Exception as e: # pylint: disable=broad-except
@@ -132,10 +136,9 @@ def embed_many(
132136

133137
embeddings: List = []
134138
for batch in self.batchify(texts, batch_size, preprocess):
135-
response = self.client.create(input=batch, engine=self.model)
139+
response = self.client.embeddings.create(input=batch, model=self.model)
136140
embeddings += [
137-
self._process_embedding(r["embedding"], as_buffer)
138-
for r in response["data"]
141+
self._process_embedding(r.embedding, as_buffer) for r in response.data
139142
]
140143
return embeddings
141144

@@ -171,8 +174,8 @@ def embed(
171174

172175
if preprocess:
173176
text = preprocess(text)
174-
result = self.client.create(input=[text], engine=self.model)
175-
return self._process_embedding(result["data"][0]["embedding"], as_buffer)
177+
result = self.client.embeddings.create(input=[text], model=self.model)
178+
return self._process_embedding(result.data[0].embedding, as_buffer)
176179

177180
@retry(
178181
wait=wait_random_exponential(min=1, max=60),
@@ -211,10 +214,11 @@ async def aembed_many(
211214

212215
embeddings: List = []
213216
for batch in self.batchify(texts, batch_size, preprocess):
214-
response = await self.client.acreate(input=batch, engine=self.model)
217+
response = await self.aclient.embeddings.create(
218+
input=batch, model=self.model
219+
)
215220
embeddings += [
216-
self._process_embedding(r["embedding"], as_buffer)
217-
for r in response["data"]
221+
self._process_embedding(r.embedding, as_buffer) for r in response.data
218222
]
219223
return embeddings
220224

@@ -250,5 +254,5 @@ async def aembed(
250254

251255
if preprocess:
252256
text = preprocess(text)
253-
result = await self.client.acreate(input=[text], engine=self.model)
254-
return self._process_embedding(result["data"][0]["embedding"], as_buffer)
257+
result = await self.aclient.embeddings.create(input=[text], model=self.model)
258+
return self._process_embedding(result.data[0].embedding, as_buffer)

requirements-all.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
openai<=0.28.1
1+
openai>=1.13.0
22
sentence-transformers>=2.2.2
33
google-cloud-aiplatform>=1.26
44
cohere>=4.44

tests/integration/test_vectorizers.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@ def skip_vectorizer() -> bool:
1818
return v
1919

2020

21-
skip_vectorizer_test = lambda: pytest.config.getfixturevalue("skip_vectorizer")
22-
23-
2421
@pytest.fixture(
2522
params=[
2623
HFTextVectorizer,
@@ -29,7 +26,10 @@ def skip_vectorizer() -> bool:
2926
CohereTextVectorizer,
3027
]
3128
)
32-
def vectorizer(request):
29+
def vectorizer(request, skip_vectorizer):
30+
if skip_vectorizer:
31+
pytest.skip("Skipping vectorizer instantiation...")
32+
3333
if request.param == HFTextVectorizer:
3434
return request.param()
3535
elif request.param == OpenAITextVectorizer:
@@ -40,8 +40,10 @@ def vectorizer(request):
4040
return request.param()
4141

4242

43-
@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests")
44-
def test_vectorizer_embed(vectorizer):
43+
def test_vectorizer_embed(vectorizer, skip_vectorizer):
44+
if skip_vectorizer:
45+
pytest.skip("Skipping vectorizer tests")
46+
4547
text = "This is a test sentence."
4648
if isinstance(vectorizer, CohereTextVectorizer):
4749
embedding = vectorizer.embed(text, input_type="search_document")
@@ -52,8 +54,10 @@ def test_vectorizer_embed(vectorizer):
5254
assert len(embedding) == vectorizer.dims
5355

5456

55-
@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests")
56-
def test_vectorizer_embed_many(vectorizer):
57+
def test_vectorizer_embed_many(vectorizer, skip_vectorizer):
58+
if skip_vectorizer:
59+
pytest.skip("Skipping vectorizer tests")
60+
5761
texts = ["This is the first test sentence.", "This is the second test sentence."]
5862
if isinstance(vectorizer, CohereTextVectorizer):
5963
embeddings = vectorizer.embed_many(texts, input_type="search_document")
@@ -67,8 +71,10 @@ def test_vectorizer_embed_many(vectorizer):
6771
)
6872

6973

70-
@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests")
71-
def test_vectorizer_bad_input(vectorizer):
74+
def test_vectorizer_bad_input(vectorizer, skip_vectorizer):
75+
if skip_vectorizer:
76+
pytest.skip("Skipping vectorizer tests")
77+
7278
with pytest.raises(TypeError):
7379
vectorizer.embed(1)
7480

@@ -80,25 +86,32 @@ def test_vectorizer_bad_input(vectorizer):
8086

8187

8288
@pytest.fixture(params=[OpenAITextVectorizer])
83-
def avectorizer(request):
89+
def avectorizer(request, skip_vectorizer):
90+
if skip_vectorizer:
91+
pytest.skip("Skipping vectorizer instantiation...")
92+
8493
# Here we use actual models for integration test
8594
if request.param == OpenAITextVectorizer:
8695
return request.param()
8796

8897

89-
@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests")
9098
@pytest.mark.asyncio
91-
async def test_vectorizer_aembed(avectorizer):
99+
async def test_vectorizer_aembed(avectorizer, skip_vectorizer):
100+
if skip_vectorizer:
101+
pytest.skip("Skipping vectorizer tests")
102+
92103
text = "This is a test sentence."
93104
embedding = await avectorizer.aembed(text)
94105

95106
assert isinstance(embedding, list)
96107
assert len(embedding) == avectorizer.dims
97108

98109

99-
@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests")
100110
@pytest.mark.asyncio
101-
async def test_vectorizer_aembed_many(avectorizer):
111+
async def test_vectorizer_aembed_many(avectorizer, skip_vectorizer):
112+
if skip_vectorizer:
113+
pytest.skip("Skipping vectorizer tests")
114+
102115
texts = ["This is the first test sentence.", "This is the second test sentence."]
103116
embeddings = await avectorizer.aembed_many(texts)
104117

@@ -109,9 +122,11 @@ async def test_vectorizer_aembed_many(avectorizer):
109122
)
110123

111124

112-
@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests")
113125
@pytest.mark.asyncio
114-
async def test_avectorizer_bad_input(avectorizer):
126+
async def test_avectorizer_bad_input(avectorizer, skip_vectorizer):
127+
if skip_vectorizer:
128+
pytest.skip("Skipping vectorizer tests")
129+
115130
with pytest.raises(TypeError):
116131
avectorizer.embed(1)
117132

0 commit comments

Comments
 (0)