Skip to content

Commit 2599758

Browse files
committed
support for openai compatible models
1 parent 35ca0bc commit 2599758

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

app/core/model_handlers.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ def generate_response(
180180
return self._handle_caii_request(prompt)
181181
if self.inference_type == "openai":
182182
return self._handle_openai_request(prompt)
183+
if self.inference_type == "openai_compatible":
184+
return self._handle_openai_compatible_request(prompt)
183185
if self.inference_type == "gemini":
184186
return self._handle_gemini_request(prompt)
185187
raise ModelHandlerError(f"Unsupported inference_type={self.inference_type}", 400)
@@ -342,6 +344,66 @@ def _handle_openai_request(self, prompt: str):
342344
except Exception as e:
343345
raise ModelHandlerError(f"OpenAI request failed: {e}", 500)
344346

347+
# ---------- OpenAI Compatible -------------------------------------------------------
348+
def _handle_openai_compatible_request(self, prompt: str):
349+
"""Handle OpenAI compatible endpoints with proper timeout configuration"""
350+
try:
351+
import httpx
352+
from openai import OpenAI
353+
354+
# Get API key from environment variable (only credential needed)
355+
api_key = os.getenv('OpenAI_Endpoint_Compatible_Key')
356+
if not api_key:
357+
raise ModelHandlerError("OpenAI_Endpoint_Compatible_Key environment variable not set", 500)
358+
359+
# Base URL comes from caii_endpoint parameter (passed during initialization)
360+
openai_compatible_endpoint = self.caii_endpoint
361+
if not openai_compatible_endpoint:
362+
raise ModelHandlerError("OpenAI compatible endpoint not provided", 500)
363+
364+
# Configure timeout for OpenAI compatible client (same as OpenAI v1.57.2)
365+
timeout_config = httpx.Timeout(
366+
connect=self.OPENAI_CONNECT_TIMEOUT,
367+
read=self.OPENAI_READ_TIMEOUT,
368+
write=10.0,
369+
pool=5.0
370+
)
371+
372+
# Configure httpx client with certificate verification for private cloud
373+
if os.path.exists("/etc/ssl/certs/ca-certificates.crt"):
374+
http_client = httpx.Client(
375+
verify="/etc/ssl/certs/ca-certificates.crt",
376+
timeout=timeout_config
377+
)
378+
else:
379+
http_client = httpx.Client(timeout=timeout_config)
380+
381+
# Remove trailing '/chat/completions' if present (similar to CAII handling)
382+
openai_compatible_endpoint = openai_compatible_endpoint.removesuffix('/chat/completions')
383+
384+
client = OpenAI(
385+
api_key=api_key,
386+
base_url=openai_compatible_endpoint,
387+
http_client=http_client
388+
)
389+
390+
completion = client.chat.completions.create(
391+
model=self.model_id,
392+
messages=[{"role": "user", "content": prompt}],
393+
max_tokens=self.model_params.max_tokens,
394+
temperature=self.model_params.temperature,
395+
top_p=self.model_params.top_p,
396+
stream=False,
397+
)
398+
399+
print("generated via OpenAI Compatible endpoint")
400+
response_text = completion.choices[0].message.content
401+
402+
return self._extract_json_from_text(response_text) if not self.custom_p else response_text
403+
404+
except Exception as e:
405+
raise ModelHandlerError(f"OpenAI Compatible request failed: {str(e)}", status_code=500)
406+
345407
# ---------- Gemini -------------------------------------------------------
346408
def _handle_gemini_request(self, prompt: str):
347409
if genai is None:

app/models/request_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ class SynthesisRequest(BaseModel):
123123
# Optional fields that can override defaults
124124
inference_type: Optional[str] = "aws_bedrock"
125125
caii_endpoint: Optional[str] = None
126+
openai_compatible_endpoint: Optional[str] = None
126127
topics: Optional[List[str]] = None
127128
doc_paths: Optional[List[str]] = None
128129
input_path: Optional[List[str]] = None

0 commit comments

Comments
 (0)