NVIDIA-NeMo
diff --git a/‎docs/user-guides/community/openai.md‎
Lines changed: 16 additions & 0 deletions b/‎docs/user-guides/community/openai.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎nemoguardrails/server/api.py‎
Lines changed: 60 additions & 62 deletions b/‎nemoguardrails/server/api.py‎
Lines changed: 60 additions & 62 deletions
diff --git a/‎nemoguardrails/server/schemas/openai.py‎
Lines changed: 1 addition & 1 deletion b/‎nemoguardrails/server/schemas/openai.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎poetry.lock‎
Lines changed: 4 additions & 4 deletions b/‎poetry.lock‎
Lines changed: 4 additions & 4 deletions
@@ -0,0 +1,16 @@
+## OpenAI API Compatibility for NeMo Guardrails
+
+NeMo Guardrails provides server-side compatibility with OpenAI API endpoints, enabling applications that use OpenAI clients to seamlessly integrate with NeMo Guardrails for adding guardrails to LLM interactions. Point your OpenAI client to `http://localhost:8000` (or your server URL) and use the standard `/v1/chat/completions` endpoint.
+
+## Feature Support Matrix
+
+The following table outlines which OpenAI API features are currently supported when using NeMo Guardrails:
+
+| Feature | Status | Notes |
+| :------ | :----: | :---- |
+| **Basic Chat Completion** | ✔ Supported | Full support for standard chat completions with guardrails applied |
+| **Streaming Responses** | ✔ Supported | Server-Sent Events (SSE) streaming with `stream=true` |
+| **Multimodal Input** | ✖ Unsupported | Support for text and image inputs (vision models) with guardrails but not yet OpenAI compatible  |
+| **Function Calling** | ✖ Unsupported | Not yet implemented; guardrails need structured output support |
+| **Tools** | ✖ Unsupported | Related to function calling; requires action flow integration |
+| **Response Format (JSON Mode)** | ✖ Unsupported | Structured output with guardrails requires additional validation logic |
@@ -28,7 +28,8 @@
 
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
-from openai.types.chat.chat_completion import ChatCompletion, Choice
+from openai.types.chat.chat_completion import Choice
+from openai.types.chat.chat_completion_message import ChatCompletionMessage
 from openai.types.model import Model
 from pydantic import BaseModel, Field, root_validator, validator
 from starlette.responses import StreamingResponse
@@ -191,88 +192,82 @@ async def root_handler():
 app.single_config_id = None
 
 
-class RequestBody(ChatCompletion):
+class RequestBody(BaseModel):
     config_id: Optional[str] = Field(
         default=os.getenv("DEFAULT_CONFIG_ID", None),
         description="The id of the configuration to be used. If not set, the default configuration will be used.",
     )
     config_ids: Optional[List[str]] = Field(
         default=None,
-        description="The list of configuration ids to be used. "
-        "If set, the configurations will be combined.",
-        # alias="guardrails",
-        validate_default=True,
+        description="The ids of the configurations to be used. If not set, the default configuration will be used.",
     )
     thread_id: Optional[str] = Field(
         default=None,
         min_length=16,
         max_length=255,
         description="The id of an existing thread to which the messages should be added.",
     )
-    model: Optional[str] = Field(
-        default=None,
-        description="The model used for the chat completion.",
+    messages: Optional[List[dict]] = Field(
+        default=None, description="The list of messages in the current conversation."
     )
-    id: Optional[str] = Field(
+    context: Optional[dict] = Field(
         default=None,
-        description="The id of the chat completion.",
+        description="Additional context data to be added to the conversation.",
+    )
+    stream: Optional[bool] = Field(
+        default=False,
+        description="If set, partial message deltas will be sent, like in ChatGPT. "
+        "Tokens will be sent as data-only server-sent events as they become "
+        "available, with the stream terminated by a data: [DONE] message.",
     )
-    object: Optional[str] = Field(
-        default="chat.completion",
-        description="The object type, which is always chat.completion",
+    options: GenerationOptions = Field(
+        default_factory=GenerationOptions,
+        description="Additional options for controlling the generation.",
     )
-    created: Optional[int] = Field(
+    state: Optional[dict] = Field(
         default=None,
-        description="The Unix timestamp (in seconds) of when the chat completion was created.",
+        description="A state object that should be used to continue the interaction.",
     )
-    choices: Optional[List[Choice]] = Field(
+    # Standard OpenAI completion parameters
+    model: Optional[str] = Field(
         default=None,
-        description="The list of choices for the chat completion.",
+        description="The model to use for chat completion. Maps to config_id for backward compatibility.",
     )
     max_tokens: Optional[int] = Field(
         default=None,
         description="The maximum number of tokens to generate.",
     )
     temperature: Optional[float] = Field(
         default=None,
-        description="The temperature to use for the chat completion.",
+        description="Sampling temperature to use.",
     )
     top_p: Optional[float] = Field(
         default=None,
-        description="The top p to use for the chat completion.",
+        description="Top-p sampling parameter.",
     )
-    stop: Optional[Union[str, List[str]]] = Field(
+    stop: Optional[str] = Field(
         default=None,
-        description="The stop sequences to use for the chat completion.",
+        description="Stop sequences.",
     )
     presence_penalty: Optional[float] = Field(
         default=None,
-        description="The presence penalty to use for the chat completion.",
+        description="Presence penalty parameter.",
     )
     frequency_penalty: Optional[float] = Field(
         default=None,
-        description="The frequency penalty to use for the chat completion.",
+        description="Frequency penalty parameter.",
     )
-    messages: Optional[List[dict]] = Field(
-        default=None, description="The list of messages in the current conversation."
-    )
-    context: Optional[dict] = Field(
+    function_call: Optional[dict] = Field(
         default=None,
-        description="Additional context data to be added to the conversation.",
+        description="Function call parameter.",
     )
-    stream: Optional[bool] = Field(
-        default=False,
-        description="If set, partial message deltas will be sent, like in ChatGPT. "
-        "Tokens will be sent as data-only server-sent events as they become "
-        "available, with the stream terminated by a data: [DONE] message.",
-    )
-    options: GenerationOptions = Field(
-        default_factory=GenerationOptions,
-        description="Additional options for controlling the generation.",
+    logit_bias: Optional[dict] = Field(
+        default=None,
+        description="Logit bias parameter.",
     )
-    state: Optional[dict] = Field(
+    log_probs: Optional[bool] = Field(
         default=None,
-        description="A state object that should be used to continue the interaction.",
+        description="Log probabilities parameter.",
     )
 
     @root_validator(pre=True)
@@ -537,16 +532,16 @@ async def chat_completion(body: RequestBody, request: Request):
             id=f"chatcmpl-{uuid.uuid4()}",
             object="chat.completion",
             created=int(time.time()),
-            model=config_ids[0] if config_ids else None,
+            model=config_ids[0] if config_ids else "unknown",
             choices=[
                 Choice(
                     index=0,
-                    message={
-                        "content": f"Could not load the {config_ids} guardrails configuration. "
+                    message=ChatCompletionMessage(
+                        content=f"Could not load the {config_ids} guardrails configuration. "
                         f"An internal error has occurred.",
-                        "role": "assistant",
-                    },
-                    finish_reason="error",
+                        role="assistant",
+                    ),
+                    finish_reason="stop",
                     logprobs=None,
                 )
             ],
@@ -570,15 +565,15 @@ async def chat_completion(body: RequestBody, request: Request):
                     id=f"chatcmpl-{uuid.uuid4()}",
                     object="chat.completion",
                     created=int(time.time()),
-                    model=None,
+                    model=config_ids[0] if config_ids else "unknown",
                     choices=[
                         Choice(
                             index=0,
-                            message={
-                                "content": "The `thread_id` must have a minimum length of 16 characters.",
-                                "role": "assistant",
-                            },
-                            finish_reason="error",
+                            message=ChatCompletionMessage(
+                                content="The `thread_id` must have a minimum length of 16 characters.",
+                                role="assistant",
+                            ),
+                            finish_reason="stop",
                             logprobs=None,
                         )
                     ],
@@ -625,7 +620,7 @@ async def chat_completion(body: RequestBody, request: Request):
                 llm_rails.generate_async(
                     messages=messages,
                     streaming_handler=streaming_handler,
-                    options=body.options,
+                    options=generation_options,
                     state=body.state,
                 )
             )
@@ -638,7 +633,7 @@ async def chat_completion(body: RequestBody, request: Request):
             )
         else:
             res = await llm_rails.generate_async(
-                messages=messages, options=body.options, state=body.state
+                messages=messages, options=generation_options, state=body.state
             )
 
             if isinstance(res, GenerationResponse):
@@ -662,11 +657,14 @@ async def chat_completion(body: RequestBody, request: Request):
                 "id": f"chatcmpl-{uuid.uuid4()}",
                 "object": "chat.completion",
                 "created": int(time.time()),
-                "model": config_ids[0] if config_ids else None,
+                "model": config_ids[0] if config_ids else "unknown",
                 "choices": [
                     Choice(
                         index=0,
-                        message=bot_message,
+                        message=ChatCompletionMessage(
+                            role="assistant",
+                            content=bot_message["content"],
+                        ),
                         finish_reason="stop",
                         logprobs=None,
                     )
@@ -688,15 +686,15 @@ async def chat_completion(body: RequestBody, request: Request):
             id=f"chatcmpl-{uuid.uuid4()}",
             object="chat.completion",
             created=int(time.time()),
-            model=None,
+            model="unknown",
             choices=[
                 Choice(
                     index=0,
-                    message={
-                        "content": "Internal server error",
-                        "role": "assistant",
-                    },
-                    finish_reason="error",
+                    message=ChatCompletionMessage(
+                        content="Internal server error",
+                        role="assistant",
+                    ),
+                    finish_reason="stop",
                     logprobs=None,
                 )
             ],
 
@@ -17,7 +17,7 @@
 
 from typing import List, Optional
 
-from openai.types.chat.chat_completion import ChatCompletion, Choice
+from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.model import Model
 from pydantic import BaseModel, Field