diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 190b2e96f8..a50d73dd8e 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -20698,7 +20698,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put", "parameters": [ { @@ -20821,7 +20821,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put-1", "parameters": [ { @@ -23508,6 +23508,147 @@ ] } }, + "/_inference/{task_type}/{openshiftai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an OpenShift AI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `openshift_ai` service.\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "operationId": "inference-put-openshift-ai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference._types.OpenShiftAiTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "openshiftai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types.Id" + }, + "style": "simple" + }, + { + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference endpoint to be created.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types.Duration" + }, + "style": "form" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "externalDocs": { + "url": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config" + }, + "description": "The chunking configuration object.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.InferenceChunkingSettings" + } + ] + }, + "service": { + "description": "The type of service supported for the specified task type. In this case, `openshift_ai`.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiServiceType" + } + ] + }, + "service_settings": { + "description": "Settings used to install the inference model. These settings are specific to the `openshift_ai` service.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiServiceSettings" + } + ] + }, + "task_settings": { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiTaskSettings" + } + ] + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutOpenShiftAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/openshift-ai-text-embedding` to create an inference endpoint that performs a `text_embedding` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-embeddings-url\",\n \"api_key\": \"openshift-ai-embeddings-token\",\n \"model_id\": \"gritlm-7b\"\n }\n}" + }, + "PutOpenShiftAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openshift-ai-completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-completion-url\",\n \"api_key\": \"openshift-ai-completion-token\",\n \"model_id\": \"llama-31-8b-instruct\"\n }\n}" + }, + "PutOpenShiftAiRequestExample3": { + "summary": "A chat completion task", + "description": "Run `PUT _inference/chat_completion/openshift-ai-chat-completion` to create an inference endpoint that performs a `chat_completion` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-chat-completion-url\",\n \"api_key\": \"openshift-ai-chat-completion-token\",\n \"model_id\": \"llama-31-8b-instruct\"\n }\n}" + }, + "PutOpenShiftAiRequestExample4": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint that performs a `rerank` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-rerank-url\",\n \"api_key\": \"openshift-ai-rerank-token\",\n \"model_id\": \"bge-reranker-v2-m3\"\n }\n}" + }, + "PutOpenShiftAiRequestExample5": { + "summary": "A rerank task with custom `task_settings` and omitted `model_id`", + "description": "Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint that performs a `rerank` task, specifying custom `task_settings` and omitting the `model_id` if deployed model doesn't require it.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-rerank-url\",\n \"api_key\": \"openshift-ai-rerank-token\"\n },\n \"task_settings\": {\n \"return_documents\": true,\n \"top_n\": 2\n }\n}" + } + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types.InferenceEndpointInfoOpenShiftAi" + } + } + } + } + }, + "x-state": "Generally available; Added in 9.3.0", + "x-metaTags": [ + { + "content": "Elasticsearch, Machine Learning", + "name": "product_name" + } + ] + } + }, "/_inference/{task_type}/{voyageai_inference_id}": { "put": { "tags": [ @@ -101692,7 +101833,7 @@ "type": "object", "properties": { "requests_per_minute": { - "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", + "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", "type": "number" } } @@ -103847,6 +103988,120 @@ "completion" ] }, + "inference._types.OpenShiftAiTaskType": { + "type": "string", + "enum": [ + "text_embedding", + "completion", + "chat_completion", + "rerank" + ] + }, + "inference._types.OpenShiftAiServiceType": { + "type": "string", + "enum": [ + "openshift_ai" + ] + }, + "inference._types.OpenShiftAiServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key for your OpenShift AI endpoint.\nCan be found in `Token authentication` section of model related information.", + "type": "string" + }, + "url": { + "description": "The URL of the OpenShift AI hosted model endpoint.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nRefer to the hosted model's documentation for the name if needed.\nService has been tested and confirmed to be working with the following models:\n* For `text_embedding` task - `gritlm-7b`.\n* For `completion` and `chat_completion` tasks - `llama-31-8b-instruct`.\n* For `rerank` task - `bge-reranker-v2-m3`.", + "type": "string" + }, + "max_input_tokens": { + "description": "For a `text_embedding` task, the maximum number of tokens per input before chunking occurs.", + "type": "number" + }, + "similarity": { + "description": "For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiSimilarityType" + } + ] + }, + "rate_limit": { + "description": "This setting helps to minimize the number of rate limit errors returned from the OpenShift AI API.\nBy default, the `openshift_ai` service sets the number of requests allowed per minute to 3000.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.RateLimitSetting" + } + ] + } + }, + "required": [ + "api_key", + "url" + ] + }, + "inference._types.OpenShiftAiSimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference._types.OpenShiftAiTaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, whether to return the source documents in the response.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.", + "type": "number" + } + } + }, + "inference._types.InferenceEndpointInfoOpenShiftAi": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "inference_id": { + "description": "The inference Id", + "type": "string" + }, + "task_type": { + "description": "The task type", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.TaskTypeOpenShiftAi" + } + ] + } + }, + "required": [ + "inference_id", + "task_type" + ] + } + ] + }, + "inference._types.TaskTypeOpenShiftAi": { + "type": "string", + "enum": [ + "text_embedding", + "chat_completion", + "completion", + "rerank" + ] + }, "inference._types.VoyageAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index c9b00850f0..e070bd22c6 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -11775,7 +11775,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put", "parameters": [ { @@ -11898,7 +11898,7 @@ "inference" ], "summary": "Create an inference endpoint", - "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "description": "IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", "operationId": "inference-put-1", "parameters": [ { @@ -14585,6 +14585,147 @@ ] } }, + "/_inference/{task_type}/{openshiftai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an OpenShift AI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `openshift_ai` service.\n\n## Required authorization\n\n* Cluster privileges: `manage_inference`\n", + "operationId": "inference-put-openshift-ai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference._types.OpenShiftAiTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "openshiftai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types.Id" + }, + "style": "simple" + }, + { + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference endpoint to be created.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types.Duration" + }, + "style": "form" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "externalDocs": { + "url": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config" + }, + "description": "The chunking configuration object.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.InferenceChunkingSettings" + } + ] + }, + "service": { + "description": "The type of service supported for the specified task type. In this case, `openshift_ai`.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiServiceType" + } + ] + }, + "service_settings": { + "description": "Settings used to install the inference model. These settings are specific to the `openshift_ai` service.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiServiceSettings" + } + ] + }, + "task_settings": { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiTaskSettings" + } + ] + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutOpenShiftAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/openshift-ai-text-embedding` to create an inference endpoint that performs a `text_embedding` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-embeddings-url\",\n \"api_key\": \"openshift-ai-embeddings-token\",\n \"model_id\": \"gritlm-7b\"\n }\n}" + }, + "PutOpenShiftAiRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openshift-ai-completion` to create an inference endpoint that performs a `completion` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-completion-url\",\n \"api_key\": \"openshift-ai-completion-token\",\n \"model_id\": \"llama-31-8b-instruct\"\n }\n}" + }, + "PutOpenShiftAiRequestExample3": { + "summary": "A chat completion task", + "description": "Run `PUT _inference/chat_completion/openshift-ai-chat-completion` to create an inference endpoint that performs a `chat_completion` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-chat-completion-url\",\n \"api_key\": \"openshift-ai-chat-completion-token\",\n \"model_id\": \"llama-31-8b-instruct\"\n }\n}" + }, + "PutOpenShiftAiRequestExample4": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint that performs a `rerank` task.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-rerank-url\",\n \"api_key\": \"openshift-ai-rerank-token\",\n \"model_id\": \"bge-reranker-v2-m3\"\n }\n}" + }, + "PutOpenShiftAiRequestExample5": { + "summary": "A rerank task with custom `task_settings` and omitted `model_id`", + "description": "Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint that performs a `rerank` task, specifying custom `task_settings` and omitting the `model_id` if deployed model doesn't require it.", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-rerank-url\",\n \"api_key\": \"openshift-ai-rerank-token\"\n },\n \"task_settings\": {\n \"return_documents\": true,\n \"top_n\": 2\n }\n}" + } + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types.InferenceEndpointInfoOpenShiftAi" + } + } + } + } + }, + "x-state": "Generally available", + "x-metaTags": [ + { + "content": "Elasticsearch, Machine Learning", + "name": "product_name" + } + ] + } + }, "/_inference/{task_type}/{voyageai_inference_id}": { "put": { "tags": [ @@ -65808,7 +65949,7 @@ "type": "object", "properties": { "requests_per_minute": { - "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", + "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", "type": "number" } } @@ -67963,6 +68104,120 @@ "completion" ] }, + "inference._types.OpenShiftAiTaskType": { + "type": "string", + "enum": [ + "text_embedding", + "completion", + "chat_completion", + "rerank" + ] + }, + "inference._types.OpenShiftAiServiceType": { + "type": "string", + "enum": [ + "openshift_ai" + ] + }, + "inference._types.OpenShiftAiServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key for your OpenShift AI endpoint.\nCan be found in `Token authentication` section of model related information.", + "type": "string" + }, + "url": { + "description": "The URL of the OpenShift AI hosted model endpoint.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nRefer to the hosted model's documentation for the name if needed.\nService has been tested and confirmed to be working with the following models:\n* For `text_embedding` task - `gritlm-7b`.\n* For `completion` and `chat_completion` tasks - `llama-31-8b-instruct`.\n* For `rerank` task - `bge-reranker-v2-m3`.", + "type": "string" + }, + "max_input_tokens": { + "description": "For a `text_embedding` task, the maximum number of tokens per input before chunking occurs.", + "type": "number" + }, + "similarity": { + "description": "For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.OpenShiftAiSimilarityType" + } + ] + }, + "rate_limit": { + "description": "This setting helps to minimize the number of rate limit errors returned from the OpenShift AI API.\nBy default, the `openshift_ai` service sets the number of requests allowed per minute to 3000.", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.RateLimitSetting" + } + ] + } + }, + "required": [ + "api_key", + "url" + ] + }, + "inference._types.OpenShiftAiSimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference._types.OpenShiftAiTaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, whether to return the source documents in the response.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.", + "type": "number" + } + } + }, + "inference._types.InferenceEndpointInfoOpenShiftAi": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "inference_id": { + "description": "The inference Id", + "type": "string" + }, + "task_type": { + "description": "The task type", + "allOf": [ + { + "$ref": "#/components/schemas/inference._types.TaskTypeOpenShiftAi" + } + ] + } + }, + "required": [ + "inference_id", + "task_type" + ] + } + ] + }, + "inference._types.TaskTypeOpenShiftAi": { + "type": "string", + "enum": [ + "text_embedding", + "chat_completion", + "completion", + "rerank" + ] + }, "inference._types.VoyageAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema.json b/output/schema/schema.json index aa9b609d6b..322f55eea5 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -10183,7 +10183,7 @@ "visibility": "public" } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", "docId": "inference-api-put", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put", "extPreviousVersionDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html", @@ -11143,6 +11143,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "9.3.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an OpenShift AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openshift_ai` service.", + "docId": "inference-api-put-openshift-ai", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai", + "name": "inference.put_openshift_ai", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_openshift_ai" + }, + "requestBodyRequired": true, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_openshift_ai" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{openshiftai_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -176313,7 +176358,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L327-L386" + "specLocation": "inference/_types/Services.ts#L339-L398" }, { "kind": "interface", @@ -176372,7 +176417,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L48-L70" + "specLocation": "inference/_types/Services.ts#L49-L71" }, { "kind": "interface", @@ -176413,7 +176458,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L72-L84" + "specLocation": "inference/_types/Services.ts#L73-L85" }, { "kind": "interface", @@ -176453,7 +176498,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L86-L95" + "specLocation": "inference/_types/Services.ts#L87-L96" }, { "kind": "interface", @@ -176493,7 +176538,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L97-L106" + "specLocation": "inference/_types/Services.ts#L98-L107" }, { "kind": "interface", @@ -176533,7 +176578,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L108-L117" + "specLocation": "inference/_types/Services.ts#L109-L118" }, { "kind": "interface", @@ -176573,7 +176618,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L119-L128" + "specLocation": "inference/_types/Services.ts#L120-L129" }, { "kind": "interface", @@ -176613,7 +176658,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L130-L139" + "specLocation": "inference/_types/Services.ts#L131-L140" }, { "kind": "interface", @@ -176653,7 +176698,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L141-L150" + "specLocation": "inference/_types/Services.ts#L142-L151" }, { "kind": "interface", @@ -176693,7 +176738,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L152-L161" + "specLocation": "inference/_types/Services.ts#L153-L162" }, { "kind": "interface", @@ -176733,7 +176778,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L163-L172" + "specLocation": "inference/_types/Services.ts#L164-L173" }, { "kind": "interface", @@ -176773,7 +176818,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L174-L183" + "specLocation": "inference/_types/Services.ts#L175-L184" }, { "kind": "interface", @@ -176813,7 +176858,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L185-L194" + "specLocation": "inference/_types/Services.ts#L186-L195" }, { "kind": "interface", @@ -176853,7 +176898,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L195-L204" + "specLocation": "inference/_types/Services.ts#L196-L205" }, { "kind": "interface", @@ -176893,7 +176938,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L217-L226" + "specLocation": "inference/_types/Services.ts#L218-L227" }, { "kind": "interface", @@ -176933,7 +176978,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L206-L215" + "specLocation": "inference/_types/Services.ts#L207-L216" }, { "kind": "interface", @@ -176973,7 +177018,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L228-L237" + "specLocation": "inference/_types/Services.ts#L229-L238" }, { "kind": "interface", @@ -177013,7 +177058,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L239-L248" + "specLocation": "inference/_types/Services.ts#L240-L249" }, { "kind": "interface", @@ -177053,7 +177098,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L250-L259" + "specLocation": "inference/_types/Services.ts#L251-L260" }, { "kind": "interface", @@ -177093,7 +177138,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L261-L270" + "specLocation": "inference/_types/Services.ts#L262-L271" }, { "kind": "interface", @@ -177133,7 +177178,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L272-L281" + "specLocation": "inference/_types/Services.ts#L273-L282" }, { "kind": "interface", @@ -177173,7 +177218,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L283-L292" + "specLocation": "inference/_types/Services.ts#L284-L293" }, { "kind": "interface", @@ -177213,7 +177258,47 @@ } } ], - "specLocation": "inference/_types/Services.ts#L294-L303" + "specLocation": "inference/_types/Services.ts#L295-L304" + }, + { + "kind": "interface", + "inherits": { + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + }, + "name": { + "name": "InferenceEndpointInfoOpenShiftAi", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The task type", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskTypeOpenShiftAi", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L306-L315" }, { "kind": "interface", @@ -177253,7 +177338,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L305-L314" + "specLocation": "inference/_types/Services.ts#L317-L326" }, { "kind": "interface", @@ -177293,7 +177378,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L316-L325" + "specLocation": "inference/_types/Services.ts#L328-L337" }, { "kind": "interface", @@ -178033,6 +178118,176 @@ }, "specLocation": "inference/_types/CommonTypes.ts#L1837-L1841" }, + { + "kind": "interface", + "name": { + "name": "OpenShiftAiServiceSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "A valid API key for your OpenShift AI endpoint.\nCan be found in `Token authentication` section of model related information.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The URL of the OpenShift AI hosted model endpoint.", + "name": "url", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the hosted model's documentation for the name if needed.\nService has been tested and confirmed to be working with the following models:\n* For `text_embedding` task - `gritlm-7b`.\n* For `completion` and `chat_completion` tasks - `llama-31-8b-instruct`.\n* For `rerank` task - `bge-reranker-v2-m3`.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `text_embedding` task, the maximum number of tokens per input before chunking occurs.", + "name": "max_input_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenShiftAiSimilarityType", + "namespace": "inference._types" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from the OpenShift AI API.\nBy default, the `openshift_ai` service sets the number of requests allowed per minute to 3000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/_types/CommonTypes.ts#L1847-L1879" + }, + { + "kind": "enum", + "members": [ + { + "name": "openshift_ai" + } + ], + "name": { + "name": "OpenShiftAiServiceType", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L1888-L1890" + }, + { + "kind": "enum", + "members": [ + { + "name": "cosine" + }, + { + "name": "dot_product" + }, + { + "name": "l2_norm" + } + ], + "name": { + "name": "OpenShiftAiSimilarityType", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L1892-L1896" + }, + { + "kind": "interface", + "name": { + "name": "OpenShiftAiTaskSettings", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "For a `rerank` task, whether to return the source documents in the response.", + "name": "return_documents", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `rerank` task, the number of most relevant documents to return.", + "name": "top_n", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/CommonTypes.ts#L1898-L1907" + }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + }, + { + "name": "completion" + }, + { + "name": "chat_completion" + }, + { + "name": "rerank" + } + ], + "name": { + "name": "OpenShiftAiTaskType", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/CommonTypes.ts#L1881-L1886" + }, { "kind": "interface", "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nrelevance_score: the relevance_score of the document relative to the query\ntext: Optional, the text of the document, if requested", @@ -178086,7 +178341,7 @@ }, "properties": [ { - "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", + "description": "The number of requests allowed per minute.\nBy default, the number of requests allowed per minute is set by each service as follows:\n\n* `alibabacloud-ai-search` service: `1000`\n* `anthropic` service: `50`\n* `azureaistudio` service: `240`\n* `azureopenai` service and task type `text_embedding`: `1440`\n* `azureopenai` service and task type `completion`: `120`\n* `cohere` service: `10000`\n* `contextualai` service: `1000`\n* `elastic` service and task type `chat_completion`: `240`\n* `googleaistudio` service: `360`\n* `googlevertexai` service: `30000`\n* `hugging_face` service: `3000`\n* `jinaai` service: `2000`\n* `llama` service: `3000`\n* `mistral` service: `240`\n* `openai` service and task type `text_embedding`: `3000`\n* `openai` service and task type `completion`: `500`\n* `openshift_ai` service: `3000`\n* `voyageai` service: `2000`\n* `watsonxai` service: `120`", "name": "requests_per_minute", "required": false, "type": { @@ -178098,7 +178353,7 @@ } } ], - "specLocation": "inference/_types/Services.ts#L392-L420" + "specLocation": "inference/_types/Services.ts#L404-L433" }, { "kind": "interface", @@ -178246,7 +178501,7 @@ "name": "ServiceSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L388-L388", + "specLocation": "inference/_types/Services.ts#L400-L400", "type": { "kind": "user_defined_value" } @@ -178330,7 +178585,7 @@ "name": "TaskSettings", "namespace": "inference._types" }, - "specLocation": "inference/_types/Services.ts#L390-L390", + "specLocation": "inference/_types/Services.ts#L402-L402", "type": { "kind": "user_defined_value" } @@ -178722,6 +178977,28 @@ }, "specLocation": "inference/_types/TaskType.ts#L139-L143" }, + { + "kind": "enum", + "members": [ + { + "name": "text_embedding" + }, + { + "name": "chat_completion" + }, + { + "name": "completion" + }, + { + "name": "rerank" + } + ], + "name": { + "name": "TaskTypeOpenShiftAi", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/TaskType.ts#L145-L150" + }, { "kind": "enum", "members": [ @@ -178736,7 +179013,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L145-L148" + "specLocation": "inference/_types/TaskType.ts#L152-L155" }, { "kind": "enum", @@ -178755,7 +179032,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L150-L154" + "specLocation": "inference/_types/TaskType.ts#L157-L161" }, { "kind": "interface", @@ -179023,7 +179300,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1847-L1878" + "specLocation": "inference/_types/CommonTypes.ts#L1909-L1940" }, { "kind": "enum", @@ -179036,7 +179313,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1911-L1913" + "specLocation": "inference/_types/CommonTypes.ts#L1973-L1975" }, { "kind": "interface", @@ -179096,7 +179373,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1880-L1904" + "specLocation": "inference/_types/CommonTypes.ts#L1942-L1966" }, { "kind": "enum", @@ -179112,7 +179389,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1906-L1909" + "specLocation": "inference/_types/CommonTypes.ts#L1968-L1971" }, { "kind": "interface", @@ -179200,7 +179477,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1915-L1953" + "specLocation": "inference/_types/CommonTypes.ts#L1977-L2015" }, { "kind": "enum", @@ -179213,7 +179490,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1961-L1963" + "specLocation": "inference/_types/CommonTypes.ts#L2023-L2025" }, { "kind": "enum", @@ -179232,7 +179509,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1955-L1959" + "specLocation": "inference/_types/CommonTypes.ts#L2017-L2021" }, { "kind": "request", @@ -179959,7 +180236,7 @@ } } }, - "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", + "description": "Create an inference endpoint.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThe following integrations are available through the inference API. You can find the available task types next to the integration name:\n* AI21 (`chat_completion`, `completion`)\n* AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Amazon Bedrock (`completion`, `text_embedding`)\n* Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)\n* Anthropic (`completion`)\n* Azure AI Studio (`completion`, 'rerank', `text_embedding`)\n* Azure OpenAI (`completion`, `text_embedding`)\n* Cohere (`completion`, `rerank`, `text_embedding`)\n* DeepSeek (`chat_completion`, `completion`)\n* Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)\n* ELSER (`sparse_embedding`)\n* Google AI Studio (`completion`, `text_embedding`)\n* Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* JinaAI (`rerank`, `text_embedding`)\n* Llama (`chat_completion`, `completion`, `text_embedding`)\n* Mistral (`chat_completion`, `completion`, `text_embedding`)\n* OpenAI (`chat_completion`, `completion`, `text_embedding`)\n* OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)\n* VoyageAI (`rerank`, `text_embedding`)\n* Watsonx inference integration (`text_embedding`)", "examples": { "InferencePutExample1": { "alternatives": [ @@ -180044,7 +180321,7 @@ } } ], - "specLocation": "inference/put/PutRequest.ts#L26-L90" + "specLocation": "inference/put/PutRequest.ts#L26-L91" }, { "kind": "response", @@ -184362,6 +184639,171 @@ }, "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L25" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `openshift_ai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenShiftAiServiceType", + "namespace": "inference._types" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `openshift_ai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenShiftAiServiceSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenShiftAiTaskSettings", + "namespace": "inference._types" + } + } + } + ] + }, + "description": "Create an OpenShift AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openshift_ai` service.", + "examples": { + "PutOpenShiftAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/openshift-ai-text-embedding` to create an inference endpoint that performs a `text_embedding` task.", + "method_request": "PUT _inference/text_embedding/openshift-ai-text-embedding", + "summary": "A text embedding task", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-embeddings-url\",\n \"api_key\": \"openshift-ai-embeddings-token\",\n \"model_id\": \"gritlm-7b\"\n }\n}" + }, + "PutOpenShiftAiRequestExample2": { + "description": "Run `PUT _inference/completion/openshift-ai-completion` to create an inference endpoint that performs a `completion` task.", + "method_request": "PUT _inference/completion/openshift-ai-completion", + "summary": "A completion task", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-completion-url\",\n \"api_key\": \"openshift-ai-completion-token\",\n \"model_id\": \"llama-31-8b-instruct\"\n }\n}" + }, + "PutOpenShiftAiRequestExample3": { + "description": "Run `PUT _inference/chat_completion/openshift-ai-chat-completion` to create an inference endpoint that performs a `chat_completion` task.", + "method_request": "PUT _inference/chat_completion/openshift-ai-chat-completion", + "summary": "A chat completion task", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-chat-completion-url\",\n \"api_key\": \"openshift-ai-chat-completion-token\",\n \"model_id\": \"llama-31-8b-instruct\"\n }\n}" + }, + "PutOpenShiftAiRequestExample4": { + "description": "Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint that performs a `rerank` task.", + "method_request": "PUT _inference/rerank/openshift-ai-rerank", + "summary": "A rerank task", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-rerank-url\",\n \"api_key\": \"openshift-ai-rerank-token\",\n \"model_id\": \"bge-reranker-v2-m3\"\n }\n}" + }, + "PutOpenShiftAiRequestExample5": { + "description": "Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint that performs a `rerank` task, specifying custom `task_settings` and omitting the `model_id` if deployed model doesn't require it.", + "method_request": "PUT _inference/rerank/openshift-ai-rerank", + "summary": "A rerank task with custom `task_settings` and omitted `model_id`", + "value": "{\n \"service\": \"openshift_ai\",\n \"service_settings\": {\n \"url\": \"openshift-ai-rerank-url\",\n \"api_key\": \"openshift-ai-rerank-token\"\n },\n \"task_settings\": {\n \"return_documents\": true,\n \"top_n\": 2\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_openshift_ai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenShiftAiTaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "openshiftai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference endpoint to be created.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_openshift_ai/PutOpenShiftAiRequest.ts#L31-L86" + }, + { + "kind": "response", + "body": { + "kind": "value", + "codegenName": "endpoint_info", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfoOpenShiftAi", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_openshift_ai" + }, + "specLocation": "inference/put_openshift_ai/PutOpenShiftAiResponse.ts#L22-L25" + }, { "kind": "request", "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 1635f967a6..f0fdb69ebd 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -14506,6 +14506,11 @@ export interface InferenceInferenceEndpointInfoOpenAI extends InferenceInference task_type: InferenceTaskTypeOpenAI } +export interface InferenceInferenceEndpointInfoOpenShiftAi extends InferenceInferenceEndpoint { + inference_id: string + task_type: InferenceTaskTypeOpenShiftAi +} + export interface InferenceInferenceEndpointInfoVoyageAI extends InferenceInferenceEndpoint { inference_id: string task_type: InferenceTaskTypeVoyageAI @@ -14598,6 +14603,26 @@ export interface InferenceOpenAITaskSettings { export type InferenceOpenAITaskType = 'chat_completion' | 'completion' | 'text_embedding' +export interface InferenceOpenShiftAiServiceSettings { + api_key: string + url: string + model_id?: string + max_input_tokens?: integer + similarity?: InferenceOpenShiftAiSimilarityType + rate_limit?: InferenceRateLimitSetting +} + +export type InferenceOpenShiftAiServiceType = 'openshift_ai' + +export type InferenceOpenShiftAiSimilarityType = 'cosine' | 'dot_product' | 'l2_norm' + +export interface InferenceOpenShiftAiTaskSettings { + return_documents?: boolean + top_n?: integer +} + +export type InferenceOpenShiftAiTaskType = 'text_embedding' | 'completion' | 'chat_completion' | 'rerank' + export interface InferenceRankedDocument { index: integer relevance_score: float @@ -14679,6 +14704,8 @@ export type InferenceTaskTypeMistral = 'text_embedding' | 'chat_completion' | 'c export type InferenceTaskTypeOpenAI = 'text_embedding' | 'chat_completion' | 'completion' +export type InferenceTaskTypeOpenShiftAi = 'text_embedding' | 'chat_completion' | 'completion' | 'rerank' + export type InferenceTaskTypeVoyageAI = 'text_embedding' | 'rerank' export type InferenceTaskTypeWatsonx = 'text_embedding' | 'chat_completion' | 'completion' @@ -15072,6 +15099,20 @@ export interface InferencePutOpenaiRequest extends RequestBase { export type InferencePutOpenaiResponse = InferenceInferenceEndpointInfoOpenAI +export interface InferencePutOpenshiftAiRequest extends RequestBase { + task_type: InferenceOpenShiftAiTaskType + openshiftai_inference_id: Id + timeout?: Duration + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferenceOpenShiftAiServiceType + service_settings: InferenceOpenShiftAiServiceSettings + task_settings?: InferenceOpenShiftAiTaskSettings + } +} + +export type InferencePutOpenshiftAiResponse = InferenceInferenceEndpointInfoOpenShiftAi + export interface InferencePutVoyageaiRequest extends RequestBase { task_type: InferenceVoyageAITaskType voyageai_inference_id: Id diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index ecc06c3d61..1e82deb4b0 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -387,6 +387,7 @@ inference-api-put-jinaai,https://www.elastic.co/docs/api/doc/elasticsearch/opera inference-api-put-llama,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-llama,, inference-api-put-mistral,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-mistral.html, inference-api-put-openai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html, +inference-api-put-openshift-ai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai,, inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai,, inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html, inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/stream-inference-api.html, diff --git a/specification/_json_spec/inference.put_openshift_ai.json b/specification/_json_spec/inference.put_openshift_ai.json new file mode 100644 index 0000000000..78047e915c --- /dev/null +++ b/specification/_json_spec/inference.put_openshift_ai.json @@ -0,0 +1,43 @@ +{ + "inference.put_openshift_ai": { + "documentation": { + "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai", + "description": "Create an OpenShift AI inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{openshiftai_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "openshiftai_inference_id": { + "type": "string", + "description": "The inference ID" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings", + "required": true + }, + "params": { + "timeout": { + "type": "time", + "description": "Specifies the amount of time to wait for the inference endpoint to be created.", + "default": "30s" + } + } + } +} diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 1bace974d0..a5cb1cab24 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -1844,6 +1844,68 @@ export enum OpenAIServiceType { openai } +export class OpenShiftAiServiceSettings { + /** + * A valid API key for your OpenShift AI endpoint. + * Can be found in `Token authentication` section of model related information. + */ + api_key: string + /** + * The URL of the OpenShift AI hosted model endpoint. + */ + url: string + /** + * The name of the model to use for the inference task. + * Refer to the hosted model's documentation for the name if needed. + * Service has been tested and confirmed to be working with the following models: + * * For `text_embedding` task - `gritlm-7b`. + * * For `completion` and `chat_completion` tasks - `llama-31-8b-instruct`. + * * For `rerank` task - `bge-reranker-v2-m3`. + */ + model_id?: string + /** + * For a `text_embedding` task, the maximum number of tokens per input before chunking occurs. + */ + max_input_tokens?: integer + /** + * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm. + */ + similarity?: OpenShiftAiSimilarityType + /** + * This setting helps to minimize the number of rate limit errors returned from the OpenShift AI API. + * By default, the `openshift_ai` service sets the number of requests allowed per minute to 3000. + */ + rate_limit?: RateLimitSetting +} + +export enum OpenShiftAiTaskType { + text_embedding, + completion, + chat_completion, + rerank +} + +export enum OpenShiftAiServiceType { + openshift_ai +} + +export enum OpenShiftAiSimilarityType { + cosine, + dot_product, + l2_norm +} + +export class OpenShiftAiTaskSettings { + /** + * For a `rerank` task, whether to return the source documents in the response. + */ + return_documents?: boolean + /** + * For a `rerank` task, the number of most relevant documents to return. + */ + top_n?: integer +} + export class VoyageAIServiceSettings { /** * The number of dimensions for resulting output embeddings. diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts index b9d5fb0972..151482f5b1 100644 --- a/specification/inference/_types/Services.ts +++ b/specification/inference/_types/Services.ts @@ -41,6 +41,7 @@ import { TaskTypeLlama, TaskTypeMistral, TaskTypeOpenAI, + TaskTypeOpenShiftAi, TaskTypeVoyageAI, TaskTypeWatsonx } from '../_types/TaskType' @@ -302,6 +303,17 @@ export class InferenceEndpointInfoOpenAI extends InferenceEndpoint { task_type: TaskTypeOpenAI } +export class InferenceEndpointInfoOpenShiftAi extends InferenceEndpoint { + /** + * The inference Id + */ + inference_id: string + /** + * The task type + */ + task_type: TaskTypeOpenShiftAi +} + export class InferenceEndpointInfoVoyageAI extends InferenceEndpoint { /** * The inference Id @@ -413,6 +425,7 @@ export class RateLimitSetting { * * `mistral` service: `240` * * `openai` service and task type `text_embedding`: `3000` * * `openai` service and task type `completion`: `500` + * * `openshift_ai` service: `3000` * * `voyageai` service: `2000` * * `watsonxai` service: `120` */ diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index c5d56e439f..ebc554ed9b 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -142,6 +142,13 @@ export enum TaskTypeOpenAI { completion } +export enum TaskTypeOpenShiftAi { + text_embedding, + chat_completion, + completion, + rerank +} + export enum TaskTypeVoyageAI { text_embedding, rerank diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts index 0ee0bf0689..1331fbeed6 100644 --- a/specification/inference/put/PutRequest.ts +++ b/specification/inference/put/PutRequest.ts @@ -49,6 +49,7 @@ import { TaskType } from '@inference/_types/TaskType' * * Llama (`chat_completion`, `completion`, `text_embedding`) * * Mistral (`chat_completion`, `completion`, `text_embedding`) * * OpenAI (`chat_completion`, `completion`, `text_embedding`) + * * OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`) * * VoyageAI (`rerank`, `text_embedding`) * * Watsonx inference integration (`text_embedding`) * @rest_spec_name inference.put diff --git a/specification/inference/put_openshift_ai/PutOpenShiftAiRequest.ts b/specification/inference/put_openshift_ai/PutOpenShiftAiRequest.ts new file mode 100644 index 0000000000..e86403c9d0 --- /dev/null +++ b/specification/inference/put_openshift_ai/PutOpenShiftAiRequest.ts @@ -0,0 +1,86 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' +import { + OpenShiftAiServiceSettings, + OpenShiftAiServiceType, + OpenShiftAiTaskSettings, + OpenShiftAiTaskType +} from '@inference/_types/CommonTypes' +import { InferenceChunkingSettings } from '@inference/_types/Services' + +/** + * Create an OpenShift AI inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `openshift_ai` service. + * @rest_spec_name inference.put_openshift_ai + * @availability stack since=9.3.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-openshift-ai + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{openshiftai_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API. + */ + task_type: OpenShiftAiTaskType + /** + * The unique identifier of the inference endpoint. + */ + openshiftai_inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference endpoint to be created. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `openshift_ai`. + */ + service: OpenShiftAiServiceType + /** + * Settings used to install the inference model. These settings are specific to the `openshift_ai` service. + */ + service_settings: OpenShiftAiServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: OpenShiftAiTaskSettings + } +} diff --git a/specification/inference/put_openshift_ai/PutOpenShiftAiResponse.ts b/specification/inference/put_openshift_ai/PutOpenShiftAiResponse.ts new file mode 100644 index 0000000000..c7c9d3e17d --- /dev/null +++ b/specification/inference/put_openshift_ai/PutOpenShiftAiResponse.ts @@ -0,0 +1,25 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfoOpenShiftAi } from '@inference/_types/Services' + +export class Response { + /** @codegen_name endpoint_info */ + body: InferenceEndpointInfoOpenShiftAi +} diff --git a/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample1.yaml b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample1.yaml new file mode 100644 index 0000000000..0fc0a91cd9 --- /dev/null +++ b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample1.yaml @@ -0,0 +1,15 @@ +summary: A text embedding task +description: + Run `PUT _inference/text_embedding/openshift-ai-text-embedding` to create an inference endpoint + that performs a `text_embedding` task. +method_request: 'PUT _inference/text_embedding/openshift-ai-text-embedding' +# type: "request" +value: |- + { + "service": "openshift_ai", + "service_settings": { + "url": "openshift-ai-embeddings-url", + "api_key": "openshift-ai-embeddings-token", + "model_id": "gritlm-7b" + } + } diff --git a/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample2.yaml b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample2.yaml new file mode 100644 index 0000000000..3e4fc1aa6d --- /dev/null +++ b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample2.yaml @@ -0,0 +1,15 @@ +summary: A completion task +description: + Run `PUT _inference/completion/openshift-ai-completion` to create an inference endpoint + that performs a `completion` task. +method_request: 'PUT _inference/completion/openshift-ai-completion' +# type: "request" +value: |- + { + "service": "openshift_ai", + "service_settings": { + "url": "openshift-ai-completion-url", + "api_key": "openshift-ai-completion-token", + "model_id": "llama-31-8b-instruct" + } + } diff --git a/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample3.yaml b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample3.yaml new file mode 100644 index 0000000000..94840dc5ce --- /dev/null +++ b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample3.yaml @@ -0,0 +1,15 @@ +summary: A chat completion task +description: + Run `PUT _inference/chat_completion/openshift-ai-chat-completion` to create an inference endpoint + that performs a `chat_completion` task. +method_request: 'PUT _inference/chat_completion/openshift-ai-chat-completion' +# type: "request" +value: |- + { + "service": "openshift_ai", + "service_settings": { + "url": "openshift-ai-chat-completion-url", + "api_key": "openshift-ai-chat-completion-token", + "model_id": "llama-31-8b-instruct" + } + } diff --git a/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample4.yaml b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample4.yaml new file mode 100644 index 0000000000..0fe9e37d22 --- /dev/null +++ b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample4.yaml @@ -0,0 +1,15 @@ +summary: A rerank task +description: + Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint + that performs a `rerank` task. +method_request: 'PUT _inference/rerank/openshift-ai-rerank' +# type: "request" +value: |- + { + "service": "openshift_ai", + "service_settings": { + "url": "openshift-ai-rerank-url", + "api_key": "openshift-ai-rerank-token", + "model_id": "bge-reranker-v2-m3" + } + } diff --git a/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample5.yaml b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample5.yaml new file mode 100644 index 0000000000..6cf48c3ffa --- /dev/null +++ b/specification/inference/put_openshift_ai/examples/request/PutOpenShiftAiRequestExample5.yaml @@ -0,0 +1,19 @@ +summary: A rerank task with custom `task_settings` and omitted `model_id` +description: + Run `PUT _inference/rerank/openshift-ai-rerank` to create an inference endpoint + that performs a `rerank` task, specifying custom `task_settings` and omitting the `model_id` + if deployed model doesn't require it. +method_request: 'PUT _inference/rerank/openshift-ai-rerank' +# type: "request" +value: |- + { + "service": "openshift_ai", + "service_settings": { + "url": "openshift-ai-rerank-url", + "api_key": "openshift-ai-rerank-token" + }, + "task_settings": { + "return_documents": true, + "top_n": 2 + } + }