From 61a4738a124dcfb812e5133492f06fe05e12f82a Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Sun, 16 Nov 2025 23:56:59 -0500 Subject: [PATCH 1/7] feat: Actualize query rewrite in search API Signed-off-by: Francisco Javier Arceo adding query expansion model to vector store config Signed-off-by: Francisco Javier Arceo --- src/llama_stack/core/datatypes.py | 8 + src/llama_stack/core/routers/vector_io.py | 6 + .../ci-tests/run-with-postgres-store.yaml | 8 + .../distributions/ci-tests/run.yaml | 8 + .../starter-gpu/run-with-postgres-store.yaml | 8 + .../distributions/starter-gpu/run.yaml | 8 + .../starter/run-with-postgres-store.yaml | 8 + .../distributions/starter/run.yaml | 8 + .../utils/memory/openai_vector_store_mixin.py | 1 + .../providers/utils/memory/vector_store.py | 81 ++ ...c18360a07bb3dda397579e25c27b-a882f554.json | 647 ++++++++++++ ...93298528e5349dfb4438d3d7324f-17b6020a.json | 989 ++++++++++++++++++ ...0ec104af88f1a482b6a936be14cc-17b6020a.json | 989 ++++++++++++++++++ ...2392139e2024601a849af31b9253-a882f554.json | 647 ++++++++++++ ...47d12cbec8bbcc581dc38df5fdbb-a882f554.json | 647 ++++++++++++ ...853da455d1f7c9316fb7e9d1419b-a882f554.json | 647 ++++++++++++ ...0ecf6a0334d311302e72afd87d25-17b6020a.json | 989 ++++++++++++++++++ ...0aaeee60e0a6866183450427d162-a882f554.json | 647 ++++++++++++ ...ee1f4ab308b1c12e971c13988bf0-17b6020a.json | 989 ++++++++++++++++++ .../vector_io/test_openai_vector_stores.py | 46 + 20 files changed, 7381 insertions(+) create mode 100644 tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json create mode 100644 tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json create mode 100644 tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json create mode 100644 tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json create mode 100644 tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json create mode 100644 tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json create mode 100644 tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json create mode 100644 tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json create mode 100644 tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 1e29690ffd..49747d4770 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -376,6 +376,14 @@ class VectorStoresConfig(BaseModel): default=None, description="Default embedding model configuration for vector stores.", ) + default_query_expansion_model: QualifiedModel | None = Field( + default=None, + description="Default LLM model for query expansion/rewriting in vector search.", + ) + query_expansion_prompt: str = Field( + default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:", + description="Prompt template for query expansion. Use {query} as placeholder for the original query.", + ) class SafetyConfig(BaseModel): diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index 5256dda449..a7b30642ce 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -99,6 +99,12 @@ async def query_chunks( ) -> QueryChunksResponse: logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}") provider = await self.routing_table.get_provider_impl(vector_store_id) + + # Ensure params dict exists and add vector_stores_config for query rewriting + if params is None: + params = {} + params["vector_stores_config"] = self.vector_stores_config + return await provider.query_chunks(vector_store_id, query, params) # OpenAI Vector Stores API endpoints diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 7721138c7f..8110dbdf6d 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -288,5 +288,13 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 + query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. + Return only the improved query, no explanations: + + + {query} + + + Improved query:' safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml index b791e14882..809b0ef1c9 100644 --- a/src/llama_stack/distributions/ci-tests/run.yaml +++ b/src/llama_stack/distributions/ci-tests/run.yaml @@ -279,5 +279,13 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 + query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. + Return only the improved query, no explanations: + + + {query} + + + Improved query:' safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index 9c250c05a6..ca47d7f4c4 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -291,5 +291,13 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 + query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. + Return only the improved query, no explanations: + + + {query} + + + Improved query:' safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml index 65f9ae326f..15555c2622 100644 --- a/src/llama_stack/distributions/starter-gpu/run.yaml +++ b/src/llama_stack/distributions/starter-gpu/run.yaml @@ -282,5 +282,13 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 + query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. + Return only the improved query, no explanations: + + + {query} + + + Improved query:' safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 3314bb9e96..423b304528 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -288,5 +288,13 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 + query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. + Return only the improved query, no explanations: + + + {query} + + + Improved query:' safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml index e88539e6a7..a0f56fc420 100644 --- a/src/llama_stack/distributions/starter/run.yaml +++ b/src/llama_stack/distributions/starter/run.yaml @@ -279,5 +279,13 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 + query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. + Return only the improved query, no explanations: + + + {query} + + + Improved query:' safety: default_shield_id: llama-guard diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index bbfd60e253..f33bb29c8c 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -611,6 +611,7 @@ async def openai_search_vector_store( "max_chunks": max_num_results * CHUNK_MULTIPLIER, "score_threshold": score_threshold, "mode": search_mode, + "rewrite_query": rewrite_query, } # TODO: Add support for ranking_options.ranker diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index b6a671ddb2..6fbf4a4245 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -17,6 +17,7 @@ from numpy.typing import NDArray from pydantic import BaseModel +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.utils.inference.prompt_adapter import ( @@ -34,6 +35,11 @@ RAGDocument, VectorStore, ) +from llama_stack_api.inference import ( + OpenAIChatCompletionRequestWithExtraBody, + OpenAIUserMessageParam, +) +from llama_stack_api.models import ModelType log = get_logger(name=__name__, category="providers::utils") @@ -262,6 +268,7 @@ class VectorStoreWithIndex: vector_store: VectorStore index: EmbeddingIndex inference_api: Api.inference + vector_stores_config: VectorStoresConfig | None = None async def insert_chunks( self, @@ -296,6 +303,11 @@ async def query_chunks( ) -> QueryChunksResponse: if params is None: params = {} + + # Extract configuration if provided by router + if "vector_stores_config" in params: + self.vector_stores_config = params["vector_stores_config"] + k = params.get("max_chunks", 3) mode = params.get("mode") score_threshold = params.get("score_threshold", 0.0) @@ -318,6 +330,11 @@ async def query_chunks( reranker_params = {"impact_factor": k_value} query_string = interleaved_content_as_str(query) + + # Apply query rewriting if enabled + if params.get("rewrite_query", False): + query_string = await self._rewrite_query_for_search(query_string) + if mode == "keyword": return await self.index.query_keyword(query_string, k, score_threshold) @@ -333,3 +350,67 @@ async def query_chunks( ) else: return await self.index.query_vector(query_vector, k, score_threshold) + + async def _rewrite_query_for_search(self, query: str) -> str: + """Rewrite the user query to improve vector search performance. + + :param query: The original user query + :returns: The rewritten query optimized for vector search + """ + # Check if query expansion model is configured + if not self.vector_stores_config or not self.vector_stores_config.default_query_expansion_model: + raise ValueError("No default_query_expansion_model configured for query rewriting") + + # Use the configured model + expansion_model = self.vector_stores_config.default_query_expansion_model + chat_model = f"{expansion_model.provider_id}/{expansion_model.model_id}" + + # Validate that the model is available and is an LLM + try: + models_response = await self.inference_api.routing_table.list_models() + except Exception as e: + raise RuntimeError(f"Failed to list available models for validation: {e}") from e + + model_found = False + for model in models_response.data: + if model.identifier == chat_model: + if model.model_type != ModelType.llm: + raise ValueError( + f"Configured query expansion model '{chat_model}' is not an LLM model " + f"(found type: {model.model_type}). Query rewriting requires an LLM model." + ) + model_found = True + break + + if not model_found: + available_llm_models = [m.identifier for m in models_response.data if m.model_type == ModelType.llm] + raise ValueError( + f"Configured query expansion model '{chat_model}' is not available. " + f"Available LLM models: {available_llm_models}" + ) + + # Use the configured prompt (has a default value) + rewrite_prompt = self.vector_stores_config.query_expansion_prompt.format(query=query) + + chat_request = OpenAIChatCompletionRequestWithExtraBody( + model=chat_model, + messages=[ + OpenAIUserMessageParam( + role="user", + content=rewrite_prompt, + ) + ], + max_tokens=100, + ) + + try: + response = await self.inference_api.openai_chat_completion(chat_request) + except Exception as e: + raise RuntimeError(f"Failed to generate rewritten query: {e}") from e + + if response.choices and len(response.choices) > 0: + rewritten_query = response.choices[0].message.content.strip() + log.info(f"Query rewritten: '{query}' → '{rewritten_query}'") + return rewritten_query + else: + raise RuntimeError("No response received from LLM model for query rewriting") diff --git a/tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json b/tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json new file mode 100644 index 0000000000..c38561dbb2 --- /dev/null +++ b/tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json @@ -0,0 +1,647 @@ +{ + "test_id": null, + "request": { + "method": "POST", + "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-gecko-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding Gecko" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-03-25", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 03-25" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-05-20", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-06-17", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview 06-17" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-05-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 05-06" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-06-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp-image-generation", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash (Image Generation) Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-exp-1206", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Experimental 1206" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-01-21", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-1219", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/learnlm-2.0-flash-experimental", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "LearnLM 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-1b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 1B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-12b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 12B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-27b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 27B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e2b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E2B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-lite-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash-Lite Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-pro-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Pro Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-robotics-er-1.5-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Robotics-ER 1.5 Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-computer-use-preview-10-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Computer Use Preview 10-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/text-embedding-004", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Text Embedding 004" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp-03-07", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental 03-07" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/aqa", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Model that performs Attributed Question Answering." + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-2.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 2" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-fast-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-live-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-live-2.5-flash-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Live 2.5 Flash Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-live-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Live Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/lyria-realtime-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Lyria Realtime Experimental" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json b/tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json new file mode 100644 index 0000000000..53f86af996 --- /dev/null +++ b/tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json @@ -0,0 +1,989 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api", + "created": 1759514629, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api-2025-10-14", + "created": 1760043960, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1", + "created": 1681940951, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-2", + "created": 1698798177, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-1106", + "created": 1699053241, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-2024-04-09", + "created": 1712601677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio", + "created": 1756339249, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-tts", + "created": 1742403959, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo", + "created": 1712361441, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime", + "created": 1756271701, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini", + "created": 1744318173, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-2025-08-28", + "created": 1756271773, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini-2025-04-14", + "created": 1744317547, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-1106", + "created": 1698959748, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0125-preview", + "created": 1706037612, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "davinci-002", + "created": 1692634301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-preview", + "created": 1706037777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0613", + "created": 1686588896, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview-2025-03-11", + "created": 1741377021, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4", + "created": 1687882411, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1", + "created": 1744316542, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-2025-04-14", + "created": 1744315746, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-3", + "created": 1698785189, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview", + "created": 1734655677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano", + "created": 1744321707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct-0914", + "created": 1694122472, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "whisper-1", + "created": 1677532384, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-2024-12-17", + "created": 1734326976, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano-2025-04-14", + "created": 1744321025, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-16k", + "created": 1683758102, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-2025-08-28", + "created": 1756256146, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct", + "created": 1692901427, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-11-20", + "created": 1739331543, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-05-13", + "created": 1715368132, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview", + "created": 1741391161, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview-2025-03-11", + "created": 1741390858, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-chat-latest", + "created": 1762547951, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview", + "created": 1741388720, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-latest", + "created": 1731689265, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro", + "created": 1742251791, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro-2025-03-19", + "created": 1742251504, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex-mini", + "created": 1763007109, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview-2025-03-11", + "created": 1741388170, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-small", + "created": 1705948997, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research-2025-06-26", + "created": 1750866121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research", + "created": 1749685485, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "codex-mini-latest", + "created": 1746673257, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-08-06", + "created": 1722814719, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1", + "created": 1734375816, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-2024-07-18", + "created": 1721172717, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini", + "created": 1721172741, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview", + "created": 1734387424, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1-mini", + "created": 1758845821, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini", + "created": 1754425928, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1", + "created": 1745517030, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini-2025-08-07", + "created": 1754425867, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-2024-09-26", + "created": 1732734466, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5", + "created": 1754425777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro", + "created": 1748475349, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-ada-002", + "created": 1671217299, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano-2025-08-07", + "created": 1754426303, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-12-17", + "created": 1734034239, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano", + "created": 1754426384, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-2025-11-13", + "created": 1762800353, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd-1106", + "created": 1699053533, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd", + "created": 1699046015, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-0125", + "created": 1706048358, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini", + "created": 1759512027, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini-2025-10-06", + "created": 1759512137, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1", + "created": 1762800673, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview-2024-12-17", + "created": 1734115920, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini-2025-01-31", + "created": 1738010200, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini", + "created": 1737146383, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-12-17", + "created": 1733945430, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro-2025-06-10", + "created": 1749166761, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-1106-preview", + "created": 1698957206, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "babbage-002", + "created": 1692634615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-chat-latest", + "created": 1754073306, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo", + "created": 1677610602, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-2025-08-07", + "created": 1754075360, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "chatgpt-4o-latest", + "created": 1723515131, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe", + "created": 1742068463, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2", + "created": 1759708615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2-pro", + "created": 1759708663, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro-2025-10-06", + "created": 1759469707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o", + "created": 1715367049, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview", + "created": 1727659998, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini", + "created": 1759517133, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini-2025-10-06", + "created": 1759517175, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini", + "created": 1744225351, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-2025-04-16", + "created": 1744133506, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2025-06-03", + "created": 1748907838, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview", + "created": 1727460443, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2025-06-03", + "created": 1748908498, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro", + "created": 1759469822, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-large", + "created": 1705953180, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research", + "created": 1749840121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research-2025-06-26", + "created": 1750865219, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-transcribe", + "created": 1742068596, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview", + "created": 1734387380, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview-2024-12-17", + "created": 1734112601, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3", + "created": 1744225308, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-2025-04-16", + "created": 1744133301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe-diarize", + "created": 1750798887, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex", + "created": 1762988221, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-10-01", + "created": 1727389042, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-10-01", + "created": 1727131766, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-codex", + "created": 1757527818, + "object": "model", + "owned_by": "system" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json b/tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json new file mode 100644 index 0000000000..2d29aebfb2 --- /dev/null +++ b/tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json @@ -0,0 +1,989 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api", + "created": 1759514629, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api-2025-10-14", + "created": 1760043960, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1", + "created": 1681940951, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-2", + "created": 1698798177, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-1106", + "created": 1699053241, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-2024-04-09", + "created": 1712601677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio", + "created": 1756339249, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-tts", + "created": 1742403959, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo", + "created": 1712361441, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini", + "created": 1744318173, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime", + "created": 1756271701, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini-2025-04-14", + "created": 1744317547, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-1106", + "created": 1698959748, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0125-preview", + "created": 1706037612, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "davinci-002", + "created": 1692634301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-preview", + "created": 1706037777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0613", + "created": 1686588896, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview-2025-03-11", + "created": 1741377021, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4", + "created": 1687882411, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1", + "created": 1744316542, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-2025-04-14", + "created": 1744315746, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-3", + "created": 1698785189, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview", + "created": 1734655677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano", + "created": 1744321707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct-0914", + "created": 1694122472, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "whisper-1", + "created": 1677532384, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-2024-12-17", + "created": 1734326976, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano-2025-04-14", + "created": 1744321025, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-16k", + "created": 1683758102, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-2025-08-28", + "created": 1756256146, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct", + "created": 1692901427, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-11-20", + "created": 1739331543, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-05-13", + "created": 1715368132, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview", + "created": 1741391161, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview-2025-03-11", + "created": 1741390858, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-chat-latest", + "created": 1762547951, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview", + "created": 1741388720, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-latest", + "created": 1731689265, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro", + "created": 1742251791, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro-2025-03-19", + "created": 1742251504, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex-mini", + "created": 1763007109, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview-2025-03-11", + "created": 1741388170, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-small", + "created": 1705948997, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research-2025-06-26", + "created": 1750866121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research", + "created": 1749685485, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "codex-mini-latest", + "created": 1746673257, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-08-06", + "created": 1722814719, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1", + "created": 1734375816, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-2024-07-18", + "created": 1721172717, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini", + "created": 1721172741, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview", + "created": 1734387424, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1-mini", + "created": 1758845821, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini", + "created": 1754425928, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1", + "created": 1745517030, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini-2025-08-07", + "created": 1754425867, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-2024-09-26", + "created": 1732734466, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5", + "created": 1754425777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro", + "created": 1748475349, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-ada-002", + "created": 1671217299, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano-2025-08-07", + "created": 1754426303, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-12-17", + "created": 1734034239, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano", + "created": 1754426384, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-2025-11-13", + "created": 1762800353, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd-1106", + "created": 1699053533, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd", + "created": 1699046015, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-0125", + "created": 1706048358, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini", + "created": 1759512027, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini-2025-10-06", + "created": 1759512137, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1", + "created": 1762800673, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview-2024-12-17", + "created": 1734115920, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini-2025-01-31", + "created": 1738010200, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini", + "created": 1737146383, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-12-17", + "created": 1733945430, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro-2025-06-10", + "created": 1749166761, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-1106-preview", + "created": 1698957206, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-codex", + "created": 1757527818, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "babbage-002", + "created": 1692634615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-chat-latest", + "created": 1754073306, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo", + "created": 1677610602, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-2025-08-07", + "created": 1754075360, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "chatgpt-4o-latest", + "created": 1723515131, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe", + "created": 1742068463, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2", + "created": 1759708615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2-pro", + "created": 1759708663, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro-2025-10-06", + "created": 1759469707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o", + "created": 1715367049, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview", + "created": 1727659998, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini", + "created": 1759517133, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini-2025-10-06", + "created": 1759517175, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini", + "created": 1744225351, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-2025-04-16", + "created": 1744133506, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2025-06-03", + "created": 1748907838, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview", + "created": 1727460443, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2025-06-03", + "created": 1748908498, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro", + "created": 1759469822, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-large", + "created": 1705953180, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research", + "created": 1749840121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research-2025-06-26", + "created": 1750865219, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-transcribe", + "created": 1742068596, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview", + "created": 1734387380, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview-2024-12-17", + "created": 1734112601, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3", + "created": 1744225308, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-2025-04-16", + "created": 1744133301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe-diarize", + "created": 1750798887, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex", + "created": 1762988221, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-10-01", + "created": 1727389042, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-10-01", + "created": 1727131766, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-2025-08-28", + "created": 1756271773, + "object": "model", + "owned_by": "system" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json b/tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json new file mode 100644 index 0000000000..be9e401ecf --- /dev/null +++ b/tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json @@ -0,0 +1,647 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]", + "request": { + "method": "POST", + "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-gecko-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding Gecko" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-03-25", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 03-25" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-05-20", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-06-17", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview 06-17" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-05-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 05-06" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-06-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp-image-generation", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash (Image Generation) Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-exp-1206", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Experimental 1206" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-01-21", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-1219", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/learnlm-2.0-flash-experimental", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "LearnLM 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-1b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 1B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-12b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 12B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-27b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 27B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e2b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E2B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-lite-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash-Lite Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-pro-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Pro Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-robotics-er-1.5-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Robotics-ER 1.5 Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-computer-use-preview-10-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Computer Use Preview 10-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/text-embedding-004", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Text Embedding 004" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp-03-07", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental 03-07" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/aqa", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Model that performs Attributed Question Answering." + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-2.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 2" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-fast-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-live-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-live-2.5-flash-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Live 2.5 Flash Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-live-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Live Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/lyria-realtime-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Lyria Realtime Experimental" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json b/tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json new file mode 100644 index 0000000000..1ec8ba14bc --- /dev/null +++ b/tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json @@ -0,0 +1,647 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]", + "request": { + "method": "POST", + "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-gecko-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding Gecko" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-03-25", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 03-25" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-05-20", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-06-17", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview 06-17" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-05-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 05-06" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-06-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp-image-generation", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash (Image Generation) Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-exp-1206", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Experimental 1206" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-01-21", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-1219", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/learnlm-2.0-flash-experimental", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "LearnLM 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-1b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 1B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-12b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 12B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-27b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 27B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e2b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E2B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-lite-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash-Lite Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-pro-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Pro Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-robotics-er-1.5-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Robotics-ER 1.5 Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-computer-use-preview-10-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Computer Use Preview 10-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/text-embedding-004", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Text Embedding 004" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp-03-07", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental 03-07" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/aqa", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Model that performs Attributed Question Answering." + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-2.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 2" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-fast-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-live-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-live-2.5-flash-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Live 2.5 Flash Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-live-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Live Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/lyria-realtime-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Lyria Realtime Experimental" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json b/tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json new file mode 100644 index 0000000000..20fae07fe5 --- /dev/null +++ b/tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json @@ -0,0 +1,647 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]", + "request": { + "method": "POST", + "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-gecko-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding Gecko" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-03-25", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 03-25" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-05-20", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-06-17", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview 06-17" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-05-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 05-06" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-06-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp-image-generation", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash (Image Generation) Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-exp-1206", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Experimental 1206" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-01-21", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-1219", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/learnlm-2.0-flash-experimental", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "LearnLM 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-1b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 1B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-12b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 12B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-27b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 27B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e2b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E2B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-lite-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash-Lite Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-pro-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Pro Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-robotics-er-1.5-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Robotics-ER 1.5 Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-computer-use-preview-10-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Computer Use Preview 10-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/text-embedding-004", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Text Embedding 004" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp-03-07", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental 03-07" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/aqa", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Model that performs Attributed Question Answering." + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-2.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 2" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-fast-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-live-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-live-2.5-flash-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Live 2.5 Flash Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-live-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Live Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/lyria-realtime-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Lyria Realtime Experimental" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json b/tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json new file mode 100644 index 0000000000..5d44f397b9 --- /dev/null +++ b/tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json @@ -0,0 +1,989 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api", + "created": 1759514629, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api-2025-10-14", + "created": 1760043960, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1", + "created": 1681940951, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-2", + "created": 1698798177, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-1106", + "created": 1699053241, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-2024-04-09", + "created": 1712601677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio", + "created": 1756339249, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-tts", + "created": 1742403959, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo", + "created": 1712361441, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime", + "created": 1756271701, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini", + "created": 1744318173, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-2025-08-28", + "created": 1756271773, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini-2025-04-14", + "created": 1744317547, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-1106", + "created": 1698959748, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0125-preview", + "created": 1706037612, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "davinci-002", + "created": 1692634301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-preview", + "created": 1706037777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0613", + "created": 1686588896, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview-2025-03-11", + "created": 1741377021, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4", + "created": 1687882411, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1", + "created": 1744316542, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-2025-04-14", + "created": 1744315746, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-3", + "created": 1698785189, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview", + "created": 1734655677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano", + "created": 1744321707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct-0914", + "created": 1694122472, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "whisper-1", + "created": 1677532384, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-2024-12-17", + "created": 1734326976, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano-2025-04-14", + "created": 1744321025, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-16k", + "created": 1683758102, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-2025-08-28", + "created": 1756256146, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct", + "created": 1692901427, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-11-20", + "created": 1739331543, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-05-13", + "created": 1715368132, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview", + "created": 1741391161, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview-2025-03-11", + "created": 1741390858, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-chat-latest", + "created": 1762547951, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview", + "created": 1741388720, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-latest", + "created": 1731689265, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro", + "created": 1742251791, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro-2025-03-19", + "created": 1742251504, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex-mini", + "created": 1763007109, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview-2025-03-11", + "created": 1741388170, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-small", + "created": 1705948997, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research-2025-06-26", + "created": 1750866121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research", + "created": 1749685485, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "codex-mini-latest", + "created": 1746673257, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-08-06", + "created": 1722814719, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1", + "created": 1734375816, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-2024-07-18", + "created": 1721172717, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini", + "created": 1721172741, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview", + "created": 1734387424, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1-mini", + "created": 1758845821, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini", + "created": 1754425928, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1", + "created": 1745517030, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini-2025-08-07", + "created": 1754425867, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-2024-09-26", + "created": 1732734466, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5", + "created": 1754425777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro", + "created": 1748475349, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-ada-002", + "created": 1671217299, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano-2025-08-07", + "created": 1754426303, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-12-17", + "created": 1734034239, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano", + "created": 1754426384, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-2025-11-13", + "created": 1762800353, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd-1106", + "created": 1699053533, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd", + "created": 1699046015, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-0125", + "created": 1706048358, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini", + "created": 1759512027, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini-2025-10-06", + "created": 1759512137, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1", + "created": 1762800673, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview-2024-12-17", + "created": 1734115920, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini-2025-01-31", + "created": 1738010200, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini", + "created": 1737146383, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-12-17", + "created": 1733945430, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro-2025-06-10", + "created": 1749166761, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-1106-preview", + "created": 1698957206, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-codex", + "created": 1757527818, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "babbage-002", + "created": 1692634615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-chat-latest", + "created": 1754073306, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo", + "created": 1677610602, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-2025-08-07", + "created": 1754075360, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "chatgpt-4o-latest", + "created": 1723515131, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe", + "created": 1742068463, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2", + "created": 1759708615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2-pro", + "created": 1759708663, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro-2025-10-06", + "created": 1759469707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o", + "created": 1715367049, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview", + "created": 1727659998, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini", + "created": 1759517133, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini-2025-10-06", + "created": 1759517175, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini", + "created": 1744225351, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-2025-04-16", + "created": 1744133506, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2025-06-03", + "created": 1748907838, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview", + "created": 1727460443, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2025-06-03", + "created": 1748908498, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro", + "created": 1759469822, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-large", + "created": 1705953180, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research", + "created": 1749840121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research-2025-06-26", + "created": 1750865219, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-transcribe", + "created": 1742068596, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview", + "created": 1734387380, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview-2024-12-17", + "created": 1734112601, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3", + "created": 1744225308, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-2025-04-16", + "created": 1744133301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe-diarize", + "created": 1750798887, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex", + "created": 1762988221, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-10-01", + "created": 1727389042, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-10-01", + "created": 1727131766, + "object": "model", + "owned_by": "system" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json b/tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json new file mode 100644 index 0000000000..ab89e5a65d --- /dev/null +++ b/tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json @@ -0,0 +1,647 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]", + "request": { + "method": "POST", + "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-gecko-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding Gecko" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-03-25", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 03-25" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-05-20", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-06-17", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview 06-17" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-05-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview 05-06" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-06-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-exp-image-generation", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash (Image Generation) Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-lite-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash-Lite Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-pro-exp-02-05", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Pro Experimental 02-05" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-exp-1206", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Experimental 1206" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-01-21", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-thinking-exp-1219", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview 05-20" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-pro-preview-tts", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Pro Preview TTS" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/learnlm-2.0-flash-experimental", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "LearnLM 2.0 Flash Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-1b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 1B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-12b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 12B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3-27b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3 27B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e4b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E4B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemma-3n-e2b-it", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemma 3n E2B" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-flash-lite-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Flash-Lite Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-pro-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Pro Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-image", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Nano Banana" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-lite-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-robotics-er-1.5-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Robotics-ER 1.5 Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-computer-use-preview-10-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Computer Use Preview 10-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/text-embedding-004", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Text Embedding 004" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp-03-07", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental 03-07" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding Experimental" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-embedding-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Embedding 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/aqa", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Model that performs Attributed Question Answering." + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-preview-06-06", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra (Preview)" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-ultra-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Ultra" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/imagen-4.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Imagen 4 Fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-2.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 2" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.0-fast-generate-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/veo-3.1-fast-generate-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Veo 3.1 fast" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.0-flash-live-001", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.0 Flash 001" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-live-2.5-flash-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini Live 2.5 Flash Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-live-preview", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Live Preview" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-latest", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Latest" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/gemini-2.5-flash-native-audio-preview-09-2025", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "models/lyria-realtime-exp", + "created": null, + "object": "model", + "owned_by": "google", + "display_name": "Lyria Realtime Experimental" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json b/tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json new file mode 100644 index 0000000000..3040751dbe --- /dev/null +++ b/tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json @@ -0,0 +1,989 @@ +{ + "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]", + "request": { + "method": "POST", + "url": "https://api.openai.com/v1/v1/models", + "headers": {}, + "body": {}, + "endpoint": "/v1/models", + "model": "" + }, + "response": { + "body": [ + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api", + "created": 1759514629, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-search-api-2025-10-14", + "created": 1760043960, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1", + "created": 1681940951, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-2", + "created": 1698798177, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-1106", + "created": 1699053241, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-2024-04-09", + "created": 1712601677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio", + "created": 1756339249, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-tts", + "created": 1742403959, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo", + "created": 1712361441, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime", + "created": 1756271701, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini", + "created": 1744318173, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-2025-08-28", + "created": 1756271773, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-mini-2025-04-14", + "created": 1744317547, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-1106", + "created": 1698959748, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0125-preview", + "created": 1706037612, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "davinci-002", + "created": 1692634301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-turbo-preview", + "created": 1706037777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-0613", + "created": 1686588896, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview-2025-03-11", + "created": 1741377021, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4", + "created": 1687882411, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1", + "created": 1744316542, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-2025-04-14", + "created": 1744315746, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "dall-e-3", + "created": 1698785189, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "computer-use-preview", + "created": 1734655677, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano", + "created": 1744321707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct-0914", + "created": 1694122472, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "whisper-1", + "created": 1677532384, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-2024-12-17", + "created": 1734326976, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4.1-nano-2025-04-14", + "created": 1744321025, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-16k", + "created": 1683758102, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-2025-08-28", + "created": 1756256146, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-instruct", + "created": 1692901427, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-11-20", + "created": 1739331543, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-05-13", + "created": 1715368132, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview", + "created": 1741391161, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-search-preview-2025-03-11", + "created": 1741390858, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-chat-latest", + "created": 1762547951, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview", + "created": 1741388720, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-latest", + "created": 1731689265, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro", + "created": 1742251791, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1-pro-2025-03-19", + "created": 1742251504, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex-mini", + "created": 1763007109, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-search-preview-2025-03-11", + "created": 1741388170, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-small", + "created": 1705948997, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research-2025-06-26", + "created": 1750866121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-deep-research", + "created": 1749685485, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "codex-mini-latest", + "created": 1746673257, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-2024-08-06", + "created": 1722814719, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o1", + "created": 1734375816, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-2024-07-18", + "created": 1721172717, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini", + "created": 1721172741, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview", + "created": 1734387424, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1-mini", + "created": 1758845821, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini", + "created": 1754425928, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-image-1", + "created": 1745517030, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-mini-2025-08-07", + "created": 1754425867, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "omni-moderation-2024-09-26", + "created": 1732734466, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5", + "created": 1754425777, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro", + "created": 1748475349, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-ada-002", + "created": 1671217299, + "object": "model", + "owned_by": "openai-internal" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano-2025-08-07", + "created": 1754426303, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-12-17", + "created": 1734034239, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-nano", + "created": 1754426384, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-2025-11-13", + "created": 1762800353, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd-1106", + "created": 1699053533, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "tts-1-hd", + "created": 1699046015, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo-0125", + "created": 1706048358, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini", + "created": 1759512027, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-audio-mini-2025-10-06", + "created": 1759512137, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1", + "created": 1762800673, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-audio-preview-2024-12-17", + "created": 1734115920, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini-2025-01-31", + "created": 1738010200, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-mini", + "created": 1737146383, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-12-17", + "created": 1733945430, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-pro-2025-06-10", + "created": 1749166761, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4-1106-preview", + "created": 1698957206, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "babbage-002", + "created": 1692634615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-chat-latest", + "created": 1754073306, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-3.5-turbo", + "created": 1677610602, + "object": "model", + "owned_by": "openai" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-2025-08-07", + "created": 1754075360, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "chatgpt-4o-latest", + "created": 1723515131, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe", + "created": 1742068463, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2", + "created": 1759708615, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "sora-2-pro", + "created": 1759708663, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro-2025-10-06", + "created": 1759469707, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o", + "created": 1715367049, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview", + "created": 1727659998, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini", + "created": 1759517133, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-realtime-mini-2025-10-06", + "created": 1759517175, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini", + "created": 1744225351, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o4-mini-2025-04-16", + "created": 1744133506, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2025-06-03", + "created": 1748907838, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview", + "created": 1727460443, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2025-06-03", + "created": 1748908498, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-pro", + "created": 1759469822, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "text-embedding-3-large", + "created": 1705953180, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research", + "created": 1749840121, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-deep-research-2025-06-26", + "created": 1750865219, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-transcribe", + "created": 1742068596, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview", + "created": 1734387380, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-mini-realtime-preview-2024-12-17", + "created": 1734112601, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3", + "created": 1744225308, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "o3-2025-04-16", + "created": 1744133301, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-transcribe-diarize", + "created": 1750798887, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5.1-codex", + "created": 1762988221, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-audio-preview-2024-10-01", + "created": 1727389042, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-4o-realtime-preview-2024-10-01", + "created": 1727131766, + "object": "model", + "owned_by": "system" + } + }, + { + "__type__": "openai.types.model.Model", + "__data__": { + "id": "gpt-5-codex", + "created": 1757527818, + "object": "model", + "owned_by": "system" + } + } + ], + "is_streaming": false + }, + "id_normalization_mapping": {} +} diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 102f3f00ce..083c839819 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -1698,3 +1698,49 @@ def get_field(obj, field): assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True" assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list" assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False" + + +@vector_provider_wrapper +def test_openai_vector_store_search_with_rewrite_query( + compat_client_with_empty_stores, + client_with_models, + sample_chunks, + embedding_model_id, + embedding_dimension, + vector_io_provider_id, +): + """Test that rewrite_query parameter is properly passed through and handled.""" + skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) + + compat_client = compat_client_with_empty_stores + llama_client = client_with_models + + # Create vector store and insert chunks + vector_store = compat_client.vector_stores.create( + name="rewrite_test", + extra_body={"embedding_model": embedding_model_id, "provider_id": vector_io_provider_id}, + ) + llama_client.vector_io.insert(vector_store_id=vector_store.id, chunks=sample_chunks) + + # Test rewrite_query=False (default behavior) + response_no_rewrite = compat_client.vector_stores.search( + vector_store_id=vector_store.id, + query="programming", + max_num_results=2, + rewrite_query=False, + ) + + # Test rewrite_query=True (may work if LLM models are available, or gracefully handle if not) + response_with_rewrite = compat_client.vector_stores.search( + vector_store_id=vector_store.id, + query="programming", + max_num_results=2, + rewrite_query=True, + ) + + # Both requests should succeed (rewrite_query=True will gracefully fall back if no LLM models) + assert response_no_rewrite is not None + assert response_with_rewrite is not None + + # Both should return the same data since we have embedding models but may not have LLM models + assert len(response_no_rewrite.data) > 0 From ac7cb1ba5a9ae22a12467a227b04afa2ee31582f Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Wed, 19 Nov 2025 10:23:17 -0500 Subject: [PATCH 2/7] adding config to providers so that it can properly be used Signed-off-by: Francisco Javier Arceo --- src/llama_stack/core/resolver.py | 12 +++++++++ src/llama_stack/core/routers/vector_io.py | 7 +++++ .../inline/vector_io/faiss/__init__.py | 7 +++-- .../providers/inline/vector_io/faiss/faiss.py | 13 +++++++++- .../inline/vector_io/sqlite_vec/__init__.py | 7 +++-- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 19 +++++++++++--- .../remote/vector_io/chroma/__init__.py | 7 +++-- .../remote/vector_io/chroma/chroma.py | 9 +++++-- .../remote/vector_io/milvus/__init__.py | 7 +++-- .../remote/vector_io/milvus/milvus.py | 6 +++++ .../remote/vector_io/pgvector/__init__.py | 9 +++++-- .../remote/vector_io/pgvector/pgvector.py | 26 ++++++++++++++++--- .../remote/vector_io/qdrant/__init__.py | 7 +++-- .../remote/vector_io/qdrant/qdrant.py | 10 ++++++- .../remote/vector_io/weaviate/__init__.py | 9 +++++-- .../remote/vector_io/weaviate/weaviate.py | 21 ++++++++++++--- .../utils/memory/openai_vector_store_mixin.py | 3 +++ .../providers/utils/memory/vector_store.py | 19 ++++++++++++-- 18 files changed, 168 insertions(+), 30 deletions(-) diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py index 6bc32c2d03..ebdbb0b180 100644 --- a/src/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -374,6 +374,13 @@ async def instantiate_provider( method = "get_adapter_impl" args = [config, deps] + # Add vector_stores_config for vector_io providers + if ( + "vector_stores_config" in inspect.signature(getattr(module, method)).parameters + and provider_spec.api == Api.vector_io + ): + args.append(run_config.vector_stores) + elif isinstance(provider_spec, AutoRoutedProviderSpec): method = "get_auto_router_impl" @@ -394,6 +401,11 @@ async def instantiate_provider( args.append(policy) if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry: args.append(run_config.telemetry.enabled) + if ( + "vector_stores_config" in inspect.signature(getattr(module, method)).parameters + and provider_spec.api == Api.vector_io + ): + args.append(run_config.vector_stores) fn = getattr(module, method) impl = await fn(*args) diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index a7b30642ce..a865a37936 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -103,6 +103,13 @@ async def query_chunks( # Ensure params dict exists and add vector_stores_config for query rewriting if params is None: params = {} + + logger.debug(f"Router vector_stores_config: {self.vector_stores_config}") + if self.vector_stores_config and hasattr(self.vector_stores_config, "default_query_expansion_model"): + logger.debug( + f"Router default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" + ) + params["vector_stores_config"] = self.vector_stores_config return await provider.query_chunks(vector_store_id, query, params) diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py index b834589e38..1b9dcda769 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -6,16 +6,19 @@ from typing import Any +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api from .config import FaissVectorIOConfig -async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): +async def get_provider_impl( + config: FaissVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None +): from .faiss import FaissVectorIOAdapter assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) await impl.initialize() return impl diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py index 91a17058bf..ec8afd3884 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -14,6 +14,7 @@ import numpy as np from numpy.typing import NDArray +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -184,10 +185,17 @@ async def query_hybrid( class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): - def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: + def __init__( + self, + config: FaissVectorIOConfig, + inference_api: Inference, + files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, + ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api + self.vector_stores_config = vector_stores_config self.cache: dict[str, VectorStoreWithIndex] = {} async def initialize(self) -> None: @@ -203,6 +211,7 @@ async def initialize(self) -> None: vector_store, await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), self.inference_api, + self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -241,6 +250,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) async def list_vector_stores(self) -> list[VectorStore]: @@ -274,6 +284,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index e84c299dc3..53e2ad135c 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -6,15 +6,18 @@ from typing import Any +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api from .config import SQLiteVectorIOConfig -async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): +async def get_provider_impl( + config: SQLiteVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None +): from .sqlite_vec import SQLiteVecVectorIOAdapter assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) await impl.initialize() return impl diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index a384a33dc5..b38ce205e6 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -14,6 +14,7 @@ import sqlite_vec # type: ignore[import-untyped] from numpy.typing import NDArray +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -385,10 +386,17 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex). """ - def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: + def __init__( + self, + config, + inference_api: Inference, + files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, + ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api + self.vector_stores_config = vector_stores_config self.cache: dict[str, VectorStoreWithIndex] = {} self.vector_store_table = None @@ -403,7 +411,9 @@ async def initialize(self) -> None: index = await SQLiteVecIndex.create( vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, index, self.inference_api, self.vector_stores_config + ) # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -427,7 +437,9 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: index = await SQLiteVecIndex.create( vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, index, self.inference_api, self.vector_stores_config + ) async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: if vector_store_id in self.cache: @@ -452,6 +464,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto kvstore=self.kvstore, ), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py index d774ea643f..3bce41c366 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py @@ -4,14 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import ChromaVectorIOConfig -async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None +): from .chroma import ChromaVectorIOAdapter - impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py index 491db6d4de..d214dff3a2 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -11,6 +11,7 @@ import chromadb from numpy.typing import NDArray +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig @@ -125,11 +126,13 @@ def __init__( config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, inference_api: Inference, files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) log.info(f"Initializing ChromaVectorIOAdapter with url: {config}") self.config = config self.inference_api = inference_api + self.vector_stores_config = vector_stores_config self.client = None self.cache = {} self.vector_store_table = None @@ -162,7 +165,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: ) ) self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store, ChromaIndex(self.client, collection), self.inference_api + vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config ) async def unregister_vector_store(self, vector_store_id: str) -> None: @@ -207,7 +210,9 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto collection = await maybe_await(self.client.get_collection(vector_store_id)) if not collection: raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") - index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api) + index = VectorStoreWithIndex( + vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config + ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py index 1b703d486c..b73cf9b3ed 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py @@ -4,15 +4,18 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import MilvusVectorIOConfig -async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None +): from .milvus import MilvusVectorIOAdapter assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py index 044d678fa0..3b21f3278b 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -11,6 +11,7 @@ from numpy.typing import NDArray from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig @@ -272,12 +273,14 @@ def __init__( config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, inference_api: Inference, files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.cache = {} self.client = None self.inference_api = inference_api + self.vector_stores_config = vector_stores_config self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" @@ -298,6 +301,7 @@ async def initialize(self) -> None: kvstore=self.kvstore, ), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index if isinstance(self.config, RemoteMilvusVectorIOConfig): @@ -325,6 +329,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -347,6 +352,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py index 36018fd954..002caf4b60 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -4,14 +4,19 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import PGVectorVectorIOConfig -async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: PGVectorVectorIOConfig, + deps: dict[Api, ProviderSpec], + vector_stores_config: VectorStoresConfig | None = None, +): from .pgvector import PGVectorVectorIOAdapter - impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 5c86fb08da..45a38e52a7 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -13,6 +13,7 @@ from psycopg2.extras import Json, execute_values from pydantic import BaseModel, TypeAdapter +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str @@ -330,11 +331,16 @@ def check_distance_metric_availability(self, distance_metric: str) -> None: class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( - self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None + self, + config: PGVectorVectorIOConfig, + inference_api: Inference, + files_api: Files | None = None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api + self.vector_stores_config = vector_stores_config self.conn = None self.cache = {} self.vector_store_table = None @@ -386,7 +392,12 @@ async def initialize(self) -> None: kvstore=self.kvstore, ) await pgvector_index.initialize() - index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) + index = VectorStoreWithIndex( + vector_store, + index=pgvector_index, + inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, + ) self.cache[vector_store.identifier] = index async def shutdown(self) -> None: @@ -413,7 +424,12 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore ) await pgvector_index.initialize() - index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) + index = VectorStoreWithIndex( + vector_store, + index=pgvector_index, + inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, + ) self.cache[vector_store.identifier] = index async def unregister_vector_store(self, vector_store_id: str) -> None: @@ -453,7 +469,9 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store = VectorStore.model_validate_json(vector_store_data) index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn) await index.initialize() - self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) + self.cache[vector_store_id] = VectorStoreWithIndex( + vector_store, index, self.inference_api, self.vector_stores_config + ) return self.cache[vector_store_id] async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py index b5b02fe598..76e167b75e 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py @@ -4,14 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import QdrantVectorIOConfig -async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None +): from .qdrant import QdrantVectorIOAdapter - impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 4dd78d8343..2de71f7cc3 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -13,6 +13,7 @@ from qdrant_client import AsyncQdrantClient, models from qdrant_client.models import PointStruct +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig @@ -152,12 +153,14 @@ def __init__( config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, inference_api: Inference, files_api: Files | None = None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api + self.vector_stores_config = vector_stores_config self.vector_store_table = None self._qdrant_lock = asyncio.Lock() @@ -173,7 +176,10 @@ async def initialize(self) -> None: for vector_store_data in stored_vector_stores: vector_store = VectorStore.model_validate_json(vector_store_data) index = VectorStoreWithIndex( - vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api + vector_store, + QdrantIndex(self.client, vector_store.identifier), + self.inference_api, + self.vector_stores_config, ) self.cache[vector_store.identifier] = index self.openai_vector_stores = await self._load_openai_vector_stores() @@ -193,6 +199,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, index=QdrantIndex(self.client, vector_store.identifier), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -224,6 +231,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=QdrantIndex(client=self.client, collection_name=vector_store.identifier), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py index 47546d4598..77bf357f4b 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -4,14 +4,19 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import WeaviateVectorIOConfig -async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: WeaviateVectorIOConfig, + deps: dict[Api, ProviderSpec], + vector_stores_config: VectorStoresConfig | None = None, +): from .weaviate import WeaviateVectorIOAdapter - impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) + impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index c15d5f4682..1c52fa84c5 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -12,6 +12,7 @@ from weaviate.classes.init import Auth from weaviate.classes.query import Filter, HybridFusion +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger @@ -262,10 +263,17 @@ async def query_hybrid( class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate): - def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: + def __init__( + self, + config: WeaviateVectorIOConfig, + inference_api: Inference, + files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, + ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api + self.vector_stores_config = vector_stores_config self.client_cache = {} self.cache = {} self.vector_store_table = None @@ -308,7 +316,10 @@ async def initialize(self) -> None: client = self._get_client() idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore) self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store=vector_store, index=idx, inference_api=self.inference_api + vector_store=vector_store, + index=idx, + inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) # Load OpenAI vector stores metadata into cache @@ -334,7 +345,10 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: ) self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api + vector_store, + WeaviateIndex(client=client, collection_name=sanitized_collection_name), + self.inference_api, + self.vector_stores_config, ) async def unregister_vector_store(self, vector_store_id: str) -> None: @@ -369,6 +383,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=WeaviateIndex(client=client, collection_name=vector_store.identifier), inference_api=self.inference_api, + vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index f33bb29c8c..d83aa6dc1a 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -613,6 +613,9 @@ async def openai_search_vector_store( "mode": search_mode, "rewrite_query": rewrite_query, } + # Add vector_stores_config if available (for query rewriting) + if hasattr(self, "vector_stores_config"): + params["vector_stores_config"] = self.vector_stores_config # TODO: Add support for ranking_options.ranker response = await self.query_chunks( diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index 6fbf4a4245..2a7b94292a 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -333,6 +333,15 @@ async def query_chunks( # Apply query rewriting if enabled if params.get("rewrite_query", False): + if self.vector_stores_config: + log.debug(f"VectorStoreWithIndex received config: {self.vector_stores_config}") + if hasattr(self.vector_stores_config, "default_query_expansion_model"): + log.debug( + f"Config has default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" + ) + else: + log.debug("No vector_stores_config found - cannot perform query rewriting") + query_string = await self._rewrite_query_for_search(query_string) if mode == "keyword": @@ -358,8 +367,14 @@ async def _rewrite_query_for_search(self, query: str) -> str: :returns: The rewritten query optimized for vector search """ # Check if query expansion model is configured - if not self.vector_stores_config or not self.vector_stores_config.default_query_expansion_model: - raise ValueError("No default_query_expansion_model configured for query rewriting") + if not self.vector_stores_config: + raise ValueError( + f"No vector_stores_config found! self.vector_stores_config is: {self.vector_stores_config}" + ) + if not self.vector_stores_config.default_query_expansion_model: + raise ValueError( + f"No default_query_expansion_model configured! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" + ) # Use the configured model expansion_model = self.vector_stores_config.default_query_expansion_model From 2cc7943fd61f69783dff1d810fa4e4fcc03a4b41 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Wed, 19 Nov 2025 22:41:19 -0500 Subject: [PATCH 3/7] added quey expnasion model to extra_body Signed-off-by: Francisco Javier Arceo --- .../utils/memory/openai_vector_store_mixin.py | 8 ++ .../providers/utils/memory/vector_store.py | 39 +++++--- src/llama_stack_api/vector_stores.py | 1 + .../test_vector_io_openai_vector_stores.py | 94 +++++++++++++++++++ 4 files changed, 130 insertions(+), 12 deletions(-) diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index d83aa6dc1a..4e67cf24b4 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -379,6 +379,11 @@ async def openai_create_vector_store( f"Using embedding config from extra_body: model='{embedding_model}', dimension={embedding_dimension}" ) + # Extract query expansion model from extra_body if provided + query_expansion_model = extra_body.get("query_expansion_model") + if query_expansion_model: + logger.debug(f"Using per-store query expansion model: {query_expansion_model}") + # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None) # Derive the canonical vector_store_id (allow override, else generate) @@ -402,6 +407,7 @@ async def openai_create_vector_store( provider_id=provider_id, provider_resource_id=vector_store_id, vector_store_name=params.name, + query_expansion_model=query_expansion_model, ) await self.register_vector_store(vector_store) @@ -607,12 +613,14 @@ async def openai_search_vector_store( if ranking_options and ranking_options.score_threshold is not None else 0.0 ) + params = { "max_chunks": max_num_results * CHUNK_MULTIPLIER, "score_threshold": score_threshold, "mode": search_mode, "rewrite_query": rewrite_query, } + # Add vector_stores_config if available (for query rewriting) if hasattr(self, "vector_stores_config"): params["vector_stores_config"] = self.vector_stores_config diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index 2a7b94292a..71d61787af 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -17,7 +17,7 @@ from numpy.typing import NDArray from pydantic import BaseModel -from llama_stack.core.datatypes import VectorStoresConfig +from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.utils.inference.prompt_adapter import ( @@ -366,18 +366,33 @@ async def _rewrite_query_for_search(self, query: str) -> str: :param query: The original user query :returns: The rewritten query optimized for vector search """ - # Check if query expansion model is configured - if not self.vector_stores_config: - raise ValueError( - f"No vector_stores_config found! self.vector_stores_config is: {self.vector_stores_config}" - ) - if not self.vector_stores_config.default_query_expansion_model: - raise ValueError( - f"No default_query_expansion_model configured! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" - ) + expansion_model = None + + # Check for per-store query expansion model first + if self.vector_store.query_expansion_model: + # Parse the model string into provider_id and model_id + model_parts = self.vector_store.query_expansion_model.split("/", 1) + if len(model_parts) == 2: + expansion_model = QualifiedModel(provider_id=model_parts[0], model_id=model_parts[1]) + log.debug(f"Using per-store query expansion model: {expansion_model}") + else: + log.warning( + f"Invalid query_expansion_model format: {self.vector_store.query_expansion_model}. Expected 'provider_id/model_id'" + ) + + # Fall back to global default if no per-store model + if not expansion_model: + if not self.vector_stores_config: + raise ValueError( + f"No vector_stores_config found and no per-store query_expansion_model! self.vector_stores_config is: {self.vector_stores_config}" + ) + if not self.vector_stores_config.default_query_expansion_model: + raise ValueError( + f"No default_query_expansion_model configured and no per-store query_expansion_model! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" + ) + expansion_model = self.vector_stores_config.default_query_expansion_model + log.debug(f"Using global default query expansion model: {expansion_model}") - # Use the configured model - expansion_model = self.vector_stores_config.default_query_expansion_model chat_model = f"{expansion_model.provider_id}/{expansion_model.model_id}" # Validate that the model is available and is an LLM diff --git a/src/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py index 0a1e6c53c5..4c0d1ced25 100644 --- a/src/llama_stack_api/vector_stores.py +++ b/src/llama_stack_api/vector_stores.py @@ -25,6 +25,7 @@ class VectorStore(Resource): embedding_model: str embedding_dimension: int vector_store_name: str | None = None + query_expansion_model: str | None = None @property def vector_store_id(self) -> str: diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 3797abb2c4..cfda7aa5ee 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -1230,3 +1230,97 @@ async def test_embedding_config_required_model_missing(vector_io_adapter): with pytest.raises(ValueError, match="embedding_model is required"): await vector_io_adapter.openai_create_vector_store(params) + + +async def test_query_expansion_functionality(vector_io_adapter): + """Test query expansion with per-store models, global defaults, and error validation.""" + from unittest.mock import MagicMock + + from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig + from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex + from llama_stack_api.models import Model, ModelType + + vector_io_adapter.register_vector_store = AsyncMock() + vector_io_adapter.__provider_id__ = "test_provider" + + # Test 1: Per-store model usage + params = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store", + metadata={}, + **{"embedding_model": "test/embedding", "query_expansion_model": "test/llama-model"}, + ) + await vector_io_adapter.openai_create_vector_store(params) + call_args = vector_io_adapter.register_vector_store.call_args[0][0] + assert call_args.query_expansion_model == "test/llama-model" + + # Test 2: Global default fallback + vector_io_adapter.register_vector_store.reset_mock() + params_no_model = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store2", metadata={}, **{"embedding_model": "test/embedding"} + ) + await vector_io_adapter.openai_create_vector_store(params_no_model) + call_args2 = vector_io_adapter.register_vector_store.call_args[0][0] + assert call_args2.query_expansion_model is None + + # Test query rewriting scenarios + mock_inference_api = MagicMock() + + # Per-store model scenario + mock_vector_store = MagicMock() + mock_vector_store.query_expansion_model = "test/llama-model" + mock_inference_api.routing_table.list_models = AsyncMock( + return_value=MagicMock( + data=[Model(identifier="test/llama-model", provider_id="test", model_type=ModelType.llm)] + ) + ) + mock_inference_api.openai_chat_completion = AsyncMock( + return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="per-store expanded"))]) + ) + + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, + index=MagicMock(), + inference_api=mock_inference_api, + vector_stores_config=VectorStoresConfig( + default_query_expansion_model=QualifiedModel(provider_id="global", model_id="default") + ), + ) + + result = await vector_store_with_index._rewrite_query_for_search("test") + assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "test/llama-model" + assert result == "per-store expanded" + + # Global default fallback scenario + mock_inference_api.reset_mock() + mock_vector_store.query_expansion_model = None + mock_inference_api.routing_table.list_models = AsyncMock( + return_value=MagicMock( + data=[Model(identifier="global/default", provider_id="global", model_type=ModelType.llm)] + ) + ) + mock_inference_api.openai_chat_completion = AsyncMock( + return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="global expanded"))]) + ) + + result = await vector_store_with_index._rewrite_query_for_search("test") + assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "global/default" + assert result == "global expanded" + + # Test 3: Error cases + # Model not found + mock_vector_store.query_expansion_model = "missing/model" + mock_inference_api.routing_table.list_models = AsyncMock(return_value=MagicMock(data=[])) + + with pytest.raises(ValueError, match="Configured query expansion model .* is not available"): + await vector_store_with_index._rewrite_query_for_search("test") + + # Non-LLM model + mock_vector_store.query_expansion_model = "test/embedding-model" + mock_inference_api.routing_table.list_models = AsyncMock( + return_value=MagicMock( + data=[Model(identifier="test/embedding-model", provider_id="test", model_type=ModelType.embedding)] + ) + ) + + with pytest.raises(ValueError, match="is not an LLM model.*Query rewriting requires an LLM model"): + await vector_store_with_index._rewrite_query_for_search("test") From d887f1f8bb8d31d3af599fb510edc0b1c6befe7d Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Fri, 21 Nov 2025 11:27:25 -0500 Subject: [PATCH 4/7] refactor to only configuration of model at build time Signed-off-by: Francisco Javier Arceo --- src/llama_stack/core/datatypes.py | 11 +- src/llama_stack/core/resolver.py | 12 -- src/llama_stack/core/routers/vector_io.py | 13 -- src/llama_stack/core/stack.py | 75 +++++--- .../ci-tests/run-with-postgres-store.yaml | 2 + .../distributions/ci-tests/run.yaml | 2 + .../starter-gpu/run-with-postgres-store.yaml | 2 + .../distributions/starter-gpu/run.yaml | 2 + .../starter/run-with-postgres-store.yaml | 2 + .../distributions/starter/run.yaml | 2 + .../inline/vector_io/faiss/__init__.py | 7 +- .../providers/inline/vector_io/faiss/faiss.py | 6 - .../inline/vector_io/sqlite_vec/__init__.py | 7 +- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 12 +- .../remote/vector_io/chroma/__init__.py | 7 +- .../remote/vector_io/chroma/chroma.py | 9 +- .../remote/vector_io/milvus/__init__.py | 7 +- .../remote/vector_io/milvus/milvus.py | 6 - .../remote/vector_io/pgvector/__init__.py | 4 +- .../remote/vector_io/pgvector/pgvector.py | 9 +- .../remote/vector_io/qdrant/__init__.py | 7 +- .../remote/vector_io/qdrant/qdrant.py | 6 - .../remote/vector_io/weaviate/__init__.py | 4 +- .../remote/vector_io/weaviate/weaviate.py | 6 - .../providers/utils/memory/__init__.py | 4 + .../providers/utils/memory/constants.py | 8 + .../utils/memory/openai_vector_store_mixin.py | 9 - .../utils/memory/query_expansion_config.py | 37 ++++ .../providers/utils/memory/vector_store.py | 146 ++++----------- src/llama_stack_api/vector_stores.py | 1 - .../test_vector_io_openai_vector_stores.py | 168 ++++++++++-------- 31 files changed, 279 insertions(+), 314 deletions(-) create mode 100644 src/llama_stack/providers/utils/memory/constants.py create mode 100644 src/llama_stack/providers/utils/memory/query_expansion_config.py diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 49747d4770..a32e1d8a26 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -18,6 +18,7 @@ StorageConfig, ) from llama_stack.log import LoggingConfig +from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT from llama_stack_api import ( Api, Benchmark, @@ -381,9 +382,17 @@ class VectorStoresConfig(BaseModel): description="Default LLM model for query expansion/rewriting in vector search.", ) query_expansion_prompt: str = Field( - default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:", + default=DEFAULT_QUERY_EXPANSION_PROMPT, description="Prompt template for query expansion. Use {query} as placeholder for the original query.", ) + query_expansion_max_tokens: int = Field( + default=100, + description="Maximum number of tokens for query expansion responses.", + ) + query_expansion_temperature: float = Field( + default=0.3, + description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).", + ) class SafetyConfig(BaseModel): diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py index ebdbb0b180..6bc32c2d03 100644 --- a/src/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -374,13 +374,6 @@ async def instantiate_provider( method = "get_adapter_impl" args = [config, deps] - # Add vector_stores_config for vector_io providers - if ( - "vector_stores_config" in inspect.signature(getattr(module, method)).parameters - and provider_spec.api == Api.vector_io - ): - args.append(run_config.vector_stores) - elif isinstance(provider_spec, AutoRoutedProviderSpec): method = "get_auto_router_impl" @@ -401,11 +394,6 @@ async def instantiate_provider( args.append(policy) if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry: args.append(run_config.telemetry.enabled) - if ( - "vector_stores_config" in inspect.signature(getattr(module, method)).parameters - and provider_spec.api == Api.vector_io - ): - args.append(run_config.vector_stores) fn = getattr(module, method) impl = await fn(*args) diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index a865a37936..5256dda449 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -99,19 +99,6 @@ async def query_chunks( ) -> QueryChunksResponse: logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}") provider = await self.routing_table.get_provider_impl(vector_store_id) - - # Ensure params dict exists and add vector_stores_config for query rewriting - if params is None: - params = {} - - logger.debug(f"Router vector_stores_config: {self.vector_stores_config}") - if self.vector_stores_config and hasattr(self.vector_stores_config, "default_query_expansion_model"): - logger.debug( - f"Router default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" - ) - - params["vector_stores_config"] = self.vector_stores_config - return await provider.query_chunks(vector_store_id, query, params) # OpenAI Vector Stores API endpoints diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 8ba1f2afdf..dae6e8ec9b 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -144,35 +144,62 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig if vector_stores_config is None: return + # Validate default embedding model default_embedding_model = vector_stores_config.default_embedding_model - if default_embedding_model is None: - return + if default_embedding_model is not None: + provider_id = default_embedding_model.provider_id + model_id = default_embedding_model.model_id + default_model_id = f"{provider_id}/{model_id}" - provider_id = default_embedding_model.provider_id - model_id = default_embedding_model.model_id - default_model_id = f"{provider_id}/{model_id}" + if Api.models not in impls: + raise ValueError( + f"Models API is not available but vector_stores config requires model '{default_model_id}'" + ) - if Api.models not in impls: - raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'") + models_impl = impls[Api.models] + response = await models_impl.list_models() + models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"} - models_impl = impls[Api.models] - response = await models_impl.list_models() - models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"} + default_model = models_list.get(default_model_id) + if default_model is None: + raise ValueError( + f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}" + ) - default_model = models_list.get(default_model_id) - if default_model is None: - raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}") + embedding_dimension = default_model.metadata.get("embedding_dimension") + if embedding_dimension is None: + raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") - embedding_dimension = default_model.metadata.get("embedding_dimension") - if embedding_dimension is None: - raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") + try: + int(embedding_dimension) + except ValueError as err: + raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err - try: - int(embedding_dimension) - except ValueError as err: - raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err + logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") + + # Validate default query expansion model + default_query_expansion_model = vector_stores_config.default_query_expansion_model + if default_query_expansion_model is not None: + provider_id = default_query_expansion_model.provider_id + model_id = default_query_expansion_model.model_id + query_model_id = f"{provider_id}/{model_id}" + + if Api.models not in impls: + raise ValueError( + f"Models API is not available but vector_stores config requires query expansion model '{query_model_id}'" + ) + + models_impl = impls[Api.models] + response = await models_impl.list_models() + llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"} - logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") + query_expansion_model = llm_models_list.get(query_model_id) + if query_expansion_model is None: + raise ValueError( + f"Query expansion model '{query_model_id}' not found. Available LLM models: {list(llm_models_list.keys())}" + ) + + logger.debug(f"Validated default query expansion model: {query_model_id}") async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]): @@ -437,6 +464,12 @@ async def initialize(self): await refresh_registry_once(impls) await validate_vector_stores_config(self.run_config.vector_stores, impls) await validate_safety_config(self.run_config.safety, impls) + + # Set global query expansion configuration from stack config + from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config + + set_default_query_expansion_config(self.run_config.vector_stores) + self.impls = impls def create_registry_refresh_task(self): diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 8110dbdf6d..219ffdce3b 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -296,5 +296,7 @@ vector_stores: Improved query:' + query_expansion_max_tokens: 100 + query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml index 809b0ef1c9..e352e92688 100644 --- a/src/llama_stack/distributions/ci-tests/run.yaml +++ b/src/llama_stack/distributions/ci-tests/run.yaml @@ -287,5 +287,7 @@ vector_stores: Improved query:' + query_expansion_max_tokens: 100 + query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index ca47d7f4c4..e81febb0ed 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -299,5 +299,7 @@ vector_stores: Improved query:' + query_expansion_max_tokens: 100 + query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml index 15555c2622..edae6f66db 100644 --- a/src/llama_stack/distributions/starter-gpu/run.yaml +++ b/src/llama_stack/distributions/starter-gpu/run.yaml @@ -290,5 +290,7 @@ vector_stores: Improved query:' + query_expansion_max_tokens: 100 + query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 423b304528..9ed74d96d1 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -296,5 +296,7 @@ vector_stores: Improved query:' + query_expansion_max_tokens: 100 + query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml index a0f56fc420..73679a1520 100644 --- a/src/llama_stack/distributions/starter/run.yaml +++ b/src/llama_stack/distributions/starter/run.yaml @@ -287,5 +287,7 @@ vector_stores: Improved query:' + query_expansion_max_tokens: 100 + query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py index 1b9dcda769..b834589e38 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -6,19 +6,16 @@ from typing import Any -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api from .config import FaissVectorIOConfig -async def get_provider_impl( - config: FaissVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None -): +async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): from .faiss import FaissVectorIOAdapter assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) + impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py index ec8afd3884..e2aab1a250 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -14,7 +14,6 @@ import numpy as np from numpy.typing import NDArray -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -190,12 +189,10 @@ def __init__( config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None, - vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.vector_stores_config = vector_stores_config self.cache: dict[str, VectorStoreWithIndex] = {} async def initialize(self) -> None: @@ -211,7 +208,6 @@ async def initialize(self) -> None: vector_store, await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), self.inference_api, - self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -250,7 +246,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) async def list_vector_stores(self) -> list[VectorStore]: @@ -284,7 +279,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index 53e2ad135c..e84c299dc3 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -6,18 +6,15 @@ from typing import Any -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api from .config import SQLiteVectorIOConfig -async def get_provider_impl( - config: SQLiteVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None -): +async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): from .sqlite_vec import SQLiteVecVectorIOAdapter assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) + impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index b38ce205e6..bc6226c845 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -14,7 +14,6 @@ import sqlite_vec # type: ignore[import-untyped] from numpy.typing import NDArray -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -391,12 +390,10 @@ def __init__( config, inference_api: Inference, files_api: Files | None, - vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.vector_stores_config = vector_stores_config self.cache: dict[str, VectorStoreWithIndex] = {} self.vector_store_table = None @@ -411,9 +408,7 @@ async def initialize(self) -> None: index = await SQLiteVecIndex.create( vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store, index, self.inference_api, self.vector_stores_config - ) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -437,9 +432,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: index = await SQLiteVecIndex.create( vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store, index, self.inference_api, self.vector_stores_config - ) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: if vector_store_id in self.cache: @@ -464,7 +457,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto kvstore=self.kvstore, ), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py index 3bce41c366..d774ea643f 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py @@ -4,17 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import ChromaVectorIOConfig -async def get_adapter_impl( - config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None -): +async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): from .chroma import ChromaVectorIOAdapter - impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) + impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py index d214dff3a2..491db6d4de 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -11,7 +11,6 @@ import chromadb from numpy.typing import NDArray -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig @@ -126,13 +125,11 @@ def __init__( config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, inference_api: Inference, files_api: Files | None, - vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) log.info(f"Initializing ChromaVectorIOAdapter with url: {config}") self.config = config self.inference_api = inference_api - self.vector_stores_config = vector_stores_config self.client = None self.cache = {} self.vector_store_table = None @@ -165,7 +162,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: ) ) self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config + vector_store, ChromaIndex(self.client, collection), self.inference_api ) async def unregister_vector_store(self, vector_store_id: str) -> None: @@ -210,9 +207,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto collection = await maybe_await(self.client.get_collection(vector_store_id)) if not collection: raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") - index = VectorStoreWithIndex( - vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config - ) + index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py index b73cf9b3ed..1b703d486c 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py @@ -4,18 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import MilvusVectorIOConfig -async def get_adapter_impl( - config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None -): +async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]): from .milvus import MilvusVectorIOAdapter assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" - impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) + impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py index 3b21f3278b..044d678fa0 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -11,7 +11,6 @@ from numpy.typing import NDArray from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig @@ -273,14 +272,12 @@ def __init__( config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, inference_api: Inference, files_api: Files | None, - vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.cache = {} self.client = None self.inference_api = inference_api - self.vector_stores_config = vector_stores_config self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" @@ -301,7 +298,6 @@ async def initialize(self) -> None: kvstore=self.kvstore, ), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index if isinstance(self.config, RemoteMilvusVectorIOConfig): @@ -329,7 +325,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -352,7 +347,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py index 002caf4b60..ea0139815a 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import PGVectorVectorIOConfig @@ -13,10 +12,9 @@ async def get_adapter_impl( config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec], - vector_stores_config: VectorStoresConfig | None = None, ): from .pgvector import PGVectorVectorIOAdapter - impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) + impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 45a38e52a7..fe1b8ce35e 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -13,7 +13,6 @@ from psycopg2.extras import Json, execute_values from pydantic import BaseModel, TypeAdapter -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str @@ -335,12 +334,10 @@ def __init__( config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None, - vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.vector_stores_config = vector_stores_config self.conn = None self.cache = {} self.vector_store_table = None @@ -396,7 +393,6 @@ async def initialize(self) -> None: vector_store, index=pgvector_index, inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -428,7 +424,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store, index=pgvector_index, inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -469,9 +464,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store = VectorStore.model_validate_json(vector_store_data) index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn) await index.initialize() - self.cache[vector_store_id] = VectorStoreWithIndex( - vector_store, index, self.inference_api, self.vector_stores_config - ) + self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) return self.cache[vector_store_id] async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py index 76e167b75e..b5b02fe598 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py @@ -4,17 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import QdrantVectorIOConfig -async def get_adapter_impl( - config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None -): +async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): from .qdrant import QdrantVectorIOAdapter - impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) + impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 2de71f7cc3..dc65466460 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -13,7 +13,6 @@ from qdrant_client import AsyncQdrantClient, models from qdrant_client.models import PointStruct -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig @@ -153,14 +152,12 @@ def __init__( config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, inference_api: Inference, files_api: Files | None = None, - vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api - self.vector_stores_config = vector_stores_config self.vector_store_table = None self._qdrant_lock = asyncio.Lock() @@ -179,7 +176,6 @@ async def initialize(self) -> None: vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api, - self.vector_stores_config, ) self.cache[vector_store.identifier] = index self.openai_vector_stores = await self._load_openai_vector_stores() @@ -199,7 +195,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, index=QdrantIndex(self.client, vector_store.identifier), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store.identifier] = index @@ -231,7 +226,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=QdrantIndex(client=self.client, collection_name=vector_store.identifier), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py index 77bf357f4b..a13cca8a16 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack_api import Api, ProviderSpec from .config import WeaviateVectorIOConfig @@ -13,10 +12,9 @@ async def get_adapter_impl( config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec], - vector_stores_config: VectorStoresConfig | None = None, ): from .weaviate import WeaviateVectorIOAdapter - impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config) + impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) await impl.initialize() return impl diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index 1c52fa84c5..67ec523d7e 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -12,7 +12,6 @@ from weaviate.classes.init import Auth from weaviate.classes.query import Filter, HybridFusion -from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.storage.kvstore import kvstore_impl from llama_stack.log import get_logger @@ -268,12 +267,10 @@ def __init__( config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None, - vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.vector_stores_config = vector_stores_config self.client_cache = {} self.cache = {} self.vector_store_table = None @@ -319,7 +316,6 @@ async def initialize(self) -> None: vector_store=vector_store, index=idx, inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) # Load OpenAI vector stores metadata into cache @@ -348,7 +344,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api, - self.vector_stores_config, ) async def unregister_vector_store(self, vector_store_id: str) -> None: @@ -383,7 +378,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto vector_store=vector_store, index=WeaviateIndex(client=client, collection_name=vector_store.identifier), inference_api=self.inference_api, - vector_stores_config=self.vector_stores_config, ) self.cache[vector_store_id] = index return index diff --git a/src/llama_stack/providers/utils/memory/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py index 756f351d88..5e0942402e 100644 --- a/src/llama_stack/providers/utils/memory/__init__.py +++ b/src/llama_stack/providers/utils/memory/__init__.py @@ -3,3 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. + +from .constants import DEFAULT_QUERY_EXPANSION_PROMPT + +__all__ = ["DEFAULT_QUERY_EXPANSION_PROMPT"] diff --git a/src/llama_stack/providers/utils/memory/constants.py b/src/llama_stack/providers/utils/memory/constants.py new file mode 100644 index 0000000000..d8703bbceb --- /dev/null +++ b/src/llama_stack/providers/utils/memory/constants.py @@ -0,0 +1,8 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Default prompt template for query expansion in vector search +DEFAULT_QUERY_EXPANSION_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:" diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 4e67cf24b4..e0293507da 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -379,11 +379,6 @@ async def openai_create_vector_store( f"Using embedding config from extra_body: model='{embedding_model}', dimension={embedding_dimension}" ) - # Extract query expansion model from extra_body if provided - query_expansion_model = extra_body.get("query_expansion_model") - if query_expansion_model: - logger.debug(f"Using per-store query expansion model: {query_expansion_model}") - # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None) # Derive the canonical vector_store_id (allow override, else generate) @@ -407,7 +402,6 @@ async def openai_create_vector_store( provider_id=provider_id, provider_resource_id=vector_store_id, vector_store_name=params.name, - query_expansion_model=query_expansion_model, ) await self.register_vector_store(vector_store) @@ -621,9 +615,6 @@ async def openai_search_vector_store( "rewrite_query": rewrite_query, } - # Add vector_stores_config if available (for query rewriting) - if hasattr(self, "vector_stores_config"): - params["vector_stores_config"] = self.vector_stores_config # TODO: Add support for ranking_options.ranker response = await self.query_chunks( diff --git a/src/llama_stack/providers/utils/memory/query_expansion_config.py b/src/llama_stack/providers/utils/memory/query_expansion_config.py new file mode 100644 index 0000000000..0b51c1a9ac --- /dev/null +++ b/src/llama_stack/providers/utils/memory/query_expansion_config.py @@ -0,0 +1,37 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig +from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT + +# Global configuration for query expansion - set during stack startup +_DEFAULT_QUERY_EXPANSION_MODEL: QualifiedModel | None = None +_DEFAULT_QUERY_EXPANSION_MAX_TOKENS: int = 100 +_DEFAULT_QUERY_EXPANSION_TEMPERATURE: float = 0.3 +_QUERY_EXPANSION_PROMPT_OVERRIDE: str | None = None + + +def set_default_query_expansion_config(vector_stores_config: VectorStoresConfig | None): + """Set the global default query expansion configuration from stack config.""" + global \ + _DEFAULT_QUERY_EXPANSION_MODEL, \ + _QUERY_EXPANSION_PROMPT_OVERRIDE, \ + _DEFAULT_QUERY_EXPANSION_MAX_TOKENS, \ + _DEFAULT_QUERY_EXPANSION_TEMPERATURE + if vector_stores_config: + _DEFAULT_QUERY_EXPANSION_MODEL = vector_stores_config.default_query_expansion_model + # Only set override if user provided a custom prompt different from default + if vector_stores_config.query_expansion_prompt != DEFAULT_QUERY_EXPANSION_PROMPT: + _QUERY_EXPANSION_PROMPT_OVERRIDE = vector_stores_config.query_expansion_prompt + else: + _QUERY_EXPANSION_PROMPT_OVERRIDE = None + _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = vector_stores_config.query_expansion_max_tokens + _DEFAULT_QUERY_EXPANSION_TEMPERATURE = vector_stores_config.query_expansion_temperature + else: + _DEFAULT_QUERY_EXPANSION_MODEL = None + _QUERY_EXPANSION_PROMPT_OVERRIDE = None + _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = 100 + _DEFAULT_QUERY_EXPANSION_TEMPERATURE = 0.3 diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index 71d61787af..61fa996e4e 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -17,7 +17,6 @@ from numpy.typing import NDArray from pydantic import BaseModel -from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.utils.inference.prompt_adapter import ( @@ -30,19 +29,18 @@ Chunk, ChunkMetadata, InterleavedContent, + OpenAIChatCompletionRequestWithExtraBody, OpenAIEmbeddingsRequestWithExtraBody, QueryChunksResponse, RAGDocument, VectorStore, ) -from llama_stack_api.inference import ( - OpenAIChatCompletionRequestWithExtraBody, - OpenAIUserMessageParam, -) -from llama_stack_api.models import ModelType log = get_logger(name=__name__, category="providers::utils") +from llama_stack.providers.utils.memory import query_expansion_config +from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT + class ChunkForDeletion(BaseModel): """Information needed to delete a chunk from a vector store. @@ -268,7 +266,6 @@ class VectorStoreWithIndex: vector_store: VectorStore index: EmbeddingIndex inference_api: Api.inference - vector_stores_config: VectorStoresConfig | None = None async def insert_chunks( self, @@ -296,6 +293,39 @@ async def insert_chunks( embeddings = np.array([c.embedding for c in chunks], dtype=np.float32) await self.index.add_chunks(chunks, embeddings) + async def _rewrite_query_for_file_search(self, query: str) -> str: + """Rewrite a search query using the globally configured LLM model for better retrieval results.""" + if not query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL: + log.debug("No default query expansion model configured, using original query") + return query + + model_id = f"{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.provider_id}/{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.model_id}" + + # Use custom prompt from config if provided, otherwise use built-in default + # Users only need to configure the model - prompt is automatic with optional override + if query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE: + # Custom prompt from config - format if it contains {query} placeholder + prompt = ( + query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE.format(query=query) + if "{query}" in query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE + else query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE + ) + else: + # Use built-in default prompt and format with query + prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query=query) + + request = OpenAIChatCompletionRequestWithExtraBody( + model=model_id, + messages=[{"role": "user", "content": prompt}], + max_tokens=query_expansion_config._DEFAULT_QUERY_EXPANSION_MAX_TOKENS, + temperature=query_expansion_config._DEFAULT_QUERY_EXPANSION_TEMPERATURE, + ) + + response = await self.inference_api.openai_chat_completion(request) + rewritten_query = response.choices[0].message.content.strip() + log.debug(f"Query rewritten: '{query}' → '{rewritten_query}'") + return rewritten_query + async def query_chunks( self, query: InterleavedContent, @@ -304,10 +334,6 @@ async def query_chunks( if params is None: params = {} - # Extract configuration if provided by router - if "vector_stores_config" in params: - self.vector_stores_config = params["vector_stores_config"] - k = params.get("max_chunks", 3) mode = params.get("mode") score_threshold = params.get("score_threshold", 0.0) @@ -331,18 +357,9 @@ async def query_chunks( query_string = interleaved_content_as_str(query) - # Apply query rewriting if enabled + # Apply query rewriting if enabled and model is configured if params.get("rewrite_query", False): - if self.vector_stores_config: - log.debug(f"VectorStoreWithIndex received config: {self.vector_stores_config}") - if hasattr(self.vector_stores_config, "default_query_expansion_model"): - log.debug( - f"Config has default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" - ) - else: - log.debug("No vector_stores_config found - cannot perform query rewriting") - - query_string = await self._rewrite_query_for_search(query_string) + query_string = await self._rewrite_query_for_file_search(query_string) if mode == "keyword": return await self.index.query_keyword(query_string, k, score_threshold) @@ -359,88 +376,3 @@ async def query_chunks( ) else: return await self.index.query_vector(query_vector, k, score_threshold) - - async def _rewrite_query_for_search(self, query: str) -> str: - """Rewrite the user query to improve vector search performance. - - :param query: The original user query - :returns: The rewritten query optimized for vector search - """ - expansion_model = None - - # Check for per-store query expansion model first - if self.vector_store.query_expansion_model: - # Parse the model string into provider_id and model_id - model_parts = self.vector_store.query_expansion_model.split("/", 1) - if len(model_parts) == 2: - expansion_model = QualifiedModel(provider_id=model_parts[0], model_id=model_parts[1]) - log.debug(f"Using per-store query expansion model: {expansion_model}") - else: - log.warning( - f"Invalid query_expansion_model format: {self.vector_store.query_expansion_model}. Expected 'provider_id/model_id'" - ) - - # Fall back to global default if no per-store model - if not expansion_model: - if not self.vector_stores_config: - raise ValueError( - f"No vector_stores_config found and no per-store query_expansion_model! self.vector_stores_config is: {self.vector_stores_config}" - ) - if not self.vector_stores_config.default_query_expansion_model: - raise ValueError( - f"No default_query_expansion_model configured and no per-store query_expansion_model! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}" - ) - expansion_model = self.vector_stores_config.default_query_expansion_model - log.debug(f"Using global default query expansion model: {expansion_model}") - - chat_model = f"{expansion_model.provider_id}/{expansion_model.model_id}" - - # Validate that the model is available and is an LLM - try: - models_response = await self.inference_api.routing_table.list_models() - except Exception as e: - raise RuntimeError(f"Failed to list available models for validation: {e}") from e - - model_found = False - for model in models_response.data: - if model.identifier == chat_model: - if model.model_type != ModelType.llm: - raise ValueError( - f"Configured query expansion model '{chat_model}' is not an LLM model " - f"(found type: {model.model_type}). Query rewriting requires an LLM model." - ) - model_found = True - break - - if not model_found: - available_llm_models = [m.identifier for m in models_response.data if m.model_type == ModelType.llm] - raise ValueError( - f"Configured query expansion model '{chat_model}' is not available. " - f"Available LLM models: {available_llm_models}" - ) - - # Use the configured prompt (has a default value) - rewrite_prompt = self.vector_stores_config.query_expansion_prompt.format(query=query) - - chat_request = OpenAIChatCompletionRequestWithExtraBody( - model=chat_model, - messages=[ - OpenAIUserMessageParam( - role="user", - content=rewrite_prompt, - ) - ], - max_tokens=100, - ) - - try: - response = await self.inference_api.openai_chat_completion(chat_request) - except Exception as e: - raise RuntimeError(f"Failed to generate rewritten query: {e}") from e - - if response.choices and len(response.choices) > 0: - rewritten_query = response.choices[0].message.content.strip() - log.info(f"Query rewritten: '{query}' → '{rewritten_query}'") - return rewritten_query - else: - raise RuntimeError("No response received from LLM model for query rewriting") diff --git a/src/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py index 4c0d1ced25..0a1e6c53c5 100644 --- a/src/llama_stack_api/vector_stores.py +++ b/src/llama_stack_api/vector_stores.py @@ -25,7 +25,6 @@ class VectorStore(Resource): embedding_model: str embedding_dimension: int vector_store_name: str | None = None - query_expansion_model: str | None = None @property def vector_store_id(self) -> str: diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index cfda7aa5ee..83bf22f346 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -1233,94 +1233,122 @@ async def test_embedding_config_required_model_missing(vector_io_adapter): async def test_query_expansion_functionality(vector_io_adapter): - """Test query expansion with per-store models, global defaults, and error validation.""" + """Test query expansion with simplified global configuration approach.""" from unittest.mock import MagicMock - from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig + from llama_stack.core.datatypes import QualifiedModel + from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT + from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex - from llama_stack_api.models import Model, ModelType + from llama_stack_api import QueryChunksResponse - vector_io_adapter.register_vector_store = AsyncMock() - vector_io_adapter.__provider_id__ = "test_provider" + # Mock a simple vector store and index + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test/embedding" + mock_inference_api = MagicMock() + mock_index = MagicMock() - # Test 1: Per-store model usage - params = OpenAICreateVectorStoreRequestWithExtraBody( - name="test_store", - metadata={}, - **{"embedding_model": "test/embedding", "query_expansion_model": "test/llama-model"}, + # Create VectorStoreWithIndex with simplified constructor + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, + index=mock_index, + inference_api=mock_inference_api, ) - await vector_io_adapter.openai_create_vector_store(params) - call_args = vector_io_adapter.register_vector_store.call_args[0][0] - assert call_args.query_expansion_model == "test/llama-model" - # Test 2: Global default fallback - vector_io_adapter.register_vector_store.reset_mock() - params_no_model = OpenAICreateVectorStoreRequestWithExtraBody( - name="test_store2", metadata={}, **{"embedding_model": "test/embedding"} + # Mock the query_vector method to return a simple response + mock_response = QueryChunksResponse(chunks=[], scores=[]) + mock_index.query_vector = AsyncMock(return_value=mock_response) + + # Mock embeddings generation + mock_inference_api.openai_embeddings = AsyncMock( + return_value=MagicMock(data=[MagicMock(embedding=[0.1, 0.2, 0.3])]) ) - await vector_io_adapter.openai_create_vector_store(params_no_model) - call_args2 = vector_io_adapter.register_vector_store.call_args[0][0] - assert call_args2.query_expansion_model is None - # Test query rewriting scenarios - mock_inference_api = MagicMock() + # Test 1: Query expansion with default prompt (no custom prompt configured) + mock_vector_stores_config = MagicMock() + mock_vector_stores_config.default_query_expansion_model = QualifiedModel(provider_id="test", model_id="llama") + mock_vector_stores_config.query_expansion_prompt = None # Use built-in default prompt + mock_vector_stores_config.query_expansion_max_tokens = 100 # Default value + mock_vector_stores_config.query_expansion_temperature = 0.3 # Default value - # Per-store model scenario - mock_vector_store = MagicMock() - mock_vector_store.query_expansion_model = "test/llama-model" - mock_inference_api.routing_table.list_models = AsyncMock( - return_value=MagicMock( - data=[Model(identifier="test/llama-model", provider_id="test", model_type=ModelType.llm)] - ) - ) + # Set global config + set_default_query_expansion_config(mock_vector_stores_config) + + # Mock chat completion for query rewriting mock_inference_api.openai_chat_completion = AsyncMock( - return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="per-store expanded"))]) + return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="expanded test query"))]) ) - vector_store_with_index = VectorStoreWithIndex( - vector_store=mock_vector_store, - index=MagicMock(), - inference_api=mock_inference_api, - vector_stores_config=VectorStoresConfig( - default_query_expansion_model=QualifiedModel(provider_id="global", model_id="default") - ), - ) + params = {"rewrite_query": True, "max_chunks": 5} + result = await vector_store_with_index.query_chunks("test query", params) + + # Verify chat completion was called for query rewriting + mock_inference_api.openai_chat_completion.assert_called_once() + chat_call_args = mock_inference_api.openai_chat_completion.call_args[0][0] + assert chat_call_args.model == "test/llama" + + # Verify default prompt is used (contains our built-in prompt text) + prompt_text = chat_call_args.messages[0].content + expected_prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query="test query") + assert prompt_text == expected_prompt + + # Verify default inference parameters are used + assert chat_call_args.max_tokens == 100 # Default value + assert chat_call_args.temperature == 0.3 # Default value - result = await vector_store_with_index._rewrite_query_for_search("test") - assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "test/llama-model" - assert result == "per-store expanded" + # Verify the rest of the flow proceeded normally + mock_inference_api.openai_embeddings.assert_called_once() + mock_index.query_vector.assert_called_once() + assert result == mock_response - # Global default fallback scenario + # Test 1b: Query expansion with custom prompt override and inference parameters mock_inference_api.reset_mock() - mock_vector_store.query_expansion_model = None - mock_inference_api.routing_table.list_models = AsyncMock( - return_value=MagicMock( - data=[Model(identifier="global/default", provider_id="global", model_type=ModelType.llm)] - ) - ) - mock_inference_api.openai_chat_completion = AsyncMock( - return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="global expanded"))]) - ) + mock_index.reset_mock() - result = await vector_store_with_index._rewrite_query_for_search("test") - assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "global/default" - assert result == "global expanded" + mock_vector_stores_config.query_expansion_prompt = "Custom prompt for rewriting: {query}" + mock_vector_stores_config.query_expansion_max_tokens = 150 + mock_vector_stores_config.query_expansion_temperature = 0.7 + set_default_query_expansion_config(mock_vector_stores_config) - # Test 3: Error cases - # Model not found - mock_vector_store.query_expansion_model = "missing/model" - mock_inference_api.routing_table.list_models = AsyncMock(return_value=MagicMock(data=[])) + result = await vector_store_with_index.query_chunks("test query", params) - with pytest.raises(ValueError, match="Configured query expansion model .* is not available"): - await vector_store_with_index._rewrite_query_for_search("test") + # Verify custom prompt and parameters are used + mock_inference_api.openai_chat_completion.assert_called_once() + chat_call_args = mock_inference_api.openai_chat_completion.call_args[0][0] + prompt_text = chat_call_args.messages[0].content + assert prompt_text == "Custom prompt for rewriting: test query" + assert "Expand this query with relevant synonyms" not in prompt_text # Default not used - # Non-LLM model - mock_vector_store.query_expansion_model = "test/embedding-model" - mock_inference_api.routing_table.list_models = AsyncMock( - return_value=MagicMock( - data=[Model(identifier="test/embedding-model", provider_id="test", model_type=ModelType.embedding)] - ) - ) + # Verify custom inference parameters + assert chat_call_args.max_tokens == 150 + assert chat_call_args.temperature == 0.7 + + # Test 2: No query expansion when no global model is configured + mock_inference_api.reset_mock() + mock_index.reset_mock() + + # Clear global config + set_default_query_expansion_config(None) + + params = {"rewrite_query": True, "max_chunks": 5} + result2 = await vector_store_with_index.query_chunks("test query", params) + + # Verify chat completion was NOT called + mock_inference_api.openai_chat_completion.assert_not_called() + # But normal flow should still work + mock_inference_api.openai_embeddings.assert_called_once() + mock_index.query_vector.assert_called_once() + assert result2 == mock_response + + # Test 3: Normal behavior without rewrite_query parameter + mock_inference_api.reset_mock() + mock_index.reset_mock() + + params_no_rewrite = {"max_chunks": 5} + result3 = await vector_store_with_index.query_chunks("test query", params_no_rewrite) - with pytest.raises(ValueError, match="is not an LLM model.*Query rewriting requires an LLM model"): - await vector_store_with_index._rewrite_query_for_search("test") + # Neither chat completion nor query rewriting should be called + mock_inference_api.openai_chat_completion.assert_not_called() + mock_inference_api.openai_embeddings.assert_called_once() + mock_index.query_vector.assert_called_once() + assert result3 == mock_response From 31e28b6d1733b8fecc00cc6aba03465fe3980342 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Fri, 21 Nov 2025 23:38:13 -0500 Subject: [PATCH 5/7] renaming to query_rewrite, consolidating, and cleaning up validation Signed-off-by: Francisco Javier Arceo --- src/llama_stack/core/datatypes.py | 37 ++++--- src/llama_stack/core/stack.py | 100 ++++++++++-------- .../ci-tests/run-with-postgres-store.yaml | 10 -- .../distributions/ci-tests/run.yaml | 10 -- .../starter-gpu/run-with-postgres-store.yaml | 10 -- .../distributions/starter-gpu/run.yaml | 10 -- .../starter/run-with-postgres-store.yaml | 10 -- .../distributions/starter/run.yaml | 10 -- .../utils/memory/query_expansion_config.py | 37 ------- .../utils/memory/rewrite_query_config.py | 38 +++++++ .../providers/utils/memory/vector_store.py | 20 ++-- .../test_vector_io_openai_vector_stores.py | 26 ++--- 12 files changed, 138 insertions(+), 180 deletions(-) delete mode 100644 src/llama_stack/providers/utils/memory/query_expansion_config.py create mode 100644 src/llama_stack/providers/utils/memory/rewrite_query_config.py diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index a32e1d8a26..8fab715f2b 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -366,6 +366,27 @@ class QualifiedModel(BaseModel): model_id: str +class RewriteQueryParams(BaseModel): + """Parameters for query rewriting/expansion.""" + + model: QualifiedModel | None = Field( + default=None, + description="LLM model for query rewriting/expansion in vector search.", + ) + prompt: str = Field( + default=DEFAULT_QUERY_EXPANSION_PROMPT, + description="Prompt template for query rewriting. Use {query} as placeholder for the original query.", + ) + max_tokens: int = Field( + default=100, + description="Maximum number of tokens for query expansion responses.", + ) + temperature: float = Field( + default=0.3, + description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).", + ) + + class VectorStoresConfig(BaseModel): """Configuration for vector stores in the stack.""" @@ -377,21 +398,9 @@ class VectorStoresConfig(BaseModel): default=None, description="Default embedding model configuration for vector stores.", ) - default_query_expansion_model: QualifiedModel | None = Field( + rewrite_query_params: RewriteQueryParams | None = Field( default=None, - description="Default LLM model for query expansion/rewriting in vector search.", - ) - query_expansion_prompt: str = Field( - default=DEFAULT_QUERY_EXPANSION_PROMPT, - description="Prompt template for query expansion. Use {query} as placeholder for the original query.", - ) - query_expansion_max_tokens: int = Field( - default=100, - description="Maximum number of tokens for query expansion responses.", - ) - query_expansion_temperature: float = Field( - default=0.3, - description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).", + description="Parameters for query rewriting/expansion. None disables query rewriting.", ) diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index dae6e8ec9b..0bebf800d1 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -14,7 +14,7 @@ import yaml from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl -from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig +from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackRunConfig, VectorStoresConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl @@ -145,61 +145,67 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig return # Validate default embedding model - default_embedding_model = vector_stores_config.default_embedding_model - if default_embedding_model is not None: - provider_id = default_embedding_model.provider_id - model_id = default_embedding_model.model_id - default_model_id = f"{provider_id}/{model_id}" + if vector_stores_config.default_embedding_model is not None: + await _validate_embedding_model(vector_stores_config.default_embedding_model, impls) - if Api.models not in impls: - raise ValueError( - f"Models API is not available but vector_stores config requires model '{default_model_id}'" - ) + # Validate default rewrite query model + if vector_stores_config.rewrite_query_params and vector_stores_config.rewrite_query_params.model: + await _validate_rewrite_query_model(vector_stores_config.rewrite_query_params.model, impls) - models_impl = impls[Api.models] - response = await models_impl.list_models() - models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"} - default_model = models_list.get(default_model_id) - if default_model is None: - raise ValueError( - f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}" - ) +async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict[Api, Any]) -> None: + """Validate that an embedding model exists and has required metadata.""" + provider_id = embedding_model.provider_id + model_id = embedding_model.model_id + model_identifier = f"{provider_id}/{model_id}" - embedding_dimension = default_model.metadata.get("embedding_dimension") - if embedding_dimension is None: - raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") + if Api.models not in impls: + raise ValueError(f"Models API is not available but vector_stores config requires model '{model_identifier}'") - try: - int(embedding_dimension) - except ValueError as err: - raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err + models_impl = impls[Api.models] + response = await models_impl.list_models() + models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"} - logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") + model = models_list.get(model_identifier) + if model is None: + raise ValueError( + f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}" + ) - # Validate default query expansion model - default_query_expansion_model = vector_stores_config.default_query_expansion_model - if default_query_expansion_model is not None: - provider_id = default_query_expansion_model.provider_id - model_id = default_query_expansion_model.model_id - query_model_id = f"{provider_id}/{model_id}" + embedding_dimension = model.metadata.get("embedding_dimension") + if embedding_dimension is None: + raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata") - if Api.models not in impls: - raise ValueError( - f"Models API is not available but vector_stores config requires query expansion model '{query_model_id}'" - ) + try: + int(embedding_dimension) + except ValueError as err: + raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err - models_impl = impls[Api.models] - response = await models_impl.list_models() - llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"} + logger.debug(f"Validated embedding model: {model_identifier} (dimension: {embedding_dimension})") - query_expansion_model = llm_models_list.get(query_model_id) - if query_expansion_model is None: - raise ValueError( - f"Query expansion model '{query_model_id}' not found. Available LLM models: {list(llm_models_list.keys())}" - ) - logger.debug(f"Validated default query expansion model: {query_model_id}") +async def _validate_rewrite_query_model(rewrite_query_model: QualifiedModel, impls: dict[Api, Any]) -> None: + """Validate that a rewrite query model exists and is accessible.""" + provider_id = rewrite_query_model.provider_id + model_id = rewrite_query_model.model_id + model_identifier = f"{provider_id}/{model_id}" + + if Api.models not in impls: + raise ValueError( + f"Models API is not available but vector_stores config requires rewrite query model '{model_identifier}'" + ) + + models_impl = impls[Api.models] + response = await models_impl.list_models() + llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"} + + model = llm_models_list.get(model_identifier) + if model is None: + raise ValueError( + f"Rewrite query model '{model_identifier}' not found. Available LLM models: {list(llm_models_list.keys())}" + ) + + logger.debug(f"Validated rewrite query model: {model_identifier}") async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]): @@ -466,9 +472,9 @@ async def initialize(self): await validate_safety_config(self.run_config.safety, impls) # Set global query expansion configuration from stack config - from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config + from llama_stack.providers.utils.memory.rewrite_query_config import set_default_rewrite_query_config - set_default_query_expansion_config(self.run_config.vector_stores) + set_default_rewrite_query_config(self.run_config.vector_stores) self.impls = impls diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 219ffdce3b..7721138c7f 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -288,15 +288,5 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 - query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. - Return only the improved query, no explanations: - - - {query} - - - Improved query:' - query_expansion_max_tokens: 100 - query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml index e352e92688..b791e14882 100644 --- a/src/llama_stack/distributions/ci-tests/run.yaml +++ b/src/llama_stack/distributions/ci-tests/run.yaml @@ -279,15 +279,5 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 - query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. - Return only the improved query, no explanations: - - - {query} - - - Improved query:' - query_expansion_max_tokens: 100 - query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index e81febb0ed..9c250c05a6 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -291,15 +291,5 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 - query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. - Return only the improved query, no explanations: - - - {query} - - - Improved query:' - query_expansion_max_tokens: 100 - query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml index edae6f66db..65f9ae326f 100644 --- a/src/llama_stack/distributions/starter-gpu/run.yaml +++ b/src/llama_stack/distributions/starter-gpu/run.yaml @@ -282,15 +282,5 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 - query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. - Return only the improved query, no explanations: - - - {query} - - - Improved query:' - query_expansion_max_tokens: 100 - query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 9ed74d96d1..3314bb9e96 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -288,15 +288,5 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 - query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. - Return only the improved query, no explanations: - - - {query} - - - Improved query:' - query_expansion_max_tokens: 100 - query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml index 73679a1520..e88539e6a7 100644 --- a/src/llama_stack/distributions/starter/run.yaml +++ b/src/llama_stack/distributions/starter/run.yaml @@ -279,15 +279,5 @@ vector_stores: default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 - query_expansion_prompt: 'Expand this query with relevant synonyms and related terms. - Return only the improved query, no explanations: - - - {query} - - - Improved query:' - query_expansion_max_tokens: 100 - query_expansion_temperature: 0.3 safety: default_shield_id: llama-guard diff --git a/src/llama_stack/providers/utils/memory/query_expansion_config.py b/src/llama_stack/providers/utils/memory/query_expansion_config.py deleted file mode 100644 index 0b51c1a9ac..0000000000 --- a/src/llama_stack/providers/utils/memory/query_expansion_config.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig -from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT - -# Global configuration for query expansion - set during stack startup -_DEFAULT_QUERY_EXPANSION_MODEL: QualifiedModel | None = None -_DEFAULT_QUERY_EXPANSION_MAX_TOKENS: int = 100 -_DEFAULT_QUERY_EXPANSION_TEMPERATURE: float = 0.3 -_QUERY_EXPANSION_PROMPT_OVERRIDE: str | None = None - - -def set_default_query_expansion_config(vector_stores_config: VectorStoresConfig | None): - """Set the global default query expansion configuration from stack config.""" - global \ - _DEFAULT_QUERY_EXPANSION_MODEL, \ - _QUERY_EXPANSION_PROMPT_OVERRIDE, \ - _DEFAULT_QUERY_EXPANSION_MAX_TOKENS, \ - _DEFAULT_QUERY_EXPANSION_TEMPERATURE - if vector_stores_config: - _DEFAULT_QUERY_EXPANSION_MODEL = vector_stores_config.default_query_expansion_model - # Only set override if user provided a custom prompt different from default - if vector_stores_config.query_expansion_prompt != DEFAULT_QUERY_EXPANSION_PROMPT: - _QUERY_EXPANSION_PROMPT_OVERRIDE = vector_stores_config.query_expansion_prompt - else: - _QUERY_EXPANSION_PROMPT_OVERRIDE = None - _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = vector_stores_config.query_expansion_max_tokens - _DEFAULT_QUERY_EXPANSION_TEMPERATURE = vector_stores_config.query_expansion_temperature - else: - _DEFAULT_QUERY_EXPANSION_MODEL = None - _QUERY_EXPANSION_PROMPT_OVERRIDE = None - _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = 100 - _DEFAULT_QUERY_EXPANSION_TEMPERATURE = 0.3 diff --git a/src/llama_stack/providers/utils/memory/rewrite_query_config.py b/src/llama_stack/providers/utils/memory/rewrite_query_config.py new file mode 100644 index 0000000000..9c53638b87 --- /dev/null +++ b/src/llama_stack/providers/utils/memory/rewrite_query_config.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig +from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT + +# Global configuration for query rewriting - set during stack startup +_DEFAULT_REWRITE_QUERY_MODEL: QualifiedModel | None = None +_DEFAULT_REWRITE_QUERY_MAX_TOKENS: int = 100 +_DEFAULT_REWRITE_QUERY_TEMPERATURE: float = 0.3 +_REWRITE_QUERY_PROMPT_OVERRIDE: str | None = None + + +def set_default_rewrite_query_config(vector_stores_config: VectorStoresConfig | None): + """Set the global default query rewriting configuration from stack config.""" + global \ + _DEFAULT_REWRITE_QUERY_MODEL, \ + _REWRITE_QUERY_PROMPT_OVERRIDE, \ + _DEFAULT_REWRITE_QUERY_MAX_TOKENS, \ + _DEFAULT_REWRITE_QUERY_TEMPERATURE + if vector_stores_config and vector_stores_config.rewrite_query_params: + params = vector_stores_config.rewrite_query_params + _DEFAULT_REWRITE_QUERY_MODEL = params.model + # Only set override if user provided a custom prompt different from default + if params.prompt != DEFAULT_QUERY_EXPANSION_PROMPT: + _REWRITE_QUERY_PROMPT_OVERRIDE = params.prompt + else: + _REWRITE_QUERY_PROMPT_OVERRIDE = None + _DEFAULT_REWRITE_QUERY_MAX_TOKENS = params.max_tokens + _DEFAULT_REWRITE_QUERY_TEMPERATURE = params.temperature + else: + _DEFAULT_REWRITE_QUERY_MODEL = None + _REWRITE_QUERY_PROMPT_OVERRIDE = None + _DEFAULT_REWRITE_QUERY_MAX_TOKENS = 100 + _DEFAULT_REWRITE_QUERY_TEMPERATURE = 0.3 diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index 61fa996e4e..11754bae29 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -38,7 +38,7 @@ log = get_logger(name=__name__, category="providers::utils") -from llama_stack.providers.utils.memory import query_expansion_config +from llama_stack.providers.utils.memory import rewrite_query_config from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT @@ -295,20 +295,20 @@ async def insert_chunks( async def _rewrite_query_for_file_search(self, query: str) -> str: """Rewrite a search query using the globally configured LLM model for better retrieval results.""" - if not query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL: - log.debug("No default query expansion model configured, using original query") + if not rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL: + log.debug("No default query rewriting model configured, using original query") return query - model_id = f"{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.provider_id}/{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.model_id}" + model_id = f"{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.provider_id}/{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.model_id}" # Use custom prompt from config if provided, otherwise use built-in default # Users only need to configure the model - prompt is automatic with optional override - if query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE: + if rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE: # Custom prompt from config - format if it contains {query} placeholder prompt = ( - query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE.format(query=query) - if "{query}" in query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE - else query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE + rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE.format(query=query) + if "{query}" in rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE + else rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE ) else: # Use built-in default prompt and format with query @@ -317,8 +317,8 @@ async def _rewrite_query_for_file_search(self, query: str) -> str: request = OpenAIChatCompletionRequestWithExtraBody( model=model_id, messages=[{"role": "user", "content": prompt}], - max_tokens=query_expansion_config._DEFAULT_QUERY_EXPANSION_MAX_TOKENS, - temperature=query_expansion_config._DEFAULT_QUERY_EXPANSION_TEMPERATURE, + max_tokens=rewrite_query_config._DEFAULT_REWRITE_QUERY_MAX_TOKENS, + temperature=rewrite_query_config._DEFAULT_REWRITE_QUERY_TEMPERATURE, ) response = await self.inference_api.openai_chat_completion(request) diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 83bf22f346..07ec41bec1 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -1236,9 +1236,9 @@ async def test_query_expansion_functionality(vector_io_adapter): """Test query expansion with simplified global configuration approach.""" from unittest.mock import MagicMock - from llama_stack.core.datatypes import QualifiedModel + from llama_stack.core.datatypes import QualifiedModel, RewriteQueryParams from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT - from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config + from llama_stack.providers.utils.memory.rewrite_query_config import set_default_rewrite_query_config from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex from llama_stack_api import QueryChunksResponse @@ -1266,13 +1266,12 @@ async def test_query_expansion_functionality(vector_io_adapter): # Test 1: Query expansion with default prompt (no custom prompt configured) mock_vector_stores_config = MagicMock() - mock_vector_stores_config.default_query_expansion_model = QualifiedModel(provider_id="test", model_id="llama") - mock_vector_stores_config.query_expansion_prompt = None # Use built-in default prompt - mock_vector_stores_config.query_expansion_max_tokens = 100 # Default value - mock_vector_stores_config.query_expansion_temperature = 0.3 # Default value + mock_vector_stores_config.rewrite_query_params = RewriteQueryParams( + model=QualifiedModel(provider_id="test", model_id="llama"), max_tokens=100, temperature=0.3 + ) # Set global config - set_default_query_expansion_config(mock_vector_stores_config) + set_default_rewrite_query_config(mock_vector_stores_config) # Mock chat completion for query rewriting mock_inference_api.openai_chat_completion = AsyncMock( @@ -1305,10 +1304,13 @@ async def test_query_expansion_functionality(vector_io_adapter): mock_inference_api.reset_mock() mock_index.reset_mock() - mock_vector_stores_config.query_expansion_prompt = "Custom prompt for rewriting: {query}" - mock_vector_stores_config.query_expansion_max_tokens = 150 - mock_vector_stores_config.query_expansion_temperature = 0.7 - set_default_query_expansion_config(mock_vector_stores_config) + mock_vector_stores_config.rewrite_query_params = RewriteQueryParams( + model=QualifiedModel(provider_id="test", model_id="llama"), + prompt="Custom prompt for rewriting: {query}", + max_tokens=150, + temperature=0.7, + ) + set_default_rewrite_query_config(mock_vector_stores_config) result = await vector_store_with_index.query_chunks("test query", params) @@ -1328,7 +1330,7 @@ async def test_query_expansion_functionality(vector_io_adapter): mock_index.reset_mock() # Clear global config - set_default_query_expansion_config(None) + set_default_rewrite_query_config(None) params = {"rewrite_query": True, "max_chunks": 5} result2 = await vector_store_with_index.query_chunks("test query", params) From 2ebc56c3d9ef7664ec5b005fe837180060411541 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Tue, 25 Nov 2025 00:06:11 -0500 Subject: [PATCH 6/7] undoing formatting and updating missed expansion parameterS Signed-off-by: Francisco Javier Arceo --- src/llama_stack/core/datatypes.py | 4 ++-- .../providers/inline/vector_io/faiss/faiss.py | 7 +------ .../inline/vector_io/sqlite_vec/sqlite_vec.py | 7 +------ .../remote/vector_io/pgvector/__init__.py | 5 +---- .../remote/vector_io/pgvector/pgvector.py | 17 +++-------------- .../providers/remote/vector_io/qdrant/qdrant.py | 4 +--- .../remote/vector_io/weaviate/__init__.py | 5 +---- .../remote/vector_io/weaviate/weaviate.py | 15 +++------------ .../providers/utils/memory/__init__.py | 4 ++-- .../providers/utils/memory/constants.py | 4 ++-- .../utils/memory/rewrite_query_config.py | 4 ++-- .../providers/utils/memory/vector_store.py | 4 ++-- .../test_vector_io_openai_vector_stores.py | 4 ++-- 13 files changed, 23 insertions(+), 61 deletions(-) diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 8fab715f2b..00527a1bd0 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -18,7 +18,7 @@ StorageConfig, ) from llama_stack.log import LoggingConfig -from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT +from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT from llama_stack_api import ( Api, Benchmark, @@ -374,7 +374,7 @@ class RewriteQueryParams(BaseModel): description="LLM model for query rewriting/expansion in vector search.", ) prompt: str = Field( - default=DEFAULT_QUERY_EXPANSION_PROMPT, + default=DEFAULT_QUERY_REWRITE_PROMPT, description="Prompt template for query rewriting. Use {query} as placeholder for the original query.", ) max_tokens: int = Field( diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py index e2aab1a250..91a17058bf 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -184,12 +184,7 @@ async def query_hybrid( class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): - def __init__( - self, - config: FaissVectorIOConfig, - inference_api: Inference, - files_api: Files | None, - ) -> None: + def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index bc6226c845..a384a33dc5 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -385,12 +385,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex). """ - def __init__( - self, - config, - inference_api: Inference, - files_api: Files | None, - ) -> None: + def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py index ea0139815a..36018fd954 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -9,10 +9,7 @@ from .config import PGVectorVectorIOConfig -async def get_adapter_impl( - config: PGVectorVectorIOConfig, - deps: dict[Api, ProviderSpec], -): +async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): from .pgvector import PGVectorVectorIOAdapter impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index fe1b8ce35e..5c86fb08da 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -330,10 +330,7 @@ def check_distance_metric_availability(self, distance_metric: str) -> None: class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( - self, - config: PGVectorVectorIOConfig, - inference_api: Inference, - files_api: Files | None = None, + self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config @@ -389,11 +386,7 @@ async def initialize(self) -> None: kvstore=self.kvstore, ) await pgvector_index.initialize() - index = VectorStoreWithIndex( - vector_store, - index=pgvector_index, - inference_api=self.inference_api, - ) + index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) self.cache[vector_store.identifier] = index async def shutdown(self) -> None: @@ -420,11 +413,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore ) await pgvector_index.initialize() - index = VectorStoreWithIndex( - vector_store, - index=pgvector_index, - inference_api=self.inference_api, - ) + index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) self.cache[vector_store.identifier] = index async def unregister_vector_store(self, vector_store_id: str) -> None: diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index dc65466460..4dd78d8343 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -173,9 +173,7 @@ async def initialize(self) -> None: for vector_store_data in stored_vector_stores: vector_store = VectorStore.model_validate_json(vector_store_data) index = VectorStoreWithIndex( - vector_store, - QdrantIndex(self.client, vector_store.identifier), - self.inference_api, + vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api ) self.cache[vector_store.identifier] = index self.openai_vector_stores = await self._load_openai_vector_stores() diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py index a13cca8a16..47546d4598 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -9,10 +9,7 @@ from .config import WeaviateVectorIOConfig -async def get_adapter_impl( - config: WeaviateVectorIOConfig, - deps: dict[Api, ProviderSpec], -): +async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): from .weaviate import WeaviateVectorIOAdapter impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files)) diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index 67ec523d7e..c15d5f4682 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -262,12 +262,7 @@ async def query_hybrid( class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate): - def __init__( - self, - config: WeaviateVectorIOConfig, - inference_api: Inference, - files_api: Files | None, - ) -> None: + def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api @@ -313,9 +308,7 @@ async def initialize(self) -> None: client = self._get_client() idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore) self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store=vector_store, - index=idx, - inference_api=self.inference_api, + vector_store=vector_store, index=idx, inference_api=self.inference_api ) # Load OpenAI vector stores metadata into cache @@ -341,9 +334,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None: ) self.cache[vector_store.identifier] = VectorStoreWithIndex( - vector_store, - WeaviateIndex(client=client, collection_name=sanitized_collection_name), - self.inference_api, + vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api ) async def unregister_vector_store(self, vector_store_id: str) -> None: diff --git a/src/llama_stack/providers/utils/memory/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py index 5e0942402e..05a832b6f8 100644 --- a/src/llama_stack/providers/utils/memory/__init__.py +++ b/src/llama_stack/providers/utils/memory/__init__.py @@ -4,6 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .constants import DEFAULT_QUERY_EXPANSION_PROMPT +from .constants import DEFAULT_QUERY_REWRITE_PROMPT -__all__ = ["DEFAULT_QUERY_EXPANSION_PROMPT"] +__all__ = ["DEFAULT_QUERY_REWRITE_PROMPT"] diff --git a/src/llama_stack/providers/utils/memory/constants.py b/src/llama_stack/providers/utils/memory/constants.py index d8703bbceb..1f6e2cef6f 100644 --- a/src/llama_stack/providers/utils/memory/constants.py +++ b/src/llama_stack/providers/utils/memory/constants.py @@ -4,5 +4,5 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -# Default prompt template for query expansion in vector search -DEFAULT_QUERY_EXPANSION_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:" +# Default prompt template for query rewriting in vector search +DEFAULT_QUERY_REWRITE_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:" diff --git a/src/llama_stack/providers/utils/memory/rewrite_query_config.py b/src/llama_stack/providers/utils/memory/rewrite_query_config.py index 9c53638b87..7128116dde 100644 --- a/src/llama_stack/providers/utils/memory/rewrite_query_config.py +++ b/src/llama_stack/providers/utils/memory/rewrite_query_config.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig -from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT +from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT # Global configuration for query rewriting - set during stack startup _DEFAULT_REWRITE_QUERY_MODEL: QualifiedModel | None = None @@ -25,7 +25,7 @@ def set_default_rewrite_query_config(vector_stores_config: VectorStoresConfig | params = vector_stores_config.rewrite_query_params _DEFAULT_REWRITE_QUERY_MODEL = params.model # Only set override if user provided a custom prompt different from default - if params.prompt != DEFAULT_QUERY_EXPANSION_PROMPT: + if params.prompt != DEFAULT_QUERY_REWRITE_PROMPT: _REWRITE_QUERY_PROMPT_OVERRIDE = params.prompt else: _REWRITE_QUERY_PROMPT_OVERRIDE = None diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index 11754bae29..e22075a5f8 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -39,7 +39,7 @@ log = get_logger(name=__name__, category="providers::utils") from llama_stack.providers.utils.memory import rewrite_query_config -from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT +from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT class ChunkForDeletion(BaseModel): @@ -312,7 +312,7 @@ async def _rewrite_query_for_file_search(self, query: str) -> str: ) else: # Use built-in default prompt and format with query - prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query=query) + prompt = DEFAULT_QUERY_REWRITE_PROMPT.format(query=query) request = OpenAIChatCompletionRequestWithExtraBody( model=model_id, diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 07ec41bec1..4588fe7e55 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -1237,7 +1237,7 @@ async def test_query_expansion_functionality(vector_io_adapter): from unittest.mock import MagicMock from llama_stack.core.datatypes import QualifiedModel, RewriteQueryParams - from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT + from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT from llama_stack.providers.utils.memory.rewrite_query_config import set_default_rewrite_query_config from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex from llama_stack_api import QueryChunksResponse @@ -1288,7 +1288,7 @@ async def test_query_expansion_functionality(vector_io_adapter): # Verify default prompt is used (contains our built-in prompt text) prompt_text = chat_call_args.messages[0].content - expected_prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query="test query") + expected_prompt = DEFAULT_QUERY_REWRITE_PROMPT.format(query="test query") assert prompt_text == expected_prompt # Verify default inference parameters are used From 5ec6f5dcff5dded95bc33c3c4b0b86b31fb06126 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Tue, 25 Nov 2025 23:24:07 -0500 Subject: [PATCH 7/7] raise when querying without config Signed-off-by: Francisco Javier Arceo --- .../providers/utils/memory/vector_store.py | 5 +++-- .../vector_io/test_vector_io_openai_vector_stores.py | 12 +++--------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index e22075a5f8..e00537c856 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -296,8 +296,9 @@ async def insert_chunks( async def _rewrite_query_for_file_search(self, query: str) -> str: """Rewrite a search query using the globally configured LLM model for better retrieval results.""" if not rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL: - log.debug("No default query rewriting model configured, using original query") - return query + raise ValueError( + "Query rewriting requested but not configured. Please configure rewrite_query_params.model in vector_stores config." + ) model_id = f"{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.provider_id}/{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.model_id}" diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 4588fe7e55..e0ae568c58 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -1325,7 +1325,7 @@ async def test_query_expansion_functionality(vector_io_adapter): assert chat_call_args.max_tokens == 150 assert chat_call_args.temperature == 0.7 - # Test 2: No query expansion when no global model is configured + # Test 2: Error when query rewriting is requested but no global model is configured mock_inference_api.reset_mock() mock_index.reset_mock() @@ -1333,14 +1333,8 @@ async def test_query_expansion_functionality(vector_io_adapter): set_default_rewrite_query_config(None) params = {"rewrite_query": True, "max_chunks": 5} - result2 = await vector_store_with_index.query_chunks("test query", params) - - # Verify chat completion was NOT called - mock_inference_api.openai_chat_completion.assert_not_called() - # But normal flow should still work - mock_inference_api.openai_embeddings.assert_called_once() - mock_index.query_vector.assert_called_once() - assert result2 == mock_response + with pytest.raises(ValueError, match="Query rewriting requested but not configured"): + await vector_store_with_index.query_chunks("test query", params) # Test 3: Normal behavior without rewrite_query parameter mock_inference_api.reset_mock()