From 61a4738a124dcfb812e5133492f06fe05e12f82a Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Sun, 16 Nov 2025 23:56:59 -0500
Subject: [PATCH 1/7] feat: Actualize query rewrite in search API

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

adding query expansion model to vector store config

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 src/llama_stack/core/datatypes.py             |   8 +
 src/llama_stack/core/routers/vector_io.py     |   6 +
 .../ci-tests/run-with-postgres-store.yaml     |   8 +
 .../distributions/ci-tests/run.yaml           |   8 +
 .../starter-gpu/run-with-postgres-store.yaml  |   8 +
 .../distributions/starter-gpu/run.yaml        |   8 +
 .../starter/run-with-postgres-store.yaml      |   8 +
 .../distributions/starter/run.yaml            |   8 +
 .../utils/memory/openai_vector_store_mixin.py |   1 +
 .../providers/utils/memory/vector_store.py    |  81 ++
 ...c18360a07bb3dda397579e25c27b-a882f554.json | 647 ++++++++++++
 ...93298528e5349dfb4438d3d7324f-17b6020a.json | 989 ++++++++++++++++++
 ...0ec104af88f1a482b6a936be14cc-17b6020a.json | 989 ++++++++++++++++++
 ...2392139e2024601a849af31b9253-a882f554.json | 647 ++++++++++++
 ...47d12cbec8bbcc581dc38df5fdbb-a882f554.json | 647 ++++++++++++
 ...853da455d1f7c9316fb7e9d1419b-a882f554.json | 647 ++++++++++++
 ...0ecf6a0334d311302e72afd87d25-17b6020a.json | 989 ++++++++++++++++++
 ...0aaeee60e0a6866183450427d162-a882f554.json | 647 ++++++++++++
 ...ee1f4ab308b1c12e971c13988bf0-17b6020a.json | 989 ++++++++++++++++++
 .../vector_io/test_openai_vector_stores.py    |  46 +
 20 files changed, 7381 insertions(+)
 create mode 100644 tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json
 create mode 100644 tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json
 create mode 100644 tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json
 create mode 100644 tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json
 create mode 100644 tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json
 create mode 100644 tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json
 create mode 100644 tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json
 create mode 100644 tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json
 create mode 100644 tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json

diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 1e29690ffd..49747d4770 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -376,6 +376,14 @@ class VectorStoresConfig(BaseModel):
         default=None,
         description="Default embedding model configuration for vector stores.",
     )
+    default_query_expansion_model: QualifiedModel | None = Field(
+        default=None,
+        description="Default LLM model for query expansion/rewriting in vector search.",
+    )
+    query_expansion_prompt: str = Field(
+        default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:",
+        description="Prompt template for query expansion. Use {query} as placeholder for the original query.",
+    )
 
 
 class SafetyConfig(BaseModel):
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index 5256dda449..a7b30642ce 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -99,6 +99,12 @@ async def query_chunks(
     ) -> QueryChunksResponse:
         logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}")
         provider = await self.routing_table.get_provider_impl(vector_store_id)
+
+        # Ensure params dict exists and add vector_stores_config for query rewriting
+        if params is None:
+            params = {}
+        params["vector_stores_config"] = self.vector_stores_config
+
         return await provider.query_chunks(vector_store_id, query, params)
 
     # OpenAI Vector Stores API endpoints
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 7721138c7f..8110dbdf6d 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -288,5 +288,13 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
+  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
+    Return only the improved query, no explanations:
+
+
+    {query}
+
+
+    Improved query:'
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index b791e14882..809b0ef1c9 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -279,5 +279,13 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
+  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
+    Return only the improved query, no explanations:
+
+
+    {query}
+
+
+    Improved query:'
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index 9c250c05a6..ca47d7f4c4 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -291,5 +291,13 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
+  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
+    Return only the improved query, no explanations:
+
+
+    {query}
+
+
+    Improved query:'
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 65f9ae326f..15555c2622 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -282,5 +282,13 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
+  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
+    Return only the improved query, no explanations:
+
+
+    {query}
+
+
+    Improved query:'
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 3314bb9e96..423b304528 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -288,5 +288,13 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
+  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
+    Return only the improved query, no explanations:
+
+
+    {query}
+
+
+    Improved query:'
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index e88539e6a7..a0f56fc420 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -279,5 +279,13 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
+  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
+    Return only the improved query, no explanations:
+
+
+    {query}
+
+
+    Improved query:'
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index bbfd60e253..f33bb29c8c 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -611,6 +611,7 @@ async def openai_search_vector_store(
                 "max_chunks": max_num_results * CHUNK_MULTIPLIER,
                 "score_threshold": score_threshold,
                 "mode": search_mode,
+                "rewrite_query": rewrite_query,
             }
             # TODO: Add support for ranking_options.ranker
 
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index b6a671ddb2..6fbf4a4245 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -17,6 +17,7 @@
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.utils.inference.prompt_adapter import (
@@ -34,6 +35,11 @@
     RAGDocument,
     VectorStore,
 )
+from llama_stack_api.inference import (
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAIUserMessageParam,
+)
+from llama_stack_api.models import ModelType
 
 log = get_logger(name=__name__, category="providers::utils")
 
@@ -262,6 +268,7 @@ class VectorStoreWithIndex:
     vector_store: VectorStore
     index: EmbeddingIndex
     inference_api: Api.inference
+    vector_stores_config: VectorStoresConfig | None = None
 
     async def insert_chunks(
         self,
@@ -296,6 +303,11 @@ async def query_chunks(
     ) -> QueryChunksResponse:
         if params is None:
             params = {}
+
+        # Extract configuration if provided by router
+        if "vector_stores_config" in params:
+            self.vector_stores_config = params["vector_stores_config"]
+
         k = params.get("max_chunks", 3)
         mode = params.get("mode")
         score_threshold = params.get("score_threshold", 0.0)
@@ -318,6 +330,11 @@ async def query_chunks(
                 reranker_params = {"impact_factor": k_value}
 
         query_string = interleaved_content_as_str(query)
+
+        # Apply query rewriting if enabled
+        if params.get("rewrite_query", False):
+            query_string = await self._rewrite_query_for_search(query_string)
+
         if mode == "keyword":
             return await self.index.query_keyword(query_string, k, score_threshold)
 
@@ -333,3 +350,67 @@ async def query_chunks(
             )
         else:
             return await self.index.query_vector(query_vector, k, score_threshold)
+
+    async def _rewrite_query_for_search(self, query: str) -> str:
+        """Rewrite the user query to improve vector search performance.
+
+        :param query: The original user query
+        :returns: The rewritten query optimized for vector search
+        """
+        # Check if query expansion model is configured
+        if not self.vector_stores_config or not self.vector_stores_config.default_query_expansion_model:
+            raise ValueError("No default_query_expansion_model configured for query rewriting")
+
+        # Use the configured model
+        expansion_model = self.vector_stores_config.default_query_expansion_model
+        chat_model = f"{expansion_model.provider_id}/{expansion_model.model_id}"
+
+        # Validate that the model is available and is an LLM
+        try:
+            models_response = await self.inference_api.routing_table.list_models()
+        except Exception as e:
+            raise RuntimeError(f"Failed to list available models for validation: {e}") from e
+
+        model_found = False
+        for model in models_response.data:
+            if model.identifier == chat_model:
+                if model.model_type != ModelType.llm:
+                    raise ValueError(
+                        f"Configured query expansion model '{chat_model}' is not an LLM model "
+                        f"(found type: {model.model_type}). Query rewriting requires an LLM model."
+                    )
+                model_found = True
+                break
+
+        if not model_found:
+            available_llm_models = [m.identifier for m in models_response.data if m.model_type == ModelType.llm]
+            raise ValueError(
+                f"Configured query expansion model '{chat_model}' is not available. "
+                f"Available LLM models: {available_llm_models}"
+            )
+
+        # Use the configured prompt (has a default value)
+        rewrite_prompt = self.vector_stores_config.query_expansion_prompt.format(query=query)
+
+        chat_request = OpenAIChatCompletionRequestWithExtraBody(
+            model=chat_model,
+            messages=[
+                OpenAIUserMessageParam(
+                    role="user",
+                    content=rewrite_prompt,
+                )
+            ],
+            max_tokens=100,
+        )
+
+        try:
+            response = await self.inference_api.openai_chat_completion(chat_request)
+        except Exception as e:
+            raise RuntimeError(f"Failed to generate rewritten query: {e}") from e
+
+        if response.choices and len(response.choices) > 0:
+            rewritten_query = response.choices[0].message.content.strip()
+            log.info(f"Query rewritten: '{query}' → '{rewritten_query}'")
+            return rewritten_query
+        else:
+            raise RuntimeError("No response received from LLM model for query rewriting")
diff --git a/tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json b/tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json
new file mode 100644
index 0000000000..c38561dbb2
--- /dev/null
+++ b/tests/integration/common/recordings/models-d98e7566147f9d534bc0461f2efe61e3f525c18360a07bb3dda397579e25c27b-a882f554.json
@@ -0,0 +1,647 @@
+{
+  "test_id": null,
+  "request": {
+    "method": "POST",
+    "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-gecko-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding Gecko"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-03-25",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 03-25"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-05-20",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-06-17",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview 06-17"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-05-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 05-06"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-06-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp-image-generation",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash (Image Generation) Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-exp-1206",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Experimental 1206"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-01-21",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-1219",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/learnlm-2.0-flash-experimental",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "LearnLM 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-1b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 1B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-12b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 12B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-27b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 27B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e2b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E2B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-lite-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash-Lite Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-pro-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Pro Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-robotics-er-1.5-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Robotics-ER 1.5 Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-computer-use-preview-10-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Computer Use Preview 10-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/text-embedding-004",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Text Embedding 004"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp-03-07",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental 03-07"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/aqa",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Model that performs Attributed Question Answering."
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-2.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 2"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-fast-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-live-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-live-2.5-flash-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Live 2.5 Flash Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-live-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Live Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/lyria-realtime-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Lyria Realtime Experimental"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json b/tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json
new file mode 100644
index 0000000000..53f86af996
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-3347a38d1dce9c7428f2fae6f6bb25085a6b93298528e5349dfb4438d3d7324f-17b6020a.json
@@ -0,0 +1,989 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api",
+          "created": 1759514629,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api-2025-10-14",
+          "created": 1760043960,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1",
+          "created": 1681940951,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-2",
+          "created": 1698798177,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-1106",
+          "created": 1699053241,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-2024-04-09",
+          "created": 1712601677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio",
+          "created": 1756339249,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-tts",
+          "created": 1742403959,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo",
+          "created": 1712361441,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime",
+          "created": 1756271701,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini",
+          "created": 1744318173,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-2025-08-28",
+          "created": 1756271773,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini-2025-04-14",
+          "created": 1744317547,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-1106",
+          "created": 1698959748,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0125-preview",
+          "created": 1706037612,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "davinci-002",
+          "created": 1692634301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-preview",
+          "created": 1706037777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0613",
+          "created": 1686588896,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview-2025-03-11",
+          "created": 1741377021,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4",
+          "created": 1687882411,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1",
+          "created": 1744316542,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-2025-04-14",
+          "created": 1744315746,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-3",
+          "created": 1698785189,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview",
+          "created": 1734655677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano",
+          "created": 1744321707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct-0914",
+          "created": 1694122472,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "whisper-1",
+          "created": 1677532384,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-2024-12-17",
+          "created": 1734326976,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano-2025-04-14",
+          "created": 1744321025,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-16k",
+          "created": 1683758102,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-2025-08-28",
+          "created": 1756256146,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct",
+          "created": 1692901427,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-11-20",
+          "created": 1739331543,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-05-13",
+          "created": 1715368132,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview",
+          "created": 1741391161,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview-2025-03-11",
+          "created": 1741390858,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-chat-latest",
+          "created": 1762547951,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview",
+          "created": 1741388720,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-latest",
+          "created": 1731689265,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro",
+          "created": 1742251791,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro-2025-03-19",
+          "created": 1742251504,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex-mini",
+          "created": 1763007109,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview-2025-03-11",
+          "created": 1741388170,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-small",
+          "created": 1705948997,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research-2025-06-26",
+          "created": 1750866121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research",
+          "created": 1749685485,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "codex-mini-latest",
+          "created": 1746673257,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-08-06",
+          "created": 1722814719,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1",
+          "created": 1734375816,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-2024-07-18",
+          "created": 1721172717,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini",
+          "created": 1721172741,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview",
+          "created": 1734387424,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1-mini",
+          "created": 1758845821,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini",
+          "created": 1754425928,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1",
+          "created": 1745517030,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini-2025-08-07",
+          "created": 1754425867,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-2024-09-26",
+          "created": 1732734466,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5",
+          "created": 1754425777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro",
+          "created": 1748475349,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-ada-002",
+          "created": 1671217299,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano-2025-08-07",
+          "created": 1754426303,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-12-17",
+          "created": 1734034239,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano",
+          "created": 1754426384,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-2025-11-13",
+          "created": 1762800353,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd-1106",
+          "created": 1699053533,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd",
+          "created": 1699046015,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-0125",
+          "created": 1706048358,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini",
+          "created": 1759512027,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini-2025-10-06",
+          "created": 1759512137,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1",
+          "created": 1762800673,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview-2024-12-17",
+          "created": 1734115920,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini-2025-01-31",
+          "created": 1738010200,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini",
+          "created": 1737146383,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-12-17",
+          "created": 1733945430,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro-2025-06-10",
+          "created": 1749166761,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-1106-preview",
+          "created": 1698957206,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "babbage-002",
+          "created": 1692634615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-chat-latest",
+          "created": 1754073306,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo",
+          "created": 1677610602,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-2025-08-07",
+          "created": 1754075360,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "chatgpt-4o-latest",
+          "created": 1723515131,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe",
+          "created": 1742068463,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2",
+          "created": 1759708615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2-pro",
+          "created": 1759708663,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro-2025-10-06",
+          "created": 1759469707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o",
+          "created": 1715367049,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview",
+          "created": 1727659998,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini",
+          "created": 1759517133,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini-2025-10-06",
+          "created": 1759517175,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini",
+          "created": 1744225351,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-2025-04-16",
+          "created": 1744133506,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2025-06-03",
+          "created": 1748907838,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview",
+          "created": 1727460443,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2025-06-03",
+          "created": 1748908498,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro",
+          "created": 1759469822,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-large",
+          "created": 1705953180,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research",
+          "created": 1749840121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research-2025-06-26",
+          "created": 1750865219,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-transcribe",
+          "created": 1742068596,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview",
+          "created": 1734387380,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview-2024-12-17",
+          "created": 1734112601,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3",
+          "created": 1744225308,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-2025-04-16",
+          "created": 1744133301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe-diarize",
+          "created": 1750798887,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex",
+          "created": 1762988221,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-10-01",
+          "created": 1727389042,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-10-01",
+          "created": 1727131766,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-codex",
+          "created": 1757527818,
+          "object": "model",
+          "owned_by": "system"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json b/tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json
new file mode 100644
index 0000000000..2d29aebfb2
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-3daddf1063512da260bce92df25fe2b01ac70ec104af88f1a482b6a936be14cc-17b6020a.json
@@ -0,0 +1,989 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api",
+          "created": 1759514629,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api-2025-10-14",
+          "created": 1760043960,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1",
+          "created": 1681940951,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-2",
+          "created": 1698798177,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-1106",
+          "created": 1699053241,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-2024-04-09",
+          "created": 1712601677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio",
+          "created": 1756339249,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-tts",
+          "created": 1742403959,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo",
+          "created": 1712361441,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini",
+          "created": 1744318173,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime",
+          "created": 1756271701,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini-2025-04-14",
+          "created": 1744317547,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-1106",
+          "created": 1698959748,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0125-preview",
+          "created": 1706037612,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "davinci-002",
+          "created": 1692634301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-preview",
+          "created": 1706037777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0613",
+          "created": 1686588896,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview-2025-03-11",
+          "created": 1741377021,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4",
+          "created": 1687882411,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1",
+          "created": 1744316542,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-2025-04-14",
+          "created": 1744315746,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-3",
+          "created": 1698785189,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview",
+          "created": 1734655677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano",
+          "created": 1744321707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct-0914",
+          "created": 1694122472,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "whisper-1",
+          "created": 1677532384,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-2024-12-17",
+          "created": 1734326976,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano-2025-04-14",
+          "created": 1744321025,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-16k",
+          "created": 1683758102,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-2025-08-28",
+          "created": 1756256146,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct",
+          "created": 1692901427,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-11-20",
+          "created": 1739331543,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-05-13",
+          "created": 1715368132,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview",
+          "created": 1741391161,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview-2025-03-11",
+          "created": 1741390858,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-chat-latest",
+          "created": 1762547951,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview",
+          "created": 1741388720,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-latest",
+          "created": 1731689265,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro",
+          "created": 1742251791,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro-2025-03-19",
+          "created": 1742251504,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex-mini",
+          "created": 1763007109,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview-2025-03-11",
+          "created": 1741388170,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-small",
+          "created": 1705948997,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research-2025-06-26",
+          "created": 1750866121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research",
+          "created": 1749685485,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "codex-mini-latest",
+          "created": 1746673257,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-08-06",
+          "created": 1722814719,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1",
+          "created": 1734375816,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-2024-07-18",
+          "created": 1721172717,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini",
+          "created": 1721172741,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview",
+          "created": 1734387424,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1-mini",
+          "created": 1758845821,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini",
+          "created": 1754425928,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1",
+          "created": 1745517030,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini-2025-08-07",
+          "created": 1754425867,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-2024-09-26",
+          "created": 1732734466,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5",
+          "created": 1754425777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro",
+          "created": 1748475349,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-ada-002",
+          "created": 1671217299,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano-2025-08-07",
+          "created": 1754426303,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-12-17",
+          "created": 1734034239,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano",
+          "created": 1754426384,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-2025-11-13",
+          "created": 1762800353,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd-1106",
+          "created": 1699053533,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd",
+          "created": 1699046015,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-0125",
+          "created": 1706048358,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini",
+          "created": 1759512027,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini-2025-10-06",
+          "created": 1759512137,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1",
+          "created": 1762800673,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview-2024-12-17",
+          "created": 1734115920,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini-2025-01-31",
+          "created": 1738010200,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini",
+          "created": 1737146383,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-12-17",
+          "created": 1733945430,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro-2025-06-10",
+          "created": 1749166761,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-1106-preview",
+          "created": 1698957206,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-codex",
+          "created": 1757527818,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "babbage-002",
+          "created": 1692634615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-chat-latest",
+          "created": 1754073306,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo",
+          "created": 1677610602,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-2025-08-07",
+          "created": 1754075360,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "chatgpt-4o-latest",
+          "created": 1723515131,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe",
+          "created": 1742068463,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2",
+          "created": 1759708615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2-pro",
+          "created": 1759708663,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro-2025-10-06",
+          "created": 1759469707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o",
+          "created": 1715367049,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview",
+          "created": 1727659998,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini",
+          "created": 1759517133,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini-2025-10-06",
+          "created": 1759517175,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini",
+          "created": 1744225351,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-2025-04-16",
+          "created": 1744133506,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2025-06-03",
+          "created": 1748907838,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview",
+          "created": 1727460443,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2025-06-03",
+          "created": 1748908498,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro",
+          "created": 1759469822,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-large",
+          "created": 1705953180,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research",
+          "created": 1749840121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research-2025-06-26",
+          "created": 1750865219,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-transcribe",
+          "created": 1742068596,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview",
+          "created": 1734387380,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview-2024-12-17",
+          "created": 1734112601,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3",
+          "created": 1744225308,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-2025-04-16",
+          "created": 1744133301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe-diarize",
+          "created": 1750798887,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex",
+          "created": 1762988221,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-10-01",
+          "created": 1727389042,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-10-01",
+          "created": 1727131766,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-2025-08-28",
+          "created": 1756271773,
+          "object": "model",
+          "owned_by": "system"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json b/tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json
new file mode 100644
index 0000000000..be9e401ecf
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-52b8e996a4f2c944c6bae30ac5274b714ed42392139e2024601a849af31b9253-a882f554.json
@@ -0,0 +1,647 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]",
+  "request": {
+    "method": "POST",
+    "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-gecko-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding Gecko"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-03-25",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 03-25"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-05-20",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-06-17",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview 06-17"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-05-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 05-06"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-06-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp-image-generation",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash (Image Generation) Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-exp-1206",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Experimental 1206"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-01-21",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-1219",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/learnlm-2.0-flash-experimental",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "LearnLM 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-1b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 1B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-12b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 12B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-27b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 27B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e2b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E2B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-lite-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash-Lite Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-pro-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Pro Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-robotics-er-1.5-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Robotics-ER 1.5 Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-computer-use-preview-10-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Computer Use Preview 10-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/text-embedding-004",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Text Embedding 004"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp-03-07",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental 03-07"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/aqa",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Model that performs Attributed Question Answering."
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-2.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 2"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-fast-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-live-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-live-2.5-flash-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Live 2.5 Flash Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-live-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Live Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/lyria-realtime-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Lyria Realtime Experimental"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json b/tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json
new file mode 100644
index 0000000000..1ec8ba14bc
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-cd7b914c23d754f03d8c0a789ce0df17329547d12cbec8bbcc581dc38df5fdbb-a882f554.json
@@ -0,0 +1,647 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]",
+  "request": {
+    "method": "POST",
+    "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-gecko-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding Gecko"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-03-25",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 03-25"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-05-20",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-06-17",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview 06-17"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-05-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 05-06"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-06-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp-image-generation",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash (Image Generation) Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-exp-1206",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Experimental 1206"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-01-21",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-1219",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/learnlm-2.0-flash-experimental",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "LearnLM 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-1b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 1B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-12b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 12B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-27b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 27B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e2b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E2B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-lite-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash-Lite Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-pro-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Pro Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-robotics-er-1.5-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Robotics-ER 1.5 Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-computer-use-preview-10-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Computer Use Preview 10-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/text-embedding-004",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Text Embedding 004"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp-03-07",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental 03-07"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/aqa",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Model that performs Attributed Question Answering."
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-2.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 2"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-fast-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-live-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-live-2.5-flash-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Live 2.5 Flash Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-live-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Live Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/lyria-realtime-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Lyria Realtime Experimental"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json b/tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json
new file mode 100644
index 0000000000..20fae07fe5
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-e3257f8a2366634242cfecf5298c0bc7af95853da455d1f7c9316fb7e9d1419b-a882f554.json
@@ -0,0 +1,647 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]",
+  "request": {
+    "method": "POST",
+    "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-gecko-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding Gecko"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-03-25",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 03-25"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-05-20",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-06-17",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview 06-17"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-05-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 05-06"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-06-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp-image-generation",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash (Image Generation) Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-exp-1206",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Experimental 1206"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-01-21",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-1219",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/learnlm-2.0-flash-experimental",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "LearnLM 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-1b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 1B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-12b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 12B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-27b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 27B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e2b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E2B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-lite-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash-Lite Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-pro-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Pro Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-robotics-er-1.5-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Robotics-ER 1.5 Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-computer-use-preview-10-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Computer Use Preview 10-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/text-embedding-004",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Text Embedding 004"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp-03-07",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental 03-07"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/aqa",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Model that performs Attributed Question Answering."
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-2.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 2"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-fast-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-live-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-live-2.5-flash-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Live 2.5 Flash Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-live-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Live Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/lyria-realtime-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Lyria Realtime Experimental"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json b/tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json
new file mode 100644
index 0000000000..5d44f397b9
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-e9e12107519911efec26749b69f37d360c970ecf6a0334d311302e72afd87d25-17b6020a.json
@@ -0,0 +1,989 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-faiss]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api",
+          "created": 1759514629,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api-2025-10-14",
+          "created": 1760043960,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1",
+          "created": 1681940951,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-2",
+          "created": 1698798177,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-1106",
+          "created": 1699053241,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-2024-04-09",
+          "created": 1712601677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio",
+          "created": 1756339249,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-tts",
+          "created": 1742403959,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo",
+          "created": 1712361441,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime",
+          "created": 1756271701,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini",
+          "created": 1744318173,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-2025-08-28",
+          "created": 1756271773,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini-2025-04-14",
+          "created": 1744317547,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-1106",
+          "created": 1698959748,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0125-preview",
+          "created": 1706037612,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "davinci-002",
+          "created": 1692634301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-preview",
+          "created": 1706037777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0613",
+          "created": 1686588896,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview-2025-03-11",
+          "created": 1741377021,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4",
+          "created": 1687882411,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1",
+          "created": 1744316542,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-2025-04-14",
+          "created": 1744315746,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-3",
+          "created": 1698785189,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview",
+          "created": 1734655677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano",
+          "created": 1744321707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct-0914",
+          "created": 1694122472,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "whisper-1",
+          "created": 1677532384,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-2024-12-17",
+          "created": 1734326976,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano-2025-04-14",
+          "created": 1744321025,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-16k",
+          "created": 1683758102,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-2025-08-28",
+          "created": 1756256146,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct",
+          "created": 1692901427,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-11-20",
+          "created": 1739331543,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-05-13",
+          "created": 1715368132,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview",
+          "created": 1741391161,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview-2025-03-11",
+          "created": 1741390858,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-chat-latest",
+          "created": 1762547951,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview",
+          "created": 1741388720,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-latest",
+          "created": 1731689265,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro",
+          "created": 1742251791,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro-2025-03-19",
+          "created": 1742251504,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex-mini",
+          "created": 1763007109,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview-2025-03-11",
+          "created": 1741388170,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-small",
+          "created": 1705948997,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research-2025-06-26",
+          "created": 1750866121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research",
+          "created": 1749685485,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "codex-mini-latest",
+          "created": 1746673257,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-08-06",
+          "created": 1722814719,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1",
+          "created": 1734375816,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-2024-07-18",
+          "created": 1721172717,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini",
+          "created": 1721172741,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview",
+          "created": 1734387424,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1-mini",
+          "created": 1758845821,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini",
+          "created": 1754425928,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1",
+          "created": 1745517030,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini-2025-08-07",
+          "created": 1754425867,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-2024-09-26",
+          "created": 1732734466,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5",
+          "created": 1754425777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro",
+          "created": 1748475349,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-ada-002",
+          "created": 1671217299,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano-2025-08-07",
+          "created": 1754426303,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-12-17",
+          "created": 1734034239,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano",
+          "created": 1754426384,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-2025-11-13",
+          "created": 1762800353,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd-1106",
+          "created": 1699053533,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd",
+          "created": 1699046015,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-0125",
+          "created": 1706048358,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini",
+          "created": 1759512027,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini-2025-10-06",
+          "created": 1759512137,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1",
+          "created": 1762800673,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview-2024-12-17",
+          "created": 1734115920,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini-2025-01-31",
+          "created": 1738010200,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini",
+          "created": 1737146383,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-12-17",
+          "created": 1733945430,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro-2025-06-10",
+          "created": 1749166761,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-1106-preview",
+          "created": 1698957206,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-codex",
+          "created": 1757527818,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "babbage-002",
+          "created": 1692634615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-chat-latest",
+          "created": 1754073306,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo",
+          "created": 1677610602,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-2025-08-07",
+          "created": 1754075360,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "chatgpt-4o-latest",
+          "created": 1723515131,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe",
+          "created": 1742068463,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2",
+          "created": 1759708615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2-pro",
+          "created": 1759708663,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro-2025-10-06",
+          "created": 1759469707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o",
+          "created": 1715367049,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview",
+          "created": 1727659998,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini",
+          "created": 1759517133,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini-2025-10-06",
+          "created": 1759517175,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini",
+          "created": 1744225351,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-2025-04-16",
+          "created": 1744133506,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2025-06-03",
+          "created": 1748907838,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview",
+          "created": 1727460443,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2025-06-03",
+          "created": 1748908498,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro",
+          "created": 1759469822,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-large",
+          "created": 1705953180,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research",
+          "created": 1749840121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research-2025-06-26",
+          "created": 1750865219,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-transcribe",
+          "created": 1742068596,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview",
+          "created": 1734387380,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview-2024-12-17",
+          "created": 1734112601,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3",
+          "created": 1744225308,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-2025-04-16",
+          "created": 1744133301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe-diarize",
+          "created": 1750798887,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex",
+          "created": 1762988221,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-10-01",
+          "created": 1727389042,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-10-01",
+          "created": 1727131766,
+          "object": "model",
+          "owned_by": "system"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json b/tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json
new file mode 100644
index 0000000000..ab89e5a65d
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-eddc1eb84009b784ae43f6ef48daf1d685230aaeee60e0a6866183450427d162-a882f554.json
@@ -0,0 +1,647 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[openai_client-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]",
+  "request": {
+    "method": "POST",
+    "url": "https://generativelanguage.googleapis.com/v1beta/openai/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-gecko-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding Gecko"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-03-25",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 03-25"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-05-20",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-06-17",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview 06-17"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-05-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview 05-06"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-06-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-exp-image-generation",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash (Image Generation) Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-lite-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash-Lite Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-pro-exp-02-05",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Pro Experimental 02-05"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-exp-1206",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Experimental 1206"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-01-21",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-thinking-exp-1219",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview 05-20"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-pro-preview-tts",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Pro Preview TTS"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/learnlm-2.0-flash-experimental",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "LearnLM 2.0 Flash Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-1b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 1B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-12b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 12B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3-27b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3 27B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e4b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E4B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemma-3n-e2b-it",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemma 3n E2B"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-flash-lite-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Flash-Lite Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-pro-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Pro Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-image",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Nano Banana"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-lite-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash-Lite Preview Sep 2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-robotics-er-1.5-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Robotics-ER 1.5 Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-computer-use-preview-10-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Computer Use Preview 10-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/text-embedding-004",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Text Embedding 004"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp-03-07",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental 03-07"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding Experimental"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-embedding-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Embedding 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/aqa",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Model that performs Attributed Question Answering."
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-preview-06-06",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra (Preview)"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-ultra-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Ultra"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/imagen-4.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Imagen 4 Fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-2.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 2"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.0-fast-generate-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/veo-3.1-fast-generate-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Veo 3.1 fast"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.0-flash-live-001",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.0 Flash 001"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-live-2.5-flash-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini Live 2.5 Flash Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-live-preview",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Live Preview"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-latest",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Latest"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/gemini-2.5-flash-native-audio-preview-09-2025",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Gemini 2.5 Flash Native Audio Preview 09-2025"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "models/lyria-realtime-exp",
+          "created": null,
+          "object": "model",
+          "owned_by": "google",
+          "display_name": "Lyria Realtime Experimental"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json b/tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json
new file mode 100644
index 0000000000..3040751dbe
--- /dev/null
+++ b/tests/integration/vector_io/recordings/models-fdf8866dba534adc96a22dccfcb8ddb92f33ee1f4ab308b1c12e971c13988bf0-17b6020a.json
@@ -0,0 +1,989 @@
+{
+  "test_id": "tests/integration/vector_io/test_openai_vector_stores.py::test_openai_vector_store_search_with_rewrite_query[client_with_models-emb=sentence-transformers/nomic-ai/nomic-embed-text-v1.5:dim=768-sqlite-vec]",
+  "request": {
+    "method": "POST",
+    "url": "https://api.openai.com/v1/v1/models",
+    "headers": {},
+    "body": {},
+    "endpoint": "/v1/models",
+    "model": ""
+  },
+  "response": {
+    "body": [
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api",
+          "created": 1759514629,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-search-api-2025-10-14",
+          "created": 1760043960,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1",
+          "created": 1681940951,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-2",
+          "created": 1698798177,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-1106",
+          "created": 1699053241,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-2024-04-09",
+          "created": 1712601677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio",
+          "created": 1756339249,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-tts",
+          "created": 1742403959,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo",
+          "created": 1712361441,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime",
+          "created": 1756271701,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini",
+          "created": 1744318173,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-2025-08-28",
+          "created": 1756271773,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-mini-2025-04-14",
+          "created": 1744317547,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-1106",
+          "created": 1698959748,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0125-preview",
+          "created": 1706037612,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "davinci-002",
+          "created": 1692634301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-turbo-preview",
+          "created": 1706037777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-0613",
+          "created": 1686588896,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview-2025-03-11",
+          "created": 1741377021,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4",
+          "created": 1687882411,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1",
+          "created": 1744316542,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-2025-04-14",
+          "created": 1744315746,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "dall-e-3",
+          "created": 1698785189,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "computer-use-preview",
+          "created": 1734655677,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano",
+          "created": 1744321707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct-0914",
+          "created": 1694122472,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "whisper-1",
+          "created": 1677532384,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-2024-12-17",
+          "created": 1734326976,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4.1-nano-2025-04-14",
+          "created": 1744321025,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-16k",
+          "created": 1683758102,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-2025-08-28",
+          "created": 1756256146,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-instruct",
+          "created": 1692901427,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-11-20",
+          "created": 1739331543,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-05-13",
+          "created": 1715368132,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview",
+          "created": 1741391161,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-search-preview-2025-03-11",
+          "created": 1741390858,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-chat-latest",
+          "created": 1762547951,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview",
+          "created": 1741388720,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-latest",
+          "created": 1731689265,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro",
+          "created": 1742251791,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1-pro-2025-03-19",
+          "created": 1742251504,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex-mini",
+          "created": 1763007109,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-search-preview-2025-03-11",
+          "created": 1741388170,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-small",
+          "created": 1705948997,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research-2025-06-26",
+          "created": 1750866121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-deep-research",
+          "created": 1749685485,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "codex-mini-latest",
+          "created": 1746673257,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-2024-08-06",
+          "created": 1722814719,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o1",
+          "created": 1734375816,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-2024-07-18",
+          "created": 1721172717,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini",
+          "created": 1721172741,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview",
+          "created": 1734387424,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1-mini",
+          "created": 1758845821,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini",
+          "created": 1754425928,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-image-1",
+          "created": 1745517030,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-mini-2025-08-07",
+          "created": 1754425867,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "omni-moderation-2024-09-26",
+          "created": 1732734466,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5",
+          "created": 1754425777,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro",
+          "created": 1748475349,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-ada-002",
+          "created": 1671217299,
+          "object": "model",
+          "owned_by": "openai-internal"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano-2025-08-07",
+          "created": 1754426303,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-12-17",
+          "created": 1734034239,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-nano",
+          "created": 1754426384,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-2025-11-13",
+          "created": 1762800353,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd-1106",
+          "created": 1699053533,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "tts-1-hd",
+          "created": 1699046015,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo-0125",
+          "created": 1706048358,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini",
+          "created": 1759512027,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-audio-mini-2025-10-06",
+          "created": 1759512137,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1",
+          "created": 1762800673,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-audio-preview-2024-12-17",
+          "created": 1734115920,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini-2025-01-31",
+          "created": 1738010200,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-mini",
+          "created": 1737146383,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-12-17",
+          "created": 1733945430,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-pro-2025-06-10",
+          "created": 1749166761,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4-1106-preview",
+          "created": 1698957206,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "babbage-002",
+          "created": 1692634615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-chat-latest",
+          "created": 1754073306,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-3.5-turbo",
+          "created": 1677610602,
+          "object": "model",
+          "owned_by": "openai"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-2025-08-07",
+          "created": 1754075360,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "chatgpt-4o-latest",
+          "created": 1723515131,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe",
+          "created": 1742068463,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2",
+          "created": 1759708615,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "sora-2-pro",
+          "created": 1759708663,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro-2025-10-06",
+          "created": 1759469707,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o",
+          "created": 1715367049,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview",
+          "created": 1727659998,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini",
+          "created": 1759517133,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-realtime-mini-2025-10-06",
+          "created": 1759517175,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini",
+          "created": 1744225351,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o4-mini-2025-04-16",
+          "created": 1744133506,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2025-06-03",
+          "created": 1748907838,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview",
+          "created": 1727460443,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2025-06-03",
+          "created": 1748908498,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-pro",
+          "created": 1759469822,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "text-embedding-3-large",
+          "created": 1705953180,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research",
+          "created": 1749840121,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-deep-research-2025-06-26",
+          "created": 1750865219,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-transcribe",
+          "created": 1742068596,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview",
+          "created": 1734387380,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-mini-realtime-preview-2024-12-17",
+          "created": 1734112601,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3",
+          "created": 1744225308,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "o3-2025-04-16",
+          "created": 1744133301,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-transcribe-diarize",
+          "created": 1750798887,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5.1-codex",
+          "created": 1762988221,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-audio-preview-2024-10-01",
+          "created": 1727389042,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-4o-realtime-preview-2024-10-01",
+          "created": 1727131766,
+          "object": "model",
+          "owned_by": "system"
+        }
+      },
+      {
+        "__type__": "openai.types.model.Model",
+        "__data__": {
+          "id": "gpt-5-codex",
+          "created": 1757527818,
+          "object": "model",
+          "owned_by": "system"
+        }
+      }
+    ],
+    "is_streaming": false
+  },
+  "id_normalization_mapping": {}
+}
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 102f3f00ce..083c839819 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -1698,3 +1698,49 @@ def get_field(obj, field):
     assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
     assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
     assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"
+
+
+@vector_provider_wrapper
+def test_openai_vector_store_search_with_rewrite_query(
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
+):
+    """Test that rewrite_query parameter is properly passed through and handled."""
+    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+    compat_client = compat_client_with_empty_stores
+    llama_client = client_with_models
+
+    # Create vector store and insert chunks
+    vector_store = compat_client.vector_stores.create(
+        name="rewrite_test",
+        extra_body={"embedding_model": embedding_model_id, "provider_id": vector_io_provider_id},
+    )
+    llama_client.vector_io.insert(vector_store_id=vector_store.id, chunks=sample_chunks)
+
+    # Test rewrite_query=False (default behavior)
+    response_no_rewrite = compat_client.vector_stores.search(
+        vector_store_id=vector_store.id,
+        query="programming",
+        max_num_results=2,
+        rewrite_query=False,
+    )
+
+    # Test rewrite_query=True (may work if LLM models are available, or gracefully handle if not)
+    response_with_rewrite = compat_client.vector_stores.search(
+        vector_store_id=vector_store.id,
+        query="programming",
+        max_num_results=2,
+        rewrite_query=True,
+    )
+
+    # Both requests should succeed (rewrite_query=True will gracefully fall back if no LLM models)
+    assert response_no_rewrite is not None
+    assert response_with_rewrite is not None
+
+    # Both should return the same data since we have embedding models but may not have LLM models
+    assert len(response_no_rewrite.data) > 0

From ac7cb1ba5a9ae22a12467a227b04afa2ee31582f Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Wed, 19 Nov 2025 10:23:17 -0500
Subject: [PATCH 2/7] adding config to providers so that it can properly be
 used

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 src/llama_stack/core/resolver.py              | 12 +++++++++
 src/llama_stack/core/routers/vector_io.py     |  7 +++++
 .../inline/vector_io/faiss/__init__.py        |  7 +++--
 .../providers/inline/vector_io/faiss/faiss.py | 13 +++++++++-
 .../inline/vector_io/sqlite_vec/__init__.py   |  7 +++--
 .../inline/vector_io/sqlite_vec/sqlite_vec.py | 19 +++++++++++---
 .../remote/vector_io/chroma/__init__.py       |  7 +++--
 .../remote/vector_io/chroma/chroma.py         |  9 +++++--
 .../remote/vector_io/milvus/__init__.py       |  7 +++--
 .../remote/vector_io/milvus/milvus.py         |  6 +++++
 .../remote/vector_io/pgvector/__init__.py     |  9 +++++--
 .../remote/vector_io/pgvector/pgvector.py     | 26 ++++++++++++++++---
 .../remote/vector_io/qdrant/__init__.py       |  7 +++--
 .../remote/vector_io/qdrant/qdrant.py         | 10 ++++++-
 .../remote/vector_io/weaviate/__init__.py     |  9 +++++--
 .../remote/vector_io/weaviate/weaviate.py     | 21 ++++++++++++---
 .../utils/memory/openai_vector_store_mixin.py |  3 +++
 .../providers/utils/memory/vector_store.py    | 19 ++++++++++++--
 18 files changed, 168 insertions(+), 30 deletions(-)

diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index 6bc32c2d03..ebdbb0b180 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -374,6 +374,13 @@ async def instantiate_provider(
         method = "get_adapter_impl"
         args = [config, deps]
 
+        # Add vector_stores_config for vector_io providers
+        if (
+            "vector_stores_config" in inspect.signature(getattr(module, method)).parameters
+            and provider_spec.api == Api.vector_io
+        ):
+            args.append(run_config.vector_stores)
+
     elif isinstance(provider_spec, AutoRoutedProviderSpec):
         method = "get_auto_router_impl"
 
@@ -394,6 +401,11 @@ async def instantiate_provider(
             args.append(policy)
         if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
             args.append(run_config.telemetry.enabled)
+        if (
+            "vector_stores_config" in inspect.signature(getattr(module, method)).parameters
+            and provider_spec.api == Api.vector_io
+        ):
+            args.append(run_config.vector_stores)
 
     fn = getattr(module, method)
     impl = await fn(*args)
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index a7b30642ce..a865a37936 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -103,6 +103,13 @@ async def query_chunks(
         # Ensure params dict exists and add vector_stores_config for query rewriting
         if params is None:
             params = {}
+
+        logger.debug(f"Router vector_stores_config: {self.vector_stores_config}")
+        if self.vector_stores_config and hasattr(self.vector_stores_config, "default_query_expansion_model"):
+            logger.debug(
+                f"Router default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
+            )
+
         params["vector_stores_config"] = self.vector_stores_config
 
         return await provider.query_chunks(vector_store_id, query, params)
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
index b834589e38..1b9dcda769 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -6,16 +6,19 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api
 
 from .config import FaissVectorIOConfig
 
 
-async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
+async def get_provider_impl(
+    config: FaissVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None
+):
     from .faiss import FaissVectorIOAdapter
 
     assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
 
-    impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
+    impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 91a17058bf..ec8afd3884 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -14,6 +14,7 @@
 import numpy as np
 from numpy.typing import NDArray
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@@ -184,10 +185,17 @@ async def query_hybrid(
 
 
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
-    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
+    def __init__(
+        self,
+        config: FaissVectorIOConfig,
+        inference_api: Inference,
+        files_api: Files | None,
+        vector_stores_config: VectorStoresConfig | None = None,
+    ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
+        self.vector_stores_config = vector_stores_config
         self.cache: dict[str, VectorStoreWithIndex] = {}
 
     async def initialize(self) -> None:
@@ -203,6 +211,7 @@ async def initialize(self) -> None:
                 vector_store,
                 await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
                 self.inference_api,
+                self.vector_stores_config,
             )
             self.cache[vector_store.identifier] = index
 
@@ -241,6 +250,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store,
             index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
 
     async def list_vector_stores(self) -> list[VectorStore]:
@@ -274,6 +284,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index e84c299dc3..53e2ad135c 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -6,15 +6,18 @@
 
 from typing import Any
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api
 
 from .config import SQLiteVectorIOConfig
 
 
-async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
+async def get_provider_impl(
+    config: SQLiteVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None
+):
     from .sqlite_vec import SQLiteVecVectorIOAdapter
 
     assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
+    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index a384a33dc5..b38ce205e6 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -14,6 +14,7 @@
 import sqlite_vec  # type: ignore[import-untyped]
 from numpy.typing import NDArray
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@@ -385,10 +386,17 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
     and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
     """
 
-    def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
+    def __init__(
+        self,
+        config,
+        inference_api: Inference,
+        files_api: Files | None,
+        vector_stores_config: VectorStoresConfig | None = None,
+    ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
+        self.vector_stores_config = vector_stores_config
         self.cache: dict[str, VectorStoreWithIndex] = {}
         self.vector_store_table = None
 
@@ -403,7 +411,9 @@ async def initialize(self) -> None:
             index = await SQLiteVecIndex.create(
                 vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
             )
-            self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
+            self.cache[vector_store.identifier] = VectorStoreWithIndex(
+                vector_store, index, self.inference_api, self.vector_stores_config
+            )
 
         # Load existing OpenAI vector stores into the in-memory cache
         await self.initialize_openai_vector_stores()
@@ -427,7 +437,9 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
         index = await SQLiteVecIndex.create(
             vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
         )
-        self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(
+            vector_store, index, self.inference_api, self.vector_stores_config
+        )
 
     async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
         if vector_store_id in self.cache:
@@ -452,6 +464,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
                 kvstore=self.kvstore,
             ),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
index d774ea643f..3bce41c366 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -4,14 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import ChromaVectorIOConfig
 
 
-async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
+async def get_adapter_impl(
+    config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None
+):
     from .chroma import ChromaVectorIOAdapter
 
-    impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
+    impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 491db6d4de..d214dff3a2 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -11,6 +11,7 @@
 import chromadb
 from numpy.typing import NDArray
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
@@ -125,11 +126,13 @@ def __init__(
         config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None,
+        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
         self.config = config
         self.inference_api = inference_api
+        self.vector_stores_config = vector_stores_config
         self.client = None
         self.cache = {}
         self.vector_store_table = None
@@ -162,7 +165,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             )
         )
         self.cache[vector_store.identifier] = VectorStoreWithIndex(
-            vector_store, ChromaIndex(self.client, collection), self.inference_api
+            vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config
         )
 
     async def unregister_vector_store(self, vector_store_id: str) -> None:
@@ -207,7 +210,9 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
         collection = await maybe_await(self.client.get_collection(vector_store_id))
         if not collection:
             raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
-        index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
+        index = VectorStoreWithIndex(
+            vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config
+        )
         self.cache[vector_store_id] = index
         return index
 
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
index 1b703d486c..b73cf9b3ed 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -4,15 +4,18 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import MilvusVectorIOConfig
 
 
-async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]):
+async def get_adapter_impl(
+    config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None
+):
     from .milvus import MilvusVectorIOAdapter
 
     assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
+    impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 044d678fa0..3b21f3278b 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -11,6 +11,7 @@
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
@@ -272,12 +273,14 @@ def __init__(
         config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None,
+        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.cache = {}
         self.client = None
         self.inference_api = inference_api
+        self.vector_stores_config = vector_stores_config
         self.vector_store_table = None
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
@@ -298,6 +301,7 @@ async def initialize(self) -> None:
                     kvstore=self.kvstore,
                 ),
                 inference_api=self.inference_api,
+                vector_stores_config=self.vector_stores_config,
             )
             self.cache[vector_store.identifier] = index
         if isinstance(self.config, RemoteMilvusVectorIOConfig):
@@ -325,6 +329,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store,
             index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
 
         self.cache[vector_store.identifier] = index
@@ -347,6 +352,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index 36018fd954..002caf4b60 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -4,14 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import PGVectorVectorIOConfig
 
 
-async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]):
+async def get_adapter_impl(
+    config: PGVectorVectorIOConfig,
+    deps: dict[Api, ProviderSpec],
+    vector_stores_config: VectorStoresConfig | None = None,
+):
     from .pgvector import PGVectorVectorIOAdapter
 
-    impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
+    impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 5c86fb08da..45a38e52a7 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -13,6 +13,7 @@
 from psycopg2.extras import Json, execute_values
 from pydantic import BaseModel, TypeAdapter
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
@@ -330,11 +331,16 @@ def check_distance_metric_availability(self, distance_metric: str) -> None:
 
 class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(
-        self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
+        self,
+        config: PGVectorVectorIOConfig,
+        inference_api: Inference,
+        files_api: Files | None = None,
+        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
+        self.vector_stores_config = vector_stores_config
         self.conn = None
         self.cache = {}
         self.vector_store_table = None
@@ -386,7 +392,12 @@ async def initialize(self) -> None:
                 kvstore=self.kvstore,
             )
             await pgvector_index.initialize()
-            index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
+            index = VectorStoreWithIndex(
+                vector_store,
+                index=pgvector_index,
+                inference_api=self.inference_api,
+                vector_stores_config=self.vector_stores_config,
+            )
             self.cache[vector_store.identifier] = index
 
     async def shutdown(self) -> None:
@@ -413,7 +424,12 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore
         )
         await pgvector_index.initialize()
-        index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
+        index = VectorStoreWithIndex(
+            vector_store,
+            index=pgvector_index,
+            inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
+        )
         self.cache[vector_store.identifier] = index
 
     async def unregister_vector_store(self, vector_store_id: str) -> None:
@@ -453,7 +469,9 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
         vector_store = VectorStore.model_validate_json(vector_store_data)
         index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
         await index.initialize()
-        self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
+        self.cache[vector_store_id] = VectorStoreWithIndex(
+            vector_store, index, self.inference_api, self.vector_stores_config
+        )
         return self.cache[vector_store_id]
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index b5b02fe598..76e167b75e 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -4,14 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
 
-async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]):
+async def get_adapter_impl(
+    config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None
+):
     from .qdrant import QdrantVectorIOAdapter
 
-    impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
+    impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 4dd78d8343..2de71f7cc3 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -13,6 +13,7 @@
 from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import PointStruct
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
@@ -152,12 +153,14 @@ def __init__(
         config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None = None,
+        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.client: AsyncQdrantClient = None
         self.cache = {}
         self.inference_api = inference_api
+        self.vector_stores_config = vector_stores_config
         self.vector_store_table = None
         self._qdrant_lock = asyncio.Lock()
 
@@ -173,7 +176,10 @@ async def initialize(self) -> None:
         for vector_store_data in stored_vector_stores:
             vector_store = VectorStore.model_validate_json(vector_store_data)
             index = VectorStoreWithIndex(
-                vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api
+                vector_store,
+                QdrantIndex(self.client, vector_store.identifier),
+                self.inference_api,
+                self.vector_stores_config,
             )
             self.cache[vector_store.identifier] = index
         self.openai_vector_stores = await self._load_openai_vector_stores()
@@ -193,6 +199,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store,
             index=QdrantIndex(self.client, vector_store.identifier),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
 
         self.cache[vector_store.identifier] = index
@@ -224,6 +231,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index 47546d4598..77bf357f4b 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -4,14 +4,19 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import WeaviateVectorIOConfig
 
 
-async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]):
+async def get_adapter_impl(
+    config: WeaviateVectorIOConfig,
+    deps: dict[Api, ProviderSpec],
+    vector_stores_config: VectorStoresConfig | None = None,
+):
     from .weaviate import WeaviateVectorIOAdapter
 
-    impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
+    impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index c15d5f4682..1c52fa84c5 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -12,6 +12,7 @@
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
 
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
@@ -262,10 +263,17 @@ async def query_hybrid(
 
 
 class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
-    def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
+    def __init__(
+        self,
+        config: WeaviateVectorIOConfig,
+        inference_api: Inference,
+        files_api: Files | None,
+        vector_stores_config: VectorStoresConfig | None = None,
+    ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
+        self.vector_stores_config = vector_stores_config
         self.client_cache = {}
         self.cache = {}
         self.vector_store_table = None
@@ -308,7 +316,10 @@ async def initialize(self) -> None:
                 client = self._get_client()
                 idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore)
                 self.cache[vector_store.identifier] = VectorStoreWithIndex(
-                    vector_store=vector_store, index=idx, inference_api=self.inference_api
+                    vector_store=vector_store,
+                    index=idx,
+                    inference_api=self.inference_api,
+                    vector_stores_config=self.vector_stores_config,
                 )
 
             # Load OpenAI vector stores metadata into cache
@@ -334,7 +345,10 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             )
 
         self.cache[vector_store.identifier] = VectorStoreWithIndex(
-            vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
+            vector_store,
+            WeaviateIndex(client=client, collection_name=sanitized_collection_name),
+            self.inference_api,
+            self.vector_stores_config,
         )
 
     async def unregister_vector_store(self, vector_store_id: str) -> None:
@@ -369,6 +383,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=WeaviateIndex(client=client, collection_name=vector_store.identifier),
             inference_api=self.inference_api,
+            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index f33bb29c8c..d83aa6dc1a 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -613,6 +613,9 @@ async def openai_search_vector_store(
                 "mode": search_mode,
                 "rewrite_query": rewrite_query,
             }
+            # Add vector_stores_config if available (for query rewriting)
+            if hasattr(self, "vector_stores_config"):
+                params["vector_stores_config"] = self.vector_stores_config
             # TODO: Add support for ranking_options.ranker
 
             response = await self.query_chunks(
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 6fbf4a4245..2a7b94292a 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -333,6 +333,15 @@ async def query_chunks(
 
         # Apply query rewriting if enabled
         if params.get("rewrite_query", False):
+            if self.vector_stores_config:
+                log.debug(f"VectorStoreWithIndex received config: {self.vector_stores_config}")
+                if hasattr(self.vector_stores_config, "default_query_expansion_model"):
+                    log.debug(
+                        f"Config has default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
+                    )
+            else:
+                log.debug("No vector_stores_config found - cannot perform query rewriting")
+
             query_string = await self._rewrite_query_for_search(query_string)
 
         if mode == "keyword":
@@ -358,8 +367,14 @@ async def _rewrite_query_for_search(self, query: str) -> str:
         :returns: The rewritten query optimized for vector search
         """
         # Check if query expansion model is configured
-        if not self.vector_stores_config or not self.vector_stores_config.default_query_expansion_model:
-            raise ValueError("No default_query_expansion_model configured for query rewriting")
+        if not self.vector_stores_config:
+            raise ValueError(
+                f"No vector_stores_config found! self.vector_stores_config is: {self.vector_stores_config}"
+            )
+        if not self.vector_stores_config.default_query_expansion_model:
+            raise ValueError(
+                f"No default_query_expansion_model configured! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
+            )
 
         # Use the configured model
         expansion_model = self.vector_stores_config.default_query_expansion_model

From 2cc7943fd61f69783dff1d810fa4e4fcc03a4b41 Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Wed, 19 Nov 2025 22:41:19 -0500
Subject: [PATCH 3/7] added quey expnasion model to extra_body

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 .../utils/memory/openai_vector_store_mixin.py |  8 ++
 .../providers/utils/memory/vector_store.py    | 39 +++++---
 src/llama_stack_api/vector_stores.py          |  1 +
 .../test_vector_io_openai_vector_stores.py    | 94 +++++++++++++++++++
 4 files changed, 130 insertions(+), 12 deletions(-)

diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index d83aa6dc1a..4e67cf24b4 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -379,6 +379,11 @@ async def openai_create_vector_store(
                 f"Using embedding config from extra_body: model='{embedding_model}', dimension={embedding_dimension}"
             )
 
+        # Extract query expansion model from extra_body if provided
+        query_expansion_model = extra_body.get("query_expansion_model")
+        if query_expansion_model:
+            logger.debug(f"Using per-store query expansion model: {query_expansion_model}")
+
         # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
         provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
         # Derive the canonical vector_store_id (allow override, else generate)
@@ -402,6 +407,7 @@ async def openai_create_vector_store(
             provider_id=provider_id,
             provider_resource_id=vector_store_id,
             vector_store_name=params.name,
+            query_expansion_model=query_expansion_model,
         )
         await self.register_vector_store(vector_store)
 
@@ -607,12 +613,14 @@ async def openai_search_vector_store(
                 if ranking_options and ranking_options.score_threshold is not None
                 else 0.0
             )
+
             params = {
                 "max_chunks": max_num_results * CHUNK_MULTIPLIER,
                 "score_threshold": score_threshold,
                 "mode": search_mode,
                 "rewrite_query": rewrite_query,
             }
+
             # Add vector_stores_config if available (for query rewriting)
             if hasattr(self, "vector_stores_config"):
                 params["vector_stores_config"] = self.vector_stores_config
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 2a7b94292a..71d61787af 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -17,7 +17,7 @@
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
-from llama_stack.core.datatypes import VectorStoresConfig
+from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.utils.inference.prompt_adapter import (
@@ -366,18 +366,33 @@ async def _rewrite_query_for_search(self, query: str) -> str:
         :param query: The original user query
         :returns: The rewritten query optimized for vector search
         """
-        # Check if query expansion model is configured
-        if not self.vector_stores_config:
-            raise ValueError(
-                f"No vector_stores_config found! self.vector_stores_config is: {self.vector_stores_config}"
-            )
-        if not self.vector_stores_config.default_query_expansion_model:
-            raise ValueError(
-                f"No default_query_expansion_model configured! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
-            )
+        expansion_model = None
+
+        # Check for per-store query expansion model first
+        if self.vector_store.query_expansion_model:
+            # Parse the model string into provider_id and model_id
+            model_parts = self.vector_store.query_expansion_model.split("/", 1)
+            if len(model_parts) == 2:
+                expansion_model = QualifiedModel(provider_id=model_parts[0], model_id=model_parts[1])
+                log.debug(f"Using per-store query expansion model: {expansion_model}")
+            else:
+                log.warning(
+                    f"Invalid query_expansion_model format: {self.vector_store.query_expansion_model}. Expected 'provider_id/model_id'"
+                )
+
+        # Fall back to global default if no per-store model
+        if not expansion_model:
+            if not self.vector_stores_config:
+                raise ValueError(
+                    f"No vector_stores_config found and no per-store query_expansion_model! self.vector_stores_config is: {self.vector_stores_config}"
+                )
+            if not self.vector_stores_config.default_query_expansion_model:
+                raise ValueError(
+                    f"No default_query_expansion_model configured and no per-store query_expansion_model! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
+                )
+            expansion_model = self.vector_stores_config.default_query_expansion_model
+            log.debug(f"Using global default query expansion model: {expansion_model}")
 
-        # Use the configured model
-        expansion_model = self.vector_stores_config.default_query_expansion_model
         chat_model = f"{expansion_model.provider_id}/{expansion_model.model_id}"
 
         # Validate that the model is available and is an LLM
diff --git a/src/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py
index 0a1e6c53c5..4c0d1ced25 100644
--- a/src/llama_stack_api/vector_stores.py
+++ b/src/llama_stack_api/vector_stores.py
@@ -25,6 +25,7 @@ class VectorStore(Resource):
     embedding_model: str
     embedding_dimension: int
     vector_store_name: str | None = None
+    query_expansion_model: str | None = None
 
     @property
     def vector_store_id(self) -> str:
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 3797abb2c4..cfda7aa5ee 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -1230,3 +1230,97 @@ async def test_embedding_config_required_model_missing(vector_io_adapter):
 
     with pytest.raises(ValueError, match="embedding_model is required"):
         await vector_io_adapter.openai_create_vector_store(params)
+
+
+async def test_query_expansion_functionality(vector_io_adapter):
+    """Test query expansion with per-store models, global defaults, and error validation."""
+    from unittest.mock import MagicMock
+
+    from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
+    from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex
+    from llama_stack_api.models import Model, ModelType
+
+    vector_io_adapter.register_vector_store = AsyncMock()
+    vector_io_adapter.__provider_id__ = "test_provider"
+
+    # Test 1: Per-store model usage
+    params = OpenAICreateVectorStoreRequestWithExtraBody(
+        name="test_store",
+        metadata={},
+        **{"embedding_model": "test/embedding", "query_expansion_model": "test/llama-model"},
+    )
+    await vector_io_adapter.openai_create_vector_store(params)
+    call_args = vector_io_adapter.register_vector_store.call_args[0][0]
+    assert call_args.query_expansion_model == "test/llama-model"
+
+    # Test 2: Global default fallback
+    vector_io_adapter.register_vector_store.reset_mock()
+    params_no_model = OpenAICreateVectorStoreRequestWithExtraBody(
+        name="test_store2", metadata={}, **{"embedding_model": "test/embedding"}
+    )
+    await vector_io_adapter.openai_create_vector_store(params_no_model)
+    call_args2 = vector_io_adapter.register_vector_store.call_args[0][0]
+    assert call_args2.query_expansion_model is None
+
+    # Test query rewriting scenarios
+    mock_inference_api = MagicMock()
+
+    # Per-store model scenario
+    mock_vector_store = MagicMock()
+    mock_vector_store.query_expansion_model = "test/llama-model"
+    mock_inference_api.routing_table.list_models = AsyncMock(
+        return_value=MagicMock(
+            data=[Model(identifier="test/llama-model", provider_id="test", model_type=ModelType.llm)]
+        )
+    )
+    mock_inference_api.openai_chat_completion = AsyncMock(
+        return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="per-store expanded"))])
+    )
+
+    vector_store_with_index = VectorStoreWithIndex(
+        vector_store=mock_vector_store,
+        index=MagicMock(),
+        inference_api=mock_inference_api,
+        vector_stores_config=VectorStoresConfig(
+            default_query_expansion_model=QualifiedModel(provider_id="global", model_id="default")
+        ),
+    )
+
+    result = await vector_store_with_index._rewrite_query_for_search("test")
+    assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "test/llama-model"
+    assert result == "per-store expanded"
+
+    # Global default fallback scenario
+    mock_inference_api.reset_mock()
+    mock_vector_store.query_expansion_model = None
+    mock_inference_api.routing_table.list_models = AsyncMock(
+        return_value=MagicMock(
+            data=[Model(identifier="global/default", provider_id="global", model_type=ModelType.llm)]
+        )
+    )
+    mock_inference_api.openai_chat_completion = AsyncMock(
+        return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="global expanded"))])
+    )
+
+    result = await vector_store_with_index._rewrite_query_for_search("test")
+    assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "global/default"
+    assert result == "global expanded"
+
+    # Test 3: Error cases
+    # Model not found
+    mock_vector_store.query_expansion_model = "missing/model"
+    mock_inference_api.routing_table.list_models = AsyncMock(return_value=MagicMock(data=[]))
+
+    with pytest.raises(ValueError, match="Configured query expansion model .* is not available"):
+        await vector_store_with_index._rewrite_query_for_search("test")
+
+    # Non-LLM model
+    mock_vector_store.query_expansion_model = "test/embedding-model"
+    mock_inference_api.routing_table.list_models = AsyncMock(
+        return_value=MagicMock(
+            data=[Model(identifier="test/embedding-model", provider_id="test", model_type=ModelType.embedding)]
+        )
+    )
+
+    with pytest.raises(ValueError, match="is not an LLM model.*Query rewriting requires an LLM model"):
+        await vector_store_with_index._rewrite_query_for_search("test")

From d887f1f8bb8d31d3af599fb510edc0b1c6befe7d Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Fri, 21 Nov 2025 11:27:25 -0500
Subject: [PATCH 4/7] refactor to only configuration of model at build time

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 src/llama_stack/core/datatypes.py             |  11 +-
 src/llama_stack/core/resolver.py              |  12 --
 src/llama_stack/core/routers/vector_io.py     |  13 --
 src/llama_stack/core/stack.py                 |  75 +++++---
 .../ci-tests/run-with-postgres-store.yaml     |   2 +
 .../distributions/ci-tests/run.yaml           |   2 +
 .../starter-gpu/run-with-postgres-store.yaml  |   2 +
 .../distributions/starter-gpu/run.yaml        |   2 +
 .../starter/run-with-postgres-store.yaml      |   2 +
 .../distributions/starter/run.yaml            |   2 +
 .../inline/vector_io/faiss/__init__.py        |   7 +-
 .../providers/inline/vector_io/faiss/faiss.py |   6 -
 .../inline/vector_io/sqlite_vec/__init__.py   |   7 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  12 +-
 .../remote/vector_io/chroma/__init__.py       |   7 +-
 .../remote/vector_io/chroma/chroma.py         |   9 +-
 .../remote/vector_io/milvus/__init__.py       |   7 +-
 .../remote/vector_io/milvus/milvus.py         |   6 -
 .../remote/vector_io/pgvector/__init__.py     |   4 +-
 .../remote/vector_io/pgvector/pgvector.py     |   9 +-
 .../remote/vector_io/qdrant/__init__.py       |   7 +-
 .../remote/vector_io/qdrant/qdrant.py         |   6 -
 .../remote/vector_io/weaviate/__init__.py     |   4 +-
 .../remote/vector_io/weaviate/weaviate.py     |   6 -
 .../providers/utils/memory/__init__.py        |   4 +
 .../providers/utils/memory/constants.py       |   8 +
 .../utils/memory/openai_vector_store_mixin.py |   9 -
 .../utils/memory/query_expansion_config.py    |  37 ++++
 .../providers/utils/memory/vector_store.py    | 146 ++++-----------
 src/llama_stack_api/vector_stores.py          |   1 -
 .../test_vector_io_openai_vector_stores.py    | 168 ++++++++++--------
 31 files changed, 279 insertions(+), 314 deletions(-)
 create mode 100644 src/llama_stack/providers/utils/memory/constants.py
 create mode 100644 src/llama_stack/providers/utils/memory/query_expansion_config.py

diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 49747d4770..a32e1d8a26 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -18,6 +18,7 @@
     StorageConfig,
 )
 from llama_stack.log import LoggingConfig
+from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
 from llama_stack_api import (
     Api,
     Benchmark,
@@ -381,9 +382,17 @@ class VectorStoresConfig(BaseModel):
         description="Default LLM model for query expansion/rewriting in vector search.",
     )
     query_expansion_prompt: str = Field(
-        default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:",
+        default=DEFAULT_QUERY_EXPANSION_PROMPT,
         description="Prompt template for query expansion. Use {query} as placeholder for the original query.",
     )
+    query_expansion_max_tokens: int = Field(
+        default=100,
+        description="Maximum number of tokens for query expansion responses.",
+    )
+    query_expansion_temperature: float = Field(
+        default=0.3,
+        description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).",
+    )
 
 
 class SafetyConfig(BaseModel):
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index ebdbb0b180..6bc32c2d03 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -374,13 +374,6 @@ async def instantiate_provider(
         method = "get_adapter_impl"
         args = [config, deps]
 
-        # Add vector_stores_config for vector_io providers
-        if (
-            "vector_stores_config" in inspect.signature(getattr(module, method)).parameters
-            and provider_spec.api == Api.vector_io
-        ):
-            args.append(run_config.vector_stores)
-
     elif isinstance(provider_spec, AutoRoutedProviderSpec):
         method = "get_auto_router_impl"
 
@@ -401,11 +394,6 @@ async def instantiate_provider(
             args.append(policy)
         if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
             args.append(run_config.telemetry.enabled)
-        if (
-            "vector_stores_config" in inspect.signature(getattr(module, method)).parameters
-            and provider_spec.api == Api.vector_io
-        ):
-            args.append(run_config.vector_stores)
 
     fn = getattr(module, method)
     impl = await fn(*args)
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index a865a37936..5256dda449 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -99,19 +99,6 @@ async def query_chunks(
     ) -> QueryChunksResponse:
         logger.debug(f"VectorIORouter.query_chunks: {vector_store_id}")
         provider = await self.routing_table.get_provider_impl(vector_store_id)
-
-        # Ensure params dict exists and add vector_stores_config for query rewriting
-        if params is None:
-            params = {}
-
-        logger.debug(f"Router vector_stores_config: {self.vector_stores_config}")
-        if self.vector_stores_config and hasattr(self.vector_stores_config, "default_query_expansion_model"):
-            logger.debug(
-                f"Router default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
-            )
-
-        params["vector_stores_config"] = self.vector_stores_config
-
         return await provider.query_chunks(vector_store_id, query, params)
 
     # OpenAI Vector Stores API endpoints
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 8ba1f2afdf..dae6e8ec9b 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -144,35 +144,62 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig
     if vector_stores_config is None:
         return
 
+    # Validate default embedding model
     default_embedding_model = vector_stores_config.default_embedding_model
-    if default_embedding_model is None:
-        return
+    if default_embedding_model is not None:
+        provider_id = default_embedding_model.provider_id
+        model_id = default_embedding_model.model_id
+        default_model_id = f"{provider_id}/{model_id}"
 
-    provider_id = default_embedding_model.provider_id
-    model_id = default_embedding_model.model_id
-    default_model_id = f"{provider_id}/{model_id}"
+        if Api.models not in impls:
+            raise ValueError(
+                f"Models API is not available but vector_stores config requires model '{default_model_id}'"
+            )
 
-    if Api.models not in impls:
-        raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
+        models_impl = impls[Api.models]
+        response = await models_impl.list_models()
+        models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
 
-    models_impl = impls[Api.models]
-    response = await models_impl.list_models()
-    models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
+        default_model = models_list.get(default_model_id)
+        if default_model is None:
+            raise ValueError(
+                f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}"
+            )
 
-    default_model = models_list.get(default_model_id)
-    if default_model is None:
-        raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
+        embedding_dimension = default_model.metadata.get("embedding_dimension")
+        if embedding_dimension is None:
+            raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
 
-    embedding_dimension = default_model.metadata.get("embedding_dimension")
-    if embedding_dimension is None:
-        raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
+        try:
+            int(embedding_dimension)
+        except ValueError as err:
+            raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
 
-    try:
-        int(embedding_dimension)
-    except ValueError as err:
-        raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
+        logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
+
+    # Validate default query expansion model
+    default_query_expansion_model = vector_stores_config.default_query_expansion_model
+    if default_query_expansion_model is not None:
+        provider_id = default_query_expansion_model.provider_id
+        model_id = default_query_expansion_model.model_id
+        query_model_id = f"{provider_id}/{model_id}"
+
+        if Api.models not in impls:
+            raise ValueError(
+                f"Models API is not available but vector_stores config requires query expansion model '{query_model_id}'"
+            )
+
+        models_impl = impls[Api.models]
+        response = await models_impl.list_models()
+        llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"}
 
-    logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
+        query_expansion_model = llm_models_list.get(query_model_id)
+        if query_expansion_model is None:
+            raise ValueError(
+                f"Query expansion model '{query_model_id}' not found. Available LLM models: {list(llm_models_list.keys())}"
+            )
+
+        logger.debug(f"Validated default query expansion model: {query_model_id}")
 
 
 async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]):
@@ -437,6 +464,12 @@ async def initialize(self):
         await refresh_registry_once(impls)
         await validate_vector_stores_config(self.run_config.vector_stores, impls)
         await validate_safety_config(self.run_config.safety, impls)
+
+        # Set global query expansion configuration from stack config
+        from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config
+
+        set_default_query_expansion_config(self.run_config.vector_stores)
+
         self.impls = impls
 
     def create_registry_refresh_task(self):
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 8110dbdf6d..219ffdce3b 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -296,5 +296,7 @@ vector_stores:
 
 
     Improved query:'
+  query_expansion_max_tokens: 100
+  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index 809b0ef1c9..e352e92688 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -287,5 +287,7 @@ vector_stores:
 
 
     Improved query:'
+  query_expansion_max_tokens: 100
+  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index ca47d7f4c4..e81febb0ed 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -299,5 +299,7 @@ vector_stores:
 
 
     Improved query:'
+  query_expansion_max_tokens: 100
+  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 15555c2622..edae6f66db 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -290,5 +290,7 @@ vector_stores:
 
 
     Improved query:'
+  query_expansion_max_tokens: 100
+  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 423b304528..9ed74d96d1 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -296,5 +296,7 @@ vector_stores:
 
 
     Improved query:'
+  query_expansion_max_tokens: 100
+  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index a0f56fc420..73679a1520 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -287,5 +287,7 @@ vector_stores:
 
 
     Improved query:'
+  query_expansion_max_tokens: 100
+  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
index 1b9dcda769..b834589e38 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
@@ -6,19 +6,16 @@
 
 from typing import Any
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api
 
 from .config import FaissVectorIOConfig
 
 
-async def get_provider_impl(
-    config: FaissVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None
-):
+async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
     from .faiss import FaissVectorIOAdapter
 
     assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
 
-    impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
+    impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index ec8afd3884..e2aab1a250 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -14,7 +14,6 @@
 import numpy as np
 from numpy.typing import NDArray
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@@ -190,12 +189,10 @@ def __init__(
         config: FaissVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None,
-        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.vector_stores_config = vector_stores_config
         self.cache: dict[str, VectorStoreWithIndex] = {}
 
     async def initialize(self) -> None:
@@ -211,7 +208,6 @@ async def initialize(self) -> None:
                 vector_store,
                 await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
                 self.inference_api,
-                self.vector_stores_config,
             )
             self.cache[vector_store.identifier] = index
 
@@ -250,7 +246,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store,
             index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
 
     async def list_vector_stores(self) -> list[VectorStore]:
@@ -284,7 +279,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
index 53e2ad135c..e84c299dc3 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
@@ -6,18 +6,15 @@
 
 from typing import Any
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api
 
 from .config import SQLiteVectorIOConfig
 
 
-async def get_provider_impl(
-    config: SQLiteVectorIOConfig, deps: dict[Api, Any], vector_stores_config: VectorStoresConfig | None = None
-):
+async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
     from .sqlite_vec import SQLiteVecVectorIOAdapter
 
     assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
+    impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index b38ce205e6..bc6226c845 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -14,7 +14,6 @@
 import sqlite_vec  # type: ignore[import-untyped]
 from numpy.typing import NDArray
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@@ -391,12 +390,10 @@ def __init__(
         config,
         inference_api: Inference,
         files_api: Files | None,
-        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.vector_stores_config = vector_stores_config
         self.cache: dict[str, VectorStoreWithIndex] = {}
         self.vector_store_table = None
 
@@ -411,9 +408,7 @@ async def initialize(self) -> None:
             index = await SQLiteVecIndex.create(
                 vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
             )
-            self.cache[vector_store.identifier] = VectorStoreWithIndex(
-                vector_store, index, self.inference_api, self.vector_stores_config
-            )
+            self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
 
         # Load existing OpenAI vector stores into the in-memory cache
         await self.initialize_openai_vector_stores()
@@ -437,9 +432,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
         index = await SQLiteVecIndex.create(
             vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
         )
-        self.cache[vector_store.identifier] = VectorStoreWithIndex(
-            vector_store, index, self.inference_api, self.vector_stores_config
-        )
+        self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
 
     async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
         if vector_store_id in self.cache:
@@ -464,7 +457,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
                 kvstore=self.kvstore,
             ),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
index 3bce41c366..d774ea643f 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
@@ -4,17 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import ChromaVectorIOConfig
 
 
-async def get_adapter_impl(
-    config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None
-):
+async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .chroma import ChromaVectorIOAdapter
 
-    impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
+    impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
index d214dff3a2..491db6d4de 100644
--- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -11,7 +11,6 @@
 import chromadb
 from numpy.typing import NDArray
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
@@ -126,13 +125,11 @@ def __init__(
         config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None,
-        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
         self.config = config
         self.inference_api = inference_api
-        self.vector_stores_config = vector_stores_config
         self.client = None
         self.cache = {}
         self.vector_store_table = None
@@ -165,7 +162,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             )
         )
         self.cache[vector_store.identifier] = VectorStoreWithIndex(
-            vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config
+            vector_store, ChromaIndex(self.client, collection), self.inference_api
         )
 
     async def unregister_vector_store(self, vector_store_id: str) -> None:
@@ -210,9 +207,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
         collection = await maybe_await(self.client.get_collection(vector_store_id))
         if not collection:
             raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
-        index = VectorStoreWithIndex(
-            vector_store, ChromaIndex(self.client, collection), self.inference_api, self.vector_stores_config
-        )
+        index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
         self.cache[vector_store_id] = index
         return index
 
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
index b73cf9b3ed..1b703d486c 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
@@ -4,18 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import MilvusVectorIOConfig
 
 
-async def get_adapter_impl(
-    config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None
-):
+async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .milvus import MilvusVectorIOAdapter
 
     assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
-    impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
+    impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 3b21f3278b..044d678fa0 100644
--- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -11,7 +11,6 @@
 from numpy.typing import NDArray
 from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
@@ -273,14 +272,12 @@ def __init__(
         config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None,
-        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.cache = {}
         self.client = None
         self.inference_api = inference_api
-        self.vector_stores_config = vector_stores_config
         self.vector_store_table = None
         self.metadata_collection_name = "openai_vector_stores_metadata"
 
@@ -301,7 +298,6 @@ async def initialize(self) -> None:
                     kvstore=self.kvstore,
                 ),
                 inference_api=self.inference_api,
-                vector_stores_config=self.vector_stores_config,
             )
             self.cache[vector_store.identifier] = index
         if isinstance(self.config, RemoteMilvusVectorIOConfig):
@@ -329,7 +325,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store,
             index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
 
         self.cache[vector_store.identifier] = index
@@ -352,7 +347,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index 002caf4b60..ea0139815a 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import PGVectorVectorIOConfig
@@ -13,10 +12,9 @@
 async def get_adapter_impl(
     config: PGVectorVectorIOConfig,
     deps: dict[Api, ProviderSpec],
-    vector_stores_config: VectorStoresConfig | None = None,
 ):
     from .pgvector import PGVectorVectorIOAdapter
 
-    impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
+    impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 45a38e52a7..fe1b8ce35e 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -13,7 +13,6 @@
 from psycopg2.extras import Json, execute_values
 from pydantic import BaseModel, TypeAdapter
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
@@ -335,12 +334,10 @@ def __init__(
         config: PGVectorVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None = None,
-        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.vector_stores_config = vector_stores_config
         self.conn = None
         self.cache = {}
         self.vector_store_table = None
@@ -396,7 +393,6 @@ async def initialize(self) -> None:
                 vector_store,
                 index=pgvector_index,
                 inference_api=self.inference_api,
-                vector_stores_config=self.vector_stores_config,
             )
             self.cache[vector_store.identifier] = index
 
@@ -428,7 +424,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store,
             index=pgvector_index,
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store.identifier] = index
 
@@ -469,9 +464,7 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
         vector_store = VectorStore.model_validate_json(vector_store_data)
         index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
         await index.initialize()
-        self.cache[vector_store_id] = VectorStoreWithIndex(
-            vector_store, index, self.inference_api, self.vector_stores_config
-        )
+        self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
         return self.cache[vector_store_id]
 
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
index 76e167b75e..b5b02fe598 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
@@ -4,17 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import QdrantVectorIOConfig
 
 
-async def get_adapter_impl(
-    config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], vector_stores_config: VectorStoresConfig | None = None
-):
+async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .qdrant import QdrantVectorIOAdapter
 
-    impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
+    impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 2de71f7cc3..dc65466460 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -13,7 +13,6 @@
 from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import PointStruct
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
@@ -153,14 +152,12 @@ def __init__(
         config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None = None,
-        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.client: AsyncQdrantClient = None
         self.cache = {}
         self.inference_api = inference_api
-        self.vector_stores_config = vector_stores_config
         self.vector_store_table = None
         self._qdrant_lock = asyncio.Lock()
 
@@ -179,7 +176,6 @@ async def initialize(self) -> None:
                 vector_store,
                 QdrantIndex(self.client, vector_store.identifier),
                 self.inference_api,
-                self.vector_stores_config,
             )
             self.cache[vector_store.identifier] = index
         self.openai_vector_stores = await self._load_openai_vector_stores()
@@ -199,7 +195,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store,
             index=QdrantIndex(self.client, vector_store.identifier),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
 
         self.cache[vector_store.identifier] = index
@@ -231,7 +226,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index 77bf357f4b..a13cca8a16 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack_api import Api, ProviderSpec
 
 from .config import WeaviateVectorIOConfig
@@ -13,10 +12,9 @@
 async def get_adapter_impl(
     config: WeaviateVectorIOConfig,
     deps: dict[Api, ProviderSpec],
-    vector_stores_config: VectorStoresConfig | None = None,
 ):
     from .weaviate import WeaviateVectorIOAdapter
 
-    impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files), vector_stores_config)
+    impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
     await impl.initialize()
     return impl
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 1c52fa84c5..67ec523d7e 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -12,7 +12,6 @@
 from weaviate.classes.init import Auth
 from weaviate.classes.query import Filter, HybridFusion
 
-from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.core.storage.kvstore import kvstore_impl
 from llama_stack.log import get_logger
@@ -268,12 +267,10 @@ def __init__(
         config: WeaviateVectorIOConfig,
         inference_api: Inference,
         files_api: Files | None,
-        vector_stores_config: VectorStoresConfig | None = None,
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
-        self.vector_stores_config = vector_stores_config
         self.client_cache = {}
         self.cache = {}
         self.vector_store_table = None
@@ -319,7 +316,6 @@ async def initialize(self) -> None:
                     vector_store=vector_store,
                     index=idx,
                     inference_api=self.inference_api,
-                    vector_stores_config=self.vector_stores_config,
                 )
 
             # Load OpenAI vector stores metadata into cache
@@ -348,7 +344,6 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store,
             WeaviateIndex(client=client, collection_name=sanitized_collection_name),
             self.inference_api,
-            self.vector_stores_config,
         )
 
     async def unregister_vector_store(self, vector_store_id: str) -> None:
@@ -383,7 +378,6 @@ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> Vecto
             vector_store=vector_store,
             index=WeaviateIndex(client=client, collection_name=vector_store.identifier),
             inference_api=self.inference_api,
-            vector_stores_config=self.vector_stores_config,
         )
         self.cache[vector_store_id] = index
         return index
diff --git a/src/llama_stack/providers/utils/memory/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py
index 756f351d88..5e0942402e 100644
--- a/src/llama_stack/providers/utils/memory/__init__.py
+++ b/src/llama_stack/providers/utils/memory/__init__.py
@@ -3,3 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+
+from .constants import DEFAULT_QUERY_EXPANSION_PROMPT
+
+__all__ = ["DEFAULT_QUERY_EXPANSION_PROMPT"]
diff --git a/src/llama_stack/providers/utils/memory/constants.py b/src/llama_stack/providers/utils/memory/constants.py
new file mode 100644
index 0000000000..d8703bbceb
--- /dev/null
+++ b/src/llama_stack/providers/utils/memory/constants.py
@@ -0,0 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Default prompt template for query expansion in vector search
+DEFAULT_QUERY_EXPANSION_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:"
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 4e67cf24b4..e0293507da 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -379,11 +379,6 @@ async def openai_create_vector_store(
                 f"Using embedding config from extra_body: model='{embedding_model}', dimension={embedding_dimension}"
             )
 
-        # Extract query expansion model from extra_body if provided
-        query_expansion_model = extra_body.get("query_expansion_model")
-        if query_expansion_model:
-            logger.debug(f"Using per-store query expansion model: {query_expansion_model}")
-
         # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
         provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
         # Derive the canonical vector_store_id (allow override, else generate)
@@ -407,7 +402,6 @@ async def openai_create_vector_store(
             provider_id=provider_id,
             provider_resource_id=vector_store_id,
             vector_store_name=params.name,
-            query_expansion_model=query_expansion_model,
         )
         await self.register_vector_store(vector_store)
 
@@ -621,9 +615,6 @@ async def openai_search_vector_store(
                 "rewrite_query": rewrite_query,
             }
 
-            # Add vector_stores_config if available (for query rewriting)
-            if hasattr(self, "vector_stores_config"):
-                params["vector_stores_config"] = self.vector_stores_config
             # TODO: Add support for ranking_options.ranker
 
             response = await self.query_chunks(
diff --git a/src/llama_stack/providers/utils/memory/query_expansion_config.py b/src/llama_stack/providers/utils/memory/query_expansion_config.py
new file mode 100644
index 0000000000..0b51c1a9ac
--- /dev/null
+++ b/src/llama_stack/providers/utils/memory/query_expansion_config.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
+from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+
+# Global configuration for query expansion - set during stack startup
+_DEFAULT_QUERY_EXPANSION_MODEL: QualifiedModel | None = None
+_DEFAULT_QUERY_EXPANSION_MAX_TOKENS: int = 100
+_DEFAULT_QUERY_EXPANSION_TEMPERATURE: float = 0.3
+_QUERY_EXPANSION_PROMPT_OVERRIDE: str | None = None
+
+
+def set_default_query_expansion_config(vector_stores_config: VectorStoresConfig | None):
+    """Set the global default query expansion configuration from stack config."""
+    global \
+        _DEFAULT_QUERY_EXPANSION_MODEL, \
+        _QUERY_EXPANSION_PROMPT_OVERRIDE, \
+        _DEFAULT_QUERY_EXPANSION_MAX_TOKENS, \
+        _DEFAULT_QUERY_EXPANSION_TEMPERATURE
+    if vector_stores_config:
+        _DEFAULT_QUERY_EXPANSION_MODEL = vector_stores_config.default_query_expansion_model
+        # Only set override if user provided a custom prompt different from default
+        if vector_stores_config.query_expansion_prompt != DEFAULT_QUERY_EXPANSION_PROMPT:
+            _QUERY_EXPANSION_PROMPT_OVERRIDE = vector_stores_config.query_expansion_prompt
+        else:
+            _QUERY_EXPANSION_PROMPT_OVERRIDE = None
+        _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = vector_stores_config.query_expansion_max_tokens
+        _DEFAULT_QUERY_EXPANSION_TEMPERATURE = vector_stores_config.query_expansion_temperature
+    else:
+        _DEFAULT_QUERY_EXPANSION_MODEL = None
+        _QUERY_EXPANSION_PROMPT_OVERRIDE = None
+        _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = 100
+        _DEFAULT_QUERY_EXPANSION_TEMPERATURE = 0.3
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 71d61787af..61fa996e4e 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -17,7 +17,6 @@
 from numpy.typing import NDArray
 from pydantic import BaseModel
 
-from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.utils.inference.prompt_adapter import (
@@ -30,19 +29,18 @@
     Chunk,
     ChunkMetadata,
     InterleavedContent,
+    OpenAIChatCompletionRequestWithExtraBody,
     OpenAIEmbeddingsRequestWithExtraBody,
     QueryChunksResponse,
     RAGDocument,
     VectorStore,
 )
-from llama_stack_api.inference import (
-    OpenAIChatCompletionRequestWithExtraBody,
-    OpenAIUserMessageParam,
-)
-from llama_stack_api.models import ModelType
 
 log = get_logger(name=__name__, category="providers::utils")
 
+from llama_stack.providers.utils.memory import query_expansion_config
+from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+
 
 class ChunkForDeletion(BaseModel):
     """Information needed to delete a chunk from a vector store.
@@ -268,7 +266,6 @@ class VectorStoreWithIndex:
     vector_store: VectorStore
     index: EmbeddingIndex
     inference_api: Api.inference
-    vector_stores_config: VectorStoresConfig | None = None
 
     async def insert_chunks(
         self,
@@ -296,6 +293,39 @@ async def insert_chunks(
         embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
         await self.index.add_chunks(chunks, embeddings)
 
+    async def _rewrite_query_for_file_search(self, query: str) -> str:
+        """Rewrite a search query using the globally configured LLM model for better retrieval results."""
+        if not query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL:
+            log.debug("No default query expansion model configured, using original query")
+            return query
+
+        model_id = f"{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.provider_id}/{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.model_id}"
+
+        # Use custom prompt from config if provided, otherwise use built-in default
+        # Users only need to configure the model - prompt is automatic with optional override
+        if query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE:
+            # Custom prompt from config - format if it contains {query} placeholder
+            prompt = (
+                query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE.format(query=query)
+                if "{query}" in query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE
+                else query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE
+            )
+        else:
+            # Use built-in default prompt and format with query
+            prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query=query)
+
+        request = OpenAIChatCompletionRequestWithExtraBody(
+            model=model_id,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=query_expansion_config._DEFAULT_QUERY_EXPANSION_MAX_TOKENS,
+            temperature=query_expansion_config._DEFAULT_QUERY_EXPANSION_TEMPERATURE,
+        )
+
+        response = await self.inference_api.openai_chat_completion(request)
+        rewritten_query = response.choices[0].message.content.strip()
+        log.debug(f"Query rewritten: '{query}' → '{rewritten_query}'")
+        return rewritten_query
+
     async def query_chunks(
         self,
         query: InterleavedContent,
@@ -304,10 +334,6 @@ async def query_chunks(
         if params is None:
             params = {}
 
-        # Extract configuration if provided by router
-        if "vector_stores_config" in params:
-            self.vector_stores_config = params["vector_stores_config"]
-
         k = params.get("max_chunks", 3)
         mode = params.get("mode")
         score_threshold = params.get("score_threshold", 0.0)
@@ -331,18 +357,9 @@ async def query_chunks(
 
         query_string = interleaved_content_as_str(query)
 
-        # Apply query rewriting if enabled
+        # Apply query rewriting if enabled and model is configured
         if params.get("rewrite_query", False):
-            if self.vector_stores_config:
-                log.debug(f"VectorStoreWithIndex received config: {self.vector_stores_config}")
-                if hasattr(self.vector_stores_config, "default_query_expansion_model"):
-                    log.debug(
-                        f"Config has default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
-                    )
-            else:
-                log.debug("No vector_stores_config found - cannot perform query rewriting")
-
-            query_string = await self._rewrite_query_for_search(query_string)
+            query_string = await self._rewrite_query_for_file_search(query_string)
 
         if mode == "keyword":
             return await self.index.query_keyword(query_string, k, score_threshold)
@@ -359,88 +376,3 @@ async def query_chunks(
             )
         else:
             return await self.index.query_vector(query_vector, k, score_threshold)
-
-    async def _rewrite_query_for_search(self, query: str) -> str:
-        """Rewrite the user query to improve vector search performance.
-
-        :param query: The original user query
-        :returns: The rewritten query optimized for vector search
-        """
-        expansion_model = None
-
-        # Check for per-store query expansion model first
-        if self.vector_store.query_expansion_model:
-            # Parse the model string into provider_id and model_id
-            model_parts = self.vector_store.query_expansion_model.split("/", 1)
-            if len(model_parts) == 2:
-                expansion_model = QualifiedModel(provider_id=model_parts[0], model_id=model_parts[1])
-                log.debug(f"Using per-store query expansion model: {expansion_model}")
-            else:
-                log.warning(
-                    f"Invalid query_expansion_model format: {self.vector_store.query_expansion_model}. Expected 'provider_id/model_id'"
-                )
-
-        # Fall back to global default if no per-store model
-        if not expansion_model:
-            if not self.vector_stores_config:
-                raise ValueError(
-                    f"No vector_stores_config found and no per-store query_expansion_model! self.vector_stores_config is: {self.vector_stores_config}"
-                )
-            if not self.vector_stores_config.default_query_expansion_model:
-                raise ValueError(
-                    f"No default_query_expansion_model configured and no per-store query_expansion_model! vector_stores_config: {self.vector_stores_config}, default_query_expansion_model: {self.vector_stores_config.default_query_expansion_model}"
-                )
-            expansion_model = self.vector_stores_config.default_query_expansion_model
-            log.debug(f"Using global default query expansion model: {expansion_model}")
-
-        chat_model = f"{expansion_model.provider_id}/{expansion_model.model_id}"
-
-        # Validate that the model is available and is an LLM
-        try:
-            models_response = await self.inference_api.routing_table.list_models()
-        except Exception as e:
-            raise RuntimeError(f"Failed to list available models for validation: {e}") from e
-
-        model_found = False
-        for model in models_response.data:
-            if model.identifier == chat_model:
-                if model.model_type != ModelType.llm:
-                    raise ValueError(
-                        f"Configured query expansion model '{chat_model}' is not an LLM model "
-                        f"(found type: {model.model_type}). Query rewriting requires an LLM model."
-                    )
-                model_found = True
-                break
-
-        if not model_found:
-            available_llm_models = [m.identifier for m in models_response.data if m.model_type == ModelType.llm]
-            raise ValueError(
-                f"Configured query expansion model '{chat_model}' is not available. "
-                f"Available LLM models: {available_llm_models}"
-            )
-
-        # Use the configured prompt (has a default value)
-        rewrite_prompt = self.vector_stores_config.query_expansion_prompt.format(query=query)
-
-        chat_request = OpenAIChatCompletionRequestWithExtraBody(
-            model=chat_model,
-            messages=[
-                OpenAIUserMessageParam(
-                    role="user",
-                    content=rewrite_prompt,
-                )
-            ],
-            max_tokens=100,
-        )
-
-        try:
-            response = await self.inference_api.openai_chat_completion(chat_request)
-        except Exception as e:
-            raise RuntimeError(f"Failed to generate rewritten query: {e}") from e
-
-        if response.choices and len(response.choices) > 0:
-            rewritten_query = response.choices[0].message.content.strip()
-            log.info(f"Query rewritten: '{query}' → '{rewritten_query}'")
-            return rewritten_query
-        else:
-            raise RuntimeError("No response received from LLM model for query rewriting")
diff --git a/src/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py
index 4c0d1ced25..0a1e6c53c5 100644
--- a/src/llama_stack_api/vector_stores.py
+++ b/src/llama_stack_api/vector_stores.py
@@ -25,7 +25,6 @@ class VectorStore(Resource):
     embedding_model: str
     embedding_dimension: int
     vector_store_name: str | None = None
-    query_expansion_model: str | None = None
 
     @property
     def vector_store_id(self) -> str:
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index cfda7aa5ee..83bf22f346 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -1233,94 +1233,122 @@ async def test_embedding_config_required_model_missing(vector_io_adapter):
 
 
 async def test_query_expansion_functionality(vector_io_adapter):
-    """Test query expansion with per-store models, global defaults, and error validation."""
+    """Test query expansion with simplified global configuration approach."""
     from unittest.mock import MagicMock
 
-    from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
+    from llama_stack.core.datatypes import QualifiedModel
+    from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+    from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config
     from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex
-    from llama_stack_api.models import Model, ModelType
+    from llama_stack_api import QueryChunksResponse
 
-    vector_io_adapter.register_vector_store = AsyncMock()
-    vector_io_adapter.__provider_id__ = "test_provider"
+    # Mock a simple vector store and index
+    mock_vector_store = MagicMock()
+    mock_vector_store.embedding_model = "test/embedding"
+    mock_inference_api = MagicMock()
+    mock_index = MagicMock()
 
-    # Test 1: Per-store model usage
-    params = OpenAICreateVectorStoreRequestWithExtraBody(
-        name="test_store",
-        metadata={},
-        **{"embedding_model": "test/embedding", "query_expansion_model": "test/llama-model"},
+    # Create VectorStoreWithIndex with simplified constructor
+    vector_store_with_index = VectorStoreWithIndex(
+        vector_store=mock_vector_store,
+        index=mock_index,
+        inference_api=mock_inference_api,
     )
-    await vector_io_adapter.openai_create_vector_store(params)
-    call_args = vector_io_adapter.register_vector_store.call_args[0][0]
-    assert call_args.query_expansion_model == "test/llama-model"
 
-    # Test 2: Global default fallback
-    vector_io_adapter.register_vector_store.reset_mock()
-    params_no_model = OpenAICreateVectorStoreRequestWithExtraBody(
-        name="test_store2", metadata={}, **{"embedding_model": "test/embedding"}
+    # Mock the query_vector method to return a simple response
+    mock_response = QueryChunksResponse(chunks=[], scores=[])
+    mock_index.query_vector = AsyncMock(return_value=mock_response)
+
+    # Mock embeddings generation
+    mock_inference_api.openai_embeddings = AsyncMock(
+        return_value=MagicMock(data=[MagicMock(embedding=[0.1, 0.2, 0.3])])
     )
-    await vector_io_adapter.openai_create_vector_store(params_no_model)
-    call_args2 = vector_io_adapter.register_vector_store.call_args[0][0]
-    assert call_args2.query_expansion_model is None
 
-    # Test query rewriting scenarios
-    mock_inference_api = MagicMock()
+    # Test 1: Query expansion with default prompt (no custom prompt configured)
+    mock_vector_stores_config = MagicMock()
+    mock_vector_stores_config.default_query_expansion_model = QualifiedModel(provider_id="test", model_id="llama")
+    mock_vector_stores_config.query_expansion_prompt = None  # Use built-in default prompt
+    mock_vector_stores_config.query_expansion_max_tokens = 100  # Default value
+    mock_vector_stores_config.query_expansion_temperature = 0.3  # Default value
 
-    # Per-store model scenario
-    mock_vector_store = MagicMock()
-    mock_vector_store.query_expansion_model = "test/llama-model"
-    mock_inference_api.routing_table.list_models = AsyncMock(
-        return_value=MagicMock(
-            data=[Model(identifier="test/llama-model", provider_id="test", model_type=ModelType.llm)]
-        )
-    )
+    # Set global config
+    set_default_query_expansion_config(mock_vector_stores_config)
+
+    # Mock chat completion for query rewriting
     mock_inference_api.openai_chat_completion = AsyncMock(
-        return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="per-store expanded"))])
+        return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="expanded test query"))])
     )
 
-    vector_store_with_index = VectorStoreWithIndex(
-        vector_store=mock_vector_store,
-        index=MagicMock(),
-        inference_api=mock_inference_api,
-        vector_stores_config=VectorStoresConfig(
-            default_query_expansion_model=QualifiedModel(provider_id="global", model_id="default")
-        ),
-    )
+    params = {"rewrite_query": True, "max_chunks": 5}
+    result = await vector_store_with_index.query_chunks("test query", params)
+
+    # Verify chat completion was called for query rewriting
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    chat_call_args = mock_inference_api.openai_chat_completion.call_args[0][0]
+    assert chat_call_args.model == "test/llama"
+
+    # Verify default prompt is used (contains our built-in prompt text)
+    prompt_text = chat_call_args.messages[0].content
+    expected_prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query="test query")
+    assert prompt_text == expected_prompt
+
+    # Verify default inference parameters are used
+    assert chat_call_args.max_tokens == 100  # Default value
+    assert chat_call_args.temperature == 0.3  # Default value
 
-    result = await vector_store_with_index._rewrite_query_for_search("test")
-    assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "test/llama-model"
-    assert result == "per-store expanded"
+    # Verify the rest of the flow proceeded normally
+    mock_inference_api.openai_embeddings.assert_called_once()
+    mock_index.query_vector.assert_called_once()
+    assert result == mock_response
 
-    # Global default fallback scenario
+    # Test 1b: Query expansion with custom prompt override and inference parameters
     mock_inference_api.reset_mock()
-    mock_vector_store.query_expansion_model = None
-    mock_inference_api.routing_table.list_models = AsyncMock(
-        return_value=MagicMock(
-            data=[Model(identifier="global/default", provider_id="global", model_type=ModelType.llm)]
-        )
-    )
-    mock_inference_api.openai_chat_completion = AsyncMock(
-        return_value=MagicMock(choices=[MagicMock(message=MagicMock(content="global expanded"))])
-    )
+    mock_index.reset_mock()
 
-    result = await vector_store_with_index._rewrite_query_for_search("test")
-    assert mock_inference_api.openai_chat_completion.call_args[0][0].model == "global/default"
-    assert result == "global expanded"
+    mock_vector_stores_config.query_expansion_prompt = "Custom prompt for rewriting: {query}"
+    mock_vector_stores_config.query_expansion_max_tokens = 150
+    mock_vector_stores_config.query_expansion_temperature = 0.7
+    set_default_query_expansion_config(mock_vector_stores_config)
 
-    # Test 3: Error cases
-    # Model not found
-    mock_vector_store.query_expansion_model = "missing/model"
-    mock_inference_api.routing_table.list_models = AsyncMock(return_value=MagicMock(data=[]))
+    result = await vector_store_with_index.query_chunks("test query", params)
 
-    with pytest.raises(ValueError, match="Configured query expansion model .* is not available"):
-        await vector_store_with_index._rewrite_query_for_search("test")
+    # Verify custom prompt and parameters are used
+    mock_inference_api.openai_chat_completion.assert_called_once()
+    chat_call_args = mock_inference_api.openai_chat_completion.call_args[0][0]
+    prompt_text = chat_call_args.messages[0].content
+    assert prompt_text == "Custom prompt for rewriting: test query"
+    assert "Expand this query with relevant synonyms" not in prompt_text  # Default not used
 
-    # Non-LLM model
-    mock_vector_store.query_expansion_model = "test/embedding-model"
-    mock_inference_api.routing_table.list_models = AsyncMock(
-        return_value=MagicMock(
-            data=[Model(identifier="test/embedding-model", provider_id="test", model_type=ModelType.embedding)]
-        )
-    )
+    # Verify custom inference parameters
+    assert chat_call_args.max_tokens == 150
+    assert chat_call_args.temperature == 0.7
+
+    # Test 2: No query expansion when no global model is configured
+    mock_inference_api.reset_mock()
+    mock_index.reset_mock()
+
+    # Clear global config
+    set_default_query_expansion_config(None)
+
+    params = {"rewrite_query": True, "max_chunks": 5}
+    result2 = await vector_store_with_index.query_chunks("test query", params)
+
+    # Verify chat completion was NOT called
+    mock_inference_api.openai_chat_completion.assert_not_called()
+    # But normal flow should still work
+    mock_inference_api.openai_embeddings.assert_called_once()
+    mock_index.query_vector.assert_called_once()
+    assert result2 == mock_response
+
+    # Test 3: Normal behavior without rewrite_query parameter
+    mock_inference_api.reset_mock()
+    mock_index.reset_mock()
+
+    params_no_rewrite = {"max_chunks": 5}
+    result3 = await vector_store_with_index.query_chunks("test query", params_no_rewrite)
 
-    with pytest.raises(ValueError, match="is not an LLM model.*Query rewriting requires an LLM model"):
-        await vector_store_with_index._rewrite_query_for_search("test")
+    # Neither chat completion nor query rewriting should be called
+    mock_inference_api.openai_chat_completion.assert_not_called()
+    mock_inference_api.openai_embeddings.assert_called_once()
+    mock_index.query_vector.assert_called_once()
+    assert result3 == mock_response

From 31e28b6d1733b8fecc00cc6aba03465fe3980342 Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Fri, 21 Nov 2025 23:38:13 -0500
Subject: [PATCH 5/7] renaming to query_rewrite, consolidating, and cleaning up
 validation

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 src/llama_stack/core/datatypes.py             |  37 ++++---
 src/llama_stack/core/stack.py                 | 100 ++++++++++--------
 .../ci-tests/run-with-postgres-store.yaml     |  10 --
 .../distributions/ci-tests/run.yaml           |  10 --
 .../starter-gpu/run-with-postgres-store.yaml  |  10 --
 .../distributions/starter-gpu/run.yaml        |  10 --
 .../starter/run-with-postgres-store.yaml      |  10 --
 .../distributions/starter/run.yaml            |  10 --
 .../utils/memory/query_expansion_config.py    |  37 -------
 .../utils/memory/rewrite_query_config.py      |  38 +++++++
 .../providers/utils/memory/vector_store.py    |  20 ++--
 .../test_vector_io_openai_vector_stores.py    |  26 ++---
 12 files changed, 138 insertions(+), 180 deletions(-)
 delete mode 100644 src/llama_stack/providers/utils/memory/query_expansion_config.py
 create mode 100644 src/llama_stack/providers/utils/memory/rewrite_query_config.py

diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index a32e1d8a26..8fab715f2b 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -366,6 +366,27 @@ class QualifiedModel(BaseModel):
     model_id: str
 
 
+class RewriteQueryParams(BaseModel):
+    """Parameters for query rewriting/expansion."""
+
+    model: QualifiedModel | None = Field(
+        default=None,
+        description="LLM model for query rewriting/expansion in vector search.",
+    )
+    prompt: str = Field(
+        default=DEFAULT_QUERY_EXPANSION_PROMPT,
+        description="Prompt template for query rewriting. Use {query} as placeholder for the original query.",
+    )
+    max_tokens: int = Field(
+        default=100,
+        description="Maximum number of tokens for query expansion responses.",
+    )
+    temperature: float = Field(
+        default=0.3,
+        description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).",
+    )
+
+
 class VectorStoresConfig(BaseModel):
     """Configuration for vector stores in the stack."""
 
@@ -377,21 +398,9 @@ class VectorStoresConfig(BaseModel):
         default=None,
         description="Default embedding model configuration for vector stores.",
     )
-    default_query_expansion_model: QualifiedModel | None = Field(
+    rewrite_query_params: RewriteQueryParams | None = Field(
         default=None,
-        description="Default LLM model for query expansion/rewriting in vector search.",
-    )
-    query_expansion_prompt: str = Field(
-        default=DEFAULT_QUERY_EXPANSION_PROMPT,
-        description="Prompt template for query expansion. Use {query} as placeholder for the original query.",
-    )
-    query_expansion_max_tokens: int = Field(
-        default=100,
-        description="Maximum number of tokens for query expansion responses.",
-    )
-    query_expansion_temperature: float = Field(
-        default=0.3,
-        description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).",
+        description="Parameters for query rewriting/expansion. None disables query rewriting.",
     )
 
 
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index dae6e8ec9b..0bebf800d1 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -14,7 +14,7 @@
 import yaml
 
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
-from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
+from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackRunConfig, VectorStoresConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
 from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
@@ -145,61 +145,67 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig
         return
 
     # Validate default embedding model
-    default_embedding_model = vector_stores_config.default_embedding_model
-    if default_embedding_model is not None:
-        provider_id = default_embedding_model.provider_id
-        model_id = default_embedding_model.model_id
-        default_model_id = f"{provider_id}/{model_id}"
+    if vector_stores_config.default_embedding_model is not None:
+        await _validate_embedding_model(vector_stores_config.default_embedding_model, impls)
 
-        if Api.models not in impls:
-            raise ValueError(
-                f"Models API is not available but vector_stores config requires model '{default_model_id}'"
-            )
+    # Validate default rewrite query model
+    if vector_stores_config.rewrite_query_params and vector_stores_config.rewrite_query_params.model:
+        await _validate_rewrite_query_model(vector_stores_config.rewrite_query_params.model, impls)
 
-        models_impl = impls[Api.models]
-        response = await models_impl.list_models()
-        models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
 
-        default_model = models_list.get(default_model_id)
-        if default_model is None:
-            raise ValueError(
-                f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}"
-            )
+async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict[Api, Any]) -> None:
+    """Validate that an embedding model exists and has required metadata."""
+    provider_id = embedding_model.provider_id
+    model_id = embedding_model.model_id
+    model_identifier = f"{provider_id}/{model_id}"
 
-        embedding_dimension = default_model.metadata.get("embedding_dimension")
-        if embedding_dimension is None:
-            raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
+    if Api.models not in impls:
+        raise ValueError(f"Models API is not available but vector_stores config requires model '{model_identifier}'")
 
-        try:
-            int(embedding_dimension)
-        except ValueError as err:
-            raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
+    models_impl = impls[Api.models]
+    response = await models_impl.list_models()
+    models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
 
-        logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
+    model = models_list.get(model_identifier)
+    if model is None:
+        raise ValueError(
+            f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
+        )
 
-    # Validate default query expansion model
-    default_query_expansion_model = vector_stores_config.default_query_expansion_model
-    if default_query_expansion_model is not None:
-        provider_id = default_query_expansion_model.provider_id
-        model_id = default_query_expansion_model.model_id
-        query_model_id = f"{provider_id}/{model_id}"
+    embedding_dimension = model.metadata.get("embedding_dimension")
+    if embedding_dimension is None:
+        raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
 
-        if Api.models not in impls:
-            raise ValueError(
-                f"Models API is not available but vector_stores config requires query expansion model '{query_model_id}'"
-            )
+    try:
+        int(embedding_dimension)
+    except ValueError as err:
+        raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
 
-        models_impl = impls[Api.models]
-        response = await models_impl.list_models()
-        llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"}
+    logger.debug(f"Validated embedding model: {model_identifier} (dimension: {embedding_dimension})")
 
-        query_expansion_model = llm_models_list.get(query_model_id)
-        if query_expansion_model is None:
-            raise ValueError(
-                f"Query expansion model '{query_model_id}' not found. Available LLM models: {list(llm_models_list.keys())}"
-            )
 
-        logger.debug(f"Validated default query expansion model: {query_model_id}")
+async def _validate_rewrite_query_model(rewrite_query_model: QualifiedModel, impls: dict[Api, Any]) -> None:
+    """Validate that a rewrite query model exists and is accessible."""
+    provider_id = rewrite_query_model.provider_id
+    model_id = rewrite_query_model.model_id
+    model_identifier = f"{provider_id}/{model_id}"
+
+    if Api.models not in impls:
+        raise ValueError(
+            f"Models API is not available but vector_stores config requires rewrite query model '{model_identifier}'"
+        )
+
+    models_impl = impls[Api.models]
+    response = await models_impl.list_models()
+    llm_models_list = {m.identifier: m for m in response.data if m.model_type == "llm"}
+
+    model = llm_models_list.get(model_identifier)
+    if model is None:
+        raise ValueError(
+            f"Rewrite query model '{model_identifier}' not found. Available LLM models: {list(llm_models_list.keys())}"
+        )
+
+    logger.debug(f"Validated rewrite query model: {model_identifier}")
 
 
 async def validate_safety_config(safety_config: SafetyConfig | None, impls: dict[Api, Any]):
@@ -466,9 +472,9 @@ async def initialize(self):
         await validate_safety_config(self.run_config.safety, impls)
 
         # Set global query expansion configuration from stack config
-        from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config
+        from llama_stack.providers.utils.memory.rewrite_query_config import set_default_rewrite_query_config
 
-        set_default_query_expansion_config(self.run_config.vector_stores)
+        set_default_rewrite_query_config(self.run_config.vector_stores)
 
         self.impls = impls
 
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 219ffdce3b..7721138c7f 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -288,15 +288,5 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
-  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
-    Return only the improved query, no explanations:
-
-
-    {query}
-
-
-    Improved query:'
-  query_expansion_max_tokens: 100
-  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index e352e92688..b791e14882 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -279,15 +279,5 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
-  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
-    Return only the improved query, no explanations:
-
-
-    {query}
-
-
-    Improved query:'
-  query_expansion_max_tokens: 100
-  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index e81febb0ed..9c250c05a6 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -291,15 +291,5 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
-  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
-    Return only the improved query, no explanations:
-
-
-    {query}
-
-
-    Improved query:'
-  query_expansion_max_tokens: 100
-  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index edae6f66db..65f9ae326f 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -282,15 +282,5 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
-  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
-    Return only the improved query, no explanations:
-
-
-    {query}
-
-
-    Improved query:'
-  query_expansion_max_tokens: 100
-  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 9ed74d96d1..3314bb9e96 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -288,15 +288,5 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
-  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
-    Return only the improved query, no explanations:
-
-
-    {query}
-
-
-    Improved query:'
-  query_expansion_max_tokens: 100
-  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index 73679a1520..e88539e6a7 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -279,15 +279,5 @@ vector_stores:
   default_embedding_model:
     provider_id: sentence-transformers
     model_id: nomic-ai/nomic-embed-text-v1.5
-  query_expansion_prompt: 'Expand this query with relevant synonyms and related terms.
-    Return only the improved query, no explanations:
-
-
-    {query}
-
-
-    Improved query:'
-  query_expansion_max_tokens: 100
-  query_expansion_temperature: 0.3
 safety:
   default_shield_id: llama-guard
diff --git a/src/llama_stack/providers/utils/memory/query_expansion_config.py b/src/llama_stack/providers/utils/memory/query_expansion_config.py
deleted file mode 100644
index 0b51c1a9ac..0000000000
--- a/src/llama_stack/providers/utils/memory/query_expansion_config.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
-from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
-
-# Global configuration for query expansion - set during stack startup
-_DEFAULT_QUERY_EXPANSION_MODEL: QualifiedModel | None = None
-_DEFAULT_QUERY_EXPANSION_MAX_TOKENS: int = 100
-_DEFAULT_QUERY_EXPANSION_TEMPERATURE: float = 0.3
-_QUERY_EXPANSION_PROMPT_OVERRIDE: str | None = None
-
-
-def set_default_query_expansion_config(vector_stores_config: VectorStoresConfig | None):
-    """Set the global default query expansion configuration from stack config."""
-    global \
-        _DEFAULT_QUERY_EXPANSION_MODEL, \
-        _QUERY_EXPANSION_PROMPT_OVERRIDE, \
-        _DEFAULT_QUERY_EXPANSION_MAX_TOKENS, \
-        _DEFAULT_QUERY_EXPANSION_TEMPERATURE
-    if vector_stores_config:
-        _DEFAULT_QUERY_EXPANSION_MODEL = vector_stores_config.default_query_expansion_model
-        # Only set override if user provided a custom prompt different from default
-        if vector_stores_config.query_expansion_prompt != DEFAULT_QUERY_EXPANSION_PROMPT:
-            _QUERY_EXPANSION_PROMPT_OVERRIDE = vector_stores_config.query_expansion_prompt
-        else:
-            _QUERY_EXPANSION_PROMPT_OVERRIDE = None
-        _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = vector_stores_config.query_expansion_max_tokens
-        _DEFAULT_QUERY_EXPANSION_TEMPERATURE = vector_stores_config.query_expansion_temperature
-    else:
-        _DEFAULT_QUERY_EXPANSION_MODEL = None
-        _QUERY_EXPANSION_PROMPT_OVERRIDE = None
-        _DEFAULT_QUERY_EXPANSION_MAX_TOKENS = 100
-        _DEFAULT_QUERY_EXPANSION_TEMPERATURE = 0.3
diff --git a/src/llama_stack/providers/utils/memory/rewrite_query_config.py b/src/llama_stack/providers/utils/memory/rewrite_query_config.py
new file mode 100644
index 0000000000..9c53638b87
--- /dev/null
+++ b/src/llama_stack/providers/utils/memory/rewrite_query_config.py
@@ -0,0 +1,38 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
+from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+
+# Global configuration for query rewriting - set during stack startup
+_DEFAULT_REWRITE_QUERY_MODEL: QualifiedModel | None = None
+_DEFAULT_REWRITE_QUERY_MAX_TOKENS: int = 100
+_DEFAULT_REWRITE_QUERY_TEMPERATURE: float = 0.3
+_REWRITE_QUERY_PROMPT_OVERRIDE: str | None = None
+
+
+def set_default_rewrite_query_config(vector_stores_config: VectorStoresConfig | None):
+    """Set the global default query rewriting configuration from stack config."""
+    global \
+        _DEFAULT_REWRITE_QUERY_MODEL, \
+        _REWRITE_QUERY_PROMPT_OVERRIDE, \
+        _DEFAULT_REWRITE_QUERY_MAX_TOKENS, \
+        _DEFAULT_REWRITE_QUERY_TEMPERATURE
+    if vector_stores_config and vector_stores_config.rewrite_query_params:
+        params = vector_stores_config.rewrite_query_params
+        _DEFAULT_REWRITE_QUERY_MODEL = params.model
+        # Only set override if user provided a custom prompt different from default
+        if params.prompt != DEFAULT_QUERY_EXPANSION_PROMPT:
+            _REWRITE_QUERY_PROMPT_OVERRIDE = params.prompt
+        else:
+            _REWRITE_QUERY_PROMPT_OVERRIDE = None
+        _DEFAULT_REWRITE_QUERY_MAX_TOKENS = params.max_tokens
+        _DEFAULT_REWRITE_QUERY_TEMPERATURE = params.temperature
+    else:
+        _DEFAULT_REWRITE_QUERY_MODEL = None
+        _REWRITE_QUERY_PROMPT_OVERRIDE = None
+        _DEFAULT_REWRITE_QUERY_MAX_TOKENS = 100
+        _DEFAULT_REWRITE_QUERY_TEMPERATURE = 0.3
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 61fa996e4e..11754bae29 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -38,7 +38,7 @@
 
 log = get_logger(name=__name__, category="providers::utils")
 
-from llama_stack.providers.utils.memory import query_expansion_config
+from llama_stack.providers.utils.memory import rewrite_query_config
 from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
 
 
@@ -295,20 +295,20 @@ async def insert_chunks(
 
     async def _rewrite_query_for_file_search(self, query: str) -> str:
         """Rewrite a search query using the globally configured LLM model for better retrieval results."""
-        if not query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL:
-            log.debug("No default query expansion model configured, using original query")
+        if not rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL:
+            log.debug("No default query rewriting model configured, using original query")
             return query
 
-        model_id = f"{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.provider_id}/{query_expansion_config._DEFAULT_QUERY_EXPANSION_MODEL.model_id}"
+        model_id = f"{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.provider_id}/{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.model_id}"
 
         # Use custom prompt from config if provided, otherwise use built-in default
         # Users only need to configure the model - prompt is automatic with optional override
-        if query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE:
+        if rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE:
             # Custom prompt from config - format if it contains {query} placeholder
             prompt = (
-                query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE.format(query=query)
-                if "{query}" in query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE
-                else query_expansion_config._QUERY_EXPANSION_PROMPT_OVERRIDE
+                rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE.format(query=query)
+                if "{query}" in rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE
+                else rewrite_query_config._REWRITE_QUERY_PROMPT_OVERRIDE
             )
         else:
             # Use built-in default prompt and format with query
@@ -317,8 +317,8 @@ async def _rewrite_query_for_file_search(self, query: str) -> str:
         request = OpenAIChatCompletionRequestWithExtraBody(
             model=model_id,
             messages=[{"role": "user", "content": prompt}],
-            max_tokens=query_expansion_config._DEFAULT_QUERY_EXPANSION_MAX_TOKENS,
-            temperature=query_expansion_config._DEFAULT_QUERY_EXPANSION_TEMPERATURE,
+            max_tokens=rewrite_query_config._DEFAULT_REWRITE_QUERY_MAX_TOKENS,
+            temperature=rewrite_query_config._DEFAULT_REWRITE_QUERY_TEMPERATURE,
         )
 
         response = await self.inference_api.openai_chat_completion(request)
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 83bf22f346..07ec41bec1 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -1236,9 +1236,9 @@ async def test_query_expansion_functionality(vector_io_adapter):
     """Test query expansion with simplified global configuration approach."""
     from unittest.mock import MagicMock
 
-    from llama_stack.core.datatypes import QualifiedModel
+    from llama_stack.core.datatypes import QualifiedModel, RewriteQueryParams
     from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
-    from llama_stack.providers.utils.memory.query_expansion_config import set_default_query_expansion_config
+    from llama_stack.providers.utils.memory.rewrite_query_config import set_default_rewrite_query_config
     from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex
     from llama_stack_api import QueryChunksResponse
 
@@ -1266,13 +1266,12 @@ async def test_query_expansion_functionality(vector_io_adapter):
 
     # Test 1: Query expansion with default prompt (no custom prompt configured)
     mock_vector_stores_config = MagicMock()
-    mock_vector_stores_config.default_query_expansion_model = QualifiedModel(provider_id="test", model_id="llama")
-    mock_vector_stores_config.query_expansion_prompt = None  # Use built-in default prompt
-    mock_vector_stores_config.query_expansion_max_tokens = 100  # Default value
-    mock_vector_stores_config.query_expansion_temperature = 0.3  # Default value
+    mock_vector_stores_config.rewrite_query_params = RewriteQueryParams(
+        model=QualifiedModel(provider_id="test", model_id="llama"), max_tokens=100, temperature=0.3
+    )
 
     # Set global config
-    set_default_query_expansion_config(mock_vector_stores_config)
+    set_default_rewrite_query_config(mock_vector_stores_config)
 
     # Mock chat completion for query rewriting
     mock_inference_api.openai_chat_completion = AsyncMock(
@@ -1305,10 +1304,13 @@ async def test_query_expansion_functionality(vector_io_adapter):
     mock_inference_api.reset_mock()
     mock_index.reset_mock()
 
-    mock_vector_stores_config.query_expansion_prompt = "Custom prompt for rewriting: {query}"
-    mock_vector_stores_config.query_expansion_max_tokens = 150
-    mock_vector_stores_config.query_expansion_temperature = 0.7
-    set_default_query_expansion_config(mock_vector_stores_config)
+    mock_vector_stores_config.rewrite_query_params = RewriteQueryParams(
+        model=QualifiedModel(provider_id="test", model_id="llama"),
+        prompt="Custom prompt for rewriting: {query}",
+        max_tokens=150,
+        temperature=0.7,
+    )
+    set_default_rewrite_query_config(mock_vector_stores_config)
 
     result = await vector_store_with_index.query_chunks("test query", params)
 
@@ -1328,7 +1330,7 @@ async def test_query_expansion_functionality(vector_io_adapter):
     mock_index.reset_mock()
 
     # Clear global config
-    set_default_query_expansion_config(None)
+    set_default_rewrite_query_config(None)
 
     params = {"rewrite_query": True, "max_chunks": 5}
     result2 = await vector_store_with_index.query_chunks("test query", params)

From 2ebc56c3d9ef7664ec5b005fe837180060411541 Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Tue, 25 Nov 2025 00:06:11 -0500
Subject: [PATCH 6/7] undoing formatting and updating missed expansion
 parameterS

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 src/llama_stack/core/datatypes.py               |  4 ++--
 .../providers/inline/vector_io/faiss/faiss.py   |  7 +------
 .../inline/vector_io/sqlite_vec/sqlite_vec.py   |  7 +------
 .../remote/vector_io/pgvector/__init__.py       |  5 +----
 .../remote/vector_io/pgvector/pgvector.py       | 17 +++--------------
 .../providers/remote/vector_io/qdrant/qdrant.py |  4 +---
 .../remote/vector_io/weaviate/__init__.py       |  5 +----
 .../remote/vector_io/weaviate/weaviate.py       | 15 +++------------
 .../providers/utils/memory/__init__.py          |  4 ++--
 .../providers/utils/memory/constants.py         |  4 ++--
 .../utils/memory/rewrite_query_config.py        |  4 ++--
 .../providers/utils/memory/vector_store.py      |  4 ++--
 .../test_vector_io_openai_vector_stores.py      |  4 ++--
 13 files changed, 23 insertions(+), 61 deletions(-)

diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 8fab715f2b..00527a1bd0 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -18,7 +18,7 @@
     StorageConfig,
 )
 from llama_stack.log import LoggingConfig
-from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT
 from llama_stack_api import (
     Api,
     Benchmark,
@@ -374,7 +374,7 @@ class RewriteQueryParams(BaseModel):
         description="LLM model for query rewriting/expansion in vector search.",
     )
     prompt: str = Field(
-        default=DEFAULT_QUERY_EXPANSION_PROMPT,
+        default=DEFAULT_QUERY_REWRITE_PROMPT,
         description="Prompt template for query rewriting. Use {query} as placeholder for the original query.",
     )
     max_tokens: int = Field(
diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
index e2aab1a250..91a17058bf 100644
--- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -184,12 +184,7 @@ async def query_hybrid(
 
 
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
-    def __init__(
-        self,
-        config: FaissVectorIOConfig,
-        inference_api: Inference,
-        files_api: Files | None,
-    ) -> None:
+    def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index bc6226c845..a384a33dc5 100644
--- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -385,12 +385,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
     and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
     """
 
-    def __init__(
-        self,
-        config,
-        inference_api: Inference,
-        files_api: Files | None,
-    ) -> None:
+    def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index ea0139815a..36018fd954 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -9,10 +9,7 @@
 from .config import PGVectorVectorIOConfig
 
 
-async def get_adapter_impl(
-    config: PGVectorVectorIOConfig,
-    deps: dict[Api, ProviderSpec],
-):
+async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .pgvector import PGVectorVectorIOAdapter
 
     impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index fe1b8ce35e..5c86fb08da 100644
--- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -330,10 +330,7 @@ def check_distance_metric_availability(self, distance_metric: str) -> None:
 
 class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
     def __init__(
-        self,
-        config: PGVectorVectorIOConfig,
-        inference_api: Inference,
-        files_api: Files | None = None,
+        self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
     ) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
@@ -389,11 +386,7 @@ async def initialize(self) -> None:
                 kvstore=self.kvstore,
             )
             await pgvector_index.initialize()
-            index = VectorStoreWithIndex(
-                vector_store,
-                index=pgvector_index,
-                inference_api=self.inference_api,
-            )
+            index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
             self.cache[vector_store.identifier] = index
 
     async def shutdown(self) -> None:
@@ -420,11 +413,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore
         )
         await pgvector_index.initialize()
-        index = VectorStoreWithIndex(
-            vector_store,
-            index=pgvector_index,
-            inference_api=self.inference_api,
-        )
+        index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
         self.cache[vector_store.identifier] = index
 
     async def unregister_vector_store(self, vector_store_id: str) -> None:
diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index dc65466460..4dd78d8343 100644
--- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -173,9 +173,7 @@ async def initialize(self) -> None:
         for vector_store_data in stored_vector_stores:
             vector_store = VectorStore.model_validate_json(vector_store_data)
             index = VectorStoreWithIndex(
-                vector_store,
-                QdrantIndex(self.client, vector_store.identifier),
-                self.inference_api,
+                vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api
             )
             self.cache[vector_store.identifier] = index
         self.openai_vector_stores = await self._load_openai_vector_stores()
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
index a13cca8a16..47546d4598 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
@@ -9,10 +9,7 @@
 from .config import WeaviateVectorIOConfig
 
 
-async def get_adapter_impl(
-    config: WeaviateVectorIOConfig,
-    deps: dict[Api, ProviderSpec],
-):
+async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .weaviate import WeaviateVectorIOAdapter
 
     impl = WeaviateVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 67ec523d7e..c15d5f4682 100644
--- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -262,12 +262,7 @@ async def query_hybrid(
 
 
 class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
-    def __init__(
-        self,
-        config: WeaviateVectorIOConfig,
-        inference_api: Inference,
-        files_api: Files | None,
-    ) -> None:
+    def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
         super().__init__(files_api=files_api, kvstore=None)
         self.config = config
         self.inference_api = inference_api
@@ -313,9 +308,7 @@ async def initialize(self) -> None:
                 client = self._get_client()
                 idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore)
                 self.cache[vector_store.identifier] = VectorStoreWithIndex(
-                    vector_store=vector_store,
-                    index=idx,
-                    inference_api=self.inference_api,
+                    vector_store=vector_store, index=idx, inference_api=self.inference_api
                 )
 
             # Load OpenAI vector stores metadata into cache
@@ -341,9 +334,7 @@ async def register_vector_store(self, vector_store: VectorStore) -> None:
             )
 
         self.cache[vector_store.identifier] = VectorStoreWithIndex(
-            vector_store,
-            WeaviateIndex(client=client, collection_name=sanitized_collection_name),
-            self.inference_api,
+            vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
         )
 
     async def unregister_vector_store(self, vector_store_id: str) -> None:
diff --git a/src/llama_stack/providers/utils/memory/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py
index 5e0942402e..05a832b6f8 100644
--- a/src/llama_stack/providers/utils/memory/__init__.py
+++ b/src/llama_stack/providers/utils/memory/__init__.py
@@ -4,6 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .constants import DEFAULT_QUERY_EXPANSION_PROMPT
+from .constants import DEFAULT_QUERY_REWRITE_PROMPT
 
-__all__ = ["DEFAULT_QUERY_EXPANSION_PROMPT"]
+__all__ = ["DEFAULT_QUERY_REWRITE_PROMPT"]
diff --git a/src/llama_stack/providers/utils/memory/constants.py b/src/llama_stack/providers/utils/memory/constants.py
index d8703bbceb..1f6e2cef6f 100644
--- a/src/llama_stack/providers/utils/memory/constants.py
+++ b/src/llama_stack/providers/utils/memory/constants.py
@@ -4,5 +4,5 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-# Default prompt template for query expansion in vector search
-DEFAULT_QUERY_EXPANSION_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:"
+# Default prompt template for query rewriting in vector search
+DEFAULT_QUERY_REWRITE_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:"
diff --git a/src/llama_stack/providers/utils/memory/rewrite_query_config.py b/src/llama_stack/providers/utils/memory/rewrite_query_config.py
index 9c53638b87..7128116dde 100644
--- a/src/llama_stack/providers/utils/memory/rewrite_query_config.py
+++ b/src/llama_stack/providers/utils/memory/rewrite_query_config.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from llama_stack.core.datatypes import QualifiedModel, VectorStoresConfig
-from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT
 
 # Global configuration for query rewriting - set during stack startup
 _DEFAULT_REWRITE_QUERY_MODEL: QualifiedModel | None = None
@@ -25,7 +25,7 @@ def set_default_rewrite_query_config(vector_stores_config: VectorStoresConfig |
         params = vector_stores_config.rewrite_query_params
         _DEFAULT_REWRITE_QUERY_MODEL = params.model
         # Only set override if user provided a custom prompt different from default
-        if params.prompt != DEFAULT_QUERY_EXPANSION_PROMPT:
+        if params.prompt != DEFAULT_QUERY_REWRITE_PROMPT:
             _REWRITE_QUERY_PROMPT_OVERRIDE = params.prompt
         else:
             _REWRITE_QUERY_PROMPT_OVERRIDE = None
diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index 11754bae29..e22075a5f8 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -39,7 +39,7 @@
 log = get_logger(name=__name__, category="providers::utils")
 
 from llama_stack.providers.utils.memory import rewrite_query_config
-from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT
 
 
 class ChunkForDeletion(BaseModel):
@@ -312,7 +312,7 @@ async def _rewrite_query_for_file_search(self, query: str) -> str:
             )
         else:
             # Use built-in default prompt and format with query
-            prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query=query)
+            prompt = DEFAULT_QUERY_REWRITE_PROMPT.format(query=query)
 
         request = OpenAIChatCompletionRequestWithExtraBody(
             model=model_id,
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 07ec41bec1..4588fe7e55 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -1237,7 +1237,7 @@ async def test_query_expansion_functionality(vector_io_adapter):
     from unittest.mock import MagicMock
 
     from llama_stack.core.datatypes import QualifiedModel, RewriteQueryParams
-    from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_EXPANSION_PROMPT
+    from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT
     from llama_stack.providers.utils.memory.rewrite_query_config import set_default_rewrite_query_config
     from llama_stack.providers.utils.memory.vector_store import VectorStoreWithIndex
     from llama_stack_api import QueryChunksResponse
@@ -1288,7 +1288,7 @@ async def test_query_expansion_functionality(vector_io_adapter):
 
     # Verify default prompt is used (contains our built-in prompt text)
     prompt_text = chat_call_args.messages[0].content
-    expected_prompt = DEFAULT_QUERY_EXPANSION_PROMPT.format(query="test query")
+    expected_prompt = DEFAULT_QUERY_REWRITE_PROMPT.format(query="test query")
     assert prompt_text == expected_prompt
 
     # Verify default inference parameters are used

From 5ec6f5dcff5dded95bc33c3c4b0b86b31fb06126 Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Tue, 25 Nov 2025 23:24:07 -0500
Subject: [PATCH 7/7] raise when querying without config

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 .../providers/utils/memory/vector_store.py           |  5 +++--
 .../vector_io/test_vector_io_openai_vector_stores.py | 12 +++---------
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
index e22075a5f8..e00537c856 100644
--- a/src/llama_stack/providers/utils/memory/vector_store.py
+++ b/src/llama_stack/providers/utils/memory/vector_store.py
@@ -296,8 +296,9 @@ async def insert_chunks(
     async def _rewrite_query_for_file_search(self, query: str) -> str:
         """Rewrite a search query using the globally configured LLM model for better retrieval results."""
         if not rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL:
-            log.debug("No default query rewriting model configured, using original query")
-            return query
+            raise ValueError(
+                "Query rewriting requested but not configured. Please configure rewrite_query_params.model in vector_stores config."
+            )
 
         model_id = f"{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.provider_id}/{rewrite_query_config._DEFAULT_REWRITE_QUERY_MODEL.model_id}"
 
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 4588fe7e55..e0ae568c58 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -1325,7 +1325,7 @@ async def test_query_expansion_functionality(vector_io_adapter):
     assert chat_call_args.max_tokens == 150
     assert chat_call_args.temperature == 0.7
 
-    # Test 2: No query expansion when no global model is configured
+    # Test 2: Error when query rewriting is requested but no global model is configured
     mock_inference_api.reset_mock()
     mock_index.reset_mock()
 
@@ -1333,14 +1333,8 @@ async def test_query_expansion_functionality(vector_io_adapter):
     set_default_rewrite_query_config(None)
 
     params = {"rewrite_query": True, "max_chunks": 5}
-    result2 = await vector_store_with_index.query_chunks("test query", params)
-
-    # Verify chat completion was NOT called
-    mock_inference_api.openai_chat_completion.assert_not_called()
-    # But normal flow should still work
-    mock_inference_api.openai_embeddings.assert_called_once()
-    mock_index.query_vector.assert_called_once()
-    assert result2 == mock_response
+    with pytest.raises(ValueError, match="Query rewriting requested but not configured"):
+        await vector_store_with_index.query_chunks("test query", params)
 
     # Test 3: Normal behavior without rewrite_query parameter
     mock_inference_api.reset_mock()