Skip to content

Commit 28e684c

Browse files
chore: remove quantisation check from validation
We were never checking against the quantisation of the provided documents so tracking the expected and actual quantisation did not make sense either. This commit removes both of the them from the tracked validation error and generates the error message without ever including anything about quantisation.
1 parent 0a5ce79 commit 28e684c

File tree

3 files changed

+66
-119
lines changed

3 files changed

+66
-119
lines changed

src/common/search/vectorSearchEmbeddingsManager.ts

Lines changed: 41 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@ export type VectorFieldIndexDefinition = {
2424
export type VectorFieldValidationError = {
2525
path: string;
2626
expectedNumDimensions: number;
27-
expectedQuantization: Quantization;
2827
actualNumDimensions: number | "unknown";
29-
actualQuantization: Quantization | "unknown";
30-
error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric";
28+
error: "dimension-mismatch" | "not-a-vector" | "not-numeric";
3129
};
3230

3331
export type EmbeddingNamespace = `${string}.${string}`;
@@ -116,9 +114,9 @@ export class VectorSearchEmbeddingsManager {
116114
if (embeddingValidationResults.length > 0) {
117115
const embeddingValidationMessages = embeddingValidationResults.map(
118116
(validation) =>
119-
`- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions and ${validation.expectedQuantization}` +
120-
` quantization, and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions}, ` +
121-
`actual quantization: ${validation.actualQuantization}. Error: ${validation.error}`
117+
`- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions,` +
118+
` and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions},` +
119+
` Error: ${validation.error}`
122120
);
123121

124122
throw new MongoDBError(
@@ -179,13 +177,11 @@ export class VectorSearchEmbeddingsManager {
179177
let fieldRef: unknown = document;
180178

181179
const constructError = (
182-
details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions" | "actualQuantization">>
180+
details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions">>
183181
): VectorFieldValidationError => ({
184182
path: definition.path,
185183
expectedNumDimensions: definition.numDimensions,
186-
expectedQuantization: definition.quantization,
187184
actualNumDimensions: details.actualNumDimensions ?? "unknown",
188-
actualQuantization: details.actualQuantization ?? "unknown",
189185
error: details.error ?? "not-a-vector",
190186
});
191187

@@ -197,94 +193,55 @@ export class VectorSearchEmbeddingsManager {
197193
}
198194
}
199195

200-
switch (definition.quantization) {
201-
// Quantization "none" means no quantization is performed, so
202-
// full-fidelity vectors are stored therefore the underlying vector
203-
// must be stored as an array of numbers having the same dimension
204-
// as that of the index.
205-
case "none":
206-
if (!Array.isArray(fieldRef)) {
196+
if (fieldRef instanceof BSON.Binary) {
197+
try {
198+
const elements = fieldRef.toFloat32Array();
199+
if (elements.length !== definition.numDimensions) {
207200
return constructError({
208-
error: "not-a-vector",
209-
});
210-
}
211-
212-
if (fieldRef.length !== definition.numDimensions) {
213-
return constructError({
214-
actualNumDimensions: fieldRef.length,
215-
actualQuantization: "none",
201+
actualNumDimensions: elements.length,
216202
error: "dimension-mismatch",
217203
});
218204
}
219205

220-
if (fieldRef.some((e) => !this.isANumber(e))) {
221-
return constructError({
222-
actualNumDimensions: fieldRef.length,
223-
actualQuantization: "none",
224-
error: "not-numeric",
225-
});
226-
}
227-
228206
return undefined;
229-
case "scalar":
230-
case "binary":
231-
if (fieldRef instanceof BSON.Binary) {
232-
try {
233-
const elements = fieldRef.toFloat32Array();
234-
if (elements.length !== definition.numDimensions) {
235-
return constructError({
236-
actualNumDimensions: elements.length,
237-
actualQuantization: "binary",
238-
error: "dimension-mismatch",
239-
});
240-
}
241-
242-
return undefined;
243-
} catch {
244-
// bits are also supported
245-
try {
246-
const bits = fieldRef.toBits();
247-
if (bits.length !== definition.numDimensions) {
248-
return constructError({
249-
actualNumDimensions: bits.length,
250-
actualQuantization: "binary",
251-
error: "dimension-mismatch",
252-
});
253-
}
254-
255-
return undefined;
256-
} catch {
257-
return constructError({
258-
actualQuantization: "binary",
259-
error: "not-a-vector",
260-
});
261-
}
262-
}
263-
} else {
264-
if (!Array.isArray(fieldRef)) {
207+
} catch {
208+
// bits are also supported
209+
try {
210+
const bits = fieldRef.toBits();
211+
if (bits.length !== definition.numDimensions) {
265212
return constructError({
266-
error: "not-a-vector",
267-
});
268-
}
269-
270-
if (fieldRef.length !== definition.numDimensions) {
271-
return constructError({
272-
actualNumDimensions: fieldRef.length,
273-
actualQuantization: "scalar",
213+
actualNumDimensions: bits.length,
274214
error: "dimension-mismatch",
275215
});
276216
}
277217

278-
if (fieldRef.some((e) => !this.isANumber(e))) {
279-
return constructError({
280-
actualNumDimensions: fieldRef.length,
281-
actualQuantization: "scalar",
282-
error: "not-numeric",
283-
});
284-
}
218+
return undefined;
219+
} catch {
220+
return constructError({
221+
error: "not-a-vector",
222+
});
285223
}
224+
}
225+
} else {
226+
if (!Array.isArray(fieldRef)) {
227+
return constructError({
228+
error: "not-a-vector",
229+
});
230+
}
286231

287-
break;
232+
if (fieldRef.length !== definition.numDimensions) {
233+
return constructError({
234+
actualNumDimensions: fieldRef.length,
235+
error: "dimension-mismatch",
236+
});
237+
}
238+
239+
if (fieldRef.some((e) => !this.isANumber(e))) {
240+
return constructError({
241+
actualNumDimensions: fieldRef.length,
242+
error: "not-numeric",
243+
});
244+
}
288245
}
289246

290247
return undefined;

tests/integration/tools/mongodb/create/insertMany.test.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ describeWithMongoDB(
175175
{
176176
type: "vector",
177177
path: "embedding",
178-
numDimensions: 8,
178+
numDimensions: 256,
179179
similarity: "euclidean",
180180
quantization: "scalar",
181181
},
@@ -187,14 +187,26 @@ describeWithMongoDB(
187187
database: database,
188188
collection: "test",
189189
documents: [{ embedding: "oopsie" }],
190+
// Note: We are intentionally commenting out the
191+
// embeddingParameters so that we can simulate the idea
192+
// of unknown or mismatched quantization.
193+
194+
// embeddingParameters: { outputDimension: 256,
195+
// outputDtype: "float", model: "voyage-3-large", input:
196+
// [
197+
// {
198+
// embedding: "oopsie",
199+
// },
200+
// ],
201+
// },
190202
},
191203
});
192204

193205
const content = getResponseContent(response.content);
194206
expect(content).toContain("Error running insert-many");
195207
const untrustedContent = getDataFromUntrustedContent(content);
196208
expect(untrustedContent).toContain(
197-
"- Field embedding is an embedding with 8 dimensions and scalar quantization, and the provided value is not compatible. Actual dimensions: unknown, actual quantization: unknown. Error: not-a-vector"
209+
"- Field embedding is an embedding with 256 dimensions, and the provided value is not compatible. Actual dimensions: unknown, Error: not-a-vector"
198210
);
199211

200212
const oopsieCount = await collection.countDocuments({
@@ -608,6 +620,8 @@ describeWithMongoDB(
608620
{
609621
getUserConfig: () => ({
610622
...defaultTestConfig,
623+
// This is expected to be set through the CI env. When not set we
624+
// get a warning in the run logs.
611625
voyageApiKey: process.env.TEST_MDB_MCP_VOYAGE_API_KEY ?? "",
612626
previewFeatures: ["vectorSearch"],
613627
}),
@@ -639,7 +653,9 @@ describeWithMongoDB(
639653
{
640654
getUserConfig: () => ({
641655
...defaultTestConfig,
642-
voyageApiKey: "valid-key",
656+
// This is expected to be set through the CI env. When not set we
657+
// get a warning in the run logs.
658+
voyageApiKey: process.env.TEST_MDB_MCP_VOYAGE_API_KEY ?? "",
643659
previewFeatures: ["vectorSearch"],
644660
}),
645661
}

tests/unit/common/search/vectorSearchEmbeddingsManager.test.ts

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -297,16 +297,9 @@ describe("VectorSearchEmbeddingsManager", () => {
297297
}
298298
);
299299

300-
it.each([
301-
{ path: "embedding_field", expectedQuantization: "scalar", actualQuantization: "scalar" },
302-
{
303-
path: "embedding_field_wo_quantization",
304-
expectedQuantization: "none",
305-
actualQuantization: "none",
306-
},
307-
] as const)(
308-
"documents inserting the field with wrong dimensions are invalid - path = $path",
309-
async ({ path, expectedQuantization, actualQuantization }) => {
300+
it.each(["embedding_field", "embedding_field_wo_quantization"] as const)(
301+
"documents inserting the field with wrong dimensions are invalid - path = $0",
302+
async (path) => {
310303
const result = await embeddings.findFieldsWithWrongEmbeddings(
311304
{ database, collection },
312305
{ [path]: [1, 2, 3] }
@@ -315,26 +308,17 @@ describe("VectorSearchEmbeddingsManager", () => {
315308
expect(result).toHaveLength(1);
316309
const expectedError: VectorFieldValidationError = {
317310
actualNumDimensions: 3,
318-
actualQuantization,
319311
error: "dimension-mismatch",
320312
expectedNumDimensions: 8,
321-
expectedQuantization,
322313
path,
323314
};
324315
expect(result[0]).toEqual(expectedError);
325316
}
326317
);
327318

328-
it.each([
329-
{ path: "embedding_field", expectedQuantization: "scalar", actualQuantization: "scalar" },
330-
{
331-
path: "embedding_field_wo_quantization",
332-
expectedQuantization: "none",
333-
actualQuantization: "none",
334-
},
335-
] as const)(
336-
"documents inserting the field with correct dimensions, but wrong type are invalid - $path",
337-
async ({ path, expectedQuantization, actualQuantization }) => {
319+
it.each(["embedding_field", "embedding_field_wo_quantization"] as const)(
320+
"documents inserting the field with correct dimensions, but wrong type are invalid - $0",
321+
async (path) => {
338322
const result = await embeddings.findFieldsWithWrongEmbeddings(
339323
{ database, collection },
340324
{ [path]: ["1", "2", "3", "4", "5", "6", "7", "8"] }
@@ -343,10 +327,8 @@ describe("VectorSearchEmbeddingsManager", () => {
343327
expect(result).toHaveLength(1);
344328
const expectedError: VectorFieldValidationError = {
345329
actualNumDimensions: 8,
346-
actualQuantization,
347330
error: "not-numeric",
348331
expectedNumDimensions: 8,
349-
expectedQuantization,
350332
path,
351333
};
352334

@@ -488,14 +470,6 @@ describe("VectorSearchEmbeddingsManager", () => {
488470
).rejects.toThrow(/Actual dimensions: 3/);
489471
});
490472

491-
it("throws error with details about quantization", async () => {
492-
await expect(
493-
embeddings.assertFieldsHaveCorrectEmbeddings({ database, collection }, [
494-
{ embedding_field: [1, 2, 3] },
495-
])
496-
).rejects.toThrow(/actual quantization: scalar/);
497-
});
498-
499473
it("throws error with details about error type", async () => {
500474
await expect(
501475
embeddings.assertFieldsHaveCorrectEmbeddings({ database, collection }, [

0 commit comments

Comments
 (0)