Skip to content

Commit b63916e

Browse files
committed
add support for creating atlas search indexes
1 parent 34c9c68 commit b63916e

File tree

5 files changed

+575
-64
lines changed

5 files changed

+575
-64
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,6 @@ npx -y mongodb-mcp-server@latest --logPath=/path/to/logs --readOnly --indexCheck
663663
"args": [
664664
"-y",
665665
"mongodb-mcp-server",
666-
"--connectionString",
667666
"mongodb+srv://username:password@cluster.mongodb.net/myDatabase",
668667
"--readOnly"
669668
]

src/tools/mongodb/create/createIndex.ts

Lines changed: 143 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -6,61 +6,121 @@ import type { IndexDirection } from "mongodb";
66
import { quantizationEnum, similarityEnum } from "../../../common/search/vectorSearchEmbeddingsManager.js";
77

88
export class CreateIndexTool extends MongoDBToolBase {
9-
private vectorSearchIndexDefinition = z.object({
10-
type: z.literal("vectorSearch"),
11-
fields: z
12-
.array(
13-
z.discriminatedUnion("type", [
14-
z
15-
.object({
16-
type: z.literal("filter"),
17-
path: z
18-
.string()
19-
.describe(
20-
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
21-
),
22-
})
23-
.strict()
24-
.describe("Definition for a field that will be used for pre-filtering results."),
25-
z
26-
.object({
27-
type: z.literal("vector"),
28-
path: z
29-
.string()
30-
.describe(
31-
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
32-
),
33-
numDimensions: z
34-
.number()
35-
.min(1)
36-
.max(8192)
37-
.default(this.config.vectorSearchDimensions)
38-
.describe(
39-
"Number of vector dimensions that MongoDB Vector Search enforces at index-time and query-time"
40-
),
41-
similarity: similarityEnum
42-
.default(this.config.vectorSearchSimilarityFunction)
43-
.describe(
44-
"Vector similarity function to use to search for top K-nearest neighbors. You can set this field only for vector-type fields."
45-
),
46-
quantization: quantizationEnum
47-
.default("none")
9+
private vectorSearchIndexDefinition = z
10+
.object({
11+
type: z.literal("vectorSearch"),
12+
fields: z
13+
.array(
14+
z.discriminatedUnion("type", [
15+
z
16+
.object({
17+
type: z.literal("filter"),
18+
path: z
19+
.string()
20+
.describe(
21+
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
22+
),
23+
})
24+
.strict()
25+
.describe("Definition for a field that will be used for pre-filtering results."),
26+
z
27+
.object({
28+
type: z.literal("vector"),
29+
path: z
30+
.string()
31+
.describe(
32+
"Name of the field to index. For nested fields, use dot notation to specify path to embedded fields"
33+
),
34+
numDimensions: z
35+
.number()
36+
.min(1)
37+
.max(8192)
38+
.default(this.config.vectorSearchDimensions)
39+
.describe(
40+
"Number of vector dimensions that MongoDB Vector Search enforces at index-time and query-time"
41+
),
42+
similarity: similarityEnum
43+
.default(this.config.vectorSearchSimilarityFunction)
44+
.describe(
45+
"Vector similarity function to use to search for top K-nearest neighbors. You can set this field only for vector-type fields."
46+
),
47+
quantization: quantizationEnum
48+
.default("none")
49+
.describe(
50+
"Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float or double vectors."
51+
),
52+
})
53+
.strict()
54+
.describe("Definition for a field that contains vector embeddings."),
55+
])
56+
)
57+
.nonempty()
58+
.refine((fields) => fields.some((f) => f.type === "vector"), {
59+
message: "At least one vector field must be defined",
60+
})
61+
.describe(
62+
"Definitions for the vector and filter fields to index, one definition per document. You must specify `vector` for fields that contain vector embeddings and `filter` for additional fields to filter on. At least one vector-type field definition is required."
63+
),
64+
})
65+
.describe("Definition for a Vector Search index.");
66+
67+
private atlasSearchIndexDefinition = z
68+
.object({
69+
type: z.literal("search"),
70+
analyzer: z
71+
.string()
72+
.optional()
73+
.default("lucene.standard")
74+
.describe(
75+
"The analyzer to use for the index. Can be one of the built-in lucene analyzers (`lucene.standard`, `lucene.simple`, `lucene.whitespace`, `lucene.keyword`), a language-specific analyzer, such as `lucene.cjk` or `lucene.czech`, or a custom analyzer defined in the Atlas UI."
76+
),
77+
mappings: z
78+
.object({
79+
dynamic: z
80+
.boolean()
81+
.optional()
82+
.default(false)
83+
.describe(
84+
"Enables or disables dynamic mapping of fields for this index. If set to true, Atlas Search recursively indexes all dynamically indexable fields. If set to false, you must specify individual fields to index using mappings.fields."
85+
),
86+
fields: z
87+
.record(
88+
z.string().describe("The field name"),
89+
z
90+
.object({
91+
type: z
92+
.enum([
93+
"autocomplete",
94+
"boolean",
95+
"date",
96+
"document",
97+
"embeddedDocuments",
98+
"geo",
99+
"number",
100+
"objectId",
101+
"string",
102+
"token",
103+
"uuid",
104+
])
105+
.describe("The field type"),
106+
})
107+
.passthrough()
48108
.describe(
49-
"Type of automatic vector quantization for your vectors. Use this setting only if your embeddings are float or double vectors."
50-
),
51-
})
52-
.strict()
53-
.describe("Definition for a field that contains vector embeddings."),
54-
])
55-
)
56-
.nonempty()
57-
.refine((fields) => fields.some((f) => f.type === "vector"), {
58-
message: "At least one vector field must be defined",
59-
})
60-
.describe(
61-
"Definitions for the vector and filter fields to index, one definition per document. You must specify `vector` for fields that contain vector embeddings and `filter` for additional fields to filter on. At least one vector-type field definition is required."
62-
),
63-
});
109+
"The field index definition. It must contain the field type, as well as any additional options for that field type."
110+
)
111+
)
112+
.optional()
113+
.describe("The field mapping definitions. If `dynamic` is set to `false`, this is required."),
114+
})
115+
.refine((data) => data.dynamic !== !!(data.fields && Object.keys(data.fields).length > 0), {
116+
message:
117+
"Either `mappings.dynamic` must be true or at least one field must be defined in `mappings.fields`",
118+
})
119+
.describe(
120+
"Document describing the index to create. Either `dynamic` must be true or at least one field must be defined in the `fields` document."
121+
),
122+
})
123+
.describe("Definition for an Atlas Search (lexical) index.");
64124

65125
public name = "create-index";
66126
protected description = "Create an index for a collection";
@@ -70,15 +130,19 @@ export class CreateIndexTool extends MongoDBToolBase {
70130
definition: z
71131
.array(
72132
z.discriminatedUnion("type", [
73-
z.object({
74-
type: z.literal("classic"),
75-
keys: z.object({}).catchall(z.custom<IndexDirection>()).describe("The index definition"),
76-
}),
77-
...(this.isFeatureEnabled("vectorSearch") ? [this.vectorSearchIndexDefinition] : []),
133+
z
134+
.object({
135+
type: z.literal("classic"),
136+
keys: z.object({}).catchall(z.custom<IndexDirection>()).describe("The index definition"),
137+
})
138+
.describe("Definition for a MongoDB index (e.g. ascending/descending/geospatial)."),
139+
...(this.isFeatureEnabled("vectorSearch")
140+
? [this.vectorSearchIndexDefinition, this.atlasSearchIndexDefinition]
141+
: []),
78142
])
79143
)
80144
.describe(
81-
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes"
145+
"The index definition. Use 'classic' for standard indexes, 'vectorSearch' for vector search indexes, and 'search' for Atlas Search (lexical) indexes."
82146
),
83147
};
84148

@@ -128,6 +192,25 @@ export class CreateIndexTool extends MongoDBToolBase {
128192
this.session.vectorSearchEmbeddingsManager.cleanupEmbeddingsForNamespace({ database, collection });
129193
}
130194

195+
break;
196+
case "search":
197+
{
198+
await this.ensureSearchIsSupported();
199+
indexes = await provider.createSearchIndexes(database, collection, [
200+
{
201+
name,
202+
definition: {
203+
mappings: definition.mappings,
204+
analyzer: definition.analyzer,
205+
},
206+
type: "search",
207+
},
208+
]);
209+
210+
responseClarification =
211+
" Since this is a search index, it may take a while for the index to build. Use the `list-indexes` tool to check the index status.";
212+
}
213+
131214
break;
132215
}
133216

tests/accuracy/createIndex.test.ts

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,23 @@
1+
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
2+
import { formatUntrustedData } from "../../src/tools/tool.js";
3+
import type { MockedTools } from "./sdk/accuracyTestingClient.js";
14
import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
25
import { Matcher } from "./sdk/matcher.js";
36

7+
const mockedTools: MockedTools = {
8+
"collection-indexes": ({ collection }: Record<string, unknown>): CallToolResult => {
9+
return {
10+
content: formatUntrustedData(
11+
`Found 1 indexes in the collection "${collection as string}".`,
12+
JSON.stringify({
13+
name: "_id_",
14+
key: { _id: 1 },
15+
})
16+
),
17+
};
18+
},
19+
};
20+
421
describeAccuracyTests(
522
[
623
{
@@ -23,6 +40,7 @@ describeAccuracyTests(
2340
},
2441
},
2542
],
43+
mockedTools,
2644
},
2745
{
2846
prompt: "Create a text index on title field in 'mflix.movies' namespace",
@@ -44,6 +62,7 @@ describeAccuracyTests(
4462
},
4563
},
4664
],
65+
mockedTools,
4766
},
4867
{
4968
prompt: "Create a vector search index on 'mflix.movies' namespace on the 'plotSummary' field. The index should use 1024 dimensions.",
@@ -69,6 +88,7 @@ describeAccuracyTests(
6988
},
7089
},
7190
],
91+
mockedTools,
7292
},
7393
{
7494
prompt: "Create a vector search index on 'mflix.movies' namespace with on the 'plotSummary' field and 'genre' field, both of which contain vector embeddings. Pick a sensible number of dimensions for a voyage 3.5 model.",
@@ -105,6 +125,7 @@ describeAccuracyTests(
105125
},
106126
},
107127
],
128+
mockedTools,
108129
},
109130
{
110131
prompt: "Create a vector search index on 'mflix.movies' namespace where the 'plotSummary' field is indexed as a 1024-dimensional vector and the 'releaseDate' field is indexed as a regular field.",
@@ -134,6 +155,95 @@ describeAccuracyTests(
134155
},
135156
},
136157
],
158+
mockedTools,
159+
},
160+
{
161+
prompt: "Create an Atlas search index on 'mflix.movies' namespace with dynamic mappings enabled",
162+
expectedToolCalls: [
163+
{
164+
toolName: "create-index",
165+
parameters: {
166+
database: "mflix",
167+
collection: "movies",
168+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
169+
definition: [
170+
{
171+
type: "search",
172+
analyzer: Matcher.anyOf(Matcher.undefined, Matcher.value("lucene.standard")),
173+
mappings: {
174+
dynamic: true,
175+
},
176+
},
177+
],
178+
},
179+
},
180+
],
181+
mockedTools,
182+
},
183+
{
184+
prompt: "Create an Atlas search index on 'mflix.movies' namespace for searching on 'title' as string field and 'year' as number field",
185+
expectedToolCalls: [
186+
{
187+
toolName: "create-index",
188+
parameters: {
189+
database: "mflix",
190+
collection: "movies",
191+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
192+
definition: [
193+
{
194+
type: "search",
195+
analyzer: Matcher.anyOf(Matcher.undefined, Matcher.value("lucene.standard")),
196+
mappings: {
197+
dynamic: Matcher.anyOf(Matcher.undefined, Matcher.value(false)),
198+
fields: {
199+
title: {
200+
type: "string",
201+
},
202+
year: {
203+
type: "number",
204+
},
205+
},
206+
},
207+
},
208+
],
209+
},
210+
},
211+
],
212+
mockedTools,
213+
},
214+
{
215+
prompt: "Create an Atlas search index on 'mflix.movies' namespace with a custom 'lucene.keyword' analyzer, where 'title' is indexed as an autocomplete field and 'genres' as a string array field, and 'released' as a date field",
216+
expectedToolCalls: [
217+
{
218+
toolName: "create-index",
219+
parameters: {
220+
database: "mflix",
221+
collection: "movies",
222+
name: Matcher.anyOf(Matcher.undefined, Matcher.string()),
223+
definition: [
224+
{
225+
type: "search",
226+
analyzer: "lucene.keyword",
227+
mappings: {
228+
dynamic: Matcher.anyOf(Matcher.undefined, Matcher.value(false)),
229+
fields: {
230+
title: {
231+
type: "autocomplete",
232+
},
233+
genres: {
234+
type: "string",
235+
},
236+
released: {
237+
type: "date",
238+
},
239+
},
240+
},
241+
},
242+
],
243+
},
244+
},
245+
],
246+
mockedTools,
137247
},
138248
],
139249
{

tests/accuracy/sdk/accuracyTestingClient.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ export class AccuracyTestingClient {
9191
return [`--${key}`, value];
9292
});
9393

94-
const args = [MCP_SERVER_CLI_SCRIPT, "--connectionString", mdbConnectionString, ...additionalArgs];
94+
const args = [MCP_SERVER_CLI_SCRIPT, mdbConnectionString, ...additionalArgs];
9595

9696
const clientTransport = new StdioClientTransport({
9797
command: process.execPath,

0 commit comments

Comments
 (0)