From 3598e1c4fe4b694e6d26b6f58fb2c39b9d57a4b0 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Thu, 27 Nov 2025 11:11:40 +0200 Subject: [PATCH 1/7] parse mcp tool json --- cmd/src/mcp_parse.go | 171 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 cmd/src/mcp_parse.go diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go new file mode 100644 index 0000000000..bb18e44657 --- /dev/null +++ b/cmd/src/mcp_parse.go @@ -0,0 +1,171 @@ +//go:generate ../../scripts/gen-mcp-tool-json.sh mcp_tools.json +package main + +import ( + _ "embed" + "encoding/json" + "errors" + "fmt" +) + +//go:embed mcp_tools.json +var mcpToolListJSON []byte + +type MCPToolDef struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema Schema `json:"inputSchema"` + OutputSchema Schema `json:"outputSchema"` +} + +type InputProperty struct { + Name string + Type string + Description string + ItemType string +} + +type Schema struct { + Schema string `json:"$schema"` + SchemaObject +} + +type RawSchema struct { + Type string `json:"type"` + Description string `json:"description"` + Schema string `json:"$schema"` + Required []string `json:"required,omitempty"` + AdditionalProperties bool `json:"additionalProperties"` + Properties map[string]json.RawMessage `json:"properties"` + Items json.RawMessage `json:"items"` +} + +type SchemaValue interface { + Type() string +} + +type SchemaObject struct { + Kind string `json:"type"` + Description string `json:"description"` + Required []string `json:"required,omitempty"` + AdditionalProperties bool `json:"additionalProperties"` + Properties map[string]SchemaValue `json:"properties"` +} + +func (s SchemaObject) Type() string { return s.Kind } + +type SchemaArray struct { + Kind string `json:"type"` + Description string `json:"description"` + Items []SchemaValue `json:"items"` +} + +func (s SchemaArray) Type() string { return s.Kind } + +type SchemaPrimitive struct { + Description string `json:"description"` + Kind string `json:"type"` +} + +func (s SchemaPrimitive) Type() string { return s.Kind } + +type PropertyType struct { + Type string `json:"type"` +} + +type Parser struct { + errors []error +} + +func (p *Parser) parseRootSchema(r RawSchema) Schema { + return Schema{ + Schema: r.Schema, + SchemaObject: SchemaObject{ + Kind: r.Type, + Description: r.Description, + Required: r.Required, + AdditionalProperties: r.AdditionalProperties, + Properties: p.parseProperties(r.Properties), + }, + } +} + +func (p *Parser) parseSchema(r *RawSchema) SchemaValue { + switch r.Type { + case "object": + return &SchemaObject{ + Kind: r.Type, + Description: r.Description, + Required: r.Required, + AdditionalProperties: r.AdditionalProperties, + Properties: p.parseProperties(r.Properties), + } + case "array": + var items []SchemaValue + if len(r.Items) > 0 { + var itemRaw RawSchema + if err := json.Unmarshal(r.Items, &itemRaw); err == nil { + items = append(items, p.parseSchema(&itemRaw)) + } else { + p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + } + } + return &SchemaArray{ + Kind: r.Type, + Description: r.Description, + Items: items, + } + default: + return &SchemaPrimitive{ + Kind: r.Type, + Description: r.Description, + } + } +} + +func (p *Parser) parseProperties(props map[string]json.RawMessage) map[string]SchemaValue { + res := make(map[string]SchemaValue) + for name, raw := range props { + var r RawSchema + if err := json.Unmarshal(raw, &r); err != nil { + p.errors = append(p.errors, fmt.Errorf("failed to parse property %q: %w", name, err)) + continue + } + res[name] = p.parseSchema(&r) + } + return res +} + +func LoadMCPToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { + defs := struct { + Tools []struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema RawSchema `json:"inputSchema"` + OutputSchema RawSchema `json:"outputSchema"` + } `json:"tools"` + }{} + + if err := json.Unmarshal(data, &defs); err != nil { + // TODO: think we should panic instead + return nil, err + } + + tools := map[string]*MCPToolDef{} + parser := &Parser{} + + for _, t := range defs.Tools { + tools[t.Name] = &MCPToolDef{ + Name: t.Name, + Description: t.Description, + InputSchema: parser.parseRootSchema(t.InputSchema), + OutputSchema: parser.parseRootSchema(t.OutputSchema), + } + } + + if len(parser.errors) > 0 { + return tools, errors.Join(parser.errors...) + } + + return tools, nil +} From 130df60ffa15c25c9d1d9c45c54cd0e1aafb05e7 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Thu, 27 Nov 2025 11:22:45 +0200 Subject: [PATCH 2/7] fix parsing for when items: true --- cmd/src/mcp_parse.go | 22 ++++++--- cmd/src/mcp_parse_test.go | 100 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 cmd/src/mcp_parse_test.go diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go index bb18e44657..ea8756facf 100644 --- a/cmd/src/mcp_parse.go +++ b/cmd/src/mcp_parse.go @@ -55,9 +55,9 @@ type SchemaObject struct { func (s SchemaObject) Type() string { return s.Kind } type SchemaArray struct { - Kind string `json:"type"` - Description string `json:"description"` - Items []SchemaValue `json:"items"` + Kind string `json:"type"` + Description string `json:"description"` + Items SchemaValue `json:"items,omitempty"` } func (s SchemaArray) Type() string { return s.Kind } @@ -101,13 +101,19 @@ func (p *Parser) parseSchema(r *RawSchema) SchemaValue { Properties: p.parseProperties(r.Properties), } case "array": - var items []SchemaValue + var items SchemaValue if len(r.Items) > 0 { - var itemRaw RawSchema - if err := json.Unmarshal(r.Items, &itemRaw); err == nil { - items = append(items, p.parseSchema(&itemRaw)) + var boolItems bool + if err := json.Unmarshal(r.Items, &boolItems); err == nil { + // Sometimes items is defined as "items: true", so we handle it here and + // consider it "empty" array } else { - p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + var itemRaw RawSchema + if err := json.Unmarshal(r.Items, &itemRaw); err == nil { + items = p.parseSchema(&itemRaw) + } else { + p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + } } } return &SchemaArray{ diff --git a/cmd/src/mcp_parse_test.go b/cmd/src/mcp_parse_test.go new file mode 100644 index 0000000000..41e9fd90e2 --- /dev/null +++ b/cmd/src/mcp_parse_test.go @@ -0,0 +1,100 @@ +package main + +import ( + "testing" +) + +func TestLoadMCPToolDefinitions(t *testing.T) { + toolJSON := []byte(`{ + "tools": [ + { + "name": "test_tool", + "description": "test description", + "inputSchema": { + "type": "object", + "$schema": "https://localhost/schema-draft/2025-07", + "properties": { + "tags": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { "type": "string" }, + "value": { "type": "string" } + } + } + } + } + }, + "outputSchema": { + "type": "object", + "$schema": "https://localhost/schema-draft/2025-07", + "properties": { + "result": { "type": "string" } + } + } + } + ] + }`) + + tools, err := LoadMCPToolDefinitions(toolJSON) + if err != nil { + t.Fatalf("Failed to load tool definitions: %v", err) + } + + if len(tools) != 1 { + t.Fatalf("Expected 1 tool, got %d", len(tools)) + } + + tool := tools["test_tool"] + if tool == nil { + t.Fatal("Tool 'test_tool' not found") + } + + if tool.Name != "test_tool" { + t.Errorf("Expected name 'test_tool', got '%s'", tool.Name) + } + + inputSchema := tool.InputSchema + outputSchema := tool.OutputSchema + schemaVersion := "https://localhost/schema-draft/2025-07" + + if inputSchema.Schema != schemaVersion { + t.Errorf("Expected input schema version %q, got %q", schemaVersion, inputSchema.Schema) + } + if outputSchema.Schema != schemaVersion { + t.Errorf("Expected output schema version %q, got %q", schemaVersion, outputSchema.Schema) + } + + tagsProp, ok := inputSchema.Properties["tags"] + if !ok { + t.Fatal("Property 'tags' not found in inputSchema") + } + + if tagsProp.Type() != "array" { + t.Errorf("Expected tags type 'array', got '%s'", tagsProp.Type()) + } + + arraySchema, ok := tagsProp.(*SchemaArray) + if !ok { + t.Fatal("Expected SchemaArray for tags") + } + + if arraySchema.Items == nil { + t.Fatal("Expected items schema in array, got nil") + } + + itemSchema := arraySchema.Items + if itemSchema.Type() != "object" { + t.Errorf("Expected item type 'object', got '%s'", itemSchema.Type()) + } + + objectSchema, ok := itemSchema.(*SchemaObject) + if !ok { + t.Fatal("Expected SchemaObject for item") + } + + if _, ok := objectSchema.Properties["key"]; !ok { + t.Error("Property 'key' not found in item schema") + } +} From 42d854f2dcc48345fff76cc1b8cdc15a25c58d4e Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Nov 2025 11:22:35 +0200 Subject: [PATCH 3/7] use lib/errors --- cmd/src/mcp_parse.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go index ea8756facf..3f10fae62f 100644 --- a/cmd/src/mcp_parse.go +++ b/cmd/src/mcp_parse.go @@ -4,8 +4,9 @@ package main import ( _ "embed" "encoding/json" - "errors" "fmt" + + "github.com/sourcegraph/sourcegraph/lib/errors" ) //go:embed mcp_tools.json @@ -112,7 +113,7 @@ func (p *Parser) parseSchema(r *RawSchema) SchemaValue { if err := json.Unmarshal(r.Items, &itemRaw); err == nil { items = p.parseSchema(&itemRaw) } else { - p.errors = append(p.errors, fmt.Errorf("failed to unmarshal array items: %w", err)) + p.errors = append(p.errors, errors.Errorf("failed to unmarshal array items: %w", err)) } } } @@ -170,7 +171,7 @@ func LoadMCPToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { } if len(parser.errors) > 0 { - return tools, errors.Join(parser.errors...) + return tools, errors.Append(nil, parser.errors...) } return tools, nil From 8b3349160c63dda538c07933f6f0bd5685d5ade6 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Nov 2025 11:26:23 +0200 Subject: [PATCH 4/7] temporarily ignore embedded json --- cmd/src/mcp_parse.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/src/mcp_parse.go b/cmd/src/mcp_parse.go index 3f10fae62f..cadd967c61 100644 --- a/cmd/src/mcp_parse.go +++ b/cmd/src/mcp_parse.go @@ -10,7 +10,7 @@ import ( ) //go:embed mcp_tools.json -var mcpToolListJSON []byte +var _ []byte type MCPToolDef struct { Name string `json:"name"` From 184659688ea191544247ec5f72993e7fad829883 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 2 Dec 2025 11:25:06 +0200 Subject: [PATCH 5/7] move mcp files to internal/mcp --- {cmd/src => internal/mcp}/mcp_parse.go | 2 +- {cmd/src => internal/mcp}/mcp_parse_test.go | 0 {cmd/src => internal/mcp}/mcp_tools.json | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename {cmd/src => internal/mcp}/mcp_parse.go (99%) rename {cmd/src => internal/mcp}/mcp_parse_test.go (100%) rename {cmd/src => internal/mcp}/mcp_tools.json (100%) diff --git a/cmd/src/mcp_parse.go b/internal/mcp/mcp_parse.go similarity index 99% rename from cmd/src/mcp_parse.go rename to internal/mcp/mcp_parse.go index cadd967c61..ba034504dc 100644 --- a/cmd/src/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -1,5 +1,5 @@ //go:generate ../../scripts/gen-mcp-tool-json.sh mcp_tools.json -package main +package mcp import ( _ "embed" diff --git a/cmd/src/mcp_parse_test.go b/internal/mcp/mcp_parse_test.go similarity index 100% rename from cmd/src/mcp_parse_test.go rename to internal/mcp/mcp_parse_test.go diff --git a/cmd/src/mcp_tools.json b/internal/mcp/mcp_tools.json similarity index 100% rename from cmd/src/mcp_tools.json rename to internal/mcp/mcp_tools.json From 4c6a6bd494e05ef8d5d7a40c36e9434fa9a0f7d0 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 2 Dec 2025 11:31:01 +0200 Subject: [PATCH 6/7] unexport and remove unused structs --- internal/mcp/mcp_parse.go | 85 +++++++++++++++------------------- internal/mcp/mcp_parse_test.go | 6 +-- 2 files changed, 40 insertions(+), 51 deletions(-) diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index ba034504dc..d1e1b93828 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -19,13 +19,6 @@ type MCPToolDef struct { OutputSchema Schema `json:"outputSchema"` } -type InputProperty struct { - Name string - Type string - Description string - ItemType string -} - type Schema struct { Schema string `json:"$schema"` SchemaObject @@ -70,15 +63,45 @@ type SchemaPrimitive struct { func (s SchemaPrimitive) Type() string { return s.Kind } -type PropertyType struct { - Type string `json:"type"` +type parser struct { + errors []error } -type Parser struct { - errors []error +func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { + defs := struct { + Tools []struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema RawSchema `json:"inputSchema"` + OutputSchema RawSchema `json:"outputSchema"` + } `json:"tools"` + }{} + + if err := json.Unmarshal(data, &defs); err != nil { + // TODO: think we should panic instead + return nil, err + } + + tools := map[string]*MCPToolDef{} + parser := &parser{} + + for _, t := range defs.Tools { + tools[t.Name] = &MCPToolDef{ + Name: t.Name, + Description: t.Description, + InputSchema: parser.parseRootSchema(t.InputSchema), + OutputSchema: parser.parseRootSchema(t.OutputSchema), + } + } + + if len(parser.errors) > 0 { + return tools, errors.Append(nil, parser.errors...) + } + + return tools, nil } -func (p *Parser) parseRootSchema(r RawSchema) Schema { +func (p *parser) parseRootSchema(r RawSchema) Schema { return Schema{ Schema: r.Schema, SchemaObject: SchemaObject{ @@ -91,7 +114,7 @@ func (p *Parser) parseRootSchema(r RawSchema) Schema { } } -func (p *Parser) parseSchema(r *RawSchema) SchemaValue { +func (p *parser) parseSchema(r *RawSchema) SchemaValue { switch r.Type { case "object": return &SchemaObject{ @@ -130,7 +153,7 @@ func (p *Parser) parseSchema(r *RawSchema) SchemaValue { } } -func (p *Parser) parseProperties(props map[string]json.RawMessage) map[string]SchemaValue { +func (p *parser) parseProperties(props map[string]json.RawMessage) map[string]SchemaValue { res := make(map[string]SchemaValue) for name, raw := range props { var r RawSchema @@ -142,37 +165,3 @@ func (p *Parser) parseProperties(props map[string]json.RawMessage) map[string]Sc } return res } - -func LoadMCPToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { - defs := struct { - Tools []struct { - Name string `json:"name"` - Description string `json:"description"` - InputSchema RawSchema `json:"inputSchema"` - OutputSchema RawSchema `json:"outputSchema"` - } `json:"tools"` - }{} - - if err := json.Unmarshal(data, &defs); err != nil { - // TODO: think we should panic instead - return nil, err - } - - tools := map[string]*MCPToolDef{} - parser := &Parser{} - - for _, t := range defs.Tools { - tools[t.Name] = &MCPToolDef{ - Name: t.Name, - Description: t.Description, - InputSchema: parser.parseRootSchema(t.InputSchema), - OutputSchema: parser.parseRootSchema(t.OutputSchema), - } - } - - if len(parser.errors) > 0 { - return tools, errors.Append(nil, parser.errors...) - } - - return tools, nil -} diff --git a/internal/mcp/mcp_parse_test.go b/internal/mcp/mcp_parse_test.go index 41e9fd90e2..e29281e9a3 100644 --- a/internal/mcp/mcp_parse_test.go +++ b/internal/mcp/mcp_parse_test.go @@ -1,10 +1,10 @@ -package main +package mcp import ( "testing" ) -func TestLoadMCPToolDefinitions(t *testing.T) { +func TestLoadToolDefinitions(t *testing.T) { toolJSON := []byte(`{ "tools": [ { @@ -37,7 +37,7 @@ func TestLoadMCPToolDefinitions(t *testing.T) { ] }`) - tools, err := LoadMCPToolDefinitions(toolJSON) + tools, err := LoadToolDefinitions(toolJSON) if err != nil { t.Fatalf("Failed to load tool definitions: %v", err) } From 33e5f5895c654d06914176021544975819b9b69e Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Tue, 2 Dec 2025 12:07:14 +0200 Subject: [PATCH 7/7] rename MCPToolDef to ToolDef and move around structs --- internal/mcp/mcp_parse.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/internal/mcp/mcp_parse.go b/internal/mcp/mcp_parse.go index d1e1b93828..b5fb843804 100644 --- a/internal/mcp/mcp_parse.go +++ b/internal/mcp/mcp_parse.go @@ -12,28 +12,28 @@ import ( //go:embed mcp_tools.json var _ []byte -type MCPToolDef struct { +type ToolDef struct { Name string `json:"name"` Description string `json:"description"` InputSchema Schema `json:"inputSchema"` OutputSchema Schema `json:"outputSchema"` } -type Schema struct { - Schema string `json:"$schema"` - SchemaObject -} - type RawSchema struct { Type string `json:"type"` Description string `json:"description"` - Schema string `json:"$schema"` + SchemaVersion string `json:"$schema"` Required []string `json:"required,omitempty"` AdditionalProperties bool `json:"additionalProperties"` Properties map[string]json.RawMessage `json:"properties"` Items json.RawMessage `json:"items"` } +type Schema struct { + Schema string `json:"$schema"` + SchemaObject +} + type SchemaValue interface { Type() string } @@ -67,7 +67,7 @@ type parser struct { errors []error } -func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { +func LoadToolDefinitions(data []byte) (map[string]*ToolDef, error) { defs := struct { Tools []struct { Name string `json:"name"` @@ -78,15 +78,14 @@ func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { }{} if err := json.Unmarshal(data, &defs); err != nil { - // TODO: think we should panic instead return nil, err } - tools := map[string]*MCPToolDef{} + tools := map[string]*ToolDef{} parser := &parser{} for _, t := range defs.Tools { - tools[t.Name] = &MCPToolDef{ + tools[t.Name] = &ToolDef{ Name: t.Name, Description: t.Description, InputSchema: parser.parseRootSchema(t.InputSchema), @@ -103,7 +102,7 @@ func LoadToolDefinitions(data []byte) (map[string]*MCPToolDef, error) { func (p *parser) parseRootSchema(r RawSchema) Schema { return Schema{ - Schema: r.Schema, + Schema: r.SchemaVersion, SchemaObject: SchemaObject{ Kind: r.Type, Description: r.Description,