diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f2375153..45b464ed 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -68,6 +68,9 @@ jobs: - name: Set up project run: | + # Install sponge. + sudo apt-get install moreutils + # `setuptools 64` adds support for editable install hooks (PEP 660). # https://github.com/pypa/setuptools/blob/main/CHANGES.rst#v6400 # `setuptools 71` significantly re-vendors setuptools packages, causing hiccups on Python 3.8. diff --git a/CHANGES.md b/CHANGES.md index cbe0abc7..12baa893 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,8 @@ # Changelog ## Unreleased +- MCP: Add subsystem providing a few server and client utilities through + the `ctk query mcp {list,inquire,launch}` subcommands. ## 2025/01/31 v0.0.31 - Fixed connectivity for `jobstats collect` diff --git a/cratedb_toolkit/query/cli.py b/cratedb_toolkit/query/cli.py index 3333cf22..9dc649a5 100644 --- a/cratedb_toolkit/query/cli.py +++ b/cratedb_toolkit/query/cli.py @@ -5,6 +5,7 @@ from ..util.cli import boot_click from .convert.cli import convert_query +from .mcp.cli import cli as mcp_cli logger = logging.getLogger(__name__) @@ -16,9 +17,10 @@ @click.pass_context def cli(ctx: click.Context, verbose: bool, debug: bool): """ - Query expression utilities. + Query utilities. """ return boot_click(ctx, verbose, debug) cli.add_command(convert_query, name="convert") +cli.add_command(mcp_cli, name="mcp") diff --git a/cratedb_toolkit/query/mcp/__init__.py b/cratedb_toolkit/query/mcp/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cratedb_toolkit/query/mcp/cli.py b/cratedb_toolkit/query/mcp/cli.py new file mode 100644 index 00000000..e708d69c --- /dev/null +++ b/cratedb_toolkit/query/mcp/cli.py @@ -0,0 +1,92 @@ +import asyncio +import logging + +import click +from click_aliases import ClickAliasedGroup + +from cratedb_toolkit.query.mcp.inquiry import McpServerInquiry +from cratedb_toolkit.query.mcp.registry import McpServerRegistry +from cratedb_toolkit.query.mcp.util import format_output +from cratedb_toolkit.util.cli import boot_click + +logger = logging.getLogger(__name__) + + +def get_format_option(default="markdown"): + return click.option( + "--format", + "format_", + type=click.Choice(["markdown", "json", "yaml"]), + required=True, + default=default, + help="Select output format", + ) + + +format_option_markdown = get_format_option(default="markdown") +format_option_json = get_format_option(default="json") + + +@click.group(cls=ClickAliasedGroup) # type: ignore[arg-type] +@click.option("--server-name", type=str, required=False, help="Select MCP server name") +@click.option("--verbose", is_flag=True, required=False, help="Turn on logging") +@click.option("--debug", is_flag=True, required=False, help="Turn on logging with debug level") +@click.version_option() +@click.pass_context +def cli(ctx: click.Context, server_name: str, verbose: bool, debug: bool): + """ + MCP utilities. + """ + ctx.meta["registry"] = McpServerRegistry() + ctx.meta["server_name"] = server_name + return boot_click(ctx, verbose, debug) + + +@cli.command(name="list") +@format_option_json +@click.pass_context +def list_servers( + ctx: click.Context, + format_: str, +): + """ + Enumerate registered MCP servers. + """ + registry = ctx.meta["registry"] + server_name = ctx.meta["server_name"] + inquiry = McpServerInquiry(registry.select(server_name)) + names = [server.name for server in inquiry.servers] + print(format_output(names, format_)) # noqa: T201 + + +@cli.command() +@format_option_markdown +@click.pass_context +def inquire( + ctx: click.Context, + format_: str, +): + """ + Inquire MCP servers, report about their capabilities. + """ + registry = ctx.meta["registry"] + server_name = ctx.meta["server_name"] + inquiry = McpServerInquiry(registry.select(server_name)) + result = asyncio.run(inquiry.format(format_)) + print(result) # noqa: T201 + + +@cli.command() +@click.pass_context +def launch( + ctx: click.Context, +): + """ + Launch MCP server. + """ + registry = ctx.meta["registry"] + server_name = ctx.meta["server_name"] + if not server_name: + raise click.BadParameter("MCP server name is required") + servers = registry.select(server_name) + servers[0].launch() diff --git a/cratedb_toolkit/query/mcp/inquiry.py b/cratedb_toolkit/query/mcp/inquiry.py new file mode 100644 index 00000000..2ba1b6cd --- /dev/null +++ b/cratedb_toolkit/query/mcp/inquiry.py @@ -0,0 +1,135 @@ +# Inquire Model Context Protocol (MCP) servers. +# +# Usage: +# +# uvx 'cratedb-toolkit[mcp]' query mcp inquire --format=markdown | sponge mcp-cratedb-landscape.md +# +# ruff: noqa: T201 +import dataclasses +import io +import logging +import typing as t +from contextlib import redirect_stdout + +from cratedb_toolkit.query.mcp.model import McpServer + +from .util import McpServerCapabilities, to_json, to_yaml + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass +class McpServerInquiry: + """ + Inquire capabilities of MCP server, and format as JSON, Markdown, or YAML. + """ + + servers: t.List[McpServer] + + title = "Database MCP servers for PostgreSQL and CrateDB" + text = ( + "This page provides an overview about a fragment of the MCP server landscape,\n" + "focusing on those that are talking to PostgreSQL and CrateDB databases.\n" + "\n" + "It enumerates the most popular adapters, and includes their detailed API capabilities.\n" + ) + + notes = ( + ":::{note}\n" + "This page was generated automatically, please do not edit manually. To rebuild, use this command:\n" + "```shell\n" + "uvx 'cratedb-toolkit[mcp]' query mcp inquire --format=markdown | sponge doc/query/mcp/landscape.md\n" + "```\n" + ":::\n" + ) + + seealso = ( + ":::{seealso}\n" + "Ready-to-run example programs about all the adapters are available per\n" + "example collection about [exploring MCP with CrateDB].\n" + "[exploring MCP with CrateDB]: https://github.com/crate/cratedb-examples/tree/main/framework/mcp.\n" + ":::\n" + ) + + def __post_init__(self): + if not self.servers: + msg = "No servers selected" + logger.error(msg) + raise UserWarning(msg) + + @staticmethod + async def get_capabilities(server: McpServer): + """ + Launch MCP server with stdio transport, and inquire API for capabilities. + + Derived from: + https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#writing-mcp-clients + """ + from mcp import ClientSession, StdioServerParameters + from mcp.client.stdio import stdio_client + + if server.program is None: + raise ValueError("Program name for MCP server not defined") + + # Create server parameters for stdio connection. + server_params = StdioServerParameters( + command=server.program, + args=server.args, + env=server.env, + ) + + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Initialize the connection. + await session.initialize() + + # Inquire API. + response = McpServerCapabilities(session) + await response.inquire() + return response + + async def format(self, variant: str): + if variant == "json": + return await self.to_json() + elif variant == "markdown": + return await self.to_markdown() + elif variant == "yaml": + return await self.to_yaml() + else: + raise NotImplementedError(f"Output variant not implemented: {variant}") + + async def to_dict(self): + payload: t.Dict[str, t.Any] = { + "meta": {"title": self.title, "text": self.text, "notes": self.notes, "seealso": self.seealso}, + "data": {}, + } + for server in self.servers: + capabilities = await self.get_capabilities(server) + payload["data"][server.name] = { + "meta": server.to_dict(), + "capabilities": capabilities.to_dict(), + } + return payload + + async def to_markdown(self): + buffer = io.StringIO() + with redirect_stdout(buffer): + print(f"# {self.title}") + print() + print(self.text) + for server in self.servers: + print(server.to_markdown()) + try: + capabilities = await self.get_capabilities(server) + print(capabilities.to_markdown()) + except Exception as ex: + logger.error(f"MCP server capability inquiry failed: {ex}") + print(self.notes) + print(self.seealso) + return buffer.getvalue() + + async def to_json(self): + return to_json(await self.to_dict()) + + async def to_yaml(self): + return to_yaml(await self.to_dict()) diff --git a/cratedb_toolkit/query/mcp/model.py b/cratedb_toolkit/query/mcp/model.py new file mode 100644 index 00000000..3cfd6235 --- /dev/null +++ b/cratedb_toolkit/query/mcp/model.py @@ -0,0 +1,137 @@ +# ruff: noqa: T201 +from __future__ import annotations + +import asyncio +import dataclasses +import io +import logging +import os +import shlex +import typing as t +from contextlib import redirect_stdout + +from cratedb_toolkit.query.mcp.util import to_json, to_yaml + +logger = logging.getLogger(__name__) + + +@dataclasses.dataclass +class McpServer: + """ + Wrap information, installation and launch of an MCP server. + """ + + name: str + command: str + program: t.Optional[str] = None + args: t.List[str] = dataclasses.field(default_factory=list) + env: t.Dict[str, str] = dataclasses.field(default_factory=dict) + requirements: t.List[str] = dataclasses.field(default_factory=list) + preinstall: t.Optional[str] = None + homepage: t.Optional[str] = None + description: t.Optional[str] = None + + def __post_init__(self): + """ + Split command and adjust description. + """ + cmd = shlex.split(self.command) + self.program = cmd[0] + self.args = cmd[1:] + self.description = self.description.strip() if self.description else None + self.preinstall = self.preinstall.strip() if self.preinstall else None + + @property + def install_command(self): + """ + Return installation command for Python packages, using `uv`. + """ + if self.requirements: + requirements = [f"'{requirement}'" for requirement in self.requirements] + return f"uv pip install {' '.join(requirements)}" + return None + + def install(self): + """ + Install MCP server, triggering both pre-install and main-install procedures. + """ + if cmd := self.preinstall: + os.system(cmd) # noqa: S605 + if cmd := self.install_command: + os.system(cmd) # noqa: S605 + + def launch(self): + """ + Launch MCP server, currently in stdio mode only. + + TODO: Is it applicable to offer SSE mode here, using FastMCP? + """ + from mcp import StdioServerParameters + + if self.program is None: + raise ValueError("Program name for MCP server not defined") + server_params = StdioServerParameters( + command=self.program, + args=self.args, + env=self.env, + ) + logger.info(f"Launching MCP server: {self.name}") + logger.info(f"Command for MCP server '{self.name}': {self.command}") + self._start_dummy(server_params) + + def _start_dummy(self, server_params): + """ + Start server, just for dummy purposes. + """ + loop = asyncio.new_event_loop() + loop.create_task(self._launch_dummy(server_params)) + loop.run_forever() + + @staticmethod + async def _launch_dummy(server_params): + """ + Launch server, just for dummy purposes. + + FIXME: Currently, nobody can interact with this server: + stdio is not forwarded, and SSE transport is not provided yet. + """ + from mcp import stdio_client + + async with stdio_client(server_params) as (read, write): + while True: + await asyncio.sleep(1) + + def to_dict(self): + return { + "name": self.name, + "description": self.description, + "homepage": self.homepage, + "install_command": self.install_command, + "run": self.command, + "preinstall": self.preinstall, + } + + def to_markdown(self): + buffer = io.StringIO() + with redirect_stdout(buffer): + print(f"## {self.name}") + print() + print(self.description) + print() + print(f":Homepage: {self.homepage}") + if self.install_command: + print(f":Install: `{self.install_command}`") + print(f":Run: `{self.command}`") + if self.preinstall: + print(":Acquire:") + print("```shell") + print(f"{self.preinstall}") + print("```") + print() + return buffer.getvalue() + + def to_json(self): + return to_json(self.to_dict()) + + def to_yaml(self): + return to_yaml(self.to_dict()) diff --git a/cratedb_toolkit/query/mcp/pg_mcp.py b/cratedb_toolkit/query/mcp/pg_mcp.py new file mode 100644 index 00000000..dbf905d7 --- /dev/null +++ b/cratedb_toolkit/query/mcp/pg_mcp.py @@ -0,0 +1,8 @@ +if __name__ == "__main__": + # FIXME: Improve invocation after packaging has been improved. + # https://github.com/stuzero/pg-mcp/issues/10 + from server.app import logger, mcp + + # TODO: Bring flexible invocation (sse vs. stdio) to mainline. + logger.info("Starting MCP server with STDIO transport") + mcp.run(transport="stdio") diff --git a/cratedb_toolkit/query/mcp/registry.py b/cratedb_toolkit/query/mcp/registry.py new file mode 100644 index 00000000..086cce2c --- /dev/null +++ b/cratedb_toolkit/query/mcp/registry.py @@ -0,0 +1,136 @@ +import dataclasses +import typing as t + +from cratedb_toolkit.query.mcp.model import McpServer + + +@dataclasses.dataclass +class McpServerRegistry: + """ + An enumeration of MCP servers that can talk to CrateDB in one way or another. + """ + + servers: t.List[McpServer] = dataclasses.field(default_factory=list) + + cratedb_mcp = McpServer( + name="cratedb-mcp", + command="cratedb-mcp", + env={ + "CRATEDB_MCP_HTTP_URL": "http://localhost:4200", + "CRATEDB_MCP_TRANSPORT": "stdio", + }, + requirements=["cratedb-mcp @ git+https://github.com/crate/cratedb-mcp@packaging-adjustments"], + homepage="https://github.com/crate/cratedb-mcp", + description=""" +The CrateDB MCP server specialises on advanced CrateDB SQL operations by blending in +knowledge base resources from CrateDB's documentation about query optimizations. +It is written in Python, optionally to be invoked with `uv` or `uvx`. + """, + ) + + dbhub = McpServer( + name="dbhub", + command="npx -y @bytebase/dbhub@0.2.3 --transport=stdio --dsn=postgres://crate@localhost:5432/testdrive", + homepage="https://github.com/bytebase/dbhub", + description=""" +DBHub is a universal database gateway implementing the Model Context Protocol (MCP) server interface. This +gateway allows MCP-compatible clients to connect to and explore different databases. +It is written in TypeScript, to be invoked with `npx`. + """, + ) + + mcp_alchemy = McpServer( + name="mcp-alchemy", + command="mcp-alchemy", + env={"DB_URL": "crate://crate@localhost:4200/?schema=testdrive"}, + requirements=[ + "mcp-alchemy @ git+https://github.com/runekaagaard/mcp-alchemy.git@b85aae6", + "sqlalchemy-cratedb>=0.42.0.dev1", + ], + homepage="https://github.com/runekaagaard/mcp-alchemy", + description=""" +The MCP Alchemy MCP server package uses SQLAlchemy to talk to databases and provides quite a range of tools. +It is written in Python, optionally to be invoked with `uv` or `uvx`. + """, + ) + + pg_mcp = McpServer( + name="pg-mcp", + command="python -m cratedb_toolkit.query.mcp.pg_mcp", + preinstall=""" +set -e +TARGET="/tmp/pg-mcp-server" +rm -rf ${TARGET} +git clone --depth 1 --no-checkout --filter=blob:none \ + https://github.com/crate-workbench/pg-mcp.git ${TARGET} +cd ${TARGET} +git checkout 16d7f61d5b3197777293ebae33b519f14a9d6e55 -- pyproject.toml uv.lock server test.py +cat pyproject.toml | grep -v requires-python | sponge pyproject.toml +uv pip install . + """, + homepage="https://github.com/crate-workbench/pg-mcp-server", + description=""" +The PG-MCP server is specialised to talk to PostgreSQL servers. With a few adjustments, +the adapter can also talk to CrateDB. The project offers rich MCP server capabilities, +and includes advanced client programs for Claude and Gemini that work out of the box. +It is written in Python, optionally to be invoked with `uv` or `uvx`. + """, + ) + + postgres_basic = McpServer( + name="postgres-basic", + command="npx -y @modelcontextprotocol/server-postgres@0.6 postgresql://crate@localhost:5432/testdrive", + homepage="https://www.npmjs.com/package/@modelcontextprotocol/server-postgres", + description=""" +A basic Model Context Protocol server that provides read-only access to +PostgreSQL databases per `query` tool. +It is written in TypeScript, to be invoked with `npx`. + """, + ) + + quarkus = McpServer( + name="quarkus", + command="jbang run --java=21 jdbc@quarkiverse/quarkus-mcp-servers jdbc:postgresql://localhost:5432/testdrive -u crate", # noqa: E501 + homepage="https://github.com/quarkiverse/quarkus-mcp-servers", + description=""" +The Quarkus MCP server communicates with databases using JDBC, providing quite a range of tools. +It is written in Java, to be invoked with `jbang`. + """, + ) + + # Define the list of built-in servers, which are those enumerated above. + builtins = [ + cratedb_mcp, + dbhub, + mcp_alchemy, + pg_mcp, + postgres_basic, + quarkus, + ] + + def __post_init__(self): + """ + Register all built-in servers by default. + """ + for server in self.builtins: + self.register(server) + + def register(self, server: McpServer): + """ + Register an MCP server. + """ + self.servers.append(server) + return self + + def select(self, name: t.Optional[str] = None) -> t.List[McpServer]: + """ + Select MCP server by name. When no specific server is selected, all built-in servers are returned. + """ + if not name: + return self.servers + servers = [] + for server in self.servers: + if server.name == name: + server.install() + servers.append(server) + return servers diff --git a/cratedb_toolkit/query/mcp/util.py b/cratedb_toolkit/query/mcp/util.py new file mode 100644 index 00000000..e0407d32 --- /dev/null +++ b/cratedb_toolkit/query/mcp/util.py @@ -0,0 +1,97 @@ +# ruff: noqa: T201 +import io +import json +import logging +import typing as t + +import yaml + +if t.TYPE_CHECKING: + from mcp import ClientSession + + +logger = logging.getLogger(__name__) + + +class McpServerCapabilities: + """ + Wrap database conversations through MCP servers. + """ + + def __init__(self, session: "ClientSession"): + self.session = session + self.data: t.Dict[str, t.Any] = {} + + @staticmethod + def decode_json_text(thing): + return json.loads(thing.content[0].text) + + def decode_items(self, items): + import pydantic_core + + return list(map(self.decode_item, json.loads(pydantic_core.to_json(items)))) + + @staticmethod + def decode_item(item): + try: + item["text"] = json.loads(item["text"]) + except Exception: # noqa: S110 + pass + return item + + async def entity_info(self, fun, attribute): + from mcp import McpError + + try: + return self.decode_items(getattr(await fun(), attribute)) + except McpError as e: + logger.warning(f"Problem invoking method '{fun.__name__}': {e}") + + def add(self, what: str, info: t.List): + self.data[what] = info + + async def inquire(self): + # List available prompts + self.add("prompts", await self.entity_info(self.session.list_prompts, "prompts")) + + # List available resources and resource templates + self.add("resources", await self.entity_info(self.session.list_resources, "resources")) + self.add( + "resource templates", await self.entity_info(self.session.list_resource_templates, "resourceTemplates") + ) + + # List available tools + self.add("tools", await self.entity_info(self.session.list_tools, "tools")) + + def to_markdown(self): + buffer = io.StringIO() + for title, info in self.data.items(): + if not info: + continue + buffer.write(f"### {title.title()}\n") + buffer.write("\n") + buffer.write("```yaml\n") + buffer.write(yaml.dump(info, sort_keys=False, width=100)) + buffer.write("```\n") + buffer.write("\n") + return buffer.getvalue() + + def to_dict(self): + return self.data + + +def to_json(thing): + return json.dumps(thing, sort_keys=False, indent=2) + + +def to_yaml(thing): + return yaml.dump(thing, sort_keys=False) + + +def format_output(thing, format_: str): + if format_ == "json": + return to_json(thing) + elif format_ == "yaml": + return to_yaml(thing) + else: + raise NotImplementedError(f"Output variant not implemented: {format_}") diff --git a/doc/backlog/main.md b/doc/backlog/main.md index 55f684c0..153675d6 100644 --- a/doc/backlog/main.md +++ b/doc/backlog/main.md @@ -162,7 +162,7 @@ Add two non-partition-based strategies. Category: `timerange`. - https://gitlab.com/DigonIO/scheduler - Document complete "Docker Compose" setup variant, using both CrateDB and `cratedb-retention` - Generalize from `cutoff_day` to `cutoff_date`? - For example, use `ms`. See https://iotdb.apache.org/UserGuide/V1.1.x/Delete-Data/TTL.html. + For example, use `ms`. See https://iotdb.apache.org/UserGuide/latest/Basic-Concept/TTL-Delete.html#ttl-delete-data. - More battle testing, in sandboxes and on production systems. - Use storage classes - https://github.com/crate/crate/issues/14298 diff --git a/doc/query/index.md b/doc/query/index.md index 9e154999..c21a6ef9 100644 --- a/doc/query/index.md +++ b/doc/query/index.md @@ -8,4 +8,5 @@ converters and migration support tasks, still in its infancy. :maxdepth: 2 convert +mcp/index ``` diff --git a/doc/query/mcp/backlog.md b/doc/query/mcp/backlog.md new file mode 100644 index 00000000..fc0d32b5 --- /dev/null +++ b/doc/query/mcp/backlog.md @@ -0,0 +1,11 @@ +# MCP backlog + +## Iteration +1 +- Launch server using SSE transport. +- Other than just selecting a server from the registry per `--server-name`, + also permit selecting an arbitrary server. +- Provide registry information per MCP resource, launch server per MCP tool. +- Guidelines about running with [MCP Inspector]. + + +[MCP Inspector]: https://github.com/modelcontextprotocol/inspector diff --git a/doc/query/mcp/index.md b/doc/query/mcp/index.md new file mode 100644 index 00000000..b15b0692 --- /dev/null +++ b/doc/query/mcp/index.md @@ -0,0 +1,120 @@ +# Model Context Protocol (MCP) + +## About + +[MCP], the Model Context Protocol, is an open protocol that enables seamless +integration between LLM applications and external data sources and tools. + +The main entities of MCP are [Prompts], [Resources], and [Tools]. + +MCP clients call servers by either invoking them as a subprocess and +communicate via stdio, or by using SSE, which implements TCP socket +communication, see [Transports]. + +## What's Inside + +This subsystem of CTK includes a little wrapper around the [Model Context +Protocol Python SDK], providing [inquiry] and launcher utilities, i.e. to +start MCP servers and hold conversations with them, with a focus on MCP +servers that wrap database access. + +In this case, the specific focus is on [CrateDB], by using relevant +PostgreSQL adapter implementations provided by the MCP ecosystem, +because CrateDB is compatible with PostgreSQL, and a specialised +MCP server for advanced conversations with CrateDB. + +In order to get an idea about a fragment of the MCP server landscape, +focusing on those that are talking to PostgreSQL and CrateDB databases, +and their detailed capabilities, this subsystem also provides a little +[MCP database server registry] which can render its ingredients into JSON, +YAML, and Markdown formats. + +## Usage + +Enumerate registered MCP servers. +```shell +ctk query mcp list +``` +```json +[ + "cratedb-mcp", + "dbhub", + "mcp-alchemy", + "pg-mcp", + "postgres-basic", + "quarkus" +] +``` + +Inquire all registered MCP servers, and report about their capabilities. +```shell +ctk query mcp inquire +``` +Inquire specific registered MCP server. +```shell +ctk query mcp --server-name=postgres-basic inquire +``` + +Launch registered MCP server. +```shell +ctk query mcp --server-name=postgres-basic launch +``` + +## References + +Popular MCP servers that can talk to CrateDB, alphabetically sorted. +Detailed MCP API capabilities about all of them, regarding available prompts, +resources, and tools, can be explored on the {doc}`landscape` page. +Ready-to-run example programs are available per example collection +about [exploring MCP with CrateDB]. + +- [CrateDB MCP] +- [DBHub] +- [MCP Alchemy] +- [PG-MCP] +- [PostgreSQL basic] +- [Quarkus JDBC] + +:::{note} +The {doc}`landscape` page can be generated using this command: +```shell +uvx 'cratedb-toolkit[mcp]' query mcp inquire --format=markdown | sponge mcp-cratedb-landscape.md +``` +::: + + +```{toctree} +:maxdepth: 2 +:hidden: + +landscape +notes +backlog +``` + + +[CrateDB]: https://cratedb.com/database +[CrateDB MCP]: https://github.com/crate/cratedb-mcp +[CrateDB SQLAlchemy dialect]: https://cratedb.com/docs/sqlalchemy-cratedb/ +[DBHub]: https://github.com/bytebase/dbhub +[exploring MCP with CrateDB]: https://github.com/crate/cratedb-examples/tree/main/framework/mcp +[inquiry]: https://github.com/crate/cratedb-toolkit/blob/main/cratedb_toolkit/query/mcp/inquiry.py +[Introduction to MCP]: https://modelcontextprotocol.io/introduction +[JBang]: https://www.jbang.dev/ +[MCP]: https://modelcontextprotocol.io/ +[MCP Alchemy]: https://github.com/runekaagaard/mcp-alchemy +[MCP database server registry]: https://github.com/crate/cratedb-toolkit/blob/main/cratedb_toolkit/query/mcp/registry.py +[MCP Python SDK]: https://github.com/modelcontextprotocol/python-sdk +[MCP SSE]: https://github.com/sidharthrajaram/mcp-sse +[Model Context Protocol (MCP) @ CrateDB]: https://github.com/crate/crate-clients-tools/discussions/234 +[Model Context Protocol Python SDK]: https://pypi.org/project/mcp/ +[PostgreSQL basic]: https://www.npmjs.com/package/@modelcontextprotocol/server-postgres +[PG-MCP]: https://github.com/stuzero/pg-mcp-server +[Prompts]: https://modelcontextprotocol.io/docs/concepts/prompts +[Quarkus JDBC]: https://github.com/quarkiverse/quarkus-mcp-servers/tree/main/jdbc#readme +[quarkus-mcp-servers]: https://github.com/quarkiverse/quarkus-mcp-servers +[Resources]: https://modelcontextprotocol.io/docs/concepts/resources +[SQLAlchemy]: https://sqlalchemy.org/ +[Tools]: https://modelcontextprotocol.io/docs/concepts/tools +[Transports]: https://modelcontextprotocol.io/docs/concepts/transports +[uv]: https://docs.astral.sh/uv/ diff --git a/doc/query/mcp/landscape.md b/doc/query/mcp/landscape.md new file mode 100644 index 00000000..2ddad1d0 --- /dev/null +++ b/doc/query/mcp/landscape.md @@ -0,0 +1,519 @@ +# Database MCP servers for PostgreSQL and CrateDB + +This page provides an overview about a fragment of the MCP server landscape, +focusing on those that are talking to PostgreSQL and CrateDB databases. + +It enumerates the most popular adapters, and includes their detailed API capabilities. + +## cratedb-mcp + +The CrateDB MCP server specialises on advanced CrateDB SQL operations by blending in +knowledge base resources from CrateDB's documentation about query optimizations. +It is written in Python, optionally to be invoked with `uv` or `uvx`. + +:Homepage: https://github.com/crate/cratedb-mcp +:Install: `uv pip install 'cratedb-mcp @ git+https://github.com/crate/cratedb-mcp@packaging-adjustments'` +:Run: `cratedb-mcp` + + +### Tools + +```yaml +- name: query_sql + description: Send a SQL query to CrateDB, only 'SELECT' queries are allows, queries that modify data, + columns or are otherwise deemed un-safe are rejected. + inputSchema: + properties: + query: + title: Query + type: string + required: + - query + title: query_sqlArguments + type: object +- name: get_cratedb_documentation_index + description: Gets an index with CrateDB documentation links to fetch, should download docs before answering + questions. Has documentation name, description and link. + inputSchema: + properties: {} + title: get_cratedb_documentation_indexArguments + type: object +- name: fetch_cratedb_docs + description: Downloads the latest CrateDB documentation piece by link. Only used to download CrateDB + docs. + inputSchema: + properties: + link: + title: Link + type: string + required: + - link + title: fetch_cratedb_docsArguments + type: object +- name: get_table_metadata + description: Returns an aggregation of all CrateDB's schema, tables and their metadata + inputSchema: + properties: {} + title: get_table_metadataArguments + type: object +- name: get_health + description: Returns the health of a CrateDB cluster. + inputSchema: + properties: {} + title: get_healthArguments + type: object +``` + + +## dbhub + +DBHub is a universal database gateway implementing the Model Context Protocol (MCP) server interface. This +gateway allows MCP-compatible clients to connect to and explore different databases. +It is written in TypeScript, to be invoked with `npx`. + +:Homepage: https://github.com/bytebase/dbhub +:Run: `npx -y @bytebase/dbhub@0.2.3 --transport=stdio --dsn=postgres://crate@localhost:5432/testdrive` + + +### Prompts + +```yaml +- name: generate_sql + description: Generate SQL queries from natural language descriptions + arguments: + - name: description + description: Natural language description of the SQL query to generate + required: true + - name: schema + description: Optional database schema to use + required: false +- name: explain_db + description: Get explanations about database tables, columns, and structures + arguments: + - name: schema + description: Optional database schema to use + required: false + - name: table + description: Optional specific table to explain + required: false +``` + +### Resources + +```yaml +- uri: db://schemas + name: schemas + description: null + mimeType: null + size: null + annotations: null +``` + +### Resource Templates + +```yaml +- uriTemplate: db://schemas/{schemaName}/tables + name: tables_in_schema + description: null + mimeType: null + annotations: null +- uriTemplate: db://schemas/{schemaName}/tables/{tableName} + name: table_structure_in_schema + description: null + mimeType: null + annotations: null +- uriTemplate: db://schemas/{schemaName}/tables/{tableName}/indexes + name: indexes_in_table + description: null + mimeType: null + annotations: null +- uriTemplate: db://schemas/{schemaName}/procedures + name: procedures_in_schema + description: null + mimeType: null + annotations: null +- uriTemplate: db://schemas/{schemaName}/procedures/{procedureName} + name: procedure_detail_in_schema + description: null + mimeType: null + annotations: null +``` + +### Tools + +```yaml +- name: run_query + description: null + inputSchema: + type: object + properties: + query: + type: string + description: SQL query to execute (SELECT only) + required: + - query + additionalProperties: false + $schema: http://json-schema.org/draft-07/schema# +- name: list_connectors + description: null + inputSchema: + type: object + properties: {} + additionalProperties: false + $schema: http://json-schema.org/draft-07/schema# +``` + + +## mcp-alchemy + +The MCP Alchemy MCP server package uses SQLAlchemy to talk to databases and provides quite a range of tools. +It is written in Python, optionally to be invoked with `uv` or `uvx`. + +:Homepage: https://github.com/runekaagaard/mcp-alchemy +:Install: `uv pip install 'mcp-alchemy @ git+https://github.com/runekaagaard/mcp-alchemy.git@b85aae6' 'sqlalchemy-cratedb>=0.42.0.dev1'` +:Run: `mcp-alchemy` + + +### Tools + +```yaml +- name: all_table_names + description: Return all table names in the database separated by comma. Connected to crate version 6.0.0 + database '' on localhost as user 'crate' + inputSchema: + properties: {} + title: all_table_namesArguments + type: object +- name: filter_table_names + description: Return all table names in the database containing the substring 'q' separated by comma. + Connected to crate version 6.0.0 database '' on localhost as user 'crate' + inputSchema: + properties: + q: + title: Q + type: string + required: + - q + title: filter_table_namesArguments + type: object +- name: schema_definitions + description: Returns schema and relation information for the given tables. Connected to crate version + 6.0.0 database '' on localhost as user 'crate' + inputSchema: + properties: + table_names: + items: + type: string + title: Table Names + type: array + required: + - table_names + title: schema_definitionsArguments + type: object +- name: execute_query + description: Execute a SQL query and return results in a readable format. Results will be truncated + after 4000 characters. Connected to crate version 6.0.0 database '' on localhost as user 'crate' + inputSchema: + properties: + query: + title: Query + type: string + params: + anyOf: + - type: object + - type: 'null' + default: null + title: Params + required: + - query + title: execute_queryArguments + type: object +``` + + +## pg-mcp + +The PG-MCP server is specialised to talk to PostgreSQL servers. With a few adjustments, +the adapter can also talk to CrateDB. The project offers rich MCP server capabilities, +and includes advanced client programs for Claude and Gemini that work out of the box. +It is written in Python, optionally to be invoked with `uv` or `uvx`. + +:Homepage: https://github.com/crate-workbench/pg-mcp-server +:Run: `python -m cratedb_toolkit.query.mcp.pg_mcp` +:Acquire: +```shell +set -e +TARGET="/tmp/pg-mcp-server" +rm -rf ${TARGET} +git clone --depth 1 --no-checkout --filter=blob:none https://github.com/crate-workbench/pg-mcp.git ${TARGET} +cd ${TARGET} +git checkout 16d7f61d5b3197777293ebae33b519f14a9d6e55 -- pyproject.toml uv.lock server test.py +cat pyproject.toml | grep -v requires-python | sponge pyproject.toml +uv pip install . +``` + + +### Resource Templates + +```yaml +- uriTemplate: pgmcp://{conn_id}/schemas + name: list_schemas + description: List all non-system schemas in the database. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables + name: list_schema_tables + description: List all tables in a specific schema with their descriptions. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables/{table}/columns + name: get_table_columns + description: Get columns for a specific table with their descriptions. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables/{table}/indexes + name: get_table_indexes + description: Get indexes for a specific table with their descriptions. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables/{table}/constraints + name: get_table_constraints + description: Get constraints for a specific table with their descriptions. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables/{table}/indexes/{index} + name: get_index_details + description: Get detailed information about a specific index. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables/{table}/constraints/{constraint} + name: get_constraint_details + description: Get detailed information about a specific constraint. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/extensions + name: list_schema_extensions + description: List all extensions installed in a specific schema. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/extensions/{extension} + name: get_extension_details + description: Get detailed information about a specific extension in a schema. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables/{table}/sample + name: sample_table_data + description: Get a sample of data from a specific table. + mimeType: null + annotations: null +- uriTemplate: pgmcp://{conn_id}/schemas/{schema}/tables/{table}/rowcount + name: get_table_rowcount + description: Get the approximate row count for a specific table. + mimeType: null + annotations: null +``` + +### Tools + +```yaml +- name: connect + description: "\n Register a database connection string and return its connection ID.\n \ + \ \n Args:\n connection_string: PostgreSQL connection string (required)\n \ + \ ctx: Request context (injected by the framework)\n \n Returns:\n \ + \ Dictionary containing the connection ID\n " + inputSchema: + properties: + connection_string: + title: Connection String + type: string + required: + - connection_string + title: connectArguments + type: object +- name: disconnect + description: "\n Close a specific database connection and remove it from the pool.\n \n\ + \ Args:\n conn_id: Connection ID to disconnect (required)\n ctx: Request\ + \ context (injected by the framework)\n \n Returns:\n Dictionary indicating\ + \ success status\n " + inputSchema: + properties: + conn_id: + title: Conn Id + type: string + required: + - conn_id + title: disconnectArguments + type: object +- name: pg_query + description: "\n Execute a read-only SQL query against the PostgreSQL database.\n \n \ + \ Args:\n query: The SQL query to execute (must be read-only)\n conn_id:\ + \ Connection ID previously obtained from the connect tool\n params: Parameters for the\ + \ query (optional)\n \n Returns:\n Query results as a list of dictionaries\n\ + \ " + inputSchema: + properties: + query: + title: Query + type: string + conn_id: + title: Conn Id + type: string + params: + default: null + title: params + type: string + required: + - query + - conn_id + title: pg_queryArguments + type: object +- name: pg_explain + description: "\n Execute an EXPLAIN (FORMAT JSON) query to get PostgreSQL execution plan.\n \ + \ \n Args:\n query: The SQL query to analyze\n conn_id: Connection\ + \ ID previously obtained from the connect tool\n params: Parameters for the query (optional)\n\ + \ \n Returns:\n Complete JSON-formatted execution plan\n " + inputSchema: + properties: + query: + title: Query + type: string + conn_id: + title: Conn Id + type: string + params: + default: null + title: params + type: string + required: + - query + - conn_id + title: pg_explainArguments + type: object +``` + + +## postgres-basic + +A basic Model Context Protocol server that provides read-only access to +PostgreSQL databases per `query` tool. +It is written in TypeScript, to be invoked with `npx`. + +:Homepage: https://www.npmjs.com/package/@modelcontextprotocol/server-postgres +:Run: `npx -y @modelcontextprotocol/server-postgres@0.6 postgresql://crate@localhost:5432/testdrive` + + +### Tools + +```yaml +- name: query + description: Run a read-only SQL query + inputSchema: + type: object + properties: + sql: + type: string +``` + + +## quarkus + +The Quarkus MCP server communicates with databases using JDBC, providing quite a range of tools. +It is written in Java, to be invoked with `jbang`. + +:Homepage: https://github.com/quarkiverse/quarkus-mcp-servers +:Run: `jbang run --java=21 jdbc@quarkiverse/quarkus-mcp-servers jdbc:postgresql://localhost:5432/testdrive -u crate` + + +### Prompts + +```yaml +- name: er_diagram + description: Visualize ER diagram + arguments: [] +- name: sample_data + description: Creates sample data and perform analysis + arguments: + - name: topic + description: The topic + required: true +``` + +### Tools + +```yaml +- name: create_table + description: Create new table in the jdbc database + inputSchema: + type: object + properties: + query: + type: string + description: CREATE TABLE SQL statement + required: + - query +- name: database_info + description: Get information about the database. Run this before anything else to know the SQL dialect, + keywords etc. + inputSchema: + type: object + properties: {} + required: [] +- name: describe_table + description: Describe table + inputSchema: + type: object + properties: + catalog: + type: string + description: Catalog name + schema: + type: string + description: Schema name + table: + type: string + description: Table name + required: + - table +- name: list_tables + description: List all tables in the jdbc database + inputSchema: + type: object + properties: {} + required: [] +- name: read_query + description: Execute a SELECT query on the jdbc database + inputSchema: + type: object + properties: + query: + type: string + description: SELECT SQL query to execute + required: + - query +- name: write_query + description: Execute a INSERT, UPDATE or DELETE query on the jdbc database + inputSchema: + type: object + properties: + query: + type: string + description: INSERT, UPDATE or DELETE SQL query to execute + required: + - query +``` + + +:::{note} +This page was generated automatically, please do not edit manually. To rebuild, use this command: +```shell +uvx 'cratedb-toolkit[mcp]' query mcp inquire --format=markdown | sponge doc/query/mcp/landscape.md +``` +::: + +:::{seealso} +Ready-to-run example programs about all the adapters are available per +example collection about [exploring MCP with CrateDB]. +[exploring MCP with CrateDB]: https://github.com/crate/cratedb-examples/tree/main/framework/mcp. +::: + + diff --git a/doc/query/mcp/notes.md b/doc/query/mcp/notes.md new file mode 100644 index 00000000..4414a194 --- /dev/null +++ b/doc/query/mcp/notes.md @@ -0,0 +1,32 @@ +# MCP notes + +## MCP package registries + +- https://mcp.so/ +- https://mcphub.tools/ +- https://smithery.ai/ + +## MCP servers + +- https://github.com/jjsantos01/qgis_mcp +- https://github.com/ahujasid/blender-mcp +- https://github.com/Simon-Kansara/ableton-live-mcp-server +- https://github.com/ihrpr/mcp-server-jupyter +- https://github.com/logiscape/mcp-sdk-php +- https://github.com/fusedio/fused-mcp +- https://pypi.org/project/mcp-atlassian/ +- https://pypi.org/project/mcp-server-fetch/ +- https://mcp.so/server/mcp-installer +- [pypi-search](https://github.com/nomicode/cline/tree/main/MCP/pypi-search) +- https://github.com/nerding-io/n8n-nodes-mcp +- https://github.com/opensumi/core +- https://github.com/awslabs/mcp +- https://github.com/Upsonic/Upsonic +- https://github.com/anaisbetts/mcp-installer +- https://github.com/mark3labs/mcp-go +- https://github.com/modelcontextprotocol/inspector +- https://github.com/lastmile-ai/mcp-agent +- https://github.com/drillan/sphinx-mcp-test +- https://github.com/github/github-mcp-server +- https://github.com/varunneal/spotify-mcp +- https://github.com/BrowserMCP/mcp diff --git a/pyproject.toml b/pyproject.toml index f6c6fe74..8cb22235 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -146,7 +146,7 @@ optional-dependencies.dynamodb = [ "commons-codec>=0.0.20", ] optional-dependencies.full = [ - "cratedb-toolkit[cfr,cloud,datasets,io,service]", + "cratedb-toolkit[cfr,cloud,datasets,io,mcp,service]", ] optional-dependencies.influxdb = [ "cratedb-toolkit[io]", @@ -167,6 +167,9 @@ optional-dependencies.kinesis = [ "commons-codec>=0.0.20", "lorrystream[carabas]>=0.0.6", ] +optional-dependencies.mcp = [ + "mcp<1.5; python_version>='3.10'", +] optional-dependencies.mongodb = [ "commons-codec[mongodb,zyp]>=0.0.21", "cratedb-toolkit[io]", @@ -197,7 +200,9 @@ optional-dependencies.service = [ optional-dependencies.test = [ "cratedb-toolkit[testing]", "httpx<0.29", + "markdown-it-py<4", "pueblo[dataframe]", + "pydantic-core<3", "pytest<9", "pytest-cov<7", "pytest-mock<4", diff --git a/tests/query/test_mcp.py b/tests/query/test_mcp.py new file mode 100644 index 00000000..ed10fe25 --- /dev/null +++ b/tests/query/test_mcp.py @@ -0,0 +1,108 @@ +import json +import os + +import pytest +import yaml +from click.testing import CliRunner + +pytest.importorskip("mcp") + +from cratedb_toolkit.query.mcp.cli import cli + + +def test_list(): + """ + Verify `ctk query mcp list`. + """ + runner = CliRunner() + + result = runner.invoke( + cli, + args="list", + catch_exceptions=False, + ) + assert result.exit_code == 0, result.output + data = json.loads(result.output) + assert data == [ + "cratedb-mcp", + "dbhub", + "mcp-alchemy", + "pg-mcp", + "postgres-basic", + "quarkus", + ] + + +def test_inquire_markdown(): + """ + Verify `ctk query mcp inquire --format=markdown`. + """ + import markdown_it + + runner = CliRunner() + + result = runner.invoke( + cli, + args="--server-name=postgres-basic inquire --format=markdown", + catch_exceptions=False, + ) + assert result.exit_code == 0, result.output + html = markdown_it.MarkdownIt().render(result.output) + assert "## postgres-basic" in result.output + assert "

postgres-basic

" in html + assert "This page was generated automatically" in html + + +@pytest.mark.skipif("GITHUB_ACTION" in os.environ, reason="Test fails on GHA. Don't know why.") +def test_inquire_json(): + """ + Verify `ctk query mcp inquire --format=json`. + """ + runner = CliRunner() + + result = runner.invoke( + cli, + args="--server-name=postgres-basic inquire --format=json", + catch_exceptions=False, + ) + assert result.exit_code == 0, result.output + response = json.loads(result.output) + assert "postgres-basic" in response["data"] + assert response["data"]["postgres-basic"]["capabilities"]["tools"][0]["name"] == "query" + assert "This page was generated automatically" in response["meta"]["notes"] + + +@pytest.mark.skipif("GITHUB_ACTION" in os.environ, reason="Test fails on GHA. Don't know why.") +def test_inquire_yaml(): + """ + Verify `ctk query mcp inquire --format=yaml`. + """ + runner = CliRunner() + + result = runner.invoke( + cli, + args="--server-name=postgres-basic inquire --format=yaml", + catch_exceptions=False, + ) + assert result.exit_code == 0, result.output + response = yaml.safe_load(result.output) + assert "postgres-basic" in response["data"] + assert response["data"]["postgres-basic"]["capabilities"]["tools"][0]["name"] == "query" + assert "This page was generated automatically" in response["meta"]["notes"] + + +def test_launch(mocker): + """ + Verify `ctk query mcp launch`. + """ + runner = CliRunner() + + # Disable the actual launcher code, to not block the progress of the test case. + mocker.patch("cratedb_toolkit.query.mcp.model.McpServer._start_dummy") + + result = runner.invoke( + cli, + args="--server-name=postgres-basic launch", + catch_exceptions=False, + ) + assert result.exit_code == 0, result.output