diff --git a/README.md b/README.md index cc1b7b7..adc6c2e 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,15 @@ Nodit MCP Server provides tools enabling AI agents to dynamically discover, unde Nodit MCP Server communicates using the standard JSON-RPC over stdio protocol, following the Model Context Protocol (MCP) conventions. Currently, only stdio-based communication is supported for server-client interactions. + +## Running evals + +The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs). + +```bash +OPENAI_API_KEY=your-key npx mcp-eval src/evals/evals.ts src/tools/data-apis.ts +``` + ## Features The following are the key features and supported blockchain networks provided through Nodit MCP Server for AI agents and LLMs. diff --git a/package.json b/package.json index cbc0d82..ed7cca5 100644 --- a/package.json +++ b/package.json @@ -18,11 +18,12 @@ "dependencies": { "@modelcontextprotocol/sdk": "^1.9.0", "js-yaml": "^4.1.0", - "zod": "^3.24.2" + "zod": "^3.24.2", + "mcp-evals": "^1.0.18" }, "devDependencies": { "@types/js-yaml": "^4.0.9", "@types/node": "^22.14.0", "typescript": "^5.8.3" } -} +} \ No newline at end of file diff --git a/src/evals/evals.ts b/src/evals/evals.ts new file mode 100644 index 0000000..a003443 --- /dev/null +++ b/src/evals/evals.ts @@ -0,0 +1,59 @@ +//evals.ts + +import { EvalConfig } from 'mcp-evals'; +import { openai } from "@ai-sdk/openai"; +import { grade, EvalFunction } from "mcp-evals"; + +const list_nodit_data_apisEval: EvalFunction = { + name: "list_nodit_data_apis Evaluation", + description: "Evaluates the functionality of tool list_nodit_data_apis", + run: async () => { + const result = await grade(openai("gpt-4"), "List the available Nodit Data API operations."); + return JSON.parse(result); + } +}; + +const call_nodit_apiEval: EvalFunction = { + name: "call_nodit_api Tool Evaluation", + description: "Evaluates the call_nodit_api tool's ability to call Nodit Blockchain Context API", + run: async () => { + const result = await grade(openai("gpt-4"), "Please use the call_nodit_api tool to call the ethereum mainnet with operationId 'eth_getBalance' and a request body {\"address\":\"0x1234567890abcdef\"} to retrieve the balance."); + return JSON.parse(result); + } +}; + +const get_nodit_api_specEval: EvalFunction = { + name: "get_nodit_api_spec Evaluation", + description: "Evaluates the get_nodit_api_spec tool by retrieving the resolved specification for a given operationId", + run: async () => { + const result = await grade(openai("gpt-4"), "What is the fully resolved specification for the operationId 'nodit_blockchain_op123' using get_nodit_api_spec?"); + return JSON.parse(result); + } +}; + +const list_nodit_node_apis: EvalFunction = { + name: 'list_nodit_node_apis', + description: 'Evaluates the functionality of listing Nodit Node API operations', + run: async () => { + const result = await grade(openai("gpt-4"), "What are the available Nodit Node API operations?"); + return JSON.parse(result); + } +}; + +const list_nodit_aptos_indexer_api_query_rootEval: EvalFunction = { + name: "list_nodit_aptos_indexer_api_query_root", + description: "Evaluates the listing of query roots from the Nodit Aptos Indexer GraphQL API", + run: async () => { + const result = await grade(openai("gpt-4"), "Which query roots are available in the Nodit Aptos Indexer GraphQL API?"); + return JSON.parse(result); + } +}; + +const config: EvalConfig = { + model: openai("gpt-4"), + evals: [list_nodit_data_apisEval, call_nodit_apiEval, get_nodit_api_specEval, list_nodit_node_apis, list_nodit_aptos_indexer_api_query_rootEval] +}; + +export default config; + +export const evals = [list_nodit_data_apisEval, call_nodit_apiEval, get_nodit_api_specEval, list_nodit_node_apis, list_nodit_aptos_indexer_api_query_rootEval]; \ No newline at end of file