andrewginns · andrewginns · Nov 7, 2025 · Nov 7, 2025 · chatgpt-codex-connector · Nov 7, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,25 @@
+# syntax=docker/dockerfile:1
+FROM ghcr.io/astral-sh/uv:python3.13-bookworm
+
+# Install Node.js and npm for mermaid-cli
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends nodejs npm \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install mermaid CLI globally
+RUN npm install -g @mermaid-js/mermaid-cli
+
+# Set work directory
+WORKDIR /app
+
+# Copy dependency files first for better caching
+COPY pyproject.toml uv.lock ./
+
+# Copy source
+COPY . .
+
+# Sync dependencies during build so they are baked into the image
+RUN uv sync --frozen
+
+# Default entrypoint
+ENTRYPOINT ["/app/docker/merbench-entrypoint.sh"]
diff --git a/Makefile b/Makefile
@@ -15,4 +15,14 @@ adk_basic_ui:
 	uv run adk web agents_mcp_usage/basic_mcp
 
 adk_multi_ui:
-	uv run adk web agents_mcp_usage/multi_mcp
+	uv run adk web agents_mcp_usage/multi_mcp
+
+merbench-docker-build:
+	docker build -t merbench .
+
+merbench-docker-run:
+	docker run --rm \
+	  -e GEMINI_API_KEY=$${GEMINI_API_KEY} \
+	  -e OPENAI_API_KEY=$${OPENAI_API_KEY} \
+	  -v "$$(pwd)/mermaid_eval_results:/app/mermaid_eval_results" \
+	  merbench $${ARGS}
diff --git a/README.md b/README.md
@@ -44,6 +44,34 @@ Run an Agent framework script e.g.:
 
 Check console, Logfire, or the ADK web UI for output
 
+## Docker quickstart
+
+Build the pre-baked evaluation image (includes Python dependencies and the Mermaid CLI):
+
+```bash
+docker build -t merbench .
+```
+
+Run the multi-model Mermaid benchmark with your API keys and a bind mount so results persist on the host:
+
+```bash
+docker run --rm \
+  -e GEMINI_API_KEY="your-gemini-key" \
+  -e OPENAI_API_KEY="your-openai-key" \
+  -v "$(pwd)/mermaid_eval_results:/app/mermaid_eval_results" \
+  merbench --models gemini-1.5-pro,openai-gpt-4.1-mini
+```
+
+The container entrypoint defaults to `run_multi_evals.py`. Override it to launch other tooling, such as the evaluation UI:
+
+```bash
+docker run --rm \
+  -e GEMINI_API_KEY="your-gemini-key" \
+  -v "$(pwd)/mermaid_eval_results:/app/mermaid_eval_results" \
+  --entrypoint uv \
+  merbench run agents_mcp_usage/evaluations/mermaid_evals/merbench_ui.py
+```
+
 ## Project Overview
 
 This project aims to teach:

diff --git a/docker/merbench-entrypoint.sh b/docker/merbench-entrypoint.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+set -e
+
+# Ensure results directory exists if bind-mounted
+mkdir -p /app/mermaid_eval_results
+
+exec uv run agents_mcp_usage/evaluations/mermaid_evals/run_multi_evals.py "$@"