diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..57b3ba2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,50 @@ +# syntax=docker/dockerfile:1 +FROM ghcr.io/astral-sh/uv:python3.13-bookworm + +# Install Node.js, npm, and Chromium dependencies for mermaid-cli +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + fonts-liberation \ + fonts-noto-color-emoji \ + libasound2 \ + libatk-bridge2.0-0 \ + libatk1.0-0 \ + libatspi2.0-0 \ + libcairo2 \ + libcups2 \ + libdrm2 \ + libgbm1 \ + libgtk-3-0 \ + libnss3 \ + libpango-1.0-0 \ + libx11-xcb1 \ + libxcomposite1 \ + libxdamage1 \ + libxfixes3 \ + libxi6 \ + libxrandr2 \ + libxrender1 \ + libxshmfence1 \ + libxss1 \ + libxtst6 \ + nodejs \ + npm \ + && rm -rf /var/lib/apt/lists/* + +# Install mermaid CLI globally +RUN npm install -g @mermaid-js/mermaid-cli + +# Set work directory +WORKDIR /app + +# Copy dependency files first for better caching +COPY pyproject.toml uv.lock ./ + +# Copy source +COPY . . + +# Sync dependencies during build so they are baked into the image +RUN uv sync --frozen + +# Default entrypoint +ENTRYPOINT ["/app/docker/merbench-entrypoint.sh"] diff --git a/Makefile b/Makefile index a47e39f..5a3e5f6 100644 --- a/Makefile +++ b/Makefile @@ -15,4 +15,14 @@ adk_basic_ui: uv run adk web agents_mcp_usage/basic_mcp adk_multi_ui: - uv run adk web agents_mcp_usage/multi_mcp \ No newline at end of file + uv run adk web agents_mcp_usage/multi_mcp + +merbench-docker-build: + docker build -t merbench . + +merbench-docker-run: + docker run --rm \ + -e GEMINI_API_KEY=$${GEMINI_API_KEY} \ + -e OPENAI_API_KEY=$${OPENAI_API_KEY} \ + -v "$$(pwd)/mermaid_eval_results:/app/mermaid_eval_results" \ + merbench $${ARGS} diff --git a/README.md b/README.md index 25fe2ed..dad9e4e 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,34 @@ Run an Agent framework script e.g.: Check console, Logfire, or the ADK web UI for output +## Docker quickstart + +Build the pre-baked evaluation image (includes Python dependencies and the Mermaid CLI): + +```bash +docker build -t merbench . +``` + +Run the multi-model Mermaid benchmark with your API keys and a bind mount so results persist on the host: + +```bash +docker run --rm \ + -e GEMINI_API_KEY="your-gemini-key" \ + -e OPENAI_API_KEY="your-openai-key" \ + -v "$(pwd)/mermaid_eval_results:/app/mermaid_eval_results" \ + merbench --models gemini-1.5-pro,openai-gpt-4.1-mini +``` + +The container entrypoint defaults to `run_multi_evals.py`. Override it to launch other tooling, such as the evaluation UI: + +```bash +docker run --rm \ + -e GEMINI_API_KEY="your-gemini-key" \ + -v "$(pwd)/mermaid_eval_results:/app/mermaid_eval_results" \ + --entrypoint uv \ + merbench run agents_mcp_usage/evaluations/mermaid_evals/merbench_ui.py +``` + ## Project Overview This project aims to teach: diff --git a/docker/merbench-entrypoint.sh b/docker/merbench-entrypoint.sh new file mode 100755 index 0000000..ad6e7d1 --- /dev/null +++ b/docker/merbench-entrypoint.sh @@ -0,0 +1,7 @@ +#!/bin/sh +set -e + +# Ensure results directory exists if bind-mounted +mkdir -p /app/mermaid_eval_results + +exec uv run agents_mcp_usage/evaluations/mermaid_evals/run_multi_evals.py "$@"