Skip to content

Commit d1181fe

Browse files
Improving Notebook for Live Evaluation (#332)
Signed-off-by: Chris Alexiuk <c.s.alexiuk@gmail.com>
1 parent ef27ecd commit d1181fe

File tree

2 files changed

+44
-33
lines changed

2 files changed

+44
-33
lines changed

nemo/Evaluator/Live Evaluation/docker_compose.yaml

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,12 @@ services:
104104
condition: service_healthy
105105
evaluator-postgres-db-migration:
106106
condition: service_completed_successfully
107-
otel-collector:
108-
condition: service_started
107+
# otel-collector:
108+
# condition: service_started
109109
networks:
110110
- nemo-ms
111111
healthcheck:
112-
test: ["CMD", "curl", "http://localhost:7331/health"]
112+
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:7331/health')"]
113113
interval: 10s
114114
timeout: 3s
115115
retries: 3
@@ -124,16 +124,16 @@ services:
124124
SERVICE_ACCOUNT: nemo-evaluator-test-workflow-executor
125125
EVAL_ENABLE_VALIDATION: False
126126
# OpenTelemetry environmental variables
127-
OTEL_SERVICE_NAME: nemo-evaluator
128-
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
129-
OTEL_TRACES_EXPORTER: otlp
130-
OTEL_METRICS_EXPORTER: none
131-
OTEL_LOGS_EXPORTER: otlp
132-
OTEL_PYTHON_EXCLUDED_URLS: "health"
133-
OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED: "true"
134-
CONSOLE_LOG_LEVEL: DEBUG
135-
OTEL_LOG_LEVEL: DEBUG
136-
LOG_LEVEL: DEBUG
127+
# OTEL_SERVICE_NAME: nemo-evaluator
128+
# OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317
129+
# OTEL_TRACES_EXPORTER: otlp
130+
# OTEL_METRICS_EXPORTER: none
131+
# OTEL_LOGS_EXPORTER: otlp
132+
# OTEL_PYTHON_EXCLUDED_URLS: "health"
133+
# OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED: "true"
134+
# CONSOLE_LOG_LEVEL: DEBUG
135+
# OTEL_LOG_LEVEL: DEBUG
136+
# LOG_LEVEL: DEBUG
137137

138138
evaluator-postgres-db-migration:
139139
image: ${EVALUATOR_IMAGE:-""}
@@ -288,16 +288,16 @@ services:
288288
# adapted from https://jessitron.com/2021/08/11/run-an-opentelemetry-collector-locally-in-docker/
289289
# and https://github.com/open-telemetry/opentelemetry-demo/blob/main/docker-compose.yml
290290
###
291-
otel-collector:
292-
image: otel/opentelemetry-collector-contrib:0.91.0
293-
command: ["--config=/etc/otel-collector-config.yaml"]
294-
volumes:
295-
- ./config/otel-collector-config.yaml:/etc/otel-collector-config.yaml
296-
ports:
297-
- "4317:4317" # OTLP over gRPC receiver
298-
- "55679:55679" # UI
299-
networks:
300-
- nemo-ms
291+
# otel-collector:
292+
# image: otel/opentelemetry-collector-contrib:0.91.0
293+
# command: ["--config=/etc/otel-collector-config.yaml"]
294+
# volumes:
295+
# - ./config/otel-collector-config.yaml:/etc/otel-collector-config.yaml
296+
# ports:
297+
# - "4317:4317" # OTLP over gRPC receiver
298+
# - "55679:55679" # UI
299+
# networks:
300+
# - nemo-ms
301301

302302
networks:
303303
nemo-ms:

nemo/Evaluator/Live Evaluation/live_evaluation.ipynb

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@
5050
"```"
5151
]
5252
},
53+
{
54+
"cell_type": "markdown",
55+
"metadata": {},
56+
"source": [
57+
"### Installing Dependencies with `uv`\n",
58+
"\n",
59+
"Before moving forward in the notebook, please ensure you're using the virtual environment created by running `uv sync` in root directory of this notebook. \n",
60+
"\n",
61+
"This will install all the necessary dependencies for the remainder of the notebook. "
62+
]
63+
},
5364
{
5465
"cell_type": "markdown",
5566
"metadata": {},
@@ -63,7 +74,7 @@
6374
},
6475
{
6576
"cell_type": "code",
66-
"execution_count": 21,
77+
"execution_count": 12,
6778
"metadata": {},
6879
"outputs": [],
6980
"source": [
@@ -88,15 +99,15 @@
8899
},
89100
{
90101
"cell_type": "code",
91-
"execution_count": 25,
102+
"execution_count": 13,
92103
"metadata": {},
93104
"outputs": [
94105
{
95106
"name": "stdout",
96107
"output_type": "stream",
97108
"text": [
98109
"Status: completed\n",
99-
"Results: EvaluationResult(job='eval-JoR3GCSrjtRkC9jPFYyMv2', id='evaluation_result-3iaZjDE3a6tt4W7ag3vNsZ', created_at=datetime.datetime(2025, 7, 16, 22, 12, 21, 687891), custom_fields={}, description=None, files_url=None, groups={}, namespace='default', ownership=None, project=None, tasks={'qa': TaskResult(metrics={'accuracy': MetricResult(scores={'string-check': Score(value=1.0, stats=ScoreStats(count=1, max=None, mean=1.0, min=None, stddev=None, stderr=None, sum=1.0, sum_squared=None, variance=None))})})}, updated_at=datetime.datetime(2025, 7, 16, 22, 12, 21, 687893))\n"
110+
"Results: EvaluationResult(job='eval-Akk2TPTzp96YCQjyvaJsMt', id='evaluation_result-2iKms1yr9GNjVWGSJVV7ZP', created_at=datetime.datetime(2025, 8, 16, 0, 53, 21, 87425), custom_fields={}, description=None, files_url=None, groups={}, namespace='default', ownership=None, project=None, tasks={'qa': TaskResult(metrics={'accuracy': MetricResult(scores={'string-check': Score(value=1.0, stats=ScoreStats(count=1, max=None, mean=1.0, min=None, stddev=None, stderr=None, sum=1.0, sum_squared=None, variance=None))})})}, updated_at=datetime.datetime(2025, 8, 16, 0, 53, 21, 89177))\n"
100111
]
101112
}
102113
],
@@ -153,7 +164,7 @@
153164
},
154165
{
155166
"cell_type": "code",
156-
"execution_count": 15,
167+
"execution_count": 5,
157168
"metadata": {},
158169
"outputs": [],
159170
"source": [
@@ -171,14 +182,14 @@
171182
},
172183
{
173184
"cell_type": "code",
174-
"execution_count": 16,
185+
"execution_count": 14,
175186
"metadata": {},
176187
"outputs": [],
177188
"source": [
178189
"model_target = {\n",
179190
" \"api_endpoint\": {\n",
180191
" \"url\": \"https://integrate.api.nvidia.com/v1\",\n",
181-
" \"model_id\": \"nvidia/llama-3.3-nemotron-super-49b-v1\",\n",
192+
" \"model_id\": \"nvidia/llama-3.3-nemotron-super-49b-v1.5\",\n",
182193
" \"api_key\": os.getenv(\"NVIDIA_API_KEY\")\n",
183194
" }\n",
184195
"}"
@@ -195,7 +206,7 @@
195206
},
196207
{
197208
"cell_type": "code",
198-
"execution_count": 17,
209+
"execution_count": 15,
199210
"metadata": {},
200211
"outputs": [],
201212
"source": [
@@ -220,15 +231,15 @@
220231
},
221232
{
222233
"cell_type": "code",
223-
"execution_count": 26,
234+
"execution_count": 16,
224235
"metadata": {},
225236
"outputs": [
226237
{
227238
"name": "stdout",
228239
"output_type": "stream",
229240
"text": [
230241
"Status: completed\n",
231-
"Results: {'correct': Score(value=2.8, stats=ScoreStats(count=5, max=None, mean=2.8, min=None, stddev=None, stderr=None, sum=14.0, sum_squared=None, variance=None))}\n"
242+
"Results: {'correct': Score(value=2.6, stats=ScoreStats(count=5, max=None, mean=2.6, min=None, stddev=None, stderr=None, sum=13.0, sum_squared=None, variance=None))}\n"
232243
]
233244
}
234245
],
@@ -248,7 +259,7 @@
248259
" \"messages\": [\n",
249260
" {\n",
250261
" \"role\": \"system\",\n",
251-
" \"content\": \"detailed thinking off\"\n",
262+
" \"content\": \"/no_think\"\n",
252263
" },\n",
253264
" {\n",
254265
" \"role\": \"user\",\n",

0 commit comments

Comments
 (0)