|
50 | 50 | "```" |
51 | 51 | ] |
52 | 52 | }, |
| 53 | + { |
| 54 | + "cell_type": "markdown", |
| 55 | + "metadata": {}, |
| 56 | + "source": [ |
| 57 | + "### Installing Dependencies with `uv`\n", |
| 58 | + "\n", |
| 59 | + "Before moving forward in the notebook, please ensure you're using the virtual environment created by running `uv sync` in root directory of this notebook. \n", |
| 60 | + "\n", |
| 61 | + "This will install all the necessary dependencies for the remainder of the notebook. " |
| 62 | + ] |
| 63 | + }, |
53 | 64 | { |
54 | 65 | "cell_type": "markdown", |
55 | 66 | "metadata": {}, |
|
63 | 74 | }, |
64 | 75 | { |
65 | 76 | "cell_type": "code", |
66 | | - "execution_count": 21, |
| 77 | + "execution_count": 12, |
67 | 78 | "metadata": {}, |
68 | 79 | "outputs": [], |
69 | 80 | "source": [ |
|
88 | 99 | }, |
89 | 100 | { |
90 | 101 | "cell_type": "code", |
91 | | - "execution_count": 25, |
| 102 | + "execution_count": 13, |
92 | 103 | "metadata": {}, |
93 | 104 | "outputs": [ |
94 | 105 | { |
95 | 106 | "name": "stdout", |
96 | 107 | "output_type": "stream", |
97 | 108 | "text": [ |
98 | 109 | "Status: completed\n", |
99 | | - "Results: EvaluationResult(job='eval-JoR3GCSrjtRkC9jPFYyMv2', id='evaluation_result-3iaZjDE3a6tt4W7ag3vNsZ', created_at=datetime.datetime(2025, 7, 16, 22, 12, 21, 687891), custom_fields={}, description=None, files_url=None, groups={}, namespace='default', ownership=None, project=None, tasks={'qa': TaskResult(metrics={'accuracy': MetricResult(scores={'string-check': Score(value=1.0, stats=ScoreStats(count=1, max=None, mean=1.0, min=None, stddev=None, stderr=None, sum=1.0, sum_squared=None, variance=None))})})}, updated_at=datetime.datetime(2025, 7, 16, 22, 12, 21, 687893))\n" |
| 110 | + "Results: EvaluationResult(job='eval-Akk2TPTzp96YCQjyvaJsMt', id='evaluation_result-2iKms1yr9GNjVWGSJVV7ZP', created_at=datetime.datetime(2025, 8, 16, 0, 53, 21, 87425), custom_fields={}, description=None, files_url=None, groups={}, namespace='default', ownership=None, project=None, tasks={'qa': TaskResult(metrics={'accuracy': MetricResult(scores={'string-check': Score(value=1.0, stats=ScoreStats(count=1, max=None, mean=1.0, min=None, stddev=None, stderr=None, sum=1.0, sum_squared=None, variance=None))})})}, updated_at=datetime.datetime(2025, 8, 16, 0, 53, 21, 89177))\n" |
100 | 111 | ] |
101 | 112 | } |
102 | 113 | ], |
|
153 | 164 | }, |
154 | 165 | { |
155 | 166 | "cell_type": "code", |
156 | | - "execution_count": 15, |
| 167 | + "execution_count": 5, |
157 | 168 | "metadata": {}, |
158 | 169 | "outputs": [], |
159 | 170 | "source": [ |
|
171 | 182 | }, |
172 | 183 | { |
173 | 184 | "cell_type": "code", |
174 | | - "execution_count": 16, |
| 185 | + "execution_count": 14, |
175 | 186 | "metadata": {}, |
176 | 187 | "outputs": [], |
177 | 188 | "source": [ |
178 | 189 | "model_target = {\n", |
179 | 190 | " \"api_endpoint\": {\n", |
180 | 191 | " \"url\": \"https://integrate.api.nvidia.com/v1\",\n", |
181 | | - " \"model_id\": \"nvidia/llama-3.3-nemotron-super-49b-v1\",\n", |
| 192 | + " \"model_id\": \"nvidia/llama-3.3-nemotron-super-49b-v1.5\",\n", |
182 | 193 | " \"api_key\": os.getenv(\"NVIDIA_API_KEY\")\n", |
183 | 194 | " }\n", |
184 | 195 | "}" |
|
195 | 206 | }, |
196 | 207 | { |
197 | 208 | "cell_type": "code", |
198 | | - "execution_count": 17, |
| 209 | + "execution_count": 15, |
199 | 210 | "metadata": {}, |
200 | 211 | "outputs": [], |
201 | 212 | "source": [ |
|
220 | 231 | }, |
221 | 232 | { |
222 | 233 | "cell_type": "code", |
223 | | - "execution_count": 26, |
| 234 | + "execution_count": 16, |
224 | 235 | "metadata": {}, |
225 | 236 | "outputs": [ |
226 | 237 | { |
227 | 238 | "name": "stdout", |
228 | 239 | "output_type": "stream", |
229 | 240 | "text": [ |
230 | 241 | "Status: completed\n", |
231 | | - "Results: {'correct': Score(value=2.8, stats=ScoreStats(count=5, max=None, mean=2.8, min=None, stddev=None, stderr=None, sum=14.0, sum_squared=None, variance=None))}\n" |
| 242 | + "Results: {'correct': Score(value=2.6, stats=ScoreStats(count=5, max=None, mean=2.6, min=None, stddev=None, stderr=None, sum=13.0, sum_squared=None, variance=None))}\n" |
232 | 243 | ] |
233 | 244 | } |
234 | 245 | ], |
|
248 | 259 | " \"messages\": [\n", |
249 | 260 | " {\n", |
250 | 261 | " \"role\": \"system\",\n", |
251 | | - " \"content\": \"detailed thinking off\"\n", |
| 262 | + " \"content\": \"/no_think\"\n", |
252 | 263 | " },\n", |
253 | 264 | " {\n", |
254 | 265 | " \"role\": \"user\",\n", |
|
0 commit comments