Skip to content

Commit cd6f9aa

Browse files
authored
stream data from huggingface dataset instead of downloading complete split
- change to stream data from dataset instead of downloading complete split
2 parents c3605ab + ef3cd51 commit cd6f9aa

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

langkit/examples/LLM_to_WhyLabs.ipynb

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
]
1111
},
1212
{
13+
"attachments": {},
1314
"cell_type": "markdown",
1415
"metadata": {},
1516
"source": [
@@ -58,6 +59,7 @@
5859
]
5960
},
6061
{
62+
"attachments": {},
6163
"cell_type": "markdown",
6264
"metadata": {},
6365
"source": [
@@ -229,6 +231,7 @@
229231
]
230232
},
231233
{
234+
"attachments": {},
232235
"cell_type": "markdown",
233236
"metadata": {},
234237
"source": [
@@ -266,13 +269,14 @@
266269
" from datasets import load_dataset, Dataset\n",
267270
" import pandas as pd\n",
268271
"\n",
269-
" archived_chats = load_dataset('alespalla/chatbot_instruction_prompts', split=\"test\")\n",
272+
" archived_chats = load_dataset('alespalla/chatbot_instruction_prompts', split=\"test\",streaming=True)\n",
273+
" chats = iter(archived_chats)\n",
274+
" for _ in range(100):\n",
275+
" response = next(chats)\n",
276+
" telemetry_agent.log(response)\n",
277+
" print(response)\n",
270278
" # lets output the dataset metadata from hugging face so we can see how to\n",
271279
" # access some of the contained prompts and responses.\n",
272-
" print(f\"Here's the dataset structure {archived_chats}\")\n",
273-
" for column_name in archived_chats.column_names:\n",
274-
" print(f\"Profiling column: ({column_name})\")\n",
275-
" telemetry_agent.log(pd.DataFrame(archived_chats[column_name][:1000], columns=[column_name]))\n",
276280
" print(\"done profiling\")"
277281
]
278282
},
@@ -289,6 +293,7 @@
289293
]
290294
},
291295
{
296+
"attachments": {},
292297
"cell_type": "markdown",
293298
"metadata": {},
294299
"source": [

0 commit comments

Comments
 (0)