Skip to content

Commit 193712e

Browse files
authored
improve the ELSER notebook (#131)
* improve the ELSER notebook with a waitFor a while loop * remove shards / index settings
1 parent 9685cb9 commit 193712e

File tree

1 file changed

+71
-28
lines changed

1 file changed

+71
-28
lines changed

notebooks/search/03-ELSER.ipynb

Lines changed: 71 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
},
3939
{
4040
"cell_type": "code",
41-
"execution_count": null,
41+
"execution_count": 1,
4242
"metadata": {
4343
"colab": {
4444
"base_uri": "https://localhost:8080/"
@@ -129,7 +129,7 @@
129129
},
130130
{
131131
"cell_type": "code",
132-
"execution_count": 12,
132+
"execution_count": 4,
133133
"metadata": {
134134
"colab": {
135135
"base_uri": "https://localhost:8080/"
@@ -173,9 +173,35 @@
173173
},
174174
{
175175
"cell_type": "code",
176-
"execution_count": 6,
176+
"execution_count": 39,
177177
"metadata": {},
178-
"outputs": [],
178+
"outputs": [
179+
{
180+
"name": "stdout",
181+
"output_type": "stream",
182+
"text": [
183+
"Model deleted successfully, We will proceed with creating one\n"
184+
]
185+
},
186+
{
187+
"name": "stderr",
188+
"output_type": "stream",
189+
"text": [
190+
"/var/folders/_8/2nxt7jjx27bd8bm5lw63ht340000gn/T/ipykernel_34494/2089429255.py:3: ElasticsearchWarning: The default [remove_binary] value of 'false' is deprecated and will be set to 'true' in a future release. Set [remove_binary] explicitly to 'true' or 'false' to ensure no behavior change.\n",
191+
" client.ml.delete_trained_model(model_id=\".elser_model_2\",force=True)\n"
192+
]
193+
},
194+
{
195+
"data": {
196+
"text/plain": [
197+
"ObjectApiResponse({'model_id': '.elser_model_2', 'model_type': 'pytorch', 'model_package': {'packaged_model_id': 'elser_model_2', 'model_repository': 'https://ml-models.elastic.co', 'minimum_version': '11.0.0', 'size': 438123914, 'sha256': '2e0450a1c598221a919917cbb05d8672aed6c613c028008fedcd696462c81af0', 'metadata': {}, 'tags': [], 'vocabulary_file': 'elser_model_2.vocab.json'}, 'created_by': 'api_user', 'version': '11.0.0', 'create_time': 1701689920521, 'model_size_bytes': 0, 'estimated_operations': 0, 'license_level': 'platinum', 'description': 'Elastic Learned Sparse EncodeR v2', 'tags': ['elastic'], 'metadata': {}, 'input': {'field_names': ['text_field']}, 'inference_config': {'text_expansion': {'vocabulary': {'index': '.ml-inference-native-000002'}, 'tokenization': {'bert': {'do_lower_case': True, 'with_special_tokens': True, 'max_sequence_length': 512, 'truncate': 'first', 'span': -1}}}}, 'location': {'index': {'name': '.ml-inference-native-000002'}}})"
198+
]
199+
},
200+
"execution_count": 39,
201+
"metadata": {},
202+
"output_type": "execute_result"
203+
}
204+
],
179205
"source": [
180206
"# delete model if already downloaded and deployed\n",
181207
"try:\n",
@@ -202,7 +228,7 @@
202228
},
203229
{
204230
"cell_type": "code",
205-
"execution_count": 10,
231+
"execution_count": 40,
206232
"metadata": {},
207233
"outputs": [
208234
{
@@ -237,31 +263,39 @@
237263
},
238264
{
239265
"cell_type": "code",
240-
"execution_count": 11,
266+
"execution_count": 41,
241267
"metadata": {},
242268
"outputs": [
243269
{
244-
"ename": "BadRequestError",
245-
"evalue": "BadRequestError(400, 'status_exception', 'Could not start model deployment because an existing deployment with the same id [.elser_model_2] exist')",
246-
"output_type": "error",
247-
"traceback": [
248-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
249-
"\u001b[0;31mBadRequestError\u001b[0m Traceback (most recent call last)",
250-
"\u001b[1;32m/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb Cell 16\u001b[0m line \u001b[0;36m2\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m# Start trained model deployment if not already deployed\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m client\u001b[39m.\u001b[39;49mml\u001b[39m.\u001b[39;49mstart_trained_model_deployment(\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m model_id\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m.elser_model_2\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m number_of_allocations\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/joe/projects/elastic/elasticsearch-labs/notebooks/search/03-ELSER.ipynb#X21sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m )\n",
251-
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/utils.py:402\u001b[0m, in \u001b[0;36m_rewrite_parameters.<locals>.wrapper.<locals>.wrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[1;32m 400\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 402\u001b[0m \u001b[39mreturn\u001b[39;00m api(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
252-
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/ml.py:3655\u001b[0m, in \u001b[0;36mMlClient.start_trained_model_deployment\u001b[0;34m(self, model_id, cache_size, error_trace, filter_path, human, number_of_allocations, pretty, priority, queue_capacity, threads_per_allocation, timeout, wait_for)\u001b[0m\n\u001b[1;32m 3653\u001b[0m __query[\u001b[39m\"\u001b[39m\u001b[39mwait_for\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m wait_for\n\u001b[1;32m 3654\u001b[0m __headers \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39maccept\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mapplication/json\u001b[39m\u001b[39m\"\u001b[39m}\n\u001b[0;32m-> 3655\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mperform_request( \u001b[39m# type: ignore[return-value]\u001b[39;49;00m\n\u001b[1;32m 3656\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mPOST\u001b[39;49m\u001b[39m\"\u001b[39;49m, __path, params\u001b[39m=\u001b[39;49m__query, headers\u001b[39m=\u001b[39;49m__headers\n\u001b[1;32m 3657\u001b[0m )\n",
253-
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/_base.py:389\u001b[0m, in \u001b[0;36mNamespacedClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mperform_request\u001b[39m(\n\u001b[1;32m 379\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 380\u001b[0m method: \u001b[39mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[39m# Use the internal clients .perform_request() implementation\u001b[39;00m\n\u001b[1;32m 388\u001b[0m \u001b[39m# so we take advantage of their transport options.\u001b[39;00m\n\u001b[0;32m--> 389\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_client\u001b[39m.\u001b[39;49mperform_request(\n\u001b[1;32m 390\u001b[0m method, path, params\u001b[39m=\u001b[39;49mparams, headers\u001b[39m=\u001b[39;49mheaders, body\u001b[39m=\u001b[39;49mbody\n\u001b[1;32m 391\u001b[0m )\n",
254-
"File \u001b[0;32m~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/elasticsearch/_sync/client/_base.py:320\u001b[0m, in \u001b[0;36mBaseClient.perform_request\u001b[0;34m(self, method, path, params, headers, body)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mValueError\u001b[39;00m, \u001b[39mKeyError\u001b[39;00m, \u001b[39mTypeError\u001b[39;00m):\n\u001b[1;32m 318\u001b[0m \u001b[39mpass\u001b[39;00m\n\u001b[0;32m--> 320\u001b[0m \u001b[39mraise\u001b[39;00m HTTP_EXCEPTIONS\u001b[39m.\u001b[39mget(meta\u001b[39m.\u001b[39mstatus, ApiError)(\n\u001b[1;32m 321\u001b[0m message\u001b[39m=\u001b[39mmessage, meta\u001b[39m=\u001b[39mmeta, body\u001b[39m=\u001b[39mresp_body\n\u001b[1;32m 322\u001b[0m )\n\u001b[1;32m 324\u001b[0m \u001b[39m# 'X-Elastic-Product: Elasticsearch' should be on every 2XX response.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_verified_elasticsearch:\n\u001b[1;32m 326\u001b[0m \u001b[39m# If the header is set we mark the server as verified.\u001b[39;00m\n",
255-
"\u001b[0;31mBadRequestError\u001b[0m: BadRequestError(400, 'status_exception', 'Could not start model deployment because an existing deployment with the same id [.elser_model_2] exist')"
270+
"name": "stdout",
271+
"output_type": "stream",
272+
"text": [
273+
"ELSER Model is currently being deployed.\n",
274+
"ELSER Model is currently being deployed.\n",
275+
"ELSER Model has been successfully deployed.\n"
256276
]
257277
}
258278
],
259279
"source": [
260280
"# Start trained model deployment if not already deployed\n",
261281
"client.ml.start_trained_model_deployment(\n",
262282
" model_id=\".elser_model_2\",\n",
263-
" number_of_allocations=1\n",
264-
")\n"
283+
" number_of_allocations=1,\n",
284+
" wait_for=\"starting\"\n",
285+
")\n",
286+
"\n",
287+
"while True:\n",
288+
" status = client.ml.get_trained_models_stats(\n",
289+
" model_id=\".elser_model_2\",\n",
290+
" )\n",
291+
" if (status[\"trained_model_stats\"][0][\"deployment_stats\"][\"state\"] == \"started\"):\n",
292+
" print(\"ELSER Model has been successfully deployed.\")\n",
293+
" break\n",
294+
" else:\n",
295+
" print(\"ELSER Model is currently being deployed.\")\n",
296+
" time.sleep(5)\n",
297+
"\n",
298+
"\n"
265299
]
266300
},
267301
{
@@ -286,15 +320,26 @@
286320
},
287321
{
288322
"cell_type": "code",
289-
"execution_count": null,
323+
"execution_count": 42,
290324
"metadata": {
291325
"colab": {
292326
"base_uri": "https://localhost:8080/"
293327
},
294328
"id": "XhRng99KLQsd",
295329
"outputId": "00ea73b5-45a4-472b-f4bc-2c2c790ab94d"
296330
},
297-
"outputs": [],
331+
"outputs": [
332+
{
333+
"data": {
334+
"text/plain": [
335+
"ObjectApiResponse({'acknowledged': True})"
336+
]
337+
},
338+
"execution_count": 42,
339+
"metadata": {},
340+
"output_type": "execute_result"
341+
}
342+
],
298343
"source": [
299344
"client.ingest.put_pipeline(\n",
300345
" id=\"elser-ingest-pipeline\", \n",
@@ -349,7 +394,7 @@
349394
},
350395
{
351396
"cell_type": "code",
352-
"execution_count": 162,
397+
"execution_count": 46,
353398
"metadata": {
354399
"colab": {
355400
"base_uri": "https://localhost:8080/"
@@ -364,7 +409,7 @@
364409
"ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'elser-example-movies'})"
365410
]
366411
},
367-
"execution_count": 162,
412+
"execution_count": 46,
368413
"metadata": {},
369414
"output_type": "execute_result"
370415
}
@@ -375,8 +420,6 @@
375420
" index=\"elser-example-movies\",\n",
376421
" settings={\n",
377422
" \"index\": {\n",
378-
" \"number_of_shards\": 1,\n",
379-
" \"number_of_replicas\": 1,\n",
380423
" \"default_pipeline\": \"elser-ingest-pipeline\"\n",
381424
" }\n",
382425
" },\n",
@@ -414,7 +457,7 @@
414457
},
415458
{
416459
"cell_type": "code",
417-
"execution_count": 163,
460+
"execution_count": 44,
418461
"metadata": {
419462
"colab": {
420463
"base_uri": "https://localhost:8080/"
@@ -482,7 +525,7 @@
482525
},
483526
{
484527
"cell_type": "code",
485-
"execution_count": 164,
528+
"execution_count": 45,
486529
"metadata": {
487530
"colab": {
488531
"base_uri": "https://localhost:8080/"

0 commit comments

Comments
 (0)