Skip to content

Commit 89c8607

Browse files
nbtest improvements (#130)
1 parent 193712e commit 89c8607

File tree

14 files changed

+559
-124
lines changed

14 files changed

+559
-124
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
masks:
2+
- "'name': '[^']+'"
3+
- "'cluster_name': '[^']+'"
4+
- "'cluster_uuid': '[^']+'"
5+
- "'build_flavor': '[^']+'"
6+
- '[0-9]+\.[0-9]+\.[0-9]+'
7+
- "'build_hash': '[^']+'"
8+
- "'build_date': '[^']+'"
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
NBTEST = ../../bin/nbtest
2+
NOTEBOOKS = \
3+
with-index-pipelines.ipynb
24

3-
.PHONY: all
5+
.PHONY: all $(NOTEBOOKS)
46

5-
all:
6-
$(NBTEST) \
7-
with-index-pipelines.ipynb
7+
all: $(NOTEBOOKS)
8+
9+
$(NOTEBOOKS):
10+
$(NBTEST) $@
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "1422b7bb-bc8c-42bb-b070-53fce3cf6144",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from elasticsearch import Elasticsearch\n",
11+
"from getpass import getpass\n",
12+
"\n",
13+
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id\n",
14+
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
15+
"\n",
16+
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
17+
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
18+
"\n",
19+
"# Create the client instance\n",
20+
"client = Elasticsearch(\n",
21+
" # For local development\n",
22+
" # hosts=[\"http://localhost:9200\"] \n",
23+
" cloud_id=ELASTIC_CLOUD_ID,\n",
24+
" api_key=ELASTIC_API_KEY,\n",
25+
")"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"id": "e4a89367-d23a-4340-bc92-2dcabd18adcd",
32+
"metadata": {},
33+
"outputs": [],
34+
"source": [
35+
"client.indices.delete(index=\"chunk_passages_example\")\n",
36+
"client.ingest.delete_pipeline(id=\"chunk_text_to_passages\")"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"id": "4ac37f1b-6122-49fe-a3b8-e8f2025a0961",
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"try:\n",
47+
" client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n",
48+
"except:\n",
49+
" pass"
50+
]
51+
}
52+
],
53+
"metadata": {
54+
"kernelspec": {
55+
"display_name": "Python 3 (ipykernel)",
56+
"language": "python",
57+
"name": "python3"
58+
},
59+
"language_info": {
60+
"codemirror_mode": {
61+
"name": "ipython",
62+
"version": 3
63+
},
64+
"file_extension": ".py",
65+
"mimetype": "text/x-python",
66+
"name": "python",
67+
"nbconvert_exporter": "python",
68+
"pygments_lexer": "ipython3",
69+
"version": "3.11.6"
70+
}
71+
},
72+
"nbformat": 4,
73+
"nbformat_minor": 5
74+
}

notebooks/document-chunking/with-index-pipelines.ipynb

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -523,17 +523,6 @@
523523
"\n",
524524
"pretty_response(response)"
525525
]
526-
},
527-
{
528-
"cell_type": "code",
529-
"execution_count": null,
530-
"id": "b269da89",
531-
"metadata": {},
532-
"outputs": [],
533-
"source": [
534-
"client.indices.delete(index=INDEX_NAME)\n",
535-
"client.ingest.delete_pipeline(id=\"chunk_text_to_passages\")\n"
536-
]
537526
}
538527
],
539528
"metadata": {
@@ -555,7 +544,7 @@
555544
"name": "python",
556545
"nbconvert_exporter": "python",
557546
"pygments_lexer": "ipython3",
558-
"version": "3.10.3"
547+
"version": "3.11.6"
559548
}
560549
},
561550
"nbformat": 4,

notebooks/search/.nbtest.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
masks:
2+
- "'name': '[^']+'"
3+
- "'build_flavor': '[^']+'"
4+
- '[0-9]+\.[0-9]+\.[0-9]+'
5+
- "'cluster_name': '[^']+'"
6+
- "'cluster_uuid': '[^']+'"
7+
- "'build_hash': '[^']+'"
8+
- "'build_date': '[^']+'"
9+
- "'_version': [0-9]+"
10+
- '^ID: .*$'
11+
- '^Score: [0-9]+\.[0-9][0-9]*$'

notebooks/search/Makefile

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
NBTEST = ../../bin/nbtest
2+
NOTEBOOKS = \
3+
00-quick-start.ipynb \
4+
01-keyword-querying-filtering.ipynb \
5+
02-hybrid-search.ipynb \
6+
03-ELSER.ipynb \
7+
04-multilingual.ipynb \
8+
05-query-rules.ipynb \
9+
06-synonyms-api.ipynb
210

3-
.PHONY: all
11+
.PHONY: all $(NOTEBOOKS)
412

5-
all:
6-
$(NBTEST) \
7-
00-quick-start.ipynb \
8-
01-keyword-querying-filtering.ipynb \
9-
02-hybrid-search.ipynb \
10-
03-ELSER.ipynb \
11-
04-multilingual.ipynb \
12-
05-query-rules.ipynb \
13-
06-synonyms-api.ipynb
13+
all: $(NOTEBOOKS)
14+
15+
$(NOTEBOOKS):
16+
$(NBTEST) $@
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "e180af3a-3a2c-4186-a577-7051ec6460b1",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"!pip install -qU elasticsearch sentence-transformers"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"id": "63d22ea2-ecca-41bb-b08f-de8ad49cda41",
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"# get the Elasticsearch client\n",
21+
"from elasticsearch import Elasticsearch\n",
22+
"from getpass import getpass\n",
23+
"\n",
24+
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
25+
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
26+
"\n",
27+
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": null,
33+
"id": "b367acaa-90e6-43d0-b9ae-cf42a0e2c0f1",
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"import json\n",
38+
"from urllib.request import urlopen\n",
39+
"from sentence_transformers import SentenceTransformer\n",
40+
"\n",
41+
"if NBTEST[\"notebook\"] in ['01-keyword-querying-filtering.ipynb', '02-hybrid-search.ipynb', '06-synonyms-api.ipynb']:\n",
42+
" # these tests need book_index to exist ahead of time\n",
43+
" client.indices.delete(index=\"book_index\", ignore_unavailable=True)\n",
44+
" \n",
45+
" mappings = {\n",
46+
" \"properties\": {\n",
47+
" \"title_vector\": {\n",
48+
" \"type\": \"dense_vector\",\n",
49+
" \"dims\": 384,\n",
50+
" \"index\": \"true\",\n",
51+
" \"similarity\": \"cosine\"\n",
52+
" }\n",
53+
" }\n",
54+
" }\n",
55+
" client.indices.create(index='book_index', mappings=mappings)\n",
56+
"\n",
57+
" url = \"https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json\"\n",
58+
" response = urlopen(url)\n",
59+
" books = json.loads(response.read())\n",
60+
"\n",
61+
" model = SentenceTransformer('all-MiniLM-L6-v2')\n",
62+
" operations = []\n",
63+
" for book in books:\n",
64+
" operations.append({\"index\": {\"_index\": \"book_index\"}})\n",
65+
" # Transforming the title into an embedding using the model\n",
66+
" book[\"title_vector\"] = model.encode(book[\"title\"]).tolist()\n",
67+
" operations.append(book)\n",
68+
" client.bulk(index=\"book_index\", operations=operations, refresh=True)"
69+
]
70+
}
71+
],
72+
"metadata": {
73+
"kernelspec": {
74+
"display_name": "Python 3 (ipykernel)",
75+
"language": "python",
76+
"name": "python3"
77+
},
78+
"language_info": {
79+
"codemirror_mode": {
80+
"name": "ipython",
81+
"version": 3
82+
},
83+
"file_extension": ".py",
84+
"mimetype": "text/x-python",
85+
"name": "python",
86+
"nbconvert_exporter": "python",
87+
"pygments_lexer": "ipython3",
88+
"version": "3.11.6"
89+
}
90+
},
91+
"nbformat": 4,
92+
"nbformat_minor": 5
93+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "7bf006aa-91cf-4c3a-b685-1f8ca5892a33",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from elasticsearch import Elasticsearch\n",
11+
"from getpass import getpass\n",
12+
"\n",
13+
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
14+
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
15+
"\n",
16+
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n",
17+
"\n",
18+
"# delete the notebook's index\n",
19+
"client.indices.delete(index=\"elser-example-movies\", ignore_unavailable=True)\n",
20+
"\n",
21+
"# delete the pipeline\n",
22+
"try:\n",
23+
" client.ingest.delete_pipeline(id=\"elser-ingest-pipeline\")\n",
24+
"except:\n",
25+
" pass\n",
26+
"\n",
27+
"# delete the model\n",
28+
"try:\n",
29+
" client.ml.delete_trained_model(model_id=\".elser_model_2\", force=True)\n",
30+
"except:\n",
31+
" pass"
32+
]
33+
}
34+
],
35+
"metadata": {
36+
"kernelspec": {
37+
"display_name": "Python 3 (ipykernel)",
38+
"language": "python",
39+
"name": "python3"
40+
},
41+
"language_info": {
42+
"codemirror_mode": {
43+
"name": "ipython",
44+
"version": 3
45+
},
46+
"file_extension": ".py",
47+
"mimetype": "text/x-python",
48+
"name": "python",
49+
"nbconvert_exporter": "python",
50+
"pygments_lexer": "ipython3",
51+
"version": "3.11.6"
52+
}
53+
},
54+
"nbformat": 4,
55+
"nbformat_minor": 5
56+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "7bcf0f81-aec8-4f49-918c-3163917885ec",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"indexes = {\n",
11+
" \"00-quick-start.ipynb\": \"book_index\",\n",
12+
" \"01-keyword-querying-filtering.ipynb\": \"book_index\",\n",
13+
" \"02-hybrid-search.ipynb\": \"book_index\",\n",
14+
" # 03-ELSER.ipynb has its own teardown notebook\n",
15+
" \"04-multilingual.ipynb\": \"articles\",\n",
16+
" \"05-query-rules.ipynb\": \"products_index\",\n",
17+
" \"06-synonyms-api.ipynb\": \"book_index\",\n",
18+
"}\n",
19+
"INDEX_NAME = indexes.get(NBTEST[\"notebook\"])"
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": null,
25+
"id": "fcd17ce3-ece3-4268-b37b-bbf47c2437c8",
26+
"metadata": {},
27+
"outputs": [],
28+
"source": [
29+
"# get the Elasticsearch client\n",
30+
"from elasticsearch import Elasticsearch\n",
31+
"from getpass import getpass\n",
32+
"\n",
33+
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
34+
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
35+
"\n",
36+
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"id": "abf51067-61f8-4cf3-b950-464805ea0e8d",
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"# delete the notebook's index\n",
47+
"if INDEX_NAME:\n",
48+
" client.indices.delete(index=INDEX_NAME, ignore_unavailable=True)"
49+
]
50+
}
51+
],
52+
"metadata": {
53+
"kernelspec": {
54+
"display_name": "Python 3 (ipykernel)",
55+
"language": "python",
56+
"name": "python3"
57+
},
58+
"language_info": {
59+
"codemirror_mode": {
60+
"name": "ipython",
61+
"version": 3
62+
},
63+
"file_extension": ".py",
64+
"mimetype": "text/x-python",
65+
"name": "python",
66+
"nbconvert_exporter": "python",
67+
"pygments_lexer": "ipython3",
68+
"version": "3.11.6"
69+
}
70+
},
71+
"nbformat": 4,
72+
"nbformat_minor": 5
73+
}

0 commit comments

Comments
 (0)