|
51 | 51 | }, |
52 | 52 | { |
53 | 53 | "cell_type": "code", |
54 | | - "execution_count": null, |
| 54 | + "execution_count": 2, |
55 | 55 | "metadata": {}, |
56 | | - "outputs": [], |
| 56 | + "outputs": [ |
| 57 | + { |
| 58 | + "name": "stderr", |
| 59 | + "output_type": "stream", |
| 60 | + "text": [ |
| 61 | + "/Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
| 62 | + " from .autonotebook import tqdm as notebook_tqdm\n", |
| 63 | + "Compiling the model with `torch.compile` and using a `torch.mps` device is not supported. Falling back to non-compiled mode.\n" |
| 64 | + ] |
| 65 | + } |
| 66 | + ], |
57 | 67 | "source": [ |
58 | 68 | "# Initialize the vectorizer\n", |
59 | 69 | "vectorizer = HFTextVectorizer(\n", |
60 | | - " model=\"sentence-transformers/all-mpnet-base-v2\",\n", |
| 70 | + " model=\"redis/langcache-embed-v1\",\n", |
61 | 71 | " cache_folder=os.getenv(\"SENTENCE_TRANSFORMERS_HOME\")\n", |
62 | 72 | ")" |
63 | 73 | ] |
|
103 | 113 | }, |
104 | 114 | { |
105 | 115 | "cell_type": "code", |
106 | | - "execution_count": 4, |
| 116 | + "execution_count": 5, |
107 | 117 | "metadata": {}, |
108 | 118 | "outputs": [ |
109 | 119 | { |
110 | 120 | "name": "stdout", |
111 | 121 | "output_type": "stream", |
112 | 122 | "text": [ |
113 | | - "Stored with key: embedcache:059d...\n" |
| 123 | + "Stored with key: embedcache:909f...\n" |
114 | 124 | ] |
115 | 125 | } |
116 | 126 | ], |
117 | 127 | "source": [ |
118 | 128 | "# Text to embed\n", |
119 | 129 | "text = \"What is machine learning?\"\n", |
120 | | - "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n", |
| 130 | + "model_name = \"redis/langcache-embed-v1\"\n", |
121 | 131 | "\n", |
122 | 132 | "# Generate the embedding\n", |
123 | 133 | "embedding = vectorizer.embed(text)\n", |
|
147 | 157 | }, |
148 | 158 | { |
149 | 159 | "cell_type": "code", |
150 | | - "execution_count": 5, |
| 160 | + "execution_count": 6, |
151 | 161 | "metadata": {}, |
152 | 162 | "outputs": [ |
153 | 163 | { |
154 | 164 | "name": "stdout", |
155 | 165 | "output_type": "stream", |
156 | 166 | "text": [ |
157 | 167 | "Found in cache: What is machine learning?\n", |
158 | | - "Model: sentence-transformers/all-mpnet-base-v2\n", |
| 168 | + "Model: redis/langcache-embed-v1\n", |
159 | 169 | "Metadata: {'category': 'ai', 'source': 'user_query'}\n", |
160 | 170 | "Embedding shape: (768,)\n" |
161 | 171 | ] |
|
184 | 194 | }, |
185 | 195 | { |
186 | 196 | "cell_type": "code", |
187 | | - "execution_count": 6, |
| 197 | + "execution_count": 7, |
188 | 198 | "metadata": {}, |
189 | 199 | "outputs": [ |
190 | 200 | { |
|
218 | 228 | }, |
219 | 229 | { |
220 | 230 | "cell_type": "code", |
221 | | - "execution_count": 7, |
| 231 | + "execution_count": 8, |
222 | 232 | "metadata": {}, |
223 | 233 | "outputs": [ |
224 | 234 | { |
|
251 | 261 | }, |
252 | 262 | { |
253 | 263 | "cell_type": "code", |
254 | | - "execution_count": 8, |
| 264 | + "execution_count": 9, |
255 | 265 | "metadata": {}, |
256 | 266 | "outputs": [ |
257 | 267 | { |
258 | 268 | "name": "stdout", |
259 | 269 | "output_type": "stream", |
260 | 270 | "text": [ |
261 | | - "Stored with key: embedcache:059d...\n", |
| 271 | + "Stored with key: embedcache:909f...\n", |
262 | 272 | "Exists by key: True\n", |
263 | 273 | "Retrieved by key: What is machine learning?\n" |
264 | 274 | ] |
|
297 | 307 | }, |
298 | 308 | { |
299 | 309 | "cell_type": "code", |
300 | | - "execution_count": 9, |
| 310 | + "execution_count": 10, |
301 | 311 | "metadata": {}, |
302 | 312 | "outputs": [ |
303 | 313 | { |
|
382 | 392 | }, |
383 | 393 | { |
384 | 394 | "cell_type": "code", |
385 | | - "execution_count": 10, |
| 395 | + "execution_count": 11, |
386 | 396 | "metadata": {}, |
387 | 397 | "outputs": [ |
388 | 398 | { |
|
430 | 440 | }, |
431 | 441 | { |
432 | 442 | "cell_type": "code", |
433 | | - "execution_count": 11, |
| 443 | + "execution_count": 12, |
434 | 444 | "metadata": {}, |
435 | 445 | "outputs": [ |
436 | 446 | { |
|
484 | 494 | }, |
485 | 495 | { |
486 | 496 | "cell_type": "code", |
487 | | - "execution_count": 12, |
| 497 | + "execution_count": 13, |
488 | 498 | "metadata": {}, |
489 | 499 | "outputs": [ |
490 | 500 | { |
|
533 | 543 | }, |
534 | 544 | { |
535 | 545 | "cell_type": "code", |
536 | | - "execution_count": 13, |
| 546 | + "execution_count": 14, |
537 | 547 | "metadata": {}, |
538 | 548 | "outputs": [ |
539 | 549 | { |
540 | 550 | "name": "stdout", |
541 | 551 | "output_type": "stream", |
542 | 552 | "text": [ |
543 | | - "Computing embedding for: What is artificial intelligence?\n", |
544 | | - "Computing embedding for: How does machine learning work?\n", |
545 | | - "Found in cache: What is artificial intelligence?\n", |
546 | | - "Computing embedding for: What are neural networks?\n", |
547 | | - "Found in cache: How does machine learning work?\n", |
548 | 553 | "\n", |
549 | 554 | "Statistics:\n", |
550 | 555 | "Total queries: 5\n", |
|
562 | 567 | " ttl=3600 # 1 hour TTL\n", |
563 | 568 | ")\n", |
564 | 569 | "\n", |
565 | | - "# Function to get embedding with caching\n", |
566 | | - "def get_cached_embedding(text, model_name):\n", |
567 | | - " # Check if it's in the cache first\n", |
568 | | - " if cached_result := example_cache.get(text=text, model_name=model_name):\n", |
569 | | - " print(f\"Found in cache: {text}\")\n", |
570 | | - " return cached_result[\"embedding\"]\n", |
571 | | - " \n", |
572 | | - " # Not in cache, compute the embedding\n", |
573 | | - " print(f\"Computing embedding for: {text}\")\n", |
574 | | - " embedding = vectorizer.embed(text)\n", |
575 | | - " \n", |
576 | | - " # Store in cache\n", |
577 | | - " example_cache.set(\n", |
578 | | - " text=text,\n", |
579 | | - " model_name=model_name,\n", |
580 | | - " embedding=embedding,\n", |
581 | | - " )\n", |
582 | | - " \n", |
583 | | - " return embedding\n", |
| 570 | + "vectorizer = HFTextVectorizer(\n", |
| 571 | + " model=model_name,\n", |
| 572 | + " cache=example_cache,\n", |
| 573 | + " cache_folder=os.getenv(\"SENTENCE_TRANSFORMERS_HOME\")\n", |
| 574 | + ")\n", |
584 | 575 | "\n", |
585 | 576 | "# Simulate processing a stream of queries\n", |
586 | 577 | "queries = [\n", |
|
604 | 595 | " cache_hits += 1\n", |
605 | 596 | " \n", |
606 | 597 | " # Get embedding (will compute or use cache)\n", |
607 | | - " embedding = get_cached_embedding(query, model_name)\n", |
| 598 | + " embedding = vectorizer.embed(query)\n", |
608 | 599 | "\n", |
609 | 600 | "# Report statistics\n", |
610 | 601 | "cache_misses = total_queries - cache_hits\n", |
|
632 | 623 | }, |
633 | 624 | { |
634 | 625 | "cell_type": "code", |
635 | | - "execution_count": 14, |
| 626 | + "execution_count": 15, |
636 | 627 | "metadata": {}, |
637 | 628 | "outputs": [ |
638 | 629 | { |
639 | 630 | "name": "stdout", |
640 | 631 | "output_type": "stream", |
641 | 632 | "text": [ |
642 | 633 | "Benchmarking without caching:\n", |
643 | | - "Time taken without caching: 0.0940 seconds\n", |
644 | | - "Average time per embedding: 0.0094 seconds\n", |
| 634 | + "Time taken without caching: 0.4735 seconds\n", |
| 635 | + "Average time per embedding: 0.0474 seconds\n", |
645 | 636 | "\n", |
646 | 637 | "Benchmarking with caching:\n", |
647 | | - "Time taken with caching: 0.0237 seconds\n", |
648 | | - "Average time per embedding: 0.0024 seconds\n", |
| 638 | + "Time taken with caching: 0.0663 seconds\n", |
| 639 | + "Average time per embedding: 0.0066 seconds\n", |
649 | 640 | "\n", |
650 | 641 | "Performance comparison:\n", |
651 | | - "Speedup with caching: 3.96x faster\n", |
652 | | - "Time saved: 0.0703 seconds (74.8%)\n", |
653 | | - "Latency reduction: 0.0070 seconds per query\n" |
| 642 | + "Speedup with caching: 7.14x faster\n", |
| 643 | + "Time saved: 0.4073 seconds (86.0%)\n", |
| 644 | + "Latency reduction: 0.0407 seconds per query\n" |
654 | 645 | ] |
655 | 646 | } |
656 | 647 | ], |
657 | 648 | "source": [ |
658 | 649 | "# Text to use for benchmarking\n", |
659 | 650 | "benchmark_text = \"This is a benchmark text to measure the performance of embedding caching.\"\n", |
660 | | - "benchmark_model = \"sentence-transformers/all-mpnet-base-v2\"\n", |
661 | 651 | "\n", |
662 | 652 | "# Create a fresh cache for benchmarking\n", |
663 | 653 | "benchmark_cache = EmbeddingsCache(\n", |
664 | 654 | " name=\"benchmark_cache\",\n", |
665 | 655 | " redis_url=\"redis://localhost:6379\",\n", |
666 | 656 | " ttl=3600 # 1 hour TTL\n", |
667 | 657 | ")\n", |
668 | | - "\n", |
669 | | - "# Function to get embeddings without caching\n", |
670 | | - "def get_embedding_without_cache(text, model_name):\n", |
671 | | - " return vectorizer.embed(text)\n", |
672 | | - "\n", |
673 | | - "# Function to get embeddings with caching\n", |
674 | | - "def get_embedding_with_cache(text, model_name):\n", |
675 | | - " if cached_result := benchmark_cache.get(text=text, model_name=model_name):\n", |
676 | | - " return cached_result[\"embedding\"]\n", |
677 | | - " \n", |
678 | | - " embedding = vectorizer.embed(text)\n", |
679 | | - " benchmark_cache.set(\n", |
680 | | - " text=text,\n", |
681 | | - " model_name=model_name,\n", |
682 | | - " embedding=embedding\n", |
683 | | - " )\n", |
684 | | - " return embedding\n", |
| 658 | + "vectorizer.cache = benchmark_cache\n", |
685 | 659 | "\n", |
686 | 660 | "# Number of iterations for the benchmark\n", |
687 | 661 | "n_iterations = 10\n", |
688 | 662 | "\n", |
689 | 663 | "# Benchmark without caching\n", |
690 | 664 | "print(\"Benchmarking without caching:\")\n", |
691 | 665 | "start_time = time.time()\n", |
692 | | - "get_embedding_without_cache(benchmark_text, benchmark_model)\n", |
| 666 | + "for _ in range(n_iterations):\n", |
| 667 | + " embedding = vectorizer.embed(text, skip_cache=True)\n", |
693 | 668 | "no_cache_time = time.time() - start_time\n", |
694 | 669 | "print(f\"Time taken without caching: {no_cache_time:.4f} seconds\")\n", |
695 | 670 | "print(f\"Average time per embedding: {no_cache_time/n_iterations:.4f} seconds\")\n", |
696 | 671 | "\n", |
697 | 672 | "# Benchmark with caching\n", |
698 | 673 | "print(\"\\nBenchmarking with caching:\")\n", |
699 | 674 | "start_time = time.time()\n", |
700 | | - "get_embedding_with_cache(benchmark_text, benchmark_model)\n", |
| 675 | + "for _ in range(n_iterations):\n", |
| 676 | + " embedding = vectorizer.embed(text)\n", |
701 | 677 | "cache_time = time.time() - start_time\n", |
702 | 678 | "print(f\"Time taken with caching: {cache_time:.4f} seconds\")\n", |
703 | 679 | "print(f\"Average time per embedding: {cache_time/n_iterations:.4f} seconds\")\n", |
|
785 | 761 | "name": "python", |
786 | 762 | "nbconvert_exporter": "python", |
787 | 763 | "pygments_lexer": "ipython3", |
788 | | - "version": "3.10.12" |
| 764 | + "version": "3.13.2" |
789 | 765 | } |
790 | 766 | }, |
791 | 767 | "nbformat": 4, |
|
0 commit comments