diff --git a/examples/rag/meilisearch-hybrid.php b/examples/rag/meilisearch-hybrid.php new file mode 100644 index 000000000..cb8346b66 --- /dev/null +++ b/examples/rag/meilisearch-hybrid.php @@ -0,0 +1,117 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Fixtures\Movies; +use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; +use Symfony\AI\Store\Bridge\Meilisearch\Store; +use Symfony\AI\Store\Document\Loader\InMemoryLoader; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\TextDocument; +use Symfony\AI\Store\Document\Vectorizer; +use Symfony\AI\Store\Indexer; +use Symfony\Component\Uid\Uuid; + +require_once dirname(__DIR__).'/bootstrap.php'; + +echo "=== Meilisearch Hybrid Search Demo ===\n\n"; +echo "This example demonstrates how to configure the semantic ratio to balance\n"; +echo "between semantic (vector) search and full-text search in Meilisearch.\n\n"; + +// Initialize the store with a balanced hybrid search (50/50) +$store = new Store( + httpClient: http_client(), + endpointUrl: env('MEILISEARCH_HOST'), + apiKey: env('MEILISEARCH_API_KEY'), + indexName: 'movies_hybrid', + semanticRatio: 0.5, // Balanced hybrid search by default +); + +// Create embeddings and documents +$documents = []; +foreach (Movies::all() as $i => $movie) { + $documents[] = new TextDocument( + id: Uuid::v4(), + content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], + metadata: new Metadata($movie), + ); +} + +// Initialize the index +$store->setup(); + +// Create embeddings for documents +$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client()); +$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger()); +$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger()); +$indexer->index($documents); + +// Create a query embedding +$queryText = 'futuristic technology and artificial intelligence'; +echo "Query: \"$queryText\"\n\n"; +$queryEmbedding = $vectorizer->vectorize($queryText); + +// Test different semantic ratios to compare results +$ratios = [ + ['ratio' => 0.0, 'description' => '100% Full-text search (keyword matching)'], + ['ratio' => 0.5, 'description' => 'Balanced hybrid (50% semantic + 50% full-text)'], + ['ratio' => 1.0, 'description' => '100% Semantic search (vector similarity)'], +]; + +foreach ($ratios as $config) { + echo "--- {$config['description']} ---\n"; + + // Override the semantic ratio for this specific query + $results = $store->query($queryEmbedding, [ + 'semanticRatio' => $config['ratio'], + 'q' => 'technology', // Full-text search keyword + ]); + + echo "Top 3 results:\n"; + foreach (array_slice($results, 0, 3) as $i => $result) { + $metadata = $result->metadata->getArrayCopy(); + echo sprintf( + " %d. %s (Score: %.4f)\n", + $i + 1, + $metadata['title'] ?? 'Unknown', + $result->score ?? 0.0 + ); + } + echo "\n"; +} + +echo "--- Custom query with pure semantic search ---\n"; +echo "Query: Movies about space exploration\n"; +$spaceEmbedding = $vectorizer->vectorize('space exploration and cosmic adventures'); +$results = $store->query($spaceEmbedding, [ + 'semanticRatio' => 1.0, // Pure semantic search +]); + +echo "Top 3 results:\n"; +foreach (array_slice($results, 0, 3) as $i => $result) { + $metadata = $result->metadata->getArrayCopy(); + echo sprintf( + " %d. %s (Score: %.4f)\n", + $i + 1, + $metadata['title'] ?? 'Unknown', + $result->score ?? 0.0 + ); +} +echo "\n"; + +// Cleanup +$store->drop(); + +echo "=== Summary ===\n"; +echo "- semanticRatio = 0.0: Best for exact keyword matches\n"; +echo "- semanticRatio = 0.5: Balanced approach combining both methods\n"; +echo "- semanticRatio = 1.0: Best for conceptual similarity searches\n"; +echo "\nYou can set the default ratio when instantiating the Store,\n"; +echo "and override it per query using the 'semanticRatio' option.\n"; diff --git a/src/ai-bundle/config/options.php b/src/ai-bundle/config/options.php index bafabd267..e5295a7d4 100644 --- a/src/ai-bundle/config/options.php +++ b/src/ai-bundle/config/options.php @@ -538,6 +538,12 @@ ->stringNode('embedder')->end() ->stringNode('vector_field')->end() ->integerNode('dimensions')->end() + ->floatNode('semantic_ratio') + ->info('The ratio between semantic (vector) and full-text search (0.0 to 1.0). Default: 1.0 (100% semantic)') + ->defaultValue(1.0) + ->min(0.0) + ->max(1.0) + ->end() ->end() ->end() ->end() diff --git a/src/ai-bundle/src/AiBundle.php b/src/ai-bundle/src/AiBundle.php index a602b339a..f399df20d 100644 --- a/src/ai-bundle/src/AiBundle.php +++ b/src/ai-bundle/src/AiBundle.php @@ -1015,6 +1015,10 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde $arguments[6] = $store['dimensions']; } + if (\array_key_exists('semantic_ratio', $store)) { + $arguments[7] = $store['semantic_ratio']; + } + $definition = new Definition(MeilisearchStore::class); $definition ->addTag('ai.store') diff --git a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php index 37e515b92..6e70c19ec 100644 --- a/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php +++ b/src/ai-bundle/tests/DependencyInjection/AiBundleTest.php @@ -2793,6 +2793,30 @@ public function testVectorizerModelBooleanOptionsArePreserved() $this->assertSame('text-embedding-3-small?normalize=false&cache=true&nested%5Bbool%5D=false', $vectorizerDefinition->getArgument(1)); } + #[TestDox('Meilisearch store with custom semantic_ratio can be configured')] + public function testMeilisearchStoreWithCustomSemanticRatioCanBeConfigured() + { + $container = $this->buildContainer([ + 'ai' => [ + 'store' => [ + 'meilisearch' => [ + 'test_store' => [ + 'endpoint' => 'http://127.0.0.1:7700', + 'api_key' => 'test_key', + 'index_name' => 'test_index', + 'semantic_ratio' => 0.5, + ], + ], + ], + ], + ]); + + $this->assertTrue($container->hasDefinition('ai.store.meilisearch.test_store')); + $definition = $container->getDefinition('ai.store.meilisearch.test_store'); + $arguments = $definition->getArguments(); + $this->assertSame(0.5, $arguments[7]); + } + private function buildContainer(array $configuration): ContainerBuilder { $container = new ContainerBuilder(); @@ -2966,6 +2990,7 @@ private function getFullConfig(): array 'embedder' => 'default', 'vector_field' => '_vectors', 'dimensions' => 768, + 'semantic_ratio' => 0.5, ], ], 'memory' => [ diff --git a/src/store/CHANGELOG.md b/src/store/CHANGELOG.md index a43ad2cd6..1b2c0bff0 100644 --- a/src/store/CHANGELOG.md +++ b/src/store/CHANGELOG.md @@ -60,5 +60,6 @@ CHANGELOG - Minimum score filtering - Result limiting - Distance/similarity scoring + * Add Meilisearch hybrid search support with a configurable `semanticRatio` parameter to control the balance between semantic (vector) and full-text search. * Add custom exception hierarchy with `ExceptionInterface` * Add support for specific exceptions for invalid arguments and runtime errors diff --git a/src/store/src/Bridge/Meilisearch/Store.php b/src/store/src/Bridge/Meilisearch/Store.php index 18057e99a..5e0bcecaa 100644 --- a/src/store/src/Bridge/Meilisearch/Store.php +++ b/src/store/src/Bridge/Meilisearch/Store.php @@ -28,17 +28,25 @@ final class Store implements ManagedStoreInterface, StoreInterface { /** * @param string $embedder The name of the embedder where vectors are stored - * @param string $vectorFieldName The name of the field int the index that contains the vector + * @param string $vectorFieldName The name of the field in the index that contains the vector + * @param float $semanticRatio The ratio between semantic (vector) and full-text search (0.0 to 1.0) + * - 0.0 = 100% full-text search + * - 0.5 = balanced hybrid search + * - 1.0 = 100% semantic search (vector only) */ public function __construct( - private readonly HttpClientInterface $httpClient, - private readonly string $endpointUrl, - #[\SensitiveParameter] private readonly string $apiKey, - private readonly string $indexName, - private readonly string $embedder = 'default', - private readonly string $vectorFieldName = '_vectors', - private readonly int $embeddingsDimension = 1536, + private HttpClientInterface $httpClient, + private string $endpointUrl, + #[\SensitiveParameter] private string $apiKey, + private string $indexName, + private string $embedder = 'default', + private string $vectorFieldName = '_vectors', + private int $embeddingsDimension = 1536, + private float $semanticRatio = 1.0, ) { + if ($semanticRatio < 0.0 || $semanticRatio > 1.0) { + throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio)); + } } public function setup(array $options = []): void @@ -71,13 +79,20 @@ public function add(VectorDocument ...$documents): void public function query(Vector $vector, array $options = []): array { + $semanticRatio = $options['semanticRatio'] ?? $this->semanticRatio; + + if ($semanticRatio < 0.0 || $semanticRatio > 1.0) { + throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio)); + } + $result = $this->request('POST', \sprintf('indexes/%s/search', $this->indexName), [ + 'q' => $options['q'] ?? '', 'vector' => $vector->getData(), 'showRankingScore' => true, 'retrieveVectors' => true, 'hybrid' => [ 'embedder' => $this->embedder, - 'semanticRatio' => 1.0, + 'semanticRatio' => $semanticRatio, ], ]); diff --git a/src/store/tests/Bridge/Meilisearch/StoreTest.php b/src/store/tests/Bridge/Meilisearch/StoreTest.php index e06f01ef2..9afb0a63b 100644 --- a/src/store/tests/Bridge/Meilisearch/StoreTest.php +++ b/src/store/tests/Bridge/Meilisearch/StoreTest.php @@ -15,9 +15,11 @@ use Symfony\AI\Platform\Vector\Vector; use Symfony\AI\Store\Bridge\Meilisearch\Store; use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; use Symfony\Component\HttpClient\Exception\ClientException; use Symfony\Component\HttpClient\MockHttpClient; use Symfony\Component\HttpClient\Response\JsonMockResponse; +use Symfony\Component\HttpClient\Response\MockResponse; use Symfony\Component\Uid\Uuid; final class StoreTest extends TestCase @@ -275,4 +277,148 @@ public function testMetadataWithoutIDRankingandVector() $this->assertSame($expected, $vectors[0]->metadata->getArrayCopy()); } + + public function testConstructorWithValidSemanticRatio() + { + $httpClient = new MockHttpClient(); + + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5); + + $this->assertInstanceOf(Store::class, $store); + } + + public function testConstructorThrowsExceptionForInvalidSemanticRatio() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $httpClient = new MockHttpClient(); + new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 1.5); + } + + public function testConstructorThrowsExceptionForNegativeSemanticRatio() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $httpClient = new MockHttpClient(); + new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: -0.1); + } + + public function testQueryUsesDefaultSemanticRatio() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [ + [ + 'id' => '550e8400-e29b-41d4-a716-446655440000', + '_vectors' => [ + 'default' => [ + 'embeddings' => [0.1, 0.2, 0.3], + ], + ], + '_rankingScore' => 0.95, + 'content' => 'Test document', + ], + ], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.7); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector); + + $request = $httpClient->getRequestsCount() > 0 ? $responses[0]->getRequestOptions() : null; + $this->assertNotNull($request); + + $body = json_decode($request['body'], true); + $this->assertSame(0.7, $body['hybrid']['semanticRatio']); + } + + public function testQueryCanOverrideSemanticRatio() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector, ['semanticRatio' => 0.2]); + + $request = $responses[0]->getRequestOptions(); + $body = json_decode($request['body'], true); + + $this->assertSame(0.2, $body['hybrid']['semanticRatio']); + } + + public function testQueryThrowsExceptionForInvalidSemanticRatioOption() + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0'); + + $httpClient = new MockHttpClient(); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index'); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector, ['semanticRatio' => 2.0]); + } + + public function testQueryWithPureKeywordSearch() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [ + [ + 'id' => '550e8400-e29b-41d4-a716-446655440000', + '_vectors' => [ + 'default' => [ + 'embeddings' => [0.1, 0.2, 0.3], + ], + ], + '_rankingScore' => 0.85, + 'title' => 'Symfony Framework', + ], + ], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index'); + + $vector = new Vector([0.1, 0.2, 0.3]); + $results = $store->query($vector, ['semanticRatio' => 0.0]); + + $this->assertCount(1, $results); + $this->assertInstanceOf(VectorDocument::class, $results[0]); + + $request = $responses[0]->getRequestOptions(); + $body = json_decode($request['body'], true); + $this->assertSame(0.0, $body['hybrid']['semanticRatio']); + } + + public function testQueryWithBalancedHybridSearch() + { + $responses = [ + new MockResponse(json_encode([ + 'hits' => [], + ])), + ]; + + $httpClient = new MockHttpClient($responses); + $store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5); + + $vector = new Vector([0.1, 0.2, 0.3]); + $store->query($vector); + + $request = $responses[0]->getRequestOptions(); + $body = json_decode($request['body'], true); + + $this->assertSame(0.5, $body['hybrid']['semanticRatio']); + } }