Skip to content

Commit e2d01ac

Browse files
committed
[Store] Add configurable semantic ratio to Meilisearch Store
This adds a semanticRatio parameter to control the balance between keyword-based (BM25) and semantic (vector) search in hybrid queries. - Add semanticRatio constructor parameter (default: 1.0 for BC) - Allow per-query override via options array - Add validation (0.0-1.0 range) - Add support for 'q' parameter in query options for BM25 text queries - Add comprehensive tests covering all scenarios Use cases: - 0.0 = Pure keyword search (IDs, codes, exact terms) - 0.5 = Balanced hybrid search (general use) - 1.0 = Pure semantic search (conceptual similarity) Backward compatible - default behavior unchanged.
1 parent 5242431 commit e2d01ac

File tree

2 files changed

+165
-2
lines changed

2 files changed

+165
-2
lines changed

src/store/src/Bridge/Meilisearch/Store.php

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@
2828
{
2929
/**
3030
* @param string $embedder The name of the embedder where vectors are stored
31-
* @param string $vectorFieldName The name of the field int the index that contains the vector
31+
* @param string $vectorFieldName The name of the field in the index that contains the vector
32+
* @param float $semanticRatio The ratio between semantic (vector) and keyword (BM25) search (0.0 to 1.0)
33+
* - 0.0 = 100% keyword search (BM25)
34+
* - 0.5 = balanced hybrid search
35+
* - 1.0 = 100% semantic search (vector only)
3236
*/
3337
public function __construct(
3438
private HttpClientInterface $httpClient,
@@ -38,7 +42,11 @@ public function __construct(
3842
private string $embedder = 'default',
3943
private string $vectorFieldName = '_vectors',
4044
private int $embeddingsDimension = 1536,
45+
private float $semanticRatio = 1.0,
4146
) {
47+
if ($semanticRatio < 0.0 || $semanticRatio > 1.0) {
48+
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio));
49+
}
4250
}
4351

4452
public function setup(array $options = []): void
@@ -71,13 +79,22 @@ public function add(VectorDocument ...$documents): void
7179

7280
public function query(Vector $vector, array $options = []): array
7381
{
82+
$semanticRatio = $options['semanticRatio'] ?? $this->semanticRatio;
83+
84+
if ($semanticRatio < 0.0 || $semanticRatio > 1.0) {
85+
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio));
86+
}
87+
88+
$queryText = $options['q'] ?? '';
89+
7490
$result = $this->request('POST', \sprintf('indexes/%s/search', $this->indexName), [
91+
'q' => $queryText,
7592
'vector' => $vector->getData(),
7693
'showRankingScore' => true,
7794
'retrieveVectors' => true,
7895
'hybrid' => [
7996
'embedder' => $this->embedder,
80-
'semanticRatio' => 1.0,
97+
'semanticRatio' => $semanticRatio,
8198
],
8299
]);
83100

src/store/tests/Bridge/Meilisearch/StoreTest.php

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
use Symfony\AI\Platform\Vector\Vector;
1616
use Symfony\AI\Store\Bridge\Meilisearch\Store;
1717
use Symfony\AI\Store\Document\VectorDocument;
18+
use Symfony\AI\Store\Exception\InvalidArgumentException;
1819
use Symfony\Component\HttpClient\Exception\ClientException;
1920
use Symfony\Component\HttpClient\MockHttpClient;
2021
use Symfony\Component\HttpClient\Response\JsonMockResponse;
22+
use Symfony\Component\HttpClient\Response\MockResponse;
2123
use Symfony\Component\Uid\Uuid;
2224

2325
final class StoreTest extends TestCase
@@ -275,4 +277,148 @@ public function testMetadataWithoutIDRankingandVector()
275277

276278
$this->assertSame($expected, $vectors[0]->metadata->getArrayCopy());
277279
}
280+
281+
public function testConstructorWithValidSemanticRatio()
282+
{
283+
$httpClient = new MockHttpClient();
284+
285+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);
286+
287+
$this->assertInstanceOf(Store::class, $store);
288+
}
289+
290+
public function testConstructorThrowsExceptionForInvalidSemanticRatio()
291+
{
292+
$this->expectException(InvalidArgumentException::class);
293+
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');
294+
295+
$httpClient = new MockHttpClient();
296+
new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 1.5);
297+
}
298+
299+
public function testConstructorThrowsExceptionForNegativeSemanticRatio()
300+
{
301+
$this->expectException(InvalidArgumentException::class);
302+
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');
303+
304+
$httpClient = new MockHttpClient();
305+
new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: -0.1);
306+
}
307+
308+
public function testQueryUsesDefaultSemanticRatio()
309+
{
310+
$responses = [
311+
new MockResponse(json_encode([
312+
'hits' => [
313+
[
314+
'id' => '550e8400-e29b-41d4-a716-446655440000',
315+
'_vectors' => [
316+
'default' => [
317+
'embeddings' => [0.1, 0.2, 0.3],
318+
],
319+
],
320+
'_rankingScore' => 0.95,
321+
'content' => 'Test document',
322+
],
323+
],
324+
])),
325+
];
326+
327+
$httpClient = new MockHttpClient($responses);
328+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.7);
329+
330+
$vector = new Vector([0.1, 0.2, 0.3]);
331+
$store->query($vector);
332+
333+
$request = $httpClient->getRequestsCount() > 0 ? $responses[0]->getRequestOptions() : null;
334+
$this->assertNotNull($request);
335+
336+
$body = json_decode($request['body'], true);
337+
$this->assertSame(0.7, $body['hybrid']['semanticRatio']);
338+
}
339+
340+
public function testQueryCanOverrideSemanticRatio()
341+
{
342+
$responses = [
343+
new MockResponse(json_encode([
344+
'hits' => [],
345+
])),
346+
];
347+
348+
$httpClient = new MockHttpClient($responses);
349+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);
350+
351+
$vector = new Vector([0.1, 0.2, 0.3]);
352+
$store->query($vector, ['semanticRatio' => 0.2]);
353+
354+
$request = $responses[0]->getRequestOptions();
355+
$body = json_decode($request['body'], true);
356+
357+
$this->assertSame(0.2, $body['hybrid']['semanticRatio']);
358+
}
359+
360+
public function testQueryThrowsExceptionForInvalidSemanticRatioOption()
361+
{
362+
$this->expectException(InvalidArgumentException::class);
363+
$this->expectExceptionMessage('The semantic ratio must be between 0.0 and 1.0');
364+
365+
$httpClient = new MockHttpClient();
366+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index');
367+
368+
$vector = new Vector([0.1, 0.2, 0.3]);
369+
$store->query($vector, ['semanticRatio' => 2.0]);
370+
}
371+
372+
public function testQueryWithPureKeywordSearch()
373+
{
374+
$responses = [
375+
new MockResponse(json_encode([
376+
'hits' => [
377+
[
378+
'id' => '550e8400-e29b-41d4-a716-446655440000',
379+
'_vectors' => [
380+
'default' => [
381+
'embeddings' => [0.1, 0.2, 0.3],
382+
],
383+
],
384+
'_rankingScore' => 0.85,
385+
'title' => 'Symfony Framework',
386+
],
387+
],
388+
])),
389+
];
390+
391+
$httpClient = new MockHttpClient($responses);
392+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index');
393+
394+
$vector = new Vector([0.1, 0.2, 0.3]);
395+
$results = $store->query($vector, ['semanticRatio' => 0.0]);
396+
397+
$this->assertCount(1, $results);
398+
$this->assertInstanceOf(VectorDocument::class, $results[0]);
399+
400+
$request = $responses[0]->getRequestOptions();
401+
$body = json_decode($request['body'], true);
402+
$this->assertSame(0.0, $body['hybrid']['semanticRatio']);
403+
}
404+
405+
public function testQueryWithBalancedHybridSearch()
406+
{
407+
$responses = [
408+
new MockResponse(json_encode([
409+
'hits' => [],
410+
])),
411+
];
412+
413+
$httpClient = new MockHttpClient($responses);
414+
$store = new Store($httpClient, 'http://localhost:7700', 'key', 'index', semanticRatio: 0.5);
415+
416+
$vector = new Vector([0.1, 0.2, 0.3]);
417+
$store->query($vector);
418+
419+
$request = $responses[0]->getRequestOptions();
420+
$body = json_decode($request['body'], true);
421+
422+
$this->assertSame(0.5, $body['hybrid']['semanticRatio']);
423+
}
278424
}

0 commit comments

Comments
 (0)