-
Notifications
You must be signed in to change notification settings - Fork 17
IBX-9846: Added search using embeddings #536
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9f74191
4bb5a5d
3260b65
7632cf9
c555ead
bc7017e
60dd23a
c45efce
2e94b8e
c8c91a4
f981ffe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| <?php | ||
|
|
||
| /** | ||
| * @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
| * @license For full copyright and license information view LICENSE file distributed with this source code. | ||
| */ | ||
| declare(strict_types=1); | ||
|
|
||
| namespace Ibexa\Bundle\Core\DependencyInjection\Configuration\Parser; | ||
|
|
||
| use Ibexa\Bundle\Core\DependencyInjection\Configuration\AbstractParser; | ||
| use Ibexa\Bundle\Core\DependencyInjection\Configuration\SiteAccessAware\ContextualizerInterface; | ||
| use Symfony\Component\Config\Definition\Builder\NodeBuilder; | ||
|
|
||
| /** | ||
| * Configuration parser for embedding models. | ||
| * | ||
| * Example configuration: | ||
| * ```yaml | ||
| * ibexa: | ||
| * system: | ||
| * default: # configuration per siteaccess or siteaccess group | ||
| * embedding_models: | ||
| * name: "text-embedding-3-small" | ||
| * dimensions: 1536 | ||
| * field_suffix: "3small" | ||
| * embedding_provider: "ibexa_openai" | ||
| * default_embedding_model: text-embedding-ada-002 | ||
| * ``` | ||
| */ | ||
| final class Embeddings extends AbstractParser | ||
| { | ||
| public function addSemanticConfig(NodeBuilder $nodeBuilder): void | ||
| { | ||
| $nodeBuilder | ||
| ->arrayNode('embedding_models') | ||
| ->normalizeKeys(false) | ||
| ->info('Defines available embedding models') | ||
| ->arrayPrototype() | ||
| ->children() | ||
| ->scalarNode('name')->isRequired()->end() | ||
| ->integerNode('dimensions')->isRequired()->end() | ||
| ->scalarNode('field_suffix')->isRequired()->end() | ||
| ->scalarNode('embedding_provider')->isRequired()->end() | ||
| ->end() | ||
| ->end() | ||
| ->end() | ||
| ->scalarNode('default_embedding_model') | ||
| ->info('Default embedding model identifier') | ||
| ->defaultValue('text-embedding-ada-002') | ||
| ->end(); | ||
| } | ||
|
|
||
| /** | ||
| * @param array<mixed> $config | ||
| */ | ||
| public function preMap(array $config, ContextualizerInterface $contextualizer): void | ||
| { | ||
| $contextualizer->mapConfigArray('embedding_models', $config); | ||
| $contextualizer->mapSetting('default_embedding_model', $config); | ||
| } | ||
|
|
||
| /** | ||
| * @param array<mixed> $scopeSettings | ||
| */ | ||
| public function mapConfig(array &$scopeSettings, $currentScope, ContextualizerInterface $contextualizer): void | ||
| { | ||
| // Nothing to do here. | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| services: | ||
| _defaults: | ||
| autowire: true | ||
| autoconfigure: true | ||
| public: false | ||
|
|
||
| Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderRegistryInterface: | ||
| alias: Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry | ||
|
|
||
| Ibexa\Core\Search\Embedding\EmbeddingProviderRegistry: | ||
| arguments: | ||
| $embeddingProviders: !tagged_iterator { tag: 'ibexa.embedding_provider', index_by: 'provider_name' } | ||
|
|
||
| Ibexa\Contracts\Core\Search\Embedding\EmbeddingProviderResolverInterface: | ||
| alias: Ibexa\Core\Search\Embedding\EmbeddingProviderResolver | ||
|
|
||
| Ibexa\Core\Search\Embedding\EmbeddingProviderResolver: ~ | ||
|
|
||
| Ibexa\Contracts\Core\Search\Embedding\EmbeddingConfigurationInterface: | ||
| alias: Ibexa\Core\Search\Embedding\EmbeddingConfiguration | ||
|
|
||
| Ibexa\Core\Search\Embedding\EmbeddingConfiguration: ~ | ||
|
|
||
| Ibexa\Contracts\Core\Search\FieldType\EmbeddingFieldFactory: ~ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| <?php | ||
|
|
||
| /** | ||
| * @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
| * @license For full copyright and license information view LICENSE file distributed with this source code. | ||
| */ | ||
| declare(strict_types=1); | ||
|
|
||
| namespace Ibexa\Contracts\Core\Repository\Values\Content; | ||
|
|
||
| use Ibexa\Contracts\Core\Repository\Values\Content\Query\Criterion; | ||
| use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding; | ||
| use InvalidArgumentException; | ||
|
|
||
| final class EmbeddingQuery extends Query | ||
| { | ||
| private ?Embedding $embedding = null; | ||
|
|
||
| public function getEmbedding(): ?Embedding | ||
| { | ||
| return $this->embedding; | ||
| } | ||
|
|
||
| public function setEmbedding(?Embedding $embedding = null): void | ||
| { | ||
| $this->embedding = $embedding; | ||
| } | ||
|
|
||
| public function getFilter(): Criterion | ||
| { | ||
| return $this->filter; | ||
| } | ||
|
|
||
| public function setFilter(Criterion $filter): void | ||
mikadamczyk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| $this->filter = $filter; | ||
| } | ||
|
|
||
| /** | ||
| * @return \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[] | ||
| */ | ||
| public function getAggregations(): array | ||
| { | ||
| return $this->aggregations; | ||
| } | ||
|
|
||
| /** | ||
| * @param \Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation[] $aggregations | ||
| */ | ||
| public function setAggregations(array $aggregations): void | ||
| { | ||
| $this->aggregations = $aggregations; | ||
| } | ||
|
|
||
| public function getOffset(): int | ||
| { | ||
| return $this->offset; | ||
| } | ||
|
|
||
| public function setOffset(int $offset): void | ||
| { | ||
| $this->offset = $offset; | ||
| } | ||
|
|
||
| public function getLimit(): int | ||
| { | ||
| return $this->limit; | ||
| } | ||
|
|
||
| public function setLimit(int $limit): void | ||
| { | ||
| $this->limit = $limit; | ||
| } | ||
|
|
||
| public function setPerformCount(bool $performCount): void | ||
| { | ||
| $this->performCount = $performCount; | ||
| } | ||
|
|
||
| public function getPerformCount(): bool | ||
| { | ||
| return $this->performCount; | ||
| } | ||
|
|
||
| public function isValid(): bool | ||
| { | ||
| $invalid = []; | ||
|
|
||
| if ($this->query !== null) { | ||
| $invalid[] = 'query'; | ||
| } | ||
| if (!empty($this->sortClauses)) { | ||
| $invalid[] = 'sortClauses'; | ||
| } | ||
| if (!empty($this->facetBuilders)) { | ||
| $invalid[] = 'facetBuilders'; | ||
| } | ||
| if ($this->spellcheck !== null) { | ||
| $invalid[] = 'spellcheck'; | ||
| } | ||
|
|
||
| if (count($invalid) > 0) { | ||
|
Check warning on line 102 in src/contracts/Repository/Values/Content/EmbeddingQuery.php
|
||
| throw new InvalidArgumentException( | ||
| sprintf( | ||
| 'EmbeddingQuery did not set [%s].', | ||
| implode(', ', $invalid) | ||
| ) | ||
| ); | ||
| } | ||
|
|
||
| return true; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| <?php | ||
|
|
||
| /** | ||
| * @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
| * @license For full copyright and license information view LICENSE file distributed with this source code. | ||
| */ | ||
| declare(strict_types=1); | ||
|
|
||
| namespace Ibexa\Contracts\Core\Repository\Values\Content; | ||
|
|
||
| use Ibexa\Contracts\Core\Repository\Values\Content\Query\Criterion; | ||
| use Ibexa\Contracts\Core\Repository\Values\Content\Query\Embedding; | ||
|
|
||
| final class EmbeddingQueryBuilder | ||
| { | ||
| private ?Embedding $embedding = null; | ||
|
|
||
| private ?int $limit = null; | ||
|
|
||
| private ?int $offset = null; | ||
|
|
||
| private ?Criterion $filter = null; | ||
|
|
||
| /** @var array<\Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation> */ | ||
| private array $aggregations = []; | ||
|
|
||
| private bool $performCount = false; | ||
|
|
||
| private function __construct() | ||
| { | ||
| } | ||
|
Check failure on line 31 in src/contracts/Repository/Values/Content/EmbeddingQueryBuilder.php
|
||
|
|
||
| public static function create(): self | ||
| { | ||
| return new self(); | ||
| } | ||
|
|
||
| public function withEmbedding(Embedding $embed): self | ||
| { | ||
| $this->embedding = $embed; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| public function setLimit(int $limit): self | ||
| { | ||
| $this->limit = $limit; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| public function setOffset(int $offset): self | ||
| { | ||
| $this->offset = $offset; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| public function setFilter(Criterion $filter): self | ||
| { | ||
| $this->filter = $filter; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| /** | ||
| * @param array<\Ibexa\Contracts\Core\Repository\Values\Content\Query\Aggregation> $aggregations | ||
| */ | ||
| public function setAggregations(array $aggregations): self | ||
| { | ||
| $this->aggregations = $aggregations; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| public function setPerformCount(bool $performCount): self | ||
| { | ||
| $this->performCount = $performCount; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| public function build(): EmbeddingQuery | ||
| { | ||
| $query = new EmbeddingQuery(); | ||
|
|
||
| if ($this->embedding !== null) { | ||
| $query->setEmbedding($this->embedding); | ||
| } | ||
|
|
||
| if ($this->limit !== null) { | ||
| $query->setLimit($this->limit); | ||
| } | ||
|
|
||
| if ($this->offset !== null) { | ||
| $query->setOffset($this->offset); | ||
| } | ||
|
|
||
| if ($this->filter !== null) { | ||
| $query->setFilter($this->filter); | ||
| } | ||
|
|
||
| if (!empty($this->aggregations)) { | ||
| $query->setAggregations($this->aggregations); | ||
| } | ||
|
|
||
| $query->setPerformCount($this->performCount); | ||
|
|
||
| return $query; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| <?php | ||
|
|
||
| /** | ||
| * @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
| * @license For full copyright and license information view LICENSE file distributed with this source code. | ||
| */ | ||
| declare(strict_types=1); | ||
|
|
||
| namespace Ibexa\Contracts\Core\Repository\Values\Content\Query; | ||
|
|
||
| use Ibexa\Contracts\Core\Repository\Values\ValueObject; | ||
|
|
||
| abstract class Embedding extends ValueObject | ||
| { | ||
| /** @var float[] */ | ||
| protected array $value; | ||
|
|
||
| /** | ||
| * @param float[] $value | ||
| */ | ||
| public function __construct(array $value) | ||
| { | ||
| $this->value = $value; | ||
| } | ||
|
|
||
| /** | ||
| * @return float[] | ||
| */ | ||
| public function getValue(): array | ||
| { | ||
| return $this->value; | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.