Skip to content

Commit fa8affc

Browse files
authored
Add field type for binary vectors (#2854)
1 parent d820746 commit fa8affc

File tree

10 files changed

+361
-12
lines changed

10 files changed

+361
-12
lines changed

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"phpstan/phpstan": "^2.1",
5151
"phpstan/phpstan-deprecation-rules": "^2.0",
5252
"phpstan/phpstan-phpunit": "^2.0",
53-
"phpunit/phpunit": "^10.4",
53+
"phpunit/phpunit": "^10.5.58",
5454
"squizlabs/php_codesniffer": "^4",
5555
"symfony/cache": "^5.4 || ^6.0 || ^7.0",
5656
"symfony/uid": "^5.4 || ^6.0 || ^7.0"

docs/en/reference/basic-mapping.rst

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ Here is a quick overview of the built-in mapping types:
152152
- ``string``
153153
- ``timestamp``
154154
- ``uuid``
155+
- ``vector_float32``
156+
- ``vector_int8``
157+
- ``vector_packed_bit``
155158

156159
You can read more about the available MongoDB types on `php.net <https://www.php.net/mongodb.bson>`_.
157160

@@ -165,21 +168,24 @@ You can read more about the available MongoDB types on `php.net <https://www.php
165168
Generally, the name of each built-in mapping type hints as to how the value will be converted.
166169
This list explains some of the less obvious mapping types:
167170

168-
- ``bin``: string to MongoDB\BSON\Binary instance with a "generic" type (default)
169-
- ``bin_bytearray``: string to MongoDB\BSON\Binary instance with a "byte array" type
170-
- ``bin_custom``: string to MongoDB\BSON\Binary instance with a "custom" type
171-
- ``bin_func``: string to MongoDB\BSON\Binary instance with a "function" type
172-
- ``bin_md5``: string to MongoDB\BSON\Binary instance with a "md5" type
173-
- ``bin_uuid``: string to MongoDB\BSON\Binary instance with a "uuid" type
171+
- ``bin``: ``string`` to ``MongoDB\BSON\Binary`` instance with a "generic" type (default)
172+
- ``bin_bytearray``: ``string`` to ``MongoDB\BSON\Binary`` instance with a "byte array" type
173+
- ``bin_custom``: ``string`` to ``MongoDB\BSON\Binary`` instance with a "custom" type
174+
- ``bin_func``: ``string`` to ``MongoDB\BSON\Binary`` instance with a "function" type
175+
- ``bin_md5``: ``string`` to ``MongoDB\BSON\Binary`` instance with a "md5" type
176+
- ``bin_uuid``: ``string`` to ``MongoDB\BSON\Binary`` instance with a "uuid" type
174177
- ``collection``: numerically indexed array to MongoDB array
175178
- ``date``: DateTime to ``MongoDB\BSON\UTCDateTime``
176179
- ``date_immutable``: DateTimeImmutable to ``MongoDB\BSON\UTCDateTime``
177-
- ``decimal128``: string to ``MongoDB\BSON\Decimal128``, requires ``ext-bcmath``
180+
- ``decimal128``: ``string`` to ``MongoDB\BSON\Decimal128``, requires ``ext-bcmath``
178181
- ``hash``: associative array to MongoDB object
179-
- ``id``: string to ObjectId by default, but other formats are possible
180-
- ``timestamp``: string to ``MongoDB\BSON\Timestamp``
182+
- ``id``: ``string`` to ObjectId by default, but other formats are possible
183+
- ``timestamp``: ``string`` to ``MongoDB\BSON\Timestamp``
181184
- ``raw``: any type
182185
- ``uuid``: `Symfony UID <https://symfony.com/doc/current/components/uid.html>`_ to ``MongoDB\BSON\Binary`` instance with a "uuid" type
186+
- ``vector_float32``: list of floats to ``MongoDB\BSON\Binary`` instance with vector type "Float32"
187+
- ``vector_int8``: list of integers to ``MongoDB\BSON\Binary`` instance with vector type "Int8"
188+
- ``vector_packed_bit``: list of booleans to ``MongoDB\BSON\Binary`` instance with vector type "PackedBit"
183189

184190
.. note::
185191

@@ -189,6 +195,10 @@ This list explains some of the less obvious mapping types:
189195
suitable you should either use an embedded document or use formats provided
190196
by the MongoDB driver (e.g. ``\MongoDB\BSON\UTCDateTime`` instead of ``\DateTime``).
191197

198+
.. note::
199+
200+
The vector types require the MongoDB PHP extension version 2.2.0 or higher.
201+
192202
.. _reference-php-mapping-types:
193203

194204
PHP Types Mapping
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Doctrine\ODM\MongoDB\Types;
6+
7+
use InvalidArgumentException;
8+
use MongoDB\BSON\Binary;
9+
use MongoDB\BSON\VectorType;
10+
11+
use function enum_exists;
12+
use function get_debug_type;
13+
use function is_array;
14+
use function sprintf;
15+
use function str_replace;
16+
17+
/** @internal */
18+
abstract class AbstractVectorType extends Type
19+
{
20+
public function convertToDatabaseValue(mixed $value): ?Binary
21+
{
22+
if (! enum_exists(VectorType::class)) {
23+
throw new InvalidArgumentException('MongoDB\BSON\VectorType enum does not exist. Install the MongoDB Extension version 2.2.0 or higher in order to use a vector field type.');
24+
}
25+
26+
if ($value === null) {
27+
return null;
28+
}
29+
30+
if (is_array($value)) {
31+
return Binary::fromVector($value, $this->getVectorType());
32+
}
33+
34+
if (! $value instanceof Binary) {
35+
throw new InvalidArgumentException(sprintf('Invalid data type %s received for vector field, expected null, array or MongoDB\BSON\Binary', get_debug_type($value)));
36+
}
37+
38+
if ($value->getType() !== Binary::TYPE_VECTOR) {
39+
throw new InvalidArgumentException(sprintf('Invalid binary data of type %d received for vector field, expected binary type %d', $value->getType(), Binary::TYPE_VECTOR));
40+
}
41+
42+
if ($value->getVectorType() !== $this->getVectorType()) {
43+
throw new InvalidArgumentException(sprintf('Invalid binary vector data of vector type %s received for vector field, expected vector type %s', $value->getVectorType()->name, $this->getVectorType()->name));
44+
}
45+
46+
return $value;
47+
}
48+
49+
/** @return list<float>|list<int>|list<bool>|null */
50+
public function convertToPHPValue(mixed $value): ?array
51+
{
52+
if ($value === null) {
53+
return null;
54+
}
55+
56+
if (is_array($value)) {
57+
return $value;
58+
}
59+
60+
if (! $value instanceof Binary) {
61+
throw new InvalidArgumentException(sprintf('Invalid data of type "%s" received for vector field', get_debug_type($value)));
62+
}
63+
64+
if ($value->getType() !== Binary::TYPE_VECTOR) {
65+
throw new InvalidArgumentException(sprintf('Invalid binary data of type %d received for vector field', $value->getType()));
66+
}
67+
68+
if ($value->getVectorType() !== $this->getVectorType()) {
69+
throw new InvalidArgumentException(sprintf('Invalid binary vector data of vector type %s received for vector field, expected vector type %s', $value->getVectorType()->name, $this->getVectorType()->name));
70+
}
71+
72+
return $value->toArray();
73+
}
74+
75+
public function closureToMongo(): string
76+
{
77+
return str_replace('%%vectorType%%', $this->getVectorType()->name, <<<'PHP'
78+
if ($value === null) {
79+
$return = null;
80+
return;
81+
}
82+
83+
if (\is_array($value)) {
84+
$return = \MongoDB\BSON\Binary::fromVector($value, \MongoDB\BSON\VectorType::%%vectorType%%);
85+
return;
86+
}
87+
88+
if (! $value instanceof \MongoDB\BSON\Binary) {
89+
throw new InvalidArgumentException(sprintf('Invalid data type %s received for vector field, expected null, array or MongoDB\BSON\Binary', get_debug_type($value)));
90+
}
91+
92+
if ($value->getType() !== \MongoDB\BSON\Binary::TYPE_VECTOR) {
93+
throw new InvalidArgumentException(sprintf('Invalid binary data of type %d received for vector field, expected binary type %d', $value->getType(), \MongoDB\BSON\Binary::TYPE_VECTOR));
94+
}
95+
96+
if ($value->getVectorType() !== \MongoDB\BSON\VectorType::%%vectorType%%) {
97+
throw new \InvalidArgumentException(sprintf('Invalid binary vector data of vector type %s received for vector field, expected vector type %%vectorType%%', $value->getVectorType()->name));
98+
}
99+
100+
$return = $value;
101+
PHP);
102+
}
103+
104+
public function closureToPHP(): string
105+
{
106+
return str_replace('%%vectorType%%', $this->getVectorType()->name, <<<'PHP'
107+
if ($value === null) {
108+
$return = null;
109+
return;
110+
}
111+
112+
if (\is_array($value)) {
113+
$return = $value;
114+
return;
115+
}
116+
117+
if (! $value instanceof \MongoDB\BSON\Binary) {
118+
throw new \InvalidArgumentException(sprintf('Invalid data of type "%s" received for vector field', get_debug_type($value)));
119+
}
120+
121+
if ($value->getType() !== \MongoDB\BSON\Binary::TYPE_VECTOR) {
122+
throw new \InvalidArgumentException(sprintf('Invalid binary data of type %d received for vector field', $value->getType()));
123+
}
124+
125+
if ($value->getVectorType() !== \MongoDB\BSON\VectorType::%%vectorType%%) {
126+
throw new \InvalidArgumentException(sprintf('Invalid binary vector data of vector type %s received for vector field, expected vector type %%vectorType%%', $value->getVectorType()->name));
127+
}
128+
129+
$return = $value->toArray();
130+
PHP);
131+
}
132+
133+
abstract protected function getVectorType(): VectorType;
134+
}

lib/Doctrine/ODM/MongoDB/Types/Type.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ abstract class Type
4747
public const RAW = 'raw';
4848
public const DECIMAL128 = 'decimal128';
4949
public const UUID = 'uuid';
50+
public const VECTOR_FLOAT32 = 'vector_float32';
51+
public const VECTOR_INT8 = 'vector_int8';
52+
public const VECTOR_PACKED_BIT = 'vector_packed_bit';
5053

5154
/** @deprecated const was deprecated in doctrine/mongodb-odm 2.1 and will be removed in 3.0. Use Type::INT instead */
5255
public const INTID = 'int_id';
@@ -89,6 +92,9 @@ abstract class Type
8992
self::RAW => Types\RawType::class,
9093
self::DECIMAL128 => Types\Decimal128Type::class,
9194
self::UUID => Types\BinaryUuidType::class,
95+
self::VECTOR_FLOAT32 => Types\VectorFloat32Type::class,
96+
self::VECTOR_INT8 => Types\VectorInt8Type::class,
97+
self::VECTOR_PACKED_BIT => Types\VectorPackedBitType::class,
9298
];
9399

94100
/** Prevent instantiation and force use of the factory method. */
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Doctrine\ODM\MongoDB\Types;
6+
7+
use MongoDB\BSON\VectorType;
8+
9+
final class VectorFloat32Type extends AbstractVectorType
10+
{
11+
protected function getVectorType(): VectorType
12+
{
13+
return VectorType::Float32;
14+
}
15+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Doctrine\ODM\MongoDB\Types;
6+
7+
use MongoDB\BSON\VectorType;
8+
9+
final class VectorInt8Type extends AbstractVectorType
10+
{
11+
protected function getVectorType(): VectorType
12+
{
13+
return VectorType::Int8;
14+
}
15+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Doctrine\ODM\MongoDB\Types;
6+
7+
use MongoDB\BSON\VectorType;
8+
9+
final class VectorPackedBitType extends AbstractVectorType
10+
{
11+
protected function getVectorType(): VectorType
12+
{
13+
return VectorType::PackedBit;
14+
}
15+
}

phpstan.neon.dist

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ parameters:
5050
identifier: property.unusedType
5151
path: tests/
5252

53+
# Requires ext-mongodb 2.2+
54+
- message: '#MongoDB\\BSON\\VectorType#'
55+
- message: '#MongoDB\\BSON\\Binary\:\:(TYPE_VECTOR|getVectorType|toArray|fromVector)#'
56+
5357
# To be removed when reaching phpstan level 6
5458
checkMissingVarTagTypehint: true
5559
checkMissingTypehints: true

tests/Doctrine/ODM/MongoDB/Tests/Aggregation/Stage/VectorSearchTest.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,7 @@ public function testQueryVector(): void
9393
public function testQueryVectorAcceptsBinary(): void
9494
{
9595
[$stage] = $this->createVectorSearchStage();
96-
// @phpstan-ignore class.notFound (requires ext-mongodb 2.2+)
9796
if (enum_exists(VectorType::class)) {
98-
// @phpstan-ignore staticMethod.notFound (requires ext-mongodb 2.2+)
9997
$binaryVector = Binary::fromVector([1, 2, 3], VectorType::Int8);
10098
self::assertInstanceOf(Binary::class, $binaryVector);
10199
} else {

0 commit comments

Comments
 (0)