From 05349f9d1a03ae8da7ab83c8d4a84af0a50b8b25 Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Thu, 17 Jul 2025 17:51:11 +0200 Subject: [PATCH 1/9] feat: add method to read from database and make method to write to database more general --- pgvector/bit.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index 26a9d8d..cecd180 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -62,9 +62,12 @@ def from_binary(cls, value): @classmethod def _to_db(cls, value): + if value is None: + return value + if not isinstance(value, cls): - raise ValueError('expected bit') - + value = cls(value) + return value.to_text() @classmethod @@ -73,3 +76,9 @@ def _to_db_binary(cls, value): raise ValueError('expected bit') return value.to_binary() + + @classmethod + def _from_db(cls, value): + if value is None or isinstance(value, cls): + return value + return cls.from_text(value) \ No newline at end of file From 2b5e53d0faf8e331237d509cad16863a726a4dd1 Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Thu, 17 Jul 2025 17:52:42 +0200 Subject: [PATCH 2/9] feat: add processor methods to BIT class --- pgvector/sqlalchemy/bit.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index 0f83f3c..091e3c8 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -1,6 +1,8 @@ +import asyncpg +from sqlalchemy.dialects.postgresql.asyncpg import PGDialect_asyncpg from sqlalchemy.dialects.postgresql.base import ischema_names from sqlalchemy.types import UserDefinedType, Float - +from .. import Bit class BIT(UserDefinedType): cache_ok = True @@ -14,6 +16,21 @@ def get_col_spec(self, **kw): return 'BIT' return 'BIT(%d)' % self.length + def bind_processor(self, dialect): + def process(value): + value = Bit._to_db(value) + if value and isinstance(dialect, PGDialect_asyncpg): + return asyncpg.BitString(value) + return value + return process + + def result_processor(self, dialect, coltype): + def process(value): + if value and isinstance(dialect, PGDialect_asyncpg): + return value.as_string() + return Bit._from_db(value).to_text() + return process + class comparator_factory(UserDefinedType.Comparator): def hamming_distance(self, other): return self.op('<~>', return_type=Float)(other) From 45cc2e1b0be1eb83cf7574eb8c36aace8b0a3997 Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Thu, 17 Jul 2025 17:53:49 +0200 Subject: [PATCH 3/9] test: use string directly instead of BitString --- tests/test_sqlalchemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 5aec977..286bf66 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -591,7 +591,7 @@ async def test_bit(self, engine): async with async_session() as session: async with session.begin(): - embedding = asyncpg.BitString('101') if engine == asyncpg_engine else '101' + embedding = '101' session.add(Item(id=1, binary_embedding=embedding)) item = await session.get(Item, 1) assert item.binary_embedding == embedding From 3d14b22d2d475e21f20511642663792f7f11ff7f Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Thu, 17 Jul 2025 18:01:22 +0200 Subject: [PATCH 4/9] fix: fix for case when value is none --- pgvector/sqlalchemy/bit.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index 091e3c8..338e1b6 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -26,8 +26,10 @@ def process(value): def result_processor(self, dialect, coltype): def process(value): - if value and isinstance(dialect, PGDialect_asyncpg): - return value.as_string() + if value is None: return None + else: + if isinstance(dialect, PGDialect_asyncpg): + return value.as_string() return Bit._from_db(value).to_text() return process From 1411240e668b2081a481355b3fcf8c5686b2344f Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Mon, 22 Sep 2025 17:42:07 +0200 Subject: [PATCH 5/9] feat: add other input types for bit --- pgvector/sqlalchemy/bit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgvector/sqlalchemy/bit.py b/pgvector/sqlalchemy/bit.py index b5f64f2..fb7f026 100644 --- a/pgvector/sqlalchemy/bit.py +++ b/pgvector/sqlalchemy/bit.py @@ -26,7 +26,7 @@ def process(value): return value return process else: - return super().bind_processor(dialect) + return lambda value: Bit._to_db(value) class comparator_factory(UserDefinedType.Comparator): def hamming_distance(self, other): From f176041d76006b9f16ce9b91be259ee4a45cb821 Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Mon, 22 Sep 2025 17:42:21 +0200 Subject: [PATCH 6/9] fix: test --- tests/test_sqlalchemy.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 6900e3a..3fd9406 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -18,8 +18,8 @@ mapped_column = Column sqlalchemy_version = 1 -psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') -psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') +psycopg2_engine = create_engine('postgresql+psycopg2://postgres:mypassword@localhost/pgvector_python_test') +psycopg2_type_engine = create_engine('postgresql+psycopg2://postgres:mypassword@localhost/pgvector_python_test') @event.listens_for(psycopg2_type_engine, "connect") @@ -28,27 +28,27 @@ def psycopg2_connect(dbapi_connection, connection_record): register_vector(dbapi_connection) -pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test') +pg8000_engine = create_engine(f'postgresql+pg8000://postgres:mypassword@localhost/pgvector_python_test') if sqlalchemy_version > 1: - psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') - psycopg_type_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') + psycopg_engine = create_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') + psycopg_type_engine = create_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') @event.listens_for(psycopg_type_engine, "connect") def psycopg_connect(dbapi_connection, connection_record): from pgvector.psycopg import register_vector register_vector(dbapi_connection) - psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') - psycopg_async_type_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') + psycopg_async_engine = create_async_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') + psycopg_async_type_engine = create_async_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') @event.listens_for(psycopg_async_type_engine.sync_engine, "connect") def psycopg_async_connect(dbapi_connection, connection_record): from pgvector.psycopg import register_vector_async dbapi_connection.run_async(register_vector_async) - asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') - asyncpg_type_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') + asyncpg_engine = create_async_engine('postgresql+asyncpg://postgres:mypassword@localhost/pgvector_python_test') + asyncpg_type_engine = create_async_engine('postgresql+asyncpg://postgres:mypassword@localhost/pgvector_python_test') @event.listens_for(asyncpg_type_engine.sync_engine, "connect") def asyncpg_connect(dbapi_connection, connection_record): @@ -311,6 +311,13 @@ def test_bit(self, engine): item = session.get(Item, 1) assert item.binary_embedding == '101' + def test_boolean_list_bit(self, engine): + with Session(engine) as session: + session.add(Item(id=1, binary_embedding=[True, False, True])) + session.commit() + item = session.get(Item, 1) + assert item.binary_embedding == '101' + def test_bit_hamming_distance(self, engine): create_items() with Session(engine) as session: @@ -567,7 +574,6 @@ def test_halfvec_array(self, engine): item = session.get(Item, 1) assert item.half_embeddings == [HalfVector([1, 2, 3]), HalfVector([4, 5, 6])] - @pytest.mark.parametrize('engine', async_engines) class TestSqlalchemyAsync: def setup_method(self): @@ -605,7 +611,7 @@ async def test_bit(self, engine): async with async_session() as session: async with session.begin(): - embedding = '101' + embedding = asyncpg.BitString('101') if engine == asyncpg_engine else '101' session.add(Item(id=1, binary_embedding=embedding)) item = await session.get(Item, 1) assert item.binary_embedding == embedding From c48f0d2af89405ae0eae89879e0d204c8a64e04f Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Mon, 22 Sep 2025 17:56:22 +0200 Subject: [PATCH 7/9] fix: use correct engine url --- tests/test_sqlalchemy.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index 3fd9406..ffe7cba 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -18,8 +18,8 @@ mapped_column = Column sqlalchemy_version = 1 -psycopg2_engine = create_engine('postgresql+psycopg2://postgres:mypassword@localhost/pgvector_python_test') -psycopg2_type_engine = create_engine('postgresql+psycopg2://postgres:mypassword@localhost/pgvector_python_test') +psycopg2_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') +psycopg2_type_engine = create_engine('postgresql+psycopg2://localhost/pgvector_python_test') @event.listens_for(psycopg2_type_engine, "connect") @@ -28,27 +28,27 @@ def psycopg2_connect(dbapi_connection, connection_record): register_vector(dbapi_connection) -pg8000_engine = create_engine(f'postgresql+pg8000://postgres:mypassword@localhost/pgvector_python_test') +pg8000_engine = create_engine(f'postgresql+pg8000://localhost/pgvector_python_test') if sqlalchemy_version > 1: - psycopg_engine = create_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') - psycopg_type_engine = create_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') + psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') + psycopg_type_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') @event.listens_for(psycopg_type_engine, "connect") def psycopg_connect(dbapi_connection, connection_record): from pgvector.psycopg import register_vector register_vector(dbapi_connection) - psycopg_async_engine = create_async_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') - psycopg_async_type_engine = create_async_engine('postgresql+psycopg://postgres:mypassword@localhost/pgvector_python_test') + psycopg_async_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') + psycopg_async_type_engine = create_async_engine('postgresql+psycopg://localhost/pgvector_python_test') @event.listens_for(psycopg_async_type_engine.sync_engine, "connect") def psycopg_async_connect(dbapi_connection, connection_record): from pgvector.psycopg import register_vector_async dbapi_connection.run_async(register_vector_async) - asyncpg_engine = create_async_engine('postgresql+asyncpg://postgres:mypassword@localhost/pgvector_python_test') - asyncpg_type_engine = create_async_engine('postgresql+asyncpg://postgres:mypassword@localhost/pgvector_python_test') + asyncpg_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') + asyncpg_type_engine = create_async_engine('postgresql+asyncpg://localhost/pgvector_python_test') @event.listens_for(asyncpg_type_engine.sync_engine, "connect") def asyncpg_connect(dbapi_connection, connection_record): From 2e6795d48ad0cecbdb210a75f8e400276078b7ae Mon Sep 17 00:00:00 2001 From: Giacomo rua Date: Mon, 22 Sep 2025 18:01:44 +0200 Subject: [PATCH 8/9] fix: add user back into engine url --- tests/test_sqlalchemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sqlalchemy.py b/tests/test_sqlalchemy.py index ffe7cba..af34bee 100644 --- a/tests/test_sqlalchemy.py +++ b/tests/test_sqlalchemy.py @@ -28,7 +28,7 @@ def psycopg2_connect(dbapi_connection, connection_record): register_vector(dbapi_connection) -pg8000_engine = create_engine(f'postgresql+pg8000://localhost/pgvector_python_test') +pg8000_engine = create_engine(f'postgresql+pg8000://{os.environ["USER"]}@localhost/pgvector_python_test') if sqlalchemy_version > 1: psycopg_engine = create_engine('postgresql+psycopg://localhost/pgvector_python_test') From d78612500ddb164b5ebd28332a4e4e47f077bdd8 Mon Sep 17 00:00:00 2001 From: jackrua <155536850+jackrua@users.noreply.github.com> Date: Tue, 23 Sep 2025 14:17:09 +0200 Subject: [PATCH 9/9] style: remove unused method --- pgvector/bit.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pgvector/bit.py b/pgvector/bit.py index cecd180..eee4b9b 100644 --- a/pgvector/bit.py +++ b/pgvector/bit.py @@ -76,9 +76,3 @@ def _to_db_binary(cls, value): raise ValueError('expected bit') return value.to_binary() - - @classmethod - def _from_db(cls, value): - if value is None or isinstance(value, cls): - return value - return cls.from_text(value) \ No newline at end of file