|
| 1 | +""" |
| 2 | +Binary preservation tests for search results. |
| 3 | +
|
| 4 | +These tests are in a separate file because the main search test suite (test_search.py) |
| 5 | +has compatibility issues with the current Valkey search module version. Most existing |
| 6 | +search tests fail due to unsupported field types and parameters (e.g., TEXT fields, |
| 7 | +SKIPINITIALSCAN, etc.). |
| 8 | +
|
| 9 | +Our binary preservation functionality works correctly with the current search module |
| 10 | +using direct FT.CREATE commands and KNN vector queries, so we maintain these tests |
| 11 | +separately to ensure the feature remains properly tested while the broader search |
| 12 | +test compatibility issues are resolved. |
| 13 | +""" |
| 14 | + |
| 15 | +import struct |
| 16 | + |
| 17 | +import pytest |
| 18 | +import valkey |
| 19 | + |
| 20 | +from .conftest import _get_client, is_resp2_connection, skip_ifmodversion_lt |
| 21 | + |
| 22 | + |
| 23 | +@pytest.mark.valkeymod |
| 24 | +@skip_ifmodversion_lt("1.0.0", "search") |
| 25 | +def test_vector_binary_preservation_default_behavior(request): |
| 26 | + """Test that default behavior still corrupts binary data (backward compatibility)""" |
| 27 | + client = _get_client(valkey.Valkey, request, decode_responses=False) |
| 28 | + |
| 29 | + # Create index with vector field using direct command |
| 30 | + client.execute_command( |
| 31 | + "FT.CREATE", "test_idx", "SCHEMA", |
| 32 | + "embedding", "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", |
| 33 | + "DISTANCE_METRIC", "COSINE" |
| 34 | + ) |
| 35 | + |
| 36 | + # Create vector data as bytes (simulating embeddings) |
| 37 | + vec1 = [0.1, 0.2, 0.3] |
| 38 | + vec1_bytes = struct.pack('3f', *vec1) |
| 39 | + |
| 40 | + # Store document with vector |
| 41 | + client.hset("doc:1", mapping={"embedding": vec1_bytes}) |
| 42 | + |
| 43 | + # Search without preserve_bytes (default behavior) using KNN query |
| 44 | + results = client.ft("test_idx").search( |
| 45 | + "*=>[KNN 1 @embedding $vec]", {"vec": vec1_bytes} |
| 46 | + ) |
| 47 | + |
| 48 | + if is_resp2_connection(client): |
| 49 | + doc = results.docs[0] |
| 50 | + # Default behavior should decode bytes to string (corrupting binary data) |
| 51 | + assert isinstance(doc.embedding, str) |
| 52 | + assert doc.embedding != vec1_bytes # Should be corrupted |
| 53 | + |
| 54 | + client.execute_command("FT.DROPINDEX", "test_idx") |
| 55 | + |
| 56 | + |
| 57 | +@pytest.mark.valkeymod |
| 58 | +@skip_ifmodversion_lt("1.0.0", "search") |
| 59 | +def test_vector_binary_preservation_enabled(request): |
| 60 | + """Test that preserve_bytes=True preserves binary vector data""" |
| 61 | + client = _get_client(valkey.Valkey, request, decode_responses=False) |
| 62 | + |
| 63 | + # Create index with vector field using direct command |
| 64 | + client.execute_command( |
| 65 | + "FT.CREATE", "test_idx", "SCHEMA", |
| 66 | + "embedding", "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", |
| 67 | + "DISTANCE_METRIC", "COSINE" |
| 68 | + ) |
| 69 | + |
| 70 | + # Create vector data as bytes (simulating embeddings) |
| 71 | + vec1 = [0.1, 0.2, 0.3] |
| 72 | + vec1_bytes = struct.pack('3f', *vec1) |
| 73 | + |
| 74 | + # Store document with vector |
| 75 | + client.hset("doc:1", mapping={"embedding": vec1_bytes}) |
| 76 | + |
| 77 | + # Search with preserve_bytes=True using KNN query |
| 78 | + results = client.ft("test_idx").search( |
| 79 | + "*=>[KNN 1 @embedding $vec]", {"vec": vec1_bytes}, preserve_bytes=True |
| 80 | + ) |
| 81 | + |
| 82 | + if is_resp2_connection(client): |
| 83 | + doc = results.docs[0] |
| 84 | + # With preserve_bytes=True, binary data should be preserved |
| 85 | + assert isinstance(doc.embedding, bytes) |
| 86 | + assert doc.embedding == vec1_bytes |
| 87 | + |
| 88 | + client.execute_command("FT.DROPINDEX", "test_idx") |
| 89 | + |
| 90 | + |
| 91 | +@pytest.mark.valkeymod |
| 92 | +@skip_ifmodversion_lt("1.0.0", "search") |
| 93 | +def test_multiple_field_types_and_vectors(request): |
| 94 | + """Test binary preservation with multiple field types and vector dimensions""" |
| 95 | + client = _get_client(valkey.Valkey, request, decode_responses=False) |
| 96 | + |
| 97 | + # Create index with diverse field types and different vector dimensions |
| 98 | + client.execute_command( |
| 99 | + "FT.CREATE", "test_idx", "SCHEMA", |
| 100 | + "title", "TAG", |
| 101 | + "price", "NUMERIC", |
| 102 | + "embedding_3d", "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", |
| 103 | + "DISTANCE_METRIC", "COSINE", |
| 104 | + "embedding_4d", "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "4", |
| 105 | + "DISTANCE_METRIC", "L2", |
| 106 | + "binary_data", "TAG" |
| 107 | + ) |
| 108 | + |
| 109 | + # Create test data with different vector dimensions |
| 110 | + vec_3d = [0.1, 0.2, 0.3] |
| 111 | + vec_3d_bytes = struct.pack("3f", *vec_3d) |
| 112 | + vec_4d = [0.4, 0.5, 0.6, 0.7] |
| 113 | + vec_4d_bytes = struct.pack("4f", *vec_4d) |
| 114 | + |
| 115 | + # Store multiple documents |
| 116 | + for i in range(3): |
| 117 | + client.hset(f"doc:{i + 1}", mapping={ |
| 118 | + "title": f"item_{i + 1}", |
| 119 | + "price": 10.0 + i, |
| 120 | + "embedding_3d": vec_3d_bytes, |
| 121 | + "embedding_4d": vec_4d_bytes, |
| 122 | + "binary_data": b"binary_content" |
| 123 | + }) |
| 124 | + |
| 125 | + # Test with multiple results (KNN 3 instead of KNN 1) |
| 126 | + results = client.ft("test_idx").search( |
| 127 | + "*=>[KNN 3 @embedding_3d $vec]", |
| 128 | + {"vec": vec_3d_bytes}, |
| 129 | + preserve_bytes=True, |
| 130 | + binary_fields=["embedding_3d", "embedding_4d"] |
| 131 | + ) |
| 132 | + |
| 133 | + if is_resp2_connection(client): |
| 134 | + assert len(results.docs) == 3 |
| 135 | + for doc in results.docs: |
| 136 | + # Vector fields should be preserved as bytes |
| 137 | + assert isinstance(doc.embedding_3d, bytes) |
| 138 | + assert doc.embedding_3d == vec_3d_bytes |
| 139 | + assert isinstance(doc.embedding_4d, bytes) |
| 140 | + assert doc.embedding_4d == vec_4d_bytes |
| 141 | + # Non-binary fields should be strings |
| 142 | + assert isinstance(doc.title, str) |
| 143 | + assert isinstance(doc.binary_data, str) |
| 144 | + |
| 145 | + client.execute_command("FT.DROPINDEX", "test_idx") |
| 146 | + |
| 147 | + |
| 148 | +@pytest.mark.valkeymod |
| 149 | +@skip_ifmodversion_lt("1.0.0", "search") |
| 150 | +def test_binary_fields_selective_preservation(request): |
| 151 | + """Test that binary_fields parameter selectively preserves specific fields""" |
| 152 | + client = _get_client(valkey.Valkey, request, decode_responses=False) |
| 153 | + |
| 154 | + # Create index with vector and tag fields using direct command |
| 155 | + client.execute_command( |
| 156 | + "FT.CREATE", "test_idx", "SCHEMA", |
| 157 | + "embedding1", "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", |
| 158 | + "DISTANCE_METRIC", "COSINE", |
| 159 | + "embedding2", "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", |
| 160 | + "DISTANCE_METRIC", "COSINE", |
| 161 | + "binary_tag", "TAG" |
| 162 | + ) |
| 163 | + |
| 164 | + # Create vector data as bytes |
| 165 | + vec1 = [0.1, 0.2, 0.3] |
| 166 | + vec1_bytes = struct.pack("3f", *vec1) |
| 167 | + vec2 = [0.4, 0.5, 0.6] |
| 168 | + vec2_bytes = struct.pack("3f", *vec2) |
| 169 | + |
| 170 | + # Store document with vectors and tag |
| 171 | + client.hset("doc:1", mapping={ |
| 172 | + "embedding1": vec1_bytes, |
| 173 | + "embedding2": vec2_bytes, |
| 174 | + "binary_tag": b"test_tag" |
| 175 | + }) |
| 176 | + |
| 177 | + # Search with selective binary preservation (only embedding1) using KNN query |
| 178 | + results = client.ft("test_idx").search( |
| 179 | + "*=>[KNN 1 @embedding1 $vec]", |
| 180 | + {"vec": vec1_bytes}, |
| 181 | + preserve_bytes=True, |
| 182 | + binary_fields=["embedding1"] |
| 183 | + ) |
| 184 | + |
| 185 | + if is_resp2_connection(client): |
| 186 | + doc = results.docs[0] |
| 187 | + assert isinstance(doc.embedding1, bytes) |
| 188 | + assert doc.embedding1 == vec1_bytes |
| 189 | + assert isinstance(doc.embedding2, str) |
| 190 | + assert isinstance(doc.binary_tag, str) |
| 191 | + |
| 192 | + client.execute_command("FT.DROPINDEX", "test_idx") |
0 commit comments