|
12 | 12 | }, |
13 | 13 | { |
14 | 14 | "cell_type": "code", |
15 | | - "execution_count": null, |
| 15 | + "execution_count": 97, |
16 | 16 | "metadata": {}, |
17 | 17 | "outputs": [], |
18 | 18 | "source": [ |
|
21 | 21 | }, |
22 | 22 | { |
23 | 23 | "cell_type": "code", |
24 | | - "execution_count": null, |
| 24 | + "execution_count": 98, |
25 | 25 | "metadata": {}, |
26 | 26 | "outputs": [], |
27 | 27 | "source": [ |
|
31 | 31 | }, |
32 | 32 | { |
33 | 33 | "cell_type": "code", |
34 | | - "execution_count": null, |
| 34 | + "execution_count": 99, |
35 | 35 | "metadata": {}, |
36 | 36 | "outputs": [], |
37 | 37 | "source": [ |
|
42 | 42 | }, |
43 | 43 | { |
44 | 44 | "cell_type": "code", |
45 | | - "execution_count": null, |
| 45 | + "execution_count": 100, |
46 | 46 | "metadata": {}, |
47 | 47 | "outputs": [], |
48 | 48 | "source": [ |
|
52 | 52 | }, |
53 | 53 | { |
54 | 54 | "cell_type": "code", |
55 | | - "execution_count": null, |
| 55 | + "execution_count": 101, |
56 | 56 | "metadata": {}, |
57 | 57 | "outputs": [], |
58 | 58 | "source": [ |
|
73 | 73 | }, |
74 | 74 | { |
75 | 75 | "cell_type": "code", |
76 | | - "execution_count": null, |
| 76 | + "execution_count": 102, |
77 | 77 | "metadata": {}, |
78 | 78 | "outputs": [], |
79 | 79 | "source": [ |
|
146 | 146 | }, |
147 | 147 | { |
148 | 148 | "cell_type": "code", |
149 | | - "execution_count": null, |
| 149 | + "execution_count": 103, |
150 | 150 | "metadata": {}, |
151 | 151 | "outputs": [], |
152 | 152 | "source": [ |
|
275 | 275 | }, |
276 | 276 | { |
277 | 277 | "cell_type": "code", |
278 | | - "execution_count": null, |
| 278 | + "execution_count": 104, |
279 | 279 | "metadata": {}, |
280 | 280 | "outputs": [], |
281 | 281 | "source": [ |
|
290 | 290 | }, |
291 | 291 | { |
292 | 292 | "cell_type": "code", |
293 | | - "execution_count": null, |
| 293 | + "execution_count": 105, |
294 | 294 | "metadata": {}, |
295 | 295 | "outputs": [], |
296 | 296 | "source": [ |
|
388 | 388 | }, |
389 | 389 | { |
390 | 390 | "cell_type": "code", |
391 | | - "execution_count": null, |
| 391 | + "execution_count": 106, |
392 | 392 | "metadata": {}, |
393 | 393 | "outputs": [], |
394 | 394 | "source": [ |
|
411 | 411 | " \"!=\": \"<>\",\n", |
412 | 412 | " }\n", |
413 | 413 | "\n", |
414 | | - " def __init__(self, *clauses: Union['Predicates', Tuple[str, str], Tuple[str, str, str]], operator: str = 'AND'):\n", |
| 414 | + " PredicateValue = Union[str, int, float]\n", |
| 415 | + "\n", |
| 416 | + " def __init__(self, *clauses: Union['Predicates', Tuple[str, PredicateValue], Tuple[str, str, PredicateValue], str, PredicateValue], operator: str = 'AND'):\n", |
415 | 417 | " \"\"\"\n", |
416 | 418 | " Predicates class defines predicates on the object metadata. Predicates can be combined using logical operators (&, |, and ~).\n", |
417 | 419 | "\n", |
|
425 | 427 | " if operator not in self.logical_operators: \n", |
426 | 428 | " raise ValueError(f\"invalid operator: {operator}\")\n", |
427 | 429 | " self.operator = operator\n", |
428 | | - " self.clauses = list(clauses)\n", |
| 430 | + " if isinstance(clauses[0], str):\n", |
| 431 | + " if len(clauses) != 3 or not (isinstance(clauses[1], str) and isinstance(clauses[2], self.PredicateValue)):\n", |
| 432 | + " raise ValueError(f\"Invalid clause format: {clauses}\")\n", |
| 433 | + " self.clauses = [(clauses[0], clauses[1], clauses[2])]\n", |
| 434 | + " else:\n", |
| 435 | + " self.clauses = list(clauses)\n", |
429 | 436 | "\n", |
430 | | - " def add_clause(self, *clause: Union['Predicates', Tuple[str, str], Tuple[str, str, str]]):\n", |
| 437 | + " def add_clause(self, *clause: Union['Predicates', Tuple[str, PredicateValue], Tuple[str, str, PredicateValue], str, PredicateValue]):\n", |
431 | 438 | " \"\"\"\n", |
432 | 439 | " Add a clause to the predicates object.\n", |
433 | 440 | "\n", |
|
436 | 443 | " clause: 'Predicates' or Tuple[str, str] or Tuple[str, str, str]\n", |
437 | 444 | " Predicate clause. Can be either another Predicates object or a tuple of the form (field, operator, value) or (field, value).\n", |
438 | 445 | " \"\"\"\n", |
439 | | - " self.clauses.extend(list(clause))\n", |
| 446 | + " if isinstance(clause[0], str):\n", |
| 447 | + " if len(clause) != 3 or not (isinstance(clause[1], str) and isinstance(clause[2], self.PredicateValue)):\n", |
| 448 | + " raise ValueError(f\"Invalid clause format: {clause}\")\n", |
| 449 | + " self.clauses.append((clause[0], clause[1], clause[2]))\n", |
| 450 | + " else:\n", |
| 451 | + " self.clauses.extend(list(clause))\n", |
440 | 452 | " \n", |
441 | 453 | " def __and__(self, other):\n", |
442 | 454 | " new_predicates = Predicates(self, other, operator='AND')\n", |
|
522 | 534 | }, |
523 | 535 | { |
524 | 536 | "cell_type": "code", |
525 | | - "execution_count": null, |
| 537 | + "execution_count": 107, |
526 | 538 | "metadata": {}, |
527 | 539 | "outputs": [], |
528 | 540 | "source": [ |
|
836 | 848 | }, |
837 | 849 | { |
838 | 850 | "cell_type": "code", |
839 | | - "execution_count": null, |
| 851 | + "execution_count": 108, |
840 | 852 | "metadata": {}, |
841 | 853 | "outputs": [ |
842 | 854 | { |
|
864 | 876 | "Generates a query to create the tables, indexes, and extensions needed to store the vector data." |
865 | 877 | ] |
866 | 878 | }, |
867 | | - "execution_count": null, |
| 879 | + "execution_count": 108, |
868 | 880 | "metadata": {}, |
869 | 881 | "output_type": "execute_result" |
870 | 882 | } |
|
883 | 895 | }, |
884 | 896 | { |
885 | 897 | "cell_type": "code", |
886 | | - "execution_count": null, |
| 898 | + "execution_count": 109, |
887 | 899 | "metadata": {}, |
888 | 900 | "outputs": [], |
889 | 901 | "source": [ |
|
1143 | 1155 | }, |
1144 | 1156 | { |
1145 | 1157 | "cell_type": "code", |
1146 | | - "execution_count": null, |
| 1158 | + "execution_count": 110, |
1147 | 1159 | "metadata": {}, |
1148 | 1160 | "outputs": [ |
1149 | 1161 | { |
|
1171 | 1183 | "Creates necessary tables." |
1172 | 1184 | ] |
1173 | 1185 | }, |
1174 | | - "execution_count": null, |
| 1186 | + "execution_count": 110, |
1175 | 1187 | "metadata": {}, |
1176 | 1188 | "output_type": "execute_result" |
1177 | 1189 | } |
|
1182 | 1194 | }, |
1183 | 1195 | { |
1184 | 1196 | "cell_type": "code", |
1185 | | - "execution_count": null, |
| 1197 | + "execution_count": 111, |
1186 | 1198 | "metadata": {}, |
1187 | 1199 | "outputs": [ |
1188 | 1200 | { |
|
1210 | 1222 | "Creates necessary tables." |
1211 | 1223 | ] |
1212 | 1224 | }, |
1213 | | - "execution_count": null, |
| 1225 | + "execution_count": 111, |
1214 | 1226 | "metadata": {}, |
1215 | 1227 | "output_type": "execute_result" |
1216 | 1228 | } |
|
1221 | 1233 | }, |
1222 | 1234 | { |
1223 | 1235 | "cell_type": "code", |
1224 | | - "execution_count": null, |
| 1236 | + "execution_count": 112, |
1225 | 1237 | "metadata": {}, |
1226 | 1238 | "outputs": [ |
1227 | | - { |
1228 | | - "name": "stderr", |
1229 | | - "output_type": "stream", |
1230 | | - "text": [ |
1231 | | - "/Users/cevian/.pyenv/versions/3.11.4/envs/nbdev_env/lib/python3.11/site-packages/fastcore/docscrape.py:225: UserWarning: potentially wrong underline length... \n", |
1232 | | - "Returns \n", |
1233 | | - "-------- in \n", |
1234 | | - "Retrieves similar records using a similarity query.\n", |
1235 | | - "...\n", |
1236 | | - " else: warn(msg)\n" |
1237 | | - ] |
1238 | | - }, |
1239 | 1239 | { |
1240 | 1240 | "data": { |
1241 | 1241 | "text/markdown": [ |
|
1285 | 1285 | "| **Returns** | **List: List of similar records.** | | |" |
1286 | 1286 | ] |
1287 | 1287 | }, |
1288 | | - "execution_count": null, |
| 1288 | + "execution_count": 112, |
1289 | 1289 | "metadata": {}, |
1290 | 1290 | "output_type": "execute_result" |
1291 | 1291 | } |
|
1296 | 1296 | }, |
1297 | 1297 | { |
1298 | 1298 | "cell_type": "code", |
1299 | | - "execution_count": null, |
| 1299 | + "execution_count": 117, |
1300 | 1300 | "metadata": {}, |
1301 | 1301 | "outputs": [], |
1302 | 1302 | "source": [ |
|
1317 | 1317 | }, |
1318 | 1318 | { |
1319 | 1319 | "cell_type": "code", |
1320 | | - "execution_count": null, |
| 1320 | + "execution_count": 118, |
1321 | 1321 | "metadata": {}, |
1322 | 1322 | "outputs": [], |
1323 | 1323 | "source": [ |
|
1393 | 1393 | "assert len(rec) == 1\n", |
1394 | 1394 | "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"==\", \"val2\")))\n", |
1395 | 1395 | "assert len(rec) == 1\n", |
1396 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_10\", \"<\", 100)))\n", |
| 1396 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key\", \"==\", \"val2\"))\n", |
| 1397 | + "assert len(rec) == 1\n", |
| 1398 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100))\n", |
1397 | 1399 | "assert len(rec) == 1\n", |
1398 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_10\", \"<\", 10)))\n", |
| 1400 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 10))\n", |
1399 | 1401 | "assert len(rec) == 0\n", |
1400 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_10\", \"<=\", 10)))\n", |
| 1402 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<=\", 10))\n", |
1401 | 1403 | "assert len(rec) == 1\n", |
1402 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_10\", \"<=\", 10.0)))\n", |
| 1404 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<=\", 10.0))\n", |
1403 | 1405 | "assert len(rec) == 1\n", |
1404 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_11\", \"<=\", 11.3)))\n", |
| 1406 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_11\", \"<=\", 11.3))\n", |
1405 | 1407 | "assert len(rec) == 1\n", |
1406 | | - "rec = await vec.search(limit=4, predicates=Predicates((\"key_11\", \">=\", 11.29999)))\n", |
| 1408 | + "rec = await vec.search(limit=4, predicates=Predicates(\"key_11\", \">=\", 11.29999))\n", |
1407 | 1409 | "assert len(rec) == 1\n", |
1408 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_11\", \"<\", 11.299999)))\n", |
| 1410 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_11\", \"<\", 11.299999))\n", |
1409 | 1411 | "assert len(rec) == 0\n", |
1410 | 1412 | "\n", |
1411 | 1413 | "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(*[(\"key\", \"val2\"), (\"key_10\", \"<\", 100)]))\n", |
|
1414 | 1416 | "assert len(rec) == 1\n", |
1415 | 1417 | "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\"), (\"key_2\", \"val_2\"), operator='OR'))\n", |
1416 | 1418 | "assert len(rec) == 2\n", |
1417 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_10\", \"<\", 100)) & (Predicates((\"key\", \"val2\")) | Predicates((\"key_2\", \"val_2\")))) \n", |
| 1419 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100) & (Predicates(\"key\",\"==\", \"val2\",) | Predicates(\"key_2\", \"==\", \"val_2\"))) \n", |
1418 | 1420 | "assert len(rec) == 1\n", |
1419 | | - "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key_10\", \"<\", 100)) and (Predicates((\"key\", \"val2\")) or Predicates((\"key_2\", \"val_2\")))) \n", |
| 1421 | + "rec = await vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key_10\", \"<\", 100) and (Predicates(\"key\",\"==\", \"val2\") or Predicates(\"key_2\",\"==\", \"val_2\"))) \n", |
1420 | 1422 | "assert len(rec) == 1\n", |
1421 | 1423 | "rec = await vec.search(limit=4, predicates=~Predicates((\"key\", \"val2\"), (\"key_10\", \"<\", 100)))\n", |
1422 | 1424 | "assert len(rec) == 4\n", |
|
2193 | 2195 | "assert rec[0][SEARCH_RESULT_DISTANCE_IDX] == 0.0009438353921149556\n", |
2194 | 2196 | "assert rec[0][\"distance\"] == 0.0009438353921149556\n", |
2195 | 2197 | "\n", |
2196 | | - "rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates((\"key\", \"val2\")))\n", |
| 2198 | + "rec = vec.search([1.0, 2.0], limit=4, predicates=Predicates(\"key\",\"==\", \"val2\"))\n", |
2197 | 2199 | "assert len(rec) == 1\n", |
2198 | 2200 | "\n", |
2199 | 2201 | "rec = vec.search([1.0, 2.0], limit=4, filter=[\n", |
|
0 commit comments