66
77TODO: Add support for multiple database backends?
88"""
9+ import os
910from pathlib import Path
1011from typing import Iterable
1112
12- from playhouse import sqlite_ext
13+ from peewee import SQL , fn
14+ from playhouse import postgres_ext as ext
1315
14- db_proxy = sqlite_ext . DatabaseProxy ()
16+ from atsphinx . sqlite3fts . playhouse import TSVectorFieldPlus
1517
18+ db_proxy = ext .DatabaseProxy ()
1619
17- class Document (sqlite_ext .Model ):
20+
21+ class Document (ext .Model ):
1822 """Document main model."""
1923
20- page = sqlite_ext .TextField (null = False , unique = True )
21- title = sqlite_ext .TextField (null = False )
24+ page = ext .TextField (null = False , unique = True )
25+ title = ext .TextField (null = False )
2226
2327 class Meta : # noqa: D106
2428 database = db_proxy
2529
2630
27- class Section (sqlite_ext .Model ):
31+ class Section (ext .Model ):
2832 """Section unit of document."""
2933
30- document = sqlite_ext .ForeignKeyField (Document )
31- root = sqlite_ext .BooleanField (default = False , null = False )
32- ref = sqlite_ext .TextField (null = False )
33- title = sqlite_ext .TextField (null = False )
34- body = sqlite_ext .TextField (null = False )
34+ document = ext .ForeignKeyField (Document )
35+ root = ext .BooleanField (default = False , null = False )
36+ ref = ext .TextField (null = False )
37+ title = ext .TextField (null = False )
38+ body = ext .TextField (null = False )
3539
3640 class Meta : # noqa: D106
3741 database = db_proxy
3842
3943
40- class Content (sqlite_ext . FTS5Model ):
44+ class Content (ext . Model ):
4145 """Searching model."""
4246
43- rowid = sqlite_ext . RowIDField ()
44- title = sqlite_ext . SearchField ()
45- body = sqlite_ext . SearchField ()
47+ rowid = ext . IntegerField ()
48+ title = TSVectorFieldPlus ()
49+ body = TSVectorFieldPlus ()
4650
4751 class Meta : # noqa: D106
4852 database = db_proxy
49- options = {"tokenize" : "trigram" }
53+ # TODO: This is an option from SQLite, it does not work on other DBMS.
54+ # options = {"tokenize": "trigram"}
5055
5156
5257def store_document (document : Document , sections : Iterable [Section ]):
@@ -58,32 +63,68 @@ def store_document(document: Document, sections: Iterable[Section]):
5863 Content .insert (
5964 {
6065 Content .rowid : section .id ,
61- Content .title : section .title or document .title ,
62- Content .body : section .body ,
66+ Content .title : fn . to_tsvector ( section .title or document .title ) ,
67+ Content .body : fn . to_tsvector ( section .body ) ,
6368 }
6469 ).execute ()
6570
6671
6772def search_documents (keyword : str ) -> Iterable [Section ]:
6873 """Search documents from keyword by full-text-search."""
74+ # SQLite.
75+ """
6976 return (
7077 Section.select()
7178 .join(Content, on=(Section.id == Content.rowid))
7279 .where(Content.match(keyword))
7380 .order_by(Content.bm25())
7481 )
82+ """
83+
84+ # PostgreSQL.
85+ # https://www.postgresql.org/docs/current/textsearch-controls.html
86+ # https://stackoverflow.com/questions/25033184/postgresql-full-text-search-performance-not-acceptable-when-ordering-by-ts-rank/25245291#25245291
87+ return (
88+ Section .select (
89+ Section ,
90+ fn .ts_rank_cd (Content .title , fn .websearch_to_tsquery (keyword ), 32 ).alias (
91+ "rank_title"
92+ ),
93+ fn .ts_rank_cd (Content .body , fn .websearch_to_tsquery (keyword ), 32 ).alias (
94+ "rank_body"
95+ ),
96+ )
97+ .join (Content , on = (Section .id == Content .rowid ))
98+ .where (
99+ Content .title .match (keyword , web = True )
100+ | Content .body .match (keyword , web = True )
101+ )
102+ .order_by (
103+ SQL ("rank_title" ).desc (),
104+ SQL ("rank_body" ).desc (),
105+ )
106+ )
75107
76108
77- def bind (db_path : Path ):
109+ def bind (db_type : str , db_path : Path ):
78110 """Bind connection.
79111
80112 This works only set db into proxy, not included creating tables.
81113 """
82- db = sqlite_ext .SqliteExtDatabase (db_path )
114+ if db_type == "sqlite" :
115+ db = ext .SqliteExtDatabase (db_path )
116+ elif db_type == "postgresql" :
117+ db = ext .PostgresqlExtDatabase (db_path )
118+ if "POSTGRES_LOG_STATEMENT" in os .environ :
119+ db .execute_sql (
120+ f"SET log_statement='{ os .environ ['POSTGRES_LOG_STATEMENT' ]} ';"
121+ )
122+ else :
123+ raise ValueError (f"Unknown database type: { db_type } " )
83124 db_proxy .initialize (db )
84125
85126
86- def initialize (db_path : Path ):
127+ def initialize (db_type : str , db_path : Path ):
87128 """Bind connection and create tables."""
88- bind (db_path )
129+ bind (db_type , db_path )
89130 db_proxy .create_tables ([Document , Section , Content ])
0 commit comments