44//! Tantivy requires a strict schema, whereas Atomic is dynamic.
55//! We deal with this discrepency by
66
7- use crate :: { appstate:: AppState , errors:: AtomicServerResult } ;
7+ use std:: collections:: HashSet ;
8+
9+ use crate :: {
10+ appstate:: AppState ,
11+ errors:: { AtomicServerError , AtomicServerResult } ,
12+ search:: { resource_to_facet, Fields } ,
13+ } ;
814use actix_web:: { web, HttpResponse } ;
9- use atomic_lib:: { urls, Resource , Storelike } ;
15+ use atomic_lib:: { errors :: AtomicResult , urls, Db , Resource , Storelike } ;
1016use serde:: Deserialize ;
1117use tantivy:: { collector:: TopDocs , query:: QueryParser } ;
1218
@@ -20,6 +26,8 @@ pub struct SearchQuery {
2026 pub limit : Option < usize > ,
2127 /// Filter by Property URL
2228 pub property : Option < String > ,
29+ /// Only include resources that have this resource as its ancestor
30+ pub parent : Option < String > ,
2331}
2432
2533/// Parses a search query and responds with a list of resources
@@ -43,110 +51,45 @@ pub async fn search_query(
4351 default_limit
4452 } ;
4553
46- let mut should_fuzzy = true ;
47- if params. property . is_some ( ) {
48- // Fuzzy searching is not possible when filtering by property
49- should_fuzzy = false ;
50- }
54+ // With this first limit, we go for a greater number - as the user may not have the rights to the first ones!
55+ // We filter these results later.
56+ // https://github.com/atomicdata-dev/atomic-data-rust/issues/279.
57+ let initial_results_limit = 100 ;
5158
52- let mut subjects: Vec < String > = Vec :: new ( ) ;
53- // These are not used at this moment, but would be quite useful in RDF context.
54- let mut atoms: Vec < StringAtom > = Vec :: new ( ) ;
59+ let mut query_list: Vec < ( tantivy:: query:: Occur , Box < dyn tantivy:: query:: Query > ) > = Vec :: new ( ) ;
60+
61+ if let Some ( parent) = params. parent . clone ( ) {
62+ let query = build_parent_query ( parent, & fields, store) ?;
63+
64+ query_list. push ( ( tantivy:: query:: Occur :: Must , Box :: new ( query) ) ) ;
65+ }
5566
5667 if let Some ( q) = params. q . clone ( ) {
57- // If any of these substrings appear, the user wants an exact / advanced search
58- let dont_fuzz_strings = vec ! [ "*" , "AND" , "OR" , "[" , "\" " , ":" , "+" , "-" , " " ] ;
59- for substr in dont_fuzz_strings {
60- if q. contains ( substr) {
61- should_fuzzy = false
62- }
63- }
68+ let fuzzy = should_fuzzy ( & params. property , & q) ;
6469
65- let query: Box < dyn tantivy:: query:: Query > = if should_fuzzy {
66- let term = tantivy:: Term :: from_field_text ( fields. value , & q) ;
67- let query = tantivy:: query:: FuzzyTermQuery :: new_prefix ( term, 1 , true ) ;
68- Box :: new ( query)
70+ let query = if fuzzy {
71+ build_fuzzy_query ( & fields, & q) ?
6972 } else {
70- // construct the query
71- let query_parser = QueryParser :: for_index (
73+ build_query (
74+ & fields,
75+ & q,
76+ params. property . clone ( ) ,
7277 & appstate. search_state . index ,
73- vec ! [
74- fields. subject,
75- // I don't think we need to search in the property
76- // fields.property,
77- fields. value,
78- ] ,
79- ) ;
80- let full_query = if let Some ( prop) = & params. property {
81- format ! ( "property:{:?} AND {}" , prop, & q)
82- } else {
83- q
84- } ;
85- query_parser
86- . parse_query ( & full_query)
87- . map_err ( |e| format ! ( "Error parsing query {}" , e) ) ?
78+ ) ?
8879 } ;
8980
90- // With this first limit, we go for a greater number - as the user may not have the rights to the first ones!
91- // We filter these results later.
92- // https://github.com/atomicdata-dev/atomic-data-rust/issues/279.
93- let initial_results_limit = 100 ;
94-
95- // execute the query
96- let top_docs = searcher
97- . search ( & query, & TopDocs :: with_limit ( initial_results_limit) )
98- . map_err ( |e| format ! ( "Error with creating search results: {} " , e) ) ?;
99-
100- // convert found documents to resources
101- for ( _score, doc_address) in top_docs {
102- let retrieved_doc = searcher. doc ( doc_address) ?;
103- let subject_val = retrieved_doc. get_first ( fields. subject ) . ok_or ( "No 'subject' in search doc found. This is required when indexing. Run with --rebuild-index" ) ?;
104- let prop_val = retrieved_doc. get_first ( fields. property ) . ok_or ( "No 'property' in search doc found. This is required when indexing. Run with --rebuild-index" ) ?;
105- let value_val = retrieved_doc. get_first ( fields. value ) . ok_or ( "No 'value' in search doc found. This is required when indexing. Run with --rebuild-index" ) ?;
106- let subject = match subject_val {
107- tantivy:: schema:: Value :: Str ( s) => s. to_string ( ) ,
108- _else => {
109- return Err ( format ! (
110- "Search schema error: Subject is not a string! Doc: {:?}" ,
111- retrieved_doc
112- )
113- . into ( ) )
114- }
115- } ;
116- let property = match prop_val {
117- tantivy:: schema:: Value :: Str ( s) => s. to_string ( ) ,
118- _else => {
119- return Err ( format ! (
120- "Search schema error: Property is not a string! Doc: {:?}" ,
121- retrieved_doc
122- )
123- . into ( ) )
124- }
125- } ;
126- let value = match value_val {
127- tantivy:: schema:: Value :: Str ( s) => s. to_string ( ) ,
128- _else => {
129- return Err ( format ! (
130- "Search schema error: Value is not a string! Doc: {:?}" ,
131- retrieved_doc
132- )
133- . into ( ) )
134- }
135- } ;
136- if subjects. contains ( & subject) {
137- continue ;
138- } else {
139- subjects. push ( subject. clone ( ) ) ;
140- let atom = StringAtom {
141- subject,
142- property,
143- value,
144- } ;
145- atoms. push ( atom) ;
146- }
147- }
81+ query_list. push ( ( tantivy:: query:: Occur :: Must , query) ) ;
14882 }
14983
84+ let query = tantivy:: query:: BooleanQuery :: new ( query_list) ;
85+
86+ // execute the query
87+ let top_docs = searcher
88+ . search ( & query, & TopDocs :: with_limit ( initial_results_limit) )
89+ . map_err ( |e| format ! ( "Error with creating search results: {} " , e) ) ?;
90+
91+ let ( subjects, _atoms) = docs_to_resources ( top_docs, & fields, & searcher) ?;
92+
15093 // Create a valid atomic data resource.
15194 // You'd think there would be a simpler way of getting the requested URL...
15295 let subject = format ! (
@@ -213,7 +156,7 @@ pub async fn search_index_rdf(
213156 get_inner_value ( t. object ) ,
214157 ) {
215158 ( Some ( s) , Some ( p) , Some ( o) ) => {
216- crate :: search:: add_triple ( & writer, s, p, o, & fields) . ok ( ) ;
159+ crate :: search:: add_triple ( & writer, s, p, o, None , & fields) . ok ( ) ;
217160 }
218161 _ => return Ok ( ( ) ) ,
219162 } ;
@@ -244,9 +187,125 @@ fn get_inner_value(t: Term) -> Option<String> {
244187 }
245188}
246189
247- #[ derive( Debug ) ]
190+ #[ derive( Debug , std :: hash :: Hash , Eq , PartialEq ) ]
248191pub struct StringAtom {
249192 pub subject : String ,
250193 pub property : String ,
251194 pub value : String ,
252195}
196+
197+ fn should_fuzzy ( property : & Option < String > , q : & str ) -> bool {
198+ if property. is_some ( ) {
199+ // Fuzzy searching is not possible when filtering by property
200+ return false ;
201+ }
202+
203+ // If any of these substrings appear, the user wants an exact / advanced search
204+ let dont_fuzz_strings = vec ! [ "*" , "AND" , "OR" , "[" , "\" " , ":" , "+" , "-" , " " ] ;
205+ for substr in dont_fuzz_strings {
206+ if q. contains ( substr) {
207+ return false ;
208+ }
209+ }
210+
211+ true
212+ }
213+
214+ fn build_fuzzy_query ( fields : & Fields , q : & str ) -> AtomicResult < Box < dyn tantivy:: query:: Query > > {
215+ let term = tantivy:: Term :: from_field_text ( fields. value , q) ;
216+ let query = tantivy:: query:: FuzzyTermQuery :: new_prefix ( term, 1 , true ) ;
217+
218+ Ok ( Box :: new ( query) )
219+ }
220+
221+ fn build_query (
222+ fields : & Fields ,
223+ q : & str ,
224+ property : Option < String > ,
225+ index : & tantivy:: Index ,
226+ ) -> AtomicResult < Box < dyn tantivy:: query:: Query > > {
227+ // construct the query
228+ let query_parser = QueryParser :: for_index (
229+ index,
230+ vec ! [
231+ fields. subject,
232+ // I don't think we need to search in the property
233+ // fields.property,
234+ fields. value,
235+ ] ,
236+ ) ;
237+
238+ let query_text = if let Some ( prop) = property {
239+ format ! ( "property:{:?} AND {}" , prop, & q)
240+ } else {
241+ q. to_string ( )
242+ } ;
243+
244+ let query = query_parser
245+ . parse_query ( & query_text)
246+ . map_err ( |e| format ! ( "Error parsing query {}" , e) ) ?;
247+
248+ Ok ( query)
249+ }
250+
251+ fn build_parent_query (
252+ subject : String ,
253+ fields : & Fields ,
254+ store : & Db ,
255+ ) -> AtomicServerResult < tantivy:: query:: TermQuery > {
256+ let resource = store. get_resource ( subject. as_str ( ) ) ?;
257+ let facet = resource_to_facet ( & resource, store) ?;
258+
259+ let term = tantivy:: Term :: from_facet ( fields. hierarchy , & facet) ;
260+
261+ Ok ( tantivy:: query:: TermQuery :: new (
262+ term,
263+ tantivy:: schema:: IndexRecordOption :: Basic ,
264+ ) )
265+ }
266+
267+ fn unpack_value (
268+ value : & tantivy:: schema:: Value ,
269+ document : & tantivy:: Document ,
270+ name : String ,
271+ ) -> Result < String , AtomicServerError > {
272+ match value {
273+ tantivy:: schema:: Value :: Str ( s) => Ok ( s. to_string ( ) ) ,
274+ _else => Err ( format ! (
275+ "Search schema error: {} is not a string! Doc: {:?}" ,
276+ name, document
277+ )
278+ . into ( ) ) ,
279+ }
280+ }
281+
282+ fn docs_to_resources (
283+ docs : Vec < ( f32 , tantivy:: DocAddress ) > ,
284+ fields : & Fields ,
285+ searcher : & tantivy:: LeasedItem < tantivy:: Searcher > ,
286+ ) -> Result < ( Vec < String > , Vec < StringAtom > ) , AtomicServerError > {
287+ let mut subjects: HashSet < String > = HashSet :: new ( ) ;
288+ // These are not used at this moment, but would be quite useful in RDF context.
289+ let mut atoms: HashSet < StringAtom > = HashSet :: new ( ) ;
290+
291+ // convert found documents to resources
292+ for ( _score, doc_address) in docs {
293+ let retrieved_doc = searcher. doc ( doc_address) ?;
294+ let subject_val = retrieved_doc. get_first ( fields. subject ) . ok_or ( "No 'subject' in search doc found. This is required when indexing. Run with --rebuild-index" ) ?;
295+ let prop_val = retrieved_doc. get_first ( fields. property ) . ok_or ( "No 'property' in search doc found. This is required when indexing. Run with --rebuild-index" ) ?;
296+ let value_val = retrieved_doc. get_first ( fields. value ) . ok_or ( "No 'value' in search doc found. This is required when indexing. Run with --rebuild-index" ) ?;
297+
298+ let subject = unpack_value ( subject_val, & retrieved_doc, "Subject" . to_string ( ) ) ?;
299+ let property = unpack_value ( prop_val, & retrieved_doc, "Property" . to_string ( ) ) ?;
300+ let value = unpack_value ( value_val, & retrieved_doc, "Value" . to_string ( ) ) ?;
301+
302+ subjects. insert ( subject. clone ( ) ) ;
303+ atoms. insert ( StringAtom {
304+ subject,
305+ property,
306+ value,
307+ } ) ;
308+ }
309+
310+ Ok ( ( subjects. into_iter ( ) . collect ( ) , atoms. into_iter ( ) . collect ( ) ) )
311+ }
0 commit comments