Skip to content

Commit 124ee58

Browse files
Pollepsjoepio
authored andcommitted
#678 Dont index unfiltered queries
1 parent 0b17753 commit 124ee58

File tree

3 files changed

+138
-81
lines changed

3 files changed

+138
-81
lines changed

lib/src/db.rs

Lines changed: 114 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ mod val_prop_sub_index;
1111
use std::{
1212
collections::{HashMap, HashSet},
1313
sync::{Arc, Mutex},
14+
vec,
1415
};
1516

1617
use tracing::{info, instrument};
@@ -19,7 +20,10 @@ use crate::{
1920
agents::ForAgent,
2021
atoms::IndexAtom,
2122
commit::CommitResponse,
22-
db::{query_index::NO_VALUE, val_prop_sub_index::find_in_val_prop_sub_index},
23+
db::{
24+
query_index::{requires_query_index, NO_VALUE},
25+
val_prop_sub_index::find_in_val_prop_sub_index,
26+
},
2327
endpoints::{default_endpoints, Endpoint, HandleGetContext},
2428
errors::{AtomicError, AtomicResult},
2529
resources::PropVals,
@@ -35,8 +39,8 @@ use self::{
3539
remove_atom_from_prop_val_sub_index,
3640
},
3741
query_index::{
38-
check_if_atom_matches_watched_query_filters, query_indexed, update_indexed_member,
39-
IndexIterator, QueryFilter,
42+
check_if_atom_matches_watched_query_filters, query_sorted_indexed, should_include_resource,
43+
update_indexed_member, IndexIterator, QueryFilter,
4044
},
4145
val_prop_sub_index::{add_atom_to_reference_index, remove_atom_from_reference_index},
4246
};
@@ -209,6 +213,110 @@ impl Db {
209213

210214
Some(Resource::from_propvals(propvals, subject))
211215
}
216+
217+
fn build_index_for_atom(
218+
&self,
219+
atom: &IndexAtom,
220+
query_filter: &QueryFilter,
221+
) -> AtomicResult<()> {
222+
// Get the SortableValue either from the Atom or the Resource.
223+
let sort_val: SortableValue = if let Some(sort) = &query_filter.sort_by {
224+
if &atom.property == sort {
225+
atom.sort_value.clone()
226+
} else {
227+
// Find the sort value in the store
228+
match self.get_value(&atom.subject, sort) {
229+
Ok(val) => val.to_sortable_string(),
230+
// If we try sorting on a value that does not exist,
231+
// we'll use an empty string as the sortable value.
232+
Err(_) => NO_VALUE.to_string(),
233+
}
234+
}
235+
} else {
236+
atom.sort_value.clone()
237+
};
238+
239+
update_indexed_member(self, query_filter, &atom.subject, &sort_val, false)?;
240+
Ok(())
241+
}
242+
243+
fn get_index_iterator_for_query(&self, q: &Query) -> IndexIterator {
244+
match (&q.property, q.value.as_ref()) {
245+
(Some(prop), val) => find_in_prop_val_sub_index(self, prop, val),
246+
(None, None) => self.all_index_atoms(q.include_external),
247+
(None, Some(val)) => find_in_val_prop_sub_index(self, val, None),
248+
}
249+
}
250+
251+
fn query_basic(&self, q: &Query) -> AtomicResult<QueryResult> {
252+
let self_url = self
253+
.get_self_url()
254+
.ok_or("No self_url set, required for Queries")?;
255+
256+
let mut subjects: Vec<String> = vec![];
257+
let mut resources: Vec<Resource> = vec![];
258+
let mut total_count = 0;
259+
260+
let atoms = self.get_index_iterator_for_query(q);
261+
262+
for (i, atom_res) in atoms.enumerate() {
263+
let atom = atom_res?;
264+
265+
if q.offset > i {
266+
continue;
267+
}
268+
269+
if !q.include_external && !atom.subject.starts_with(&self_url) {
270+
continue;
271+
}
272+
if q.limit.is_none() || subjects.len() < q.limit.unwrap() {
273+
if !should_include_resource(q) {
274+
subjects.push(atom.subject.clone());
275+
total_count += 1;
276+
continue;
277+
}
278+
279+
if let Ok(resource) = self.get_resource_extended(&atom.subject, true, &q.for_agent)
280+
{
281+
subjects.push(atom.subject.clone());
282+
resources.push(resource);
283+
}
284+
}
285+
286+
total_count += 1;
287+
}
288+
289+
Ok(QueryResult {
290+
subjects,
291+
resources,
292+
count: total_count,
293+
})
294+
}
295+
296+
fn query_complex(&self, q: &Query) -> AtomicResult<QueryResult> {
297+
let (mut subjects, mut resources, mut total_count) = query_sorted_indexed(self, q)?;
298+
let q_filter: QueryFilter = q.into();
299+
300+
if total_count == 0 && !q_filter.is_watched(self) {
301+
info!(filter = ?q_filter, "Building query index");
302+
let atoms = self.get_index_iterator_for_query(q);
303+
q_filter.watch(self)?;
304+
305+
// Build indexes
306+
for atom in atoms.flatten() {
307+
self.build_index_for_atom(&atom, &q_filter)?;
308+
}
309+
310+
// Query through the new indexes.
311+
(subjects, resources, total_count) = query_sorted_indexed(self, q)?;
312+
}
313+
314+
Ok(QueryResult {
315+
subjects,
316+
resources,
317+
count: total_count,
318+
})
319+
}
212320
}
213321

214322
impl Storelike for Db {
@@ -468,50 +576,11 @@ impl Storelike for Db {
468576
/// Tries `query_cache`, which you should implement yourself.
469577
#[instrument(skip(self))]
470578
fn query(&self, q: &Query) -> AtomicResult<QueryResult> {
471-
let q_filter: QueryFilter = q.into();
472-
if let Ok(res) = query_indexed(self, q) {
473-
if res.count > 0 || q_filter.is_watched(self) {
474-
// Yay, we have a cache hit!
475-
// We don't have to create the indexes, so we can return early.
476-
return Ok(res);
477-
}
478-
}
479-
480-
// Maybe make this optional?
481-
q_filter.watch(self)?;
482-
483-
info!(filter = ?q_filter, "Building query index");
484-
485-
let atoms: IndexIterator = match (&q.property, q.value.as_ref()) {
486-
(Some(prop), val) => find_in_prop_val_sub_index(self, prop, val),
487-
(None, None) => self.all_index_atoms(q.include_external),
488-
(None, Some(val)) => find_in_val_prop_sub_index(self, val, None),
489-
};
490-
491-
for a in atoms {
492-
let atom = a?;
493-
// Get the SortableValue either from the Atom or the Resource.
494-
let sort_val: SortableValue = if let Some(sort) = &q_filter.sort_by {
495-
if &atom.property == sort {
496-
atom.sort_value
497-
} else {
498-
// Find the sort value in the store
499-
match self.get_value(&atom.subject, sort) {
500-
Ok(val) => val.to_sortable_string(),
501-
// If we try sorting on a value that does not exist,
502-
// we'll use an empty string as the sortable value.
503-
Err(_) => NO_VALUE.to_string(),
504-
}
505-
}
506-
} else {
507-
atom.sort_value
508-
};
509-
510-
update_indexed_member(self, &q_filter, &atom.subject, &sort_val, false)?;
579+
if requires_query_index(q) {
580+
return self.query_complex(q);
511581
}
512582

513-
// Retry the same query!
514-
query_indexed(self, q)
583+
self.query_basic(q)
515584
}
516585

517586
#[instrument(skip(self))]

lib/src/db/query_index.rs

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,10 @@ pub const NO_VALUE: &str = "";
7373

7474
#[tracing::instrument(skip(store))]
7575
/// Performs a query on the `query_index` Tree, which is a lexicographic sorted list of all hits for QueryFilters.
76-
pub fn query_indexed(store: &Db, q: &Query) -> AtomicResult<QueryResult> {
76+
pub fn query_sorted_indexed(
77+
store: &Db,
78+
q: &Query,
79+
) -> AtomicResult<(Vec<String>, Vec<Resource>, usize)> {
7780
// When there is no explicit start / end value passed, we use the very first and last
7881
// lexicographic characters in existence to make the range practically encompass all values.
7982
let start = if let Some(val) = &q.start_val {
@@ -97,18 +100,14 @@ pub fn query_indexed(store: &Db, q: &Query) -> AtomicResult<QueryResult> {
97100
};
98101

99102
let mut subjects: Vec<String> = vec![];
100-
let mut resources = Vec::new();
103+
let mut resources: Vec<Resource> = vec![];
101104
let mut count = 0;
102105

103106
let self_url = store
104107
.get_self_url()
105108
.ok_or("No self_url set, required for Queries")?;
106109

107-
let limit = if let Some(limit) = q.limit {
108-
limit
109-
} else {
110-
std::usize::MAX
111-
};
110+
let limit = q.limit.unwrap_or(std::usize::MAX);
112111

113112
for (i, kv) in iter.enumerate() {
114113
// The user's maximum amount of results has not yet been reached
@@ -118,49 +117,30 @@ pub fn query_indexed(store: &Db, q: &Query) -> AtomicResult<QueryResult> {
118117
if in_selection {
119118
let (k, _v) = kv.map_err(|_e| "Unable to parse query_cached")?;
120119
let (_q_filter, _val, subject) = parse_collection_members_key(&k)?;
120+
121121
// If no external resources should be included, skip this one if it's an external resource
122122
if !q.include_external && !subject.starts_with(&self_url) {
123123
continue;
124124
}
125125

126-
// When an agent is defined, we must perform authorization checks
127-
// WARNING: EXPENSIVE!
128-
// TODO: Make async
129-
if q.include_nested || q.for_agent != ForAgent::Sudo {
130-
match store.get_resource_extended(subject, true, &q.for_agent) {
131-
Ok(resource) => {
132-
resources.push(resource);
133-
subjects.push(subject.into())
134-
}
135-
Err(e) => match &e.error_type {
136-
crate::AtomicErrorType::NotFoundError => {}
137-
crate::AtomicErrorType::UnauthorizedError => {}
138-
_other => {
139-
return Err(format!(
140-
"Error when getting resource in collection: {}",
141-
&e
142-
)
143-
.into());
144-
}
145-
},
126+
if should_include_resource(q) {
127+
if let Ok(resource) = store.get_resource_extended(subject, true, &q.for_agent) {
128+
resources.push(resource);
129+
subjects.push(subject.into());
146130
}
147131
} else {
148-
// If there is no need for nested resources, and no auth checks, we can skip the expensive part!
149-
subjects.push(subject.into())
132+
subjects.push(subject.into());
150133
}
151134
}
135+
152136
// We iterate over every single resource, even if we don't perform any computation on the items.
153137
// This helps with pagination, but it comes at a serious performance cost. We might need to change how this works later on.
154138
// Also, this count does not take into account the `include_external` filter.
139+
count += 1;
155140
// https://github.com/atomicdata-dev/atomic-server/issues/290
156-
count = i + 1;
157141
}
158142

159-
Ok(QueryResult {
160-
count,
161-
resources,
162-
subjects,
163-
})
143+
Ok((subjects, resources, count))
164144
}
165145

166146
/// Checks if the resource will match with a QueryFilter.
@@ -390,6 +370,14 @@ pub fn parse_collection_members_key(bytes: &[u8]) -> AtomicResult<(QueryFilter,
390370
Ok((q_filter, value, subject))
391371
}
392372

373+
pub fn requires_query_index(query: &Query) -> bool {
374+
query.sort_by.is_some() || query.start_val.is_some() || query.end_val.is_some()
375+
}
376+
377+
pub fn should_include_resource(query: &Query) -> bool {
378+
query.include_nested || query.for_agent != ForAgent::Sudo
379+
}
380+
393381
#[cfg(test)]
394382
pub mod test {
395383
use crate::urls;

lib/src/db/test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ fn queries() {
297297
assert_eq!(res.resources.len(), limit, "nested resources");
298298

299299
q.sort_by = Some(sort_by.into());
300-
println!("!!!!!!! !!!!!!!! SORT STUFF");
301300
let mut res = store.query(&q).unwrap();
301+
assert!(!res.resources.is_empty(), "resources should be returned");
302302
let mut prev_resource = res.resources[0].clone();
303303
// For one resource, we will change the order by changing its value
304304
let mut resource_changed_order_opt = None;

0 commit comments

Comments
 (0)