Skip to content

Commit fb3b7a7

Browse files
authored
Merge pull request #165 from dongxinEric/bugfix/135/do-not-store-arrays-as-sparse-arrays
Resolves #135: DocLayer is not able to handle sparse arrays correctly.
2 parents 577c1d8 + e2ffcb2 commit fb3b7a7

File tree

4 files changed

+53
-42
lines changed

4 files changed

+53
-42
lines changed

src/QLExpression.actor.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ ACTOR static Future<Void> doPathExpansionIfNotArray(PromiseStream<Reference<IRea
9898
* - Case 3: Component after array root is numeric and treating it as field name. Ex: a.b.1.c, b is array
9999
* of size 3 and treating "1" as field name. This can be expanded to a.b.0.1.c, a.b.1.1.c and a.b.2.1.c. As
100100
* Doc Layer serializes numeric field name and array index same way, we have to pass this information.
101+
*
102+
* NOTE: MongoDB does NOT do nested array expansion. For exmaple if the query is a.c and a happens to be an array of
103+
* size 2, the query will be expanded into a.0.c and a.1.c. Then if a[0] is again an array, it will NOT do the expansion
104+
* again. That's why in case 3 we call doPathExpansionIfNotArray(), instead of the normal doPathExpansion().
101105
*/
102106
ACTOR static Future<Void> doArrayExpansion(PromiseStream<Reference<IReadContext>> promises,
103107
Standalone<StringRef> queryPath,
@@ -126,6 +130,7 @@ ACTOR static Future<Void> doArrayExpansion(PromiseStream<Reference<IReadContext>
126130

127131
// Case 3 - Treat next component as field name.
128132
if (nextComponentIsNumeric) {
133+
// Do not do array expansion again.
129134
futures.push_back(doPathExpansionIfNotArray(promises, StringRef(newPath), arrayElementPath, document,
130135
expandLastArray, imputeNulls));
131136
} else { // Case 1
@@ -169,6 +174,12 @@ ACTOR static Future<Void> doPathExpansion(PromiseStream<Reference<IReadContext>>
169174
// expansion. So we also need to check that if the "most recent existing ancestor" is a primitive value, then
170175
// its direct parent is also not an array. If the MREA is an object, then no check is necessary, since MongoDB
171176
// switches back into null-imputing mode if that happens (even if we expanded an array right before).
177+
178+
// NOTE: since arrays are stored as sparse arrays, i.e. null values in the array will not be persisted at all
179+
// it has to impute a null if:
180+
// - last part of the path is numeric
181+
// - it's immediate parent is an array
182+
// - it's within the length of the array
172183
if (imputeNulls && !v.present() && !arrayAncestor.present()) {
173184
std::vector<Future<Optional<DataValue>>> futureAncestors;
174185
futureAncestors.push_back(document->get(StringRef()));
@@ -177,14 +188,19 @@ ACTOR static Future<Void> doPathExpansion(PromiseStream<Reference<IReadContext>>
177188

178189
std::vector<Optional<DataValue>> ancestors = wait(getAll(futureAncestors));
179190

180-
for (auto j = ancestors.rbegin();
181-
!(j == ancestors.rend()) /* yes, it can be !=, but MSVC has a bug, hence !(==)*/; ++j) {
182-
if (j->present()) {
183-
DVTypeCode type = j->get().getSortType();
191+
for (int j = ancestors.size() - 1; j >= 0; --j) {
192+
if (ancestors[j].present()) {
193+
DVTypeCode type = ancestors[j].get().getSortType();
184194
if (type == DVTypeCode::OBJECT ||
185195
(type != DVTypeCode::ARRAY &&
186-
(j + 1 == ancestors.rend() || (j + 1)->get().getSortType() != DVTypeCode::ARRAY))) {
196+
(j == 0 || ancestors[j - 1].get().getSortType() != DVTypeCode::ARRAY))) {
187197
promises.send(Reference<NullContext>(new NullContext()));
198+
} else if (j == ancestors.size() - 2 && dk[dk.size() - 1][0] == (uint8_t)DVTypeCode::NUMBER &&
199+
type == DVTypeCode::ARRAY) {
200+
DataValue dv = DataValue::decode_key_part(dk[dk.size() - 1]);
201+
if (dv.getDouble() < ancestors[j].get().getArraysize()) {
202+
promises.send(Reference<NullContext>(new NullContext()));
203+
}
188204
}
189205
break;
190206
}

test/correctness/gen.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,8 @@ def random_id_value():
183183
def random_value():
184184
r = global_prng.random()
185185
while True:
186-
# TODO re-enable None value generating
187186
# if (r < 0.1) and generator_options.test_nulls:
188-
# val = None
187+
# val = None
189188
if (r < 0.2):
190189
val = random_float()
191190
elif (r < 0.4):

test/correctness/mongo_model.py

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -729,11 +729,11 @@ def process_update_operator_rename(self, key, update_expression):
729729
def process_update_operator_set_on_insert(self, key, update_expression, new_doc=False):
730730
# print "Update Operator: $setOnInsert ", key, update_expression
731731
if new_doc:
732-
self.data[key] = OrderedDict(self.data[key].items() + update_expression.items())
732+
self.data[key] = OrderedDict(self.data[key].items() + deepcopy(update_expression.items()))
733733

734734
def process_update_operator_set(self, key, update_expression):
735735
# print "Update Operator: $set ", update
736-
self.data[key] = OrderedDict(self.data[key].items() + update_expression.items())
736+
self.data[key] = OrderedDict(self.data[key].items() + deepcopy(update_expression.items()))
737737

738738
def process_update_operator_unset(self, key, update_expression):
739739
# print "Update Operator: $unset ", update
@@ -837,11 +837,9 @@ def process_update_operator_pull_all(self, key, update_expression):
837837
raise MongoModelException("Cannot apply $pullAll to a non-array field.", code=10142)
838838

839839
def evaluate(self, query, document):
840-
if len(query) == 0:
841-
# If we made it here, the evaluate function must NOT be called from the `find` function, and thus
842-
# we can return false. Note in find() function, an empty query means maches all documents.
843-
return False
844840
acc = True
841+
if len(query) == 0:
842+
return len(document) == 0
845843
for field in query.keys():
846844
if field == '_id':
847845
tmp = OrderedDict()
@@ -962,21 +960,15 @@ def has_operator_expressions(doc):
962960

963961
def process_update_operator(self, key, update, new_doc=False):
964962
op_update = self.has_operator_expressions(update)
965-
old_data = None
966-
967-
try:
968-
old_data = deepcopy(self.data[key])
969-
if op_update:
970-
for k in update:
971-
if k == '$setOnInsert':
972-
self.mapUpdateOperator[k](key, update[k], new_doc=new_doc)
973-
elif k in self.mapUpdateOperator:
974-
self.mapUpdateOperator[k](key, update[k])
975-
else:
976-
self.replace(key, update)
977-
except MongoModelException as e:
978-
self.data[key] = old_data
979-
raise e
963+
old_data = deepcopy(self.data[key])
964+
if op_update:
965+
for k in update:
966+
if k == '$setOnInsert':
967+
self.mapUpdateOperator[k](key, update[k], new_doc=new_doc)
968+
elif k in self.mapUpdateOperator:
969+
self.mapUpdateOperator[k](key, update[k])
970+
else:
971+
self.replace(key, update)
980972

981973
def deep_transform_logical_operators(self, selector=None):
982974
new_selector = {}
@@ -1126,26 +1118,27 @@ def update(self, query, update, upsert, multi):
11261118
raise MongoModelException('multi update only works with $ operators', code=10158)
11271119
if isOperatorUpdate:
11281120
self.validate_update_object(update)
1129-
if len(query) == 0:
1130-
return
1131-
key = query.keys()[0]
11321121
any = False
11331122
old_data = deepcopy(self.data)
11341123
n = 0
11351124
try:
1125+
if len(query) == 0:
1126+
# Update all existing docs. And since the query is empty, do NOT do upsert.
1127+
any = True
1128+
for k in self.data.keys():
1129+
n +=1
1130+
self.process_update_operator(k, update)
1131+
if not multi:
1132+
break
1133+
key = query.keys()[0]
11361134
for k, item in self.data.iteritems():
11371135
if evaluate(key, query[key], item, self.options):
11381136
any = True
11391137
n += 1
11401138
# print "Result: ", item
1141-
if len(update) > 0:
1142-
self.process_update_operator(k, update)
1143-
if not multi:
1144-
return
1145-
else:
1146-
self.replace(k, update)
1147-
if not multi:
1148-
return
1139+
self.process_update_operator(k, update)
1140+
if not multi:
1141+
break
11491142
if any:
11501143
for index in self.indexes:
11511144
if not index.inError:
@@ -1186,7 +1179,7 @@ def update(self, query, update, upsert, multi):
11861179
if "_id" in query:
11871180
update["_id"] = query["_id"]
11881181
self.insert(update)
1189-
else:
1182+
elif not any:
11901183
# mongoDB raise an exception for the '$setOnInsert' update operator even if the upsert is False
11911184
if '$setOnInsert' in update.keys() and len(update['$setOnInsert']) == 0:
11921185
raise MongoModelException(
@@ -1285,7 +1278,11 @@ def validate_and_build_entry(self, documents, first_build=False):
12851278
check_none_at_all=True,
12861279
check_none_next=True,
12871280
debug=False)
1288-
entry = entry + reduce((lambda acc, x: acc + x), map((lambda x: str(x)), _values), "")
1281+
if len(_values) == 0:
1282+
# empty array
1283+
entry = entry + '[]'
1284+
else:
1285+
entry = entry + reduce((lambda acc, x: acc + x), map((lambda x: str(x)), _values), "")
12891286
if entry in seen:
12901287
# print "[{}] in {} ? {}".format(entry, seen, entry in seen)
12911288
# print "Violating keys " + str(key)

test/correctness/smoke/test_update.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import pytest
2626

2727

28-
@pytest.mark.xfail
2928
def test_update_array_containing_none_value(fixture_collection):
3029
collection = fixture_collection
3130

0 commit comments

Comments
 (0)