11import itertools
2+ import sys
23from collections import defaultdict
34
45from bson import SON , json_util
1112from django .db .models .functions .math import Power
1213from django .db .models .lookups import IsNull
1314from django .db .models .sql import compiler
14- from django .db .models .sql .constants import GET_ITERATOR_CHUNK_SIZE , MULTI , SINGLE
15+ from django .db .models .sql .constants import MULTI , SINGLE
1516from django .db .models .sql .datastructures import BaseTable
1617from django .utils .functional import cached_property
1718from pymongo import ASCENDING , DESCENDING
1819
1920from .query import MongoQuery , wrap_database_errors
2021
22+ # Maximum document batch size for MongoDB cursor responses.
23+ MAX_BATCH_SIZE_MB = 1024 * 1024 * 16
24+
2125
2226class SQLCompiler (compiler .SQLCompiler ):
2327 """Base class for all Mongo compilers."""
@@ -235,9 +239,7 @@ def pre_sql_setup(self, with_col_aliases=False):
235239 self .order_by_objs = [expr .replace_expressions (all_replacements ) for expr , _ in order_by ]
236240 return extra_select , order_by , group_by
237241
238- def execute_sql (
239- self , result_type = MULTI , chunked_fetch = False , chunk_size = GET_ITERATOR_CHUNK_SIZE
240- ):
242+ def execute_sql (self , result_type = MULTI , chunked_fetch = False , chunk_size = None ):
241243 self .pre_sql_setup ()
242244 try :
243245 query = self .build_query (
@@ -258,7 +260,8 @@ def execute_sql(
258260 else :
259261 return self ._make_result (obj , self .columns )
260262 # result_type is MULTI
261- cursor .batch_size (chunk_size )
263+ # if chunk_size is not None:
264+ # cursor.batch_size(chunk_size)
262265 result = self .cursor_iter (cursor , chunk_size , self .columns )
263266 if not chunked_fetch :
264267 # If using non-chunked reads, read data into memory.
@@ -270,7 +273,7 @@ def results_iter(
270273 results = None ,
271274 tuple_expected = False ,
272275 chunked_fetch = False ,
273- chunk_size = GET_ITERATOR_CHUNK_SIZE ,
276+ chunk_size = None ,
274277 ):
275278 """
276279 Return an iterator over the results from executing query given
@@ -318,12 +321,25 @@ def _make_result(self, entity, columns):
318321 result .append (obj .get (name ))
319322 return result
320323
321- def cursor_iter (self , cursor , chunk_size , columns ):
322- """Yield chunks of results from cursor."""
324+ def cursor_iter (self , cursor , _ , columns ):
325+ """
326+ Yield chunks of results from cursor.
327+ MongoDB ignores all chunk_size overrides. Cursor iteration abides by
328+ MongoDB's default cursor batch size response.
329+ Read more here: https://www.mongodb.com/docs/manual/core/cursors/#cursor-batches
330+ """
323331 chunk = []
324- for row in cursor :
332+ chunk_size = 101 # MongoDB's default initial batch size
333+
334+ for i , row in enumerate (cursor ):
325335 chunk .append (self ._make_result (row , columns ))
336+
326337 if len (chunk ) == chunk_size :
338+ if i == chunk_size - 1 : # First chunk complete
339+ # Using current row as representation, approximate
340+ # how many rows can fit in a 16MB payload (MongoDB batch_size max)
341+ # then set that as the new chunk size.
342+ chunk_size = MAX_BATCH_SIZE_MB // sys .getsizeof (row )
327343 yield chunk
328344 chunk = []
329345 yield chunk
0 commit comments