88#include " utils.h"
99#include " value_generators.h"
1010
11+ #include < algorithm>
1112#include < cstddef>
1213#include < initializer_list>
1314#include < ios>
@@ -38,13 +39,25 @@ size_t EstimateColumnStringMemoryUsage(
3839 if (item_estimated_size == ColumnString::EstimatedValueSize{0 } && total_items_size && number_of_items)
3940 item_estimated_size = ColumnString::EstimatedValueSize (static_cast <double >(*total_items_size) / number_of_items);
4041
41- return number_of_items * sizeof (std::string_view)
42- + number_of_items * static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio
42+ const size_t estimated_total_item_size = number_of_items * static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio;
43+ const auto estimated_number_of_blocks = std::max<size_t >(1 , estimated_total_item_size ? COLUMN_STRING_DEFAULT_BLOCK_SIZE / estimated_total_item_size : 1 );
44+
45+ // space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
46+ const auto estimate_lost_space_in_block = (static_cast <size_t >(item_estimated_size) != 0
47+ ? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast <size_t >(static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio)
48+ : COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10 );
49+
50+ const auto max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
51+
52+ return (number_of_items * sizeof (std::string_view)
53+ + estimated_total_item_size
54+ + estimate_lost_space_in_block * estimated_number_of_blocks
4355 + COLUMN_STRING_DEFAULT_BLOCK_SIZE
4456 // It is hard to compute overhead added by vector<ColumnString::Block>
4557 // (mostly because we don't know number of ColumnString::Block instances from outside, and this number depends on many factors),
4658 // so we just make a guess.
47- + COLUMN_STRING_MAX_EXPECTED_MEMORY_OVERHEAD;
59+ + COLUMN_STRING_MAX_EXPECTED_MEMORY_OVERHEAD)
60+ * max_estimation_error_factor;
4861}
4962
5063std::string ScaleString (std::string str, size_t required_size) {
@@ -289,8 +302,15 @@ struct ColumnStringEstimatedValueSizeTest : public ::testing::TestWithParam<std:
289302 // Adjust number of items so the test doesn't use too much memory
290303 if (static_cast <size_t >(single_value_size_estimation) != 0
291304 // *2 since we store both reference values and values in column itself.
292- && EstimateColumnStringMemoryUsage (expected_number_of_items, single_value_size_estimation, size_ratio.average ) * 2 > MAX_MEMORY_USAGE) {
305+ && EstimateColumnStringMemoryUsage (expected_number_of_items, single_value_size_estimation, size_ratio.average ) > MAX_MEMORY_USAGE) {
306+ const auto old_expected_number_of_items = expected_number_of_items;
293307 expected_number_of_items = MAX_MEMORY_USAGE / (static_cast <size_t >(single_value_size_estimation) * 2 * size_ratio.average );
308+
309+ std::cerr << " To avoid using too much memory, reduced number of items in test"
310+ << " from " << old_expected_number_of_items
311+ << " to " << expected_number_of_items
312+ << " , expected item size is " << single_value_size_estimation
313+ << std::endl;
294314 }
295315 }
296316
@@ -318,9 +338,9 @@ struct ColumnStringEstimatedValueSizeTest : public ::testing::TestWithParam<std:
318338 ASSERT_TRUE (CompareRecursive (values, column));
319339 }
320340
321- size_t EstimateMemoryUsage (size_t total_values_size) {
341+ size_t EstimateMemoryUsage (size_t total_values_size, float expected_number_of_items_multiplier = 1.0 ) {
322342 const auto & [single_value_size_estimation, size_ratio] = GetParam ();
323- return EstimateColumnStringMemoryUsage (expected_number_of_items, single_value_size_estimation, size_ratio.average , total_values_size);
343+ return EstimateColumnStringMemoryUsage (expected_number_of_items * expected_number_of_items_multiplier , single_value_size_estimation, size_ratio.average , total_values_size);
324344 }
325345};
326346
@@ -363,8 +383,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, AppendNoReserve)
363383
364384 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
365385
366- // since there was no Reserve call prior, there could be more some overallocations, hence *2
367- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * 2 );
386+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
387+
388+ // since there was no Reserve call prior, there could be more some overallocations, hence some estimation error
389+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor);
368390}
369391
370392TEST_P (ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
@@ -377,8 +399,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
377399 EXPECT_NO_THROW (col.Reserve (expected_number_of_items));
378400 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
379401
402+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
403+
380404 // Allow minor overallocations, hence * 1.2
381- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * 2 );
405+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor );
382406}
383407
384408TEST_P (ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
@@ -391,8 +415,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
391415 EXPECT_NO_THROW (col.Reserve (expected_number_of_items * .8 ));
392416 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
393417
418+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
419+
394420 // Allow minor overallocations, hence * 1.2
395- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * 2 );
421+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor );
396422}
397423
398424TEST_P (ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
@@ -405,8 +431,10 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
405431 EXPECT_NO_THROW (col.Reserve (expected_number_of_items * 1.2 ));
406432 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
407433
434+ const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
435+
408436 // Allow minor overallocations, hence * 1.2
409- EXPECT_LT (col.MemoryUsage (), EstimateColumnStringMemoryUsage (expected_number_of_items * 1.2 , single_value_size_estimation, size_ratio. average , total_values_size ) * 2 );
437+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size, 1.2 ) * max_estimation_error_factor );
410438}
411439
412440/* * TODO more tests
0 commit comments