@@ -34,6 +34,12 @@ size_t EstimateColumnStringMemoryUsage(
3434 ) {
3535 static const size_t COLUMN_STRING_DEFAULT_BLOCK_SIZE = 4096 ;
3636 static const size_t COLUMN_STRING_MAX_EXPECTED_MEMORY_OVERHEAD = 4096 ;
37+ const float max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
38+
39+ // space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
40+ const auto estimate_lost_space_in_block = (static_cast <size_t >(item_estimated_size) != 0
41+ ? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast <size_t >(static_cast <size_t >(item_estimated_size) * std::max (1 .0f , value_to_estimation_average_size_ratio))
42+ : COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10 );
3743
3844 // if no estimation provided, use factual total size of all items
3945 if (item_estimated_size == ColumnString::EstimatedValueSize{0 } && total_items_size && number_of_items)
@@ -42,13 +48,6 @@ size_t EstimateColumnStringMemoryUsage(
4248 const size_t estimated_total_item_size = number_of_items * static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio;
4349 const auto estimated_number_of_blocks = std::max<size_t >(1 , estimated_total_item_size ? COLUMN_STRING_DEFAULT_BLOCK_SIZE / estimated_total_item_size : 1 );
4450
45- // space wasted in block since not all items can be fit perfectly, and there is some unused space at the end of the block.
46- const auto estimate_lost_space_in_block = (static_cast <size_t >(item_estimated_size) != 0
47- ? COLUMN_STRING_DEFAULT_BLOCK_SIZE % static_cast <size_t >(static_cast <size_t >(item_estimated_size) * value_to_estimation_average_size_ratio)
48- : COLUMN_STRING_DEFAULT_BLOCK_SIZE / 10 );
49-
50- const auto max_estimation_error_factor = item_estimated_size == ColumnString::NO_PREALLOCATE ? 2 : 1.2 ;
51-
5251 return (number_of_items * sizeof (std::string_view)
5352 + estimated_total_item_size
5453 + estimate_lost_space_in_block * estimated_number_of_blocks
@@ -213,49 +212,6 @@ TEST(ColumnString, InvalidSizeEstimation) {
213212 EXPECT_THROW (col.SetEstimatedValueSize (ColumnString::EstimatedValueSize (std::numeric_limits<size_t >::max ())), ValidationError);
214213}
215214
216- TEST (ColumnString, WithSizeEstimation) {
217- const ColumnString::EstimatedValueSize value_size_estimations[] = {
218- ColumnString::EstimatedValueSize::TINY,
219- ColumnString::EstimatedValueSize::SMALL,
220- ColumnString::EstimatedValueSize::MEDIUM,
221- ColumnString::EstimatedValueSize::LARGE,
222-
223- // ColumnString::EstimatedValueSize(0),
224- ColumnString::EstimatedValueSize (1 ),
225- ColumnString::EstimatedValueSize (300 ),
226- ColumnString::EstimatedValueSize (10'000 ),
227- };
228-
229- auto values = MakeStrings ();
230- std::cerr << " Number of values: " << values.size () << std::endl;
231-
232- for (ColumnString::EstimatedValueSize estimation : value_size_estimations) {
233- SCOPED_TRACE (::testing::Message (" with estimation: " ) << estimation);
234- std::cerr << " \n Estimation " << estimation << std::endl;
235-
236- auto col = std::make_shared<ColumnString>(estimation);
237-
238- dumpMemoryUsage (" After constructing with estimation" , col);
239-
240- col->Reserve (values.size ());
241- dumpMemoryUsage (" After Reserve()" , col);
242-
243- size_t i = 0 ;
244- for (const auto & v : values) {
245- col->Append (v);
246-
247- EXPECT_EQ (i + 1 , col->Size ());
248- EXPECT_EQ (v, col->At (i));
249-
250- ++i;
251- }
252-
253- dumpMemoryUsage (" After appending all values" , col);
254- }
255- }
256-
257-
258-
259215struct SizeRatio {
260216 std::vector<float > ratios;
261217 float average;
@@ -276,19 +232,6 @@ std::ostream & operator<<(std::ostream& ostr, const SizeRatio & r) {
276232 return ostr << " SizeRatio{ average: " << std::fixed << r.average << " } " ;
277233}
278234
279- // std::vector<SizeRatio> ratios{
280- // // estimation is about right
281- // SizeRatio({0.9, 0.95, 1.0, 1.05, 1.1}),
282- // // estimation is a bit high, real values are about 0.8 of estimated size
283- // SizeRatio({0.75, 0.8, 0.85}),
284- // // estimation is a bit low, real values are about 1.2 of estimated size
285- // SizeRatio({1.25, 1.2, 1.25}),
286- // // estimation is to high, real values are about 2.0 of estimated size
287- // SizeRatio({1.9, 2, 2.1}),
288- // // estimation is to low, real values are about 0.5 of estimated size
289- // SizeRatio({0.4, 0.5, 0.6}),
290- // };
291-
292235/* * Make sure that setting value size estimates with ColumnString::EstimatedValueSize either via contructor or via SetEstimatedValueSize
293236 * doesn't break ColumnString functionality and well-behaves with Reserve() and Append().
294237 * I.e. values are appended properly, nothing crashes and memory usage is not crazy-high if estimation is incorrect.
@@ -340,7 +283,10 @@ struct ColumnStringEstimatedValueSizeTest : public ::testing::TestWithParam<std:
340283
341284 size_t EstimateMemoryUsage (size_t total_values_size, float expected_number_of_items_multiplier = 1.0 ) {
342285 const auto & [single_value_size_estimation, size_ratio] = GetParam ();
343- return EstimateColumnStringMemoryUsage (expected_number_of_items * expected_number_of_items_multiplier, single_value_size_estimation, size_ratio.average , total_values_size);
286+ return EstimateColumnStringMemoryUsage (expected_number_of_items * expected_number_of_items_multiplier,
287+ single_value_size_estimation,
288+ size_ratio.average ,
289+ total_values_size);
344290 }
345291};
346292
@@ -383,10 +329,8 @@ TEST_P(ColumnStringEstimatedValueSizeTest, AppendNoReserve)
383329
384330 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
385331
386- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
387-
388332 // since there was no Reserve call prior, there could be more some overallocations, hence some estimation error
389- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor );
333+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size));
390334}
391335
392336TEST_P (ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
@@ -399,10 +343,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveExactAndAppend)
399343 EXPECT_NO_THROW (col.Reserve (expected_number_of_items));
400344 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
401345
402- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
403-
404- // Allow minor overallocations, hence * 1.2
405- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor);
346+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size));
406347}
407348
408349TEST_P (ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
@@ -415,10 +356,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveLessAndAppend)
415356 EXPECT_NO_THROW (col.Reserve (expected_number_of_items * .8 ));
416357 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
417358
418- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
419-
420- // Allow minor overallocations, hence * 1.2
421- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size) * max_estimation_error_factor);
359+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size));
422360}
423361
424362TEST_P (ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
@@ -431,10 +369,7 @@ TEST_P(ColumnStringEstimatedValueSizeTest, ReserveMoreAndAppend)
431369 EXPECT_NO_THROW (col.Reserve (expected_number_of_items * 1.2 ));
432370 EXPECT_NO_FATAL_FAILURE (AppendStrings (col, total_values_size));
433371
434- const auto max_estimation_error_factor = single_value_size_estimation == ColumnString::NO_PREALLOCATE ? 2.5 : 2 ;
435-
436- // Allow minor overallocations, hence * 1.2
437- EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size, 1.2 ) * max_estimation_error_factor);
372+ EXPECT_LT (col.MemoryUsage (), EstimateMemoryUsage (total_values_size, 1.2 ));
438373}
439374
440375/* * TODO more tests
0 commit comments