Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 40 additions & 13 deletions libcxx/include/string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add an explanation of what the optimization is in the commit message? And also please include benchmark results.

Original file line number Diff line number Diff line change
Expand Up @@ -1440,24 +1440,51 @@ public:
template <class _ForwardIterator, __enable_if_t<__has_forward_iterator_category<_ForwardIterator>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
append(_ForwardIterator __first, _ForwardIterator __last) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, we are not optimizing append(char8_t*, char8_t*) into a memmove, but we could. We should figure out how to do that, perhaps in __copy_non_overlapping_range.

size_type __sz = size();
size_type __cap = capacity();
size_type __n = static_cast<size_type>(std::distance(__first, __last));
const size_type __size = size();
const size_type __cap = capacity();
const size_type __n = static_cast<size_type>(std::distance(__first, __last));
if (__n == 0)
return *this;

if (__string_is_trivial_iterator_v<_ForwardIterator> && !__addr_in_range(*__first)) {
if (__cap - __sz < __n)
__grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __sz, 0);
__annotate_increase(__n);
auto __end = __copy_non_overlapping_range(__first, __last, std::__to_address(__get_pointer() + __sz));
traits_type::assign(*__end, value_type());
__set_size(__sz + __n);
return *this;
__annotate_delete();
auto __asan_guard = std::__make_scope_guard(__annotate_new_size(*this));

if (__n > __cap - __size) {
__long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__size + __n));
__buffer.__size_ = __size + __n;
Comment on lines +1453 to +1454
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Semantically, that's what we have:

Suggested change
__long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__size + __n));
__buffer.__size_ = __size + __n;
const size_type __new_size = __size + __n;
const size_type __new_capacity = __get_amortized_growth_capacity(__new_size);
__long __buffer = __allocate_long_buffer(__alloc_, __new_size, __new_capacity);

It might not be necessary to actually name __new_size, but I think it would make sense for the API to allow us to write this.

The other way I can see to achieve the same result without losing the fact that we're doing amortized growth at the call site would be:

struct __amortized_growth_strategy_t {};
__long __buffer  = __allocate_long_buffer(__alloc_, __size + __n, __amortized_growth_strategy_t{});

and then

_LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX20 __long
  __allocate_long_buffer(_Allocator& __alloc, size_type __size, __amortized_growth_strategy_t) {
    _LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__size),
                            "Trying to allocate long buffer for a size what would fit into the small buffer");
    size_type const __capacity = has-amortized-growth-strategy ? __get_amortized_growth_capacity(__size) : __size;
    auto __buffer = std::__allocate_at_least(__alloc, __align_allocation_size(__capacity));

    if (__libcpp_is_constant_evaluated()) {
      for (size_type __i = 0; __i != __buffer.count; ++__i)
        std::__construct_at(std::addressof(__buffer.ptr[__i]));
    }

    return __long(__buffer, __size);
  }

auto __guard = std::__make_exception_guard([&] {
__alloc_traits::deallocate(__alloc_, __buffer.__data_, __buffer.__cap_ * __endian_factor);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should add a member function to the long buffer representation that gives you the correct capacity. I'd like to do that refactoring before we land this patch.

});
auto __end = __copy_non_overlapping_range(__first, __last, std::__to_address(__buffer.__data_) + __size);
traits_type::assign(*__end, _CharT());
traits_type::copy(std::__to_address(__buffer.__data_), data(), __size);
__guard.__complete();
__reset_internal_buffer(__buffer);
} else {
const basic_string __temp(__first, __last, __alloc_);
return append(__temp.data(), __temp.size());
_CharT* const __ptr = std::__to_address(__get_pointer());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

     _CharT* const __ptr = std::__to_address(__get_pointer());
#  ifndef _LIBCPP_CXX03_LANG
      if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value &&
                    is_same<value_type, __remove_cvref_t<decltype(*__first)>>::value) {
        traits_type::move(__ptr + __size, std::__to_address(__first), __last - __first);
      } else if constexpr (__has_bidirectional_iterator_category<_ForwardIterator>::value) {
        auto __dest = __ptr + __size + __n;
        while (__first != __last) {
          --__last;
          --__dest;
          traits_type::assign(*__dest, *__last);
        }
      } else {
        _CharT __first_val = *__first;
        ++__first;
        __copy_non_overlapping_range(__first, __last, __ptr + __size + 1);
        traits_type::assign(__ptr[__size], __first_val);
      }
#  else
      _CharT __first_val = *__first;
      ++__first;
      __copy_non_overlapping_range(__first, __last, __ptr + __size + 1);
      traits_type::assign(__ptr[__size], __first_val);
#  endif
      traits_type::assign(__ptr[__size + __n], _CharT());
      __set_size(__size + __n);

IMO this is a lot easier to read because the interaction between the preprocessor and if constexpr is a lot more straightforward. There's a tiny bit more duplication, but IMO it increases readability over status quo.

# ifndef _LIBCPP_CXX03_LANG
if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value &&
is_same<value_type, __remove_cvref_t<decltype(*__first)>>::value) {
traits_type::move(__ptr + __size, std::__to_address(__first), __last - __first);
} else if constexpr (__has_bidirectional_iterator_category<_ForwardIterator>::value) {
auto __dest = __ptr + __size + __n;
while (__first != __last) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is non-trivial. Can you please add a comment explaining why you go from the back?

--__last;
--__dest;
traits_type::assign(*__dest, *__last);
}
} else
# endif
{
_CharT __first_val = *__first;
++__first;
__copy_non_overlapping_range(__first, __last, __ptr + __size + 1);
traits_type::assign(__ptr[__size], __first_val);
}
traits_type::assign(__ptr[__size + __n], _CharT());
__set_size(__size + __n);
}
return *this;
}

# if _LIBCPP_STD_VER >= 23
Expand Down
23 changes: 23 additions & 0 deletions libcxx/test/benchmarks/containers/string.bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <deque>
#include <iterator>
#include <new>
#include <list>
#include <vector>

#include "../CartesianBenchmarks.h"
Expand Down Expand Up @@ -122,6 +125,26 @@ static void BM_StringResizeAndOverwrite(benchmark::State& state) {
}
BENCHMARK(BM_StringResizeAndOverwrite);

template <class Container>
static void BM_StringAppend(benchmark::State& state) {
std::string orig_str = "This is some data so the string isn't empty";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::string orig_str = "This is some data so the string isn't empty";
std::string const orig_str = "This is some data so the string isn't empty";

std::string str = orig_str;

const char to_append_str[] = "This is a long string to make sure append does some work";
Container to_append;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Container to_append;
Container to_append(std::begin(to_append_str), std::end(to_append_str));

std::copy(std::begin(to_append_str), std::end(to_append_str), std::back_inserter(to_append));

for (auto _ : state) {
benchmark::DoNotOptimize(str);
str.append(to_append.begin(), to_append.end());
benchmark::DoNotOptimize(str);
str.resize(orig_str.size());
}
}
BENCHMARK(BM_StringAppend<std::vector<char>>);
BENCHMARK(BM_StringAppend<std::deque<char>>);
BENCHMARK(BM_StringAppend<std::list<char>>);

enum class Length { Empty, Small, Large, Huge };
struct AllLengths : EnumValuesAsTuple<AllLengths, Length, 4> {
static constexpr const char* Names[] = {"Empty", "Small", "Large", "Huge"};
Expand Down
Loading