-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[libc++] Optimize basic_string::append(Iter, Iter) #169794
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions ,cpp -- libcxx/include/string libcxx/test/benchmarks/containers/string.bench.cpp --diff_from_common_commit
View the diff from clang-format here.diff --git a/libcxx/test/benchmarks/containers/string.bench.cpp b/libcxx/test/benchmarks/containers/string.bench.cpp
index 68249ad89..8fb1d1a21 100644
--- a/libcxx/test/benchmarks/containers/string.bench.cpp
+++ b/libcxx/test/benchmarks/containers/string.bench.cpp
@@ -128,7 +128,7 @@ BENCHMARK(BM_StringResizeAndOverwrite);
template <class Container>
static void BM_StringAppend(benchmark::State& state) {
std::string orig_str = "This is some data so the string isn't empty";
- std::string str = orig_str;
+ std::string str = orig_str;
const char to_append_str[] = "This is a long string to make sure append does some work";
Container to_append;
|
e850aa4 to
f4596cb
Compare
f4596cb to
1ac38dc
Compare
|
@llvm/pr-subscribers-libcxx Author: Nikolas Klauser (philnik777) ChangesFull diff: https://github.com/llvm/llvm-project/pull/169794.diff 2 Files Affected:
diff --git a/libcxx/include/string b/libcxx/include/string
index 2b3ba6d2d9b62..c0454ba2cb9f8 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1440,24 +1440,51 @@ public:
template <class _ForwardIterator, __enable_if_t<__has_forward_iterator_category<_ForwardIterator>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string&
append(_ForwardIterator __first, _ForwardIterator __last) {
- size_type __sz = size();
- size_type __cap = capacity();
- size_type __n = static_cast<size_type>(std::distance(__first, __last));
+ const size_type __size = size();
+ const size_type __cap = capacity();
+ const size_type __n = static_cast<size_type>(std::distance(__first, __last));
if (__n == 0)
return *this;
- if (__string_is_trivial_iterator_v<_ForwardIterator> && !__addr_in_range(*__first)) {
- if (__cap - __sz < __n)
- __grow_by_without_replace(__cap, __sz + __n - __cap, __sz, __sz, 0);
- __annotate_increase(__n);
- auto __end = __copy_non_overlapping_range(__first, __last, std::__to_address(__get_pointer() + __sz));
- traits_type::assign(*__end, value_type());
- __set_size(__sz + __n);
- return *this;
+ __annotate_delete();
+ auto __asan_guard = std::__make_scope_guard(__annotate_new_size(*this));
+
+ if (__n > __cap - __size) {
+ __long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__size + __n));
+ __buffer.__size_ = __size + __n;
+ auto __guard = std::__make_exception_guard([&] {
+ __alloc_traits::deallocate(__alloc_, __buffer.__data_, __buffer.__cap_ * __endian_factor);
+ });
+ auto __end = __copy_non_overlapping_range(__first, __last, std::__to_address(__buffer.__data_) + __size);
+ traits_type::assign(*__end, _CharT());
+ traits_type::copy(std::__to_address(__buffer.__data_), data(), __size);
+ __guard.__complete();
+ __reset_internal_buffer(__buffer);
} else {
- const basic_string __temp(__first, __last, __alloc_);
- return append(__temp.data(), __temp.size());
+ _CharT* const __ptr = std::__to_address(__get_pointer());
+# ifndef _LIBCPP_CXX03_LANG
+ if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value &&
+ is_same<value_type, __remove_cvref_t<decltype(*__first)>>::value) {
+ traits_type::move(__ptr + __size, std::__to_address(__first), __last - __first);
+ } else if constexpr (__has_bidirectional_iterator_category<_ForwardIterator>::value) {
+ auto __dest = __ptr + __size + __n;
+ while (__first != __last) {
+ --__last;
+ --__dest;
+ traits_type::assign(*__dest, *__last);
+ }
+ } else
+# endif
+ {
+ _CharT __first_val = *__first;
+ ++__first;
+ __copy_non_overlapping_range(__first, __last, __ptr + __size + 1);
+ traits_type::assign(__ptr[__size], __first_val);
+ }
+ traits_type::assign(__ptr[__size + __n], _CharT());
+ __set_size(__size + __n);
}
+ return *this;
}
# if _LIBCPP_STD_VER >= 23
diff --git a/libcxx/test/benchmarks/containers/string.bench.cpp b/libcxx/test/benchmarks/containers/string.bench.cpp
index 98216d22d0144..68249ad8963ed 100644
--- a/libcxx/test/benchmarks/containers/string.bench.cpp
+++ b/libcxx/test/benchmarks/containers/string.bench.cpp
@@ -11,7 +11,10 @@
#include <algorithm>
#include <cstdint>
#include <cstdlib>
+#include <deque>
+#include <iterator>
#include <new>
+#include <list>
#include <vector>
#include "../CartesianBenchmarks.h"
@@ -122,6 +125,26 @@ static void BM_StringResizeAndOverwrite(benchmark::State& state) {
}
BENCHMARK(BM_StringResizeAndOverwrite);
+template <class Container>
+static void BM_StringAppend(benchmark::State& state) {
+ std::string orig_str = "This is some data so the string isn't empty";
+ std::string str = orig_str;
+
+ const char to_append_str[] = "This is a long string to make sure append does some work";
+ Container to_append;
+ std::copy(std::begin(to_append_str), std::end(to_append_str), std::back_inserter(to_append));
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(str);
+ str.append(to_append.begin(), to_append.end());
+ benchmark::DoNotOptimize(str);
+ str.resize(orig_str.size());
+ }
+}
+BENCHMARK(BM_StringAppend<std::vector<char>>);
+BENCHMARK(BM_StringAppend<std::deque<char>>);
+BENCHMARK(BM_StringAppend<std::list<char>>);
+
enum class Length { Empty, Small, Large, Huge };
struct AllLengths : EnumValuesAsTuple<AllLengths, Length, 4> {
static constexpr const char* Names[] = {"Empty", "Small", "Large", "Huge"};
|
|
/libcxx-bot benchmark libcxx/test/benchmarks/containers/string.bench.cpp Benchmark results: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add an explanation of what the optimization is in the commit message? And also please include benchmark results.
| __long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__size + __n)); | ||
| __buffer.__size_ = __size + __n; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Semantically, that's what we have:
| __long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__size + __n)); | |
| __buffer.__size_ = __size + __n; | |
| const size_type __new_size = __size + __n; | |
| const size_type __new_capacity = __get_amortized_growth_capacity(__new_size); | |
| __long __buffer = __allocate_long_buffer(__alloc_, __new_size, __new_capacity); |
It might not be necessary to actually name __new_size, but I think it would make sense for the API to allow us to write this.
The other way I can see to achieve the same result without losing the fact that we're doing amortized growth at the call site would be:
struct __amortized_growth_strategy_t {};
__long __buffer = __allocate_long_buffer(__alloc_, __size + __n, __amortized_growth_strategy_t{});and then
_LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX20 __long
__allocate_long_buffer(_Allocator& __alloc, size_type __size, __amortized_growth_strategy_t) {
_LIBCPP_ASSERT_INTERNAL(!__fits_in_sso(__size),
"Trying to allocate long buffer for a size what would fit into the small buffer");
size_type const __capacity = has-amortized-growth-strategy ? __get_amortized_growth_capacity(__size) : __size;
auto __buffer = std::__allocate_at_least(__alloc, __align_allocation_size(__capacity));
if (__libcpp_is_constant_evaluated()) {
for (size_type __i = 0; __i != __buffer.count; ++__i)
std::__construct_at(std::addressof(__buffer.ptr[__i]));
}
return __long(__buffer, __size);
}| __long __buffer = __allocate_long_buffer(__alloc_, __get_amortized_growth_capacity(__size + __n)); | ||
| __buffer.__size_ = __size + __n; | ||
| auto __guard = std::__make_exception_guard([&] { | ||
| __alloc_traits::deallocate(__alloc_, __buffer.__data_, __buffer.__cap_ * __endian_factor); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should add a member function to the long buffer representation that gives you the correct capacity. I'd like to do that refactoring before we land this patch.
| } else { | ||
| const basic_string __temp(__first, __last, __alloc_); | ||
| return append(__temp.data(), __temp.size()); | ||
| _CharT* const __ptr = std::__to_address(__get_pointer()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_CharT* const __ptr = std::__to_address(__get_pointer());
# ifndef _LIBCPP_CXX03_LANG
if constexpr (__libcpp_is_contiguous_iterator<_ForwardIterator>::value &&
is_same<value_type, __remove_cvref_t<decltype(*__first)>>::value) {
traits_type::move(__ptr + __size, std::__to_address(__first), __last - __first);
} else if constexpr (__has_bidirectional_iterator_category<_ForwardIterator>::value) {
auto __dest = __ptr + __size + __n;
while (__first != __last) {
--__last;
--__dest;
traits_type::assign(*__dest, *__last);
}
} else {
_CharT __first_val = *__first;
++__first;
__copy_non_overlapping_range(__first, __last, __ptr + __size + 1);
traits_type::assign(__ptr[__size], __first_val);
}
# else
_CharT __first_val = *__first;
++__first;
__copy_non_overlapping_range(__first, __last, __ptr + __size + 1);
traits_type::assign(__ptr[__size], __first_val);
# endif
traits_type::assign(__ptr[__size + __n], _CharT());
__set_size(__size + __n);IMO this is a lot easier to read because the interaction between the preprocessor and if constexpr is a lot more straightforward. There's a tiny bit more duplication, but IMO it increases readability over status quo.
| traits_type::move(__ptr + __size, std::__to_address(__first), __last - __first); | ||
| } else if constexpr (__has_bidirectional_iterator_category<_ForwardIterator>::value) { | ||
| auto __dest = __ptr + __size + __n; | ||
| while (__first != __last) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is non-trivial. Can you please add a comment explaining why you go from the back?
|
|
||
| template <class _ForwardIterator, __enable_if_t<__has_forward_iterator_category<_ForwardIterator>::value, int> = 0> | ||
| _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& | ||
| append(_ForwardIterator __first, _ForwardIterator __last) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment, we are not optimizing append(char8_t*, char8_t*) into a memmove, but we could. We should figure out how to do that, perhaps in __copy_non_overlapping_range.
| std::string str = orig_str; | ||
|
|
||
| const char to_append_str[] = "This is a long string to make sure append does some work"; | ||
| Container to_append; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| Container to_append; | |
| Container to_append(std::begin(to_append_str), std::end(to_append_str)); |
|
|
||
| template <class Container> | ||
| static void BM_StringAppend(benchmark::State& state) { | ||
| std::string orig_str = "This is some data so the string isn't empty"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| std::string orig_str = "This is some data so the string isn't empty"; | |
| std::string const orig_str = "This is some data so the string isn't empty"; |
No description provided.