Skip to content

Commit 1cd0772

Browse files
lemirejaja360
authored andcommitted
design a backward algorithm to produce the shortest string
1 parent 66a37ea commit 1cd0772

File tree

2 files changed

+138
-1
lines changed

2 files changed

+138
-1
lines changed

benchmarks/algorithms.h

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,126 @@ struct BenchArgs {
6363

6464
namespace BenchmarkShortest {
6565

66+
67+
/**
68+
* We have that std::to_chars does not produce the shortest
69+
* representation for numbers in scientific notation, so we
70+
* optimize the string representation to be shorter.
71+
*/
72+
inline std::string optimize_number_string(const std::string &input) {
73+
// Check if input contains 'E' or 'e' for scientific notation
74+
auto e_pos = input.find_first_of("Ee");
75+
if (e_pos != std::string::npos) {
76+
// Handle scientific notation
77+
std::string mantissa = input.substr(0, e_pos);
78+
std::string exponent = input.substr(e_pos + 1);
79+
80+
// Remove leading zeros in exponent, preserving sign
81+
bool negative = exponent[0] == '-';
82+
exponent.erase(0, negative ? 1 : 0);
83+
exponent.erase(0, exponent.find_first_not_of('0'));
84+
if (exponent.empty())
85+
exponent = "0";
86+
if (negative && exponent != "0")
87+
exponent = "-" + exponent;
88+
89+
// Reconstruct the number
90+
return mantissa + "E" + exponent;
91+
}
92+
93+
// Handle non-scientific notation
94+
if (input == "0" || input == "-0")
95+
return input;
96+
97+
// Determine sign
98+
bool is_negative = input[0] == '-';
99+
std::string num = is_negative ? input.substr(1) : input;
100+
101+
// Find first and last significant digits
102+
std::string digits = num;
103+
size_t decimal_pos = digits.find('.');
104+
if (decimal_pos != std::string::npos) {
105+
digits.erase(decimal_pos, 1); // Remove decimal point
106+
}
107+
size_t first_non_zero = digits.find_first_not_of('0');
108+
size_t last_non_zero = digits.find_last_not_of('0');
109+
digits = digits.substr(first_non_zero, last_non_zero - first_non_zero + 1);
110+
// Count significant digits
111+
size_t num_digits = digits.length();
112+
if (num_digits == 0)
113+
return input;
114+
// Calculate exponent
115+
size_t input_decimal_pos = input.find('.');
116+
size_t input_first_non_zero = input.find_first_not_of('0');
117+
size_t input_last_non_zero = input.find_last_not_of('0');
118+
119+
int exponent = 0;
120+
if (input_decimal_pos == std::string::npos) {
121+
// we have 123232900000
122+
exponent = (input_last_non_zero - input_first_non_zero);
123+
} else if (input_last_non_zero < input_decimal_pos) {
124+
// Number like 123.456 or 0.456
125+
exponent = (input_decimal_pos - input_first_non_zero - 1);
126+
} else {
127+
// Number like 0.000123
128+
exponent =
129+
-static_cast<int>(input.find_first_not_of('0', input_decimal_pos + 1) -
130+
input_decimal_pos);
131+
}
132+
// Calculate scientific notation length
133+
size_t mantissa_len =
134+
num_digits + (num_digits > 1 ? 1 : 0); // Digits + optional decimal
135+
size_t exponent_len = (exponent == 0) ? 1
136+
: (exponent < 0 ? 1 : 0) +
137+
(std::abs(exponent) < 10 ? 1
138+
: std::abs(exponent) < 100 ? 2
139+
: 3);
140+
size_t sci_len = mantissa_len + 1 + exponent_len +
141+
(is_negative ? 1 : 0); // Mantissa + E + exponent + sign
142+
143+
// Compare lengths
144+
if (sci_len >= input.length())
145+
return input;
146+
147+
// Construct scientific notation
148+
std::string result;
149+
if (is_negative)
150+
result += "-";
151+
result += digits[0];
152+
if (num_digits > 1) {
153+
result += ".";
154+
result += digits.substr(1);
155+
}
156+
result += "e";
157+
result += std::to_string(exponent);
158+
159+
return result;
160+
}
161+
162+
/**
163+
* This is a special version of std::to_chars that produces the shortest
164+
* representation for numbers. It should not be used for benchmarking.
165+
*/
166+
template<arithmetic_float T>
167+
int std_to_chars_shorter(T d, std::span<char>& buffer) {
168+
#if TO_CHARS_SUPPORTED
169+
const auto [p, ec]
170+
= std::to_chars(buffer.data(), buffer.data() + buffer.size(), d);
171+
if (ec != std::errc()) {
172+
std::cerr << "problem with " << d << std::endl;
173+
std::abort();
174+
}
175+
// This is ridiculous, optimize:
176+
std::string result(buffer.data(), p - buffer.data());
177+
result = optimize_number_string(result);
178+
std::memcpy(buffer.data(), result.data(), result.size());
179+
return result.size();
180+
#else
181+
std::cerr << "std::to_chars not supported" << std::endl;
182+
std::abort();
183+
#endif
184+
}
185+
66186
template<arithmetic_float T>
67187
int dragon4(T d, std::span<char>& buffer) {
68188
if constexpr (std::is_same_v<T, float>)
@@ -432,6 +552,7 @@ int std_to_chars(T d, std::span<char>& buffer) {
432552
#endif
433553
}
434554

555+
435556
} // namespace BenchmarksShortest
436557

437558
template <typename T>
@@ -441,6 +562,16 @@ auto wrap(int (*fn)(T, std::span<char>&)) {
441562
};
442563
}
443564

565+
// Experimental: shorter representation for std::to_chars
566+
// This is not a benchmark, but a utility function to produce the shortest
567+
// representation of a floating-point number using std::to_chars.
568+
// It is not used in the benchmarks, but can be useful for other purposes.
569+
// It is not optimized for performance, but for producing the shortest string.
570+
template <arithmetic_float T>
571+
BenchArgs<T> get_std_to_chars_shorter() {
572+
return BenchArgs<T>("std_to_chars_short", wrap(BenchmarkShortest::std_to_chars_shorter<T>), TO_CHARS_SUPPORTED);
573+
}
574+
444575
template <arithmetic_float T>
445576
std::vector<BenchArgs<T>> initArgs(bool use_errol = false, size_t repeat = 0, size_t fixed_size = 0) {
446577
std::vector<BenchArgs<T>> args;

benchmarks/benchmark.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,14 @@ bool is_exact_integer(float_type x) {
154154
// New template version of describe
155155
template <typename T>
156156
void describe(const std::variant<std::vector<TestCase<float>>, std::vector<TestCase<double>>> &numbers,
157-
const std::vector<BenchArgs<T>> &args,
157+
std::vector<BenchArgs<T>> args,
158158
const std::vector<std::string> &algo_filter) {
159+
if constexpr (std::is_same_v<T, float>) {
160+
args.push_back(get_std_to_chars_shorter<float>());
161+
} else if constexpr (std::is_same_v<T, double>) {
162+
args.push_back(get_std_to_chars_shorter<double>());
163+
}
164+
159165
std::visit([&args, &algo_filter](const auto &lines) {
160166
size_t integers64 = 0;
161167
size_t integers32 = 0;

0 commit comments

Comments
 (0)