Skip to content

Commit 65cb40d

Browse files
committed
Optimize endianness operations
Move endianness detection / helpers to utils/endian.hpp Pre-compute endian conversion performance once Optimize single-byte writes/reads
1 parent 786dfcc commit 65cb40d

File tree

10 files changed

+137
-76
lines changed

10 files changed

+137
-76
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ add_library(lslobj OBJECT
9797
src/udp_server.h
9898
src/util/cast.hpp
9999
src/util/cast.cpp
100+
src/util/endian.cpp
101+
src/util/endian.hpp
100102
src/util/inireader.hpp
101103
src/util/inireader.cpp
102104
src/util/strfuns.hpp

src/data_receiver.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "sample.h"
66
#include "socket_utils.h"
77
#include "util/cast.hpp"
8+
#include "util/endian.hpp"
89
#include "util/strfuns.hpp"
910
#include <chrono>
1011
#include <exception>
@@ -155,7 +156,7 @@ void data_receiver::data_thread() {
155156

156157
// --- protocol negotiation ---
157158

158-
int use_byte_order = 0; // which byte order we shall use (0=portable byte order)
159+
bool reverse_byte_order = false; // perform little <-> big endian conversion?
159160
int data_protocol_version = 100; // which protocol version we shall use for data
160161
// transmission (100=version 1.00)
161162
bool suppress_subnormals = false; // whether we shall suppress subnormal numbers
@@ -225,12 +226,13 @@ void data_receiver::data_thread() {
225226
rest = trim(hdrline.substr(colon + 1));
226227
// get the header information
227228
if (type == "byte-order") {
228-
use_byte_order = std::stoi(rest);
229-
if (use_byte_order == 2134 && LSL_BYTE_ORDER != 2134 &&
230-
format_sizes[conn_.type_info().channel_format()] >= 8)
229+
int use_byte_order = std::stoi(rest);
230+
auto value_size = format_sizes[conn_.type_info().channel_format()];
231+
if (!lsl::can_convert_endian(use_byte_order, value_size))
231232
throw std::runtime_error(
232233
"The byte order conversion requested by the other party is "
233234
"not supported.");
235+
reverse_byte_order = use_byte_order != LSL_BYTE_ORDER;
234236
}
235237
if (type == "suppress-subnormals")
236238
suppress_subnormals = lsl::from_string<bool>(rest);
@@ -280,8 +282,8 @@ void data_receiver::data_thread() {
280282
received(fac.new_sample(0.0, false));
281283
expected->assign_test_pattern(test_pattern);
282284
if (data_protocol_version >= 110)
283-
received->load_streambuf(
284-
buffer, data_protocol_version, use_byte_order, suppress_subnormals);
285+
received->load_streambuf(buffer, data_protocol_version,
286+
reverse_byte_order, suppress_subnormals);
285287
else
286288
*inarch >> *received;
287289

@@ -310,7 +312,7 @@ void data_receiver::data_thread() {
310312
sample_p samp(factory->new_sample(0.0, false));
311313
if (data_protocol_version >= 110)
312314
samp->load_streambuf(
313-
buffer, data_protocol_version, use_byte_order, suppress_subnormals);
315+
buffer, data_protocol_version, reverse_byte_order, suppress_subnormals);
314316
else
315317
*inarch >> *samp;
316318
// deduce timestamp if necessary

src/sample.cpp

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -154,57 +154,69 @@ void save_raw(std::streambuf &sb, const void *address, std::size_t count) {
154154
throw std::runtime_error("Output stream error.");
155155
}
156156

157+
void save_byte(std::streambuf& sb, uint8_t v) {
158+
if (sb.sputc(*reinterpret_cast<const char *>(&v)) == std::streambuf::traits_type::eof())
159+
throw std::runtime_error("Output stream error.");
160+
}
161+
157162
/// Helper function to load raw binary data from a stream buffer.
158163
void load_raw(std::streambuf &sb, void *address, std::size_t count) {
159164
if ((std::size_t)sb.sgetn((char *)address, (std::streamsize)count) != count)
160165
throw std::runtime_error("Input stream error.");
161166
}
162167

168+
uint8_t load_byte(std::streambuf &sb) {
169+
auto res = sb.sbumpc();
170+
if(res == std::streambuf::traits_type::eof())
171+
throw std::runtime_error("Input stream error.");
172+
return static_cast<uint8_t>(res);
173+
}
174+
163175
/// Load a value from a stream buffer with correct endian treatment.
164-
template <typename T> T load_value(std::streambuf &sb, int use_byte_order) {
176+
template <typename T> T load_value(std::streambuf &sb, bool reverse_byte_order) {
165177
T tmp;
166178
load_raw(sb, &tmp, sizeof(T));
167-
if (sizeof(T) > 1 && use_byte_order != LSL_BYTE_ORDER) endian_reverse_inplace(tmp);
179+
if (sizeof(T) > 1 && reverse_byte_order) endian_reverse_inplace(tmp);
168180
return tmp;
169181
}
170182

171183
/// Save a value to a stream buffer with correct endian treatment.
172-
template <typename T> void save_value(std::streambuf &sb, T v, int use_byte_order) {
173-
if (use_byte_order != LSL_BYTE_ORDER) endian_reverse_inplace(v);
184+
template <typename T> void save_value(std::streambuf &sb, T v, bool reverse_byte_order) {
185+
if (sizeof(T) > 1 && reverse_byte_order) endian_reverse_inplace(v);
174186
save_raw(sb, &v, sizeof(T));
175187
}
176188

177189
void sample::save_streambuf(
178-
std::streambuf &sb, int /*protocol_version*/, int use_byte_order, void *scratchpad) const {
190+
std::streambuf &sb, int /*protocol_version*/, bool reverse_byte_order, void *scratchpad) const {
179191
// write sample header
180192
if (timestamp == DEDUCED_TIMESTAMP) {
181-
save_value(sb, TAG_DEDUCED_TIMESTAMP, use_byte_order);
193+
save_byte(sb, TAG_DEDUCED_TIMESTAMP);
182194
} else {
183-
save_value(sb, TAG_TRANSMITTED_TIMESTAMP, use_byte_order);
184-
save_value(sb, timestamp, use_byte_order);
195+
save_byte(sb, TAG_TRANSMITTED_TIMESTAMP);
196+
save_value(sb, timestamp, reverse_byte_order);
185197
}
186198
// write channel data
187199
if (format_ == cft_string) {
188200
for (const auto &str : samplevals<std::string>(*this)) {
189201
// write string length as variable-length integer
190202
if (str.size() <= 0xFF) {
191-
save_value(sb, (uint8_t)sizeof(uint8_t), use_byte_order);
192-
save_value(sb, static_cast<uint8_t>(str.size()), use_byte_order);
203+
save_byte(sb, static_cast<uint8_t>(sizeof(uint8_t)));
204+
save_byte(sb, static_cast<uint8_t>(str.size()));
193205
} else {
194206
if (str.size() <= 0xFFFFFFFF) {
195-
save_value(sb, (uint8_t)sizeof(uint32_t), use_byte_order);
196-
save_value(sb, static_cast<uint32_t>(str.size()), use_byte_order);
207+
save_byte(sb, static_cast<uint8_t>(sizeof(uint32_t)));
208+
save_value(sb, static_cast<uint32_t>(str.size()), reverse_byte_order);
197209
} else {
198-
save_value(sb, (uint8_t)sizeof(uint64_t), use_byte_order);
199-
save_value(sb, static_cast<std::size_t>(str.size()), use_byte_order);
210+
save_byte(sb, static_cast<uint8_t>(sizeof(uint64_t)));
211+
save_value(sb, static_cast<std::size_t>(str.size()), reverse_byte_order);
200212
}
201213
}
202214
// write string contents
203215
if (!str.empty()) save_raw(sb, str.data(), str.size());
204216
}
205217
} else {
206218
// write numeric data in binary
207-
if (use_byte_order == LSL_BYTE_ORDER || format_sizes[format_] == 1) {
219+
if (!reverse_byte_order || format_sizes[format_] == 1) {
208220
save_raw(sb, &data_, datasize());
209221
} else {
210222
memcpy(scratchpad, &data_, datasize());
@@ -215,31 +227,31 @@ void sample::save_streambuf(
215227
}
216228

217229
void sample::load_streambuf(
218-
std::streambuf &sb, int /*unused*/, int use_byte_order, bool suppress_subnormals) {
230+
std::streambuf &sb, int /*unused*/, bool reverse_byte_order, bool suppress_subnormals) {
219231
// read sample header
220-
if (load_value<uint8_t>(sb, use_byte_order) == TAG_DEDUCED_TIMESTAMP)
232+
if (load_byte(sb) == TAG_DEDUCED_TIMESTAMP)
221233
// deduce the timestamp
222234
timestamp = DEDUCED_TIMESTAMP;
223235
else
224236
// read the time stamp
225-
timestamp = load_value<double>(sb, use_byte_order);
237+
timestamp = load_value<double>(sb, reverse_byte_order);
226238

227239
// read channel data
228240
if (format_ == cft_string) {
229241
for (auto &str : samplevals<std::string>(*this)) {
230242
// read string length as variable-length integer
231243
std::size_t len = 0;
232-
auto lenbytes = load_value<uint8_t>(sb, use_byte_order);
244+
auto lenbytes = load_byte(sb);
233245

234246
if (sizeof(std::size_t) < 8 && lenbytes > sizeof(std::size_t))
235247
throw std::runtime_error(
236248
"This platform does not support strings of 64-bit length.");
237249
switch (lenbytes) {
238-
case sizeof(uint8_t): len = load_value<uint8_t>(sb, use_byte_order); break;
239-
case sizeof(uint16_t): len = load_value<uint16_t>(sb, use_byte_order); break;
240-
case sizeof(uint32_t): len = load_value<uint32_t>(sb, use_byte_order); break;
250+
case sizeof(uint8_t): len = load_byte(sb); break;
251+
case sizeof(uint16_t): len = load_value<uint16_t>(sb, reverse_byte_order); break;
252+
case sizeof(uint32_t): len = load_value<uint32_t>(sb, reverse_byte_order); break;
241253
#ifndef BOOST_NO_INT64_T
242-
case sizeof(uint64_t): len = load_value<uint64_t>(sb, use_byte_order); break;
254+
case sizeof(uint64_t): len = load_value<uint64_t>(sb, reverse_byte_order); break;
243255
#endif
244256
default: throw std::runtime_error("Stream contents corrupted (invalid varlen int).");
245257
}
@@ -250,8 +262,8 @@ void sample::load_streambuf(
250262
} else {
251263
// read numeric channel data
252264
load_raw(sb, &data_, datasize());
253-
if (use_byte_order != LSL_BYTE_ORDER && format_sizes[format_] > 1)
254-
convert_endian(&data_, num_channels_, format_sizes[format_]);
265+
if (reverse_byte_order && format_sizes[format_] > 1)
266+
convert_endian(&data_, num_channels(), format_sizes[format_]);
255267
if (suppress_subnormals && format_float[format_]) {
256268
if (format_ == cft_float32) {
257269
for (auto &val : samplevals<uint32_t>(*this))

src/sample.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,15 @@
33
#include "common.h"
44
#include "forward.h"
55
#include "util/cast.hpp"
6+
#include "util/endian.hpp"
67
#include <atomic>
78
#include <cstdint>
8-
#include <boost/endian/detail/order.hpp>
99
#include <iosfwd>
1010
#include <limits>
1111
#include <stdexcept>
1212
#include <string>
1313
#include <type_traits>
1414

15-
// Determine target byte order / endianness
16-
using byteorder = lslboost::endian::order;
17-
static_assert(byteorder::native == byteorder::little || byteorder::native == byteorder::big, "Unsupported byteorder");
18-
const int LSL_BYTE_ORDER = (byteorder::native == byteorder::little) ? 1234 : 4321;
1915

2016
namespace lsl {
2117
// assert that the target CPU can represent the double-precision timestamp format required by LSL
@@ -144,12 +140,12 @@ class sample {
144140
// === serialization functions ===
145141

146142
/// Serialize a sample to a stream buffer (protocol 1.10).
147-
void save_streambuf(std::streambuf &sb, int protocol_version, int use_byte_order,
143+
void save_streambuf(std::streambuf &sb, int protocol_version, bool reverse_byte_order,
148144
void *scratchpad = nullptr) const;
149145

150146
/// Deserialize a sample from a stream buffer (protocol 1.10).
151-
void load_streambuf(
152-
std::streambuf &sb, int protocol_version, int use_byte_order, bool suppress_subnormals);
147+
void load_streambuf(std::streambuf &sb, int protocol_version, bool reverse_byte_order,
148+
bool suppress_subnormals);
153149

154150
/// Convert the endianness of channel data in-place.
155151
static void convert_endian(void *data, uint32_t n, uint32_t width);

src/socket_utils.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,6 @@
11
#include "socket_utils.h"
22
#include "api_config.h"
33
#include "common.h"
4-
#include <boost/endian/conversion.hpp>
5-
6-
double lsl::measure_endian_performance() {
7-
const double measure_duration = 0.01;
8-
const double t_end = lsl_clock() + measure_duration;
9-
uint64_t data = 0x01020304;
10-
double k;
11-
for (k = 0; ((int)k & 0xFF) != 0 || lsl_clock() < t_end; k++)
12-
lslboost::endian::endian_reverse_inplace(data);
13-
return k;
14-
}
154

165
template <typename Socket, typename Protocol>
176
uint16_t bind_port_in_range_(Socket &sock, Protocol protocol) {

src/socket_utils.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@ uint16_t bind_port_in_range(asio::ip::udp::socket &sock, asio::ip::udp protocol)
1515
/// Bind and listen to an acceptor on a free port in the configured port range or throw an error.
1616
uint16_t bind_and_listen_to_port_in_range(
1717
asio::ip::tcp::acceptor &acc, asio::ip::tcp protocol, int backlog);
18-
19-
/// Measure the endian conversion performance of this machine.
20-
double measure_endian_performance();
2118
} // namespace lsl
2219

2320
#endif

src/tcp_server.cpp

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "socket_utils.h"
77
#include "stream_info_impl.h"
88
#include "util/cast.hpp"
9+
#include "util/endian.hpp"
910
#include "util/strfuns.hpp"
1011
#include <asio/io_context.hpp>
1112
#include <asio/ip/host_name.hpp>
@@ -34,6 +35,8 @@
3435
#define NO_EXPLICIT_TEMPLATE_INSTANTIATION
3536
#include "portable_archive/portable_oarchive.hpp"
3637

38+
using std::size_t;
39+
3740
namespace lsl {
3841
/**
3942
* Active session with a TCP client.
@@ -127,9 +130,8 @@ class client_session : public std::enable_shared_from_this<client_session> {
127130
char *scratch_{nullptr};
128131
/// protocol version to use for transmission
129132
int data_protocol_version_{100};
130-
/// byte order to use (0=portable, 1234=little endian, 4321=big endian, 2134=PDP endian,
131-
/// unsupported)
132-
int use_byte_order_{LSL_BYTE_ORDER};
133+
/// is the client's endianness reversed (big<->little endian)
134+
bool reverse_byte_order_{false};
133135
/// our chunk granularity
134136
int chunk_granularity_{0};
135137
/// maximum number of samples buffered
@@ -423,6 +425,9 @@ void client_session::handle_read_feedparams(
423425

424426
// determine the parameters for data transmission
425427
bool client_suppress_subnormals = false;
428+
429+
lsl::Endianness use_byte_order = LSL_BYTE_ORDER;
430+
426431
// use least common denominator data protocol version
427432
data_protocol_version_ = std::min(
428433
api_config::get_instance()->use_protocol_version(), client_protocol_version);
@@ -436,21 +441,21 @@ void client_session::handle_read_feedparams(
436441
!client_has_ieee754_floats)
437442
data_protocol_version_ = 100;
438443
if (data_protocol_version_ >= 110) {
439-
// decide on the byte order if conflicting
440-
if (LSL_BYTE_ORDER != client_byte_order) {
441-
if (client_byte_order == 2134 && client_value_size >= 8) {
442-
// since we have no implementation for this byte order conversion let
443-
// the client do it
444-
use_byte_order_ = LSL_BYTE_ORDER;
445-
} else {
446-
// let the faster party perform the endian conversion
447-
use_byte_order_ = (client_value_size <= 1 || (measure_endian_performance() >
448-
client_endian_performance))
449-
? client_byte_order
450-
: LSL_BYTE_ORDER;
451-
}
452-
} else
453-
use_byte_order_ = LSL_BYTE_ORDER;
444+
445+
// enable endian conversion when
446+
// 1. our byte ordering is different from the client's *and*
447+
// 2. we can actually perform the conversion *and*
448+
// 3. the sample format is wide enough for endianness to matter *and*
449+
// 4. we're faster at converting than the client
450+
if (LSL_BYTE_ORDER != client_byte_order && // (1)
451+
lsl::can_convert_endian(client_byte_order, client_value_size) // (2)
452+
&& client_value_size > 1 && // (3)
453+
(measure_endian_performance() > client_endian_performance)) // (4)
454+
{
455+
use_byte_order = static_cast<lsl::Endianness>(client_byte_order);
456+
reverse_byte_order_ = true;
457+
}
458+
454459
// determine if subnormal suppression needs to be enabled
455460
client_suppress_subnormals =
456461
(format_subnormal[format] && !client_supports_subnormals);
@@ -461,7 +466,7 @@ void client_session::handle_read_feedparams(
461466
response_stream << "LSL/" << api_config::get_instance()->use_protocol_version()
462467
<< " 200 OK\r\n";
463468
response_stream << "UID: " << serv_->info_->uid() << "\r\n";
464-
response_stream << "Byte-Order: " << use_byte_order_ << "\r\n";
469+
response_stream << "Byte-Order: " << use_byte_order << "\r\n";
465470
response_stream << "Suppress-Subnormals: " << client_suppress_subnormals << "\r\n";
466471
response_stream << "Data-Protocol-Version: " << data_protocol_version_ << "\r\n";
467472
response_stream << "\r\n" << std::flush;
@@ -489,7 +494,8 @@ void client_session::handle_read_feedparams(
489494
lsl::sample_p temp(fac.new_sample(0.0, false));
490495
temp->assign_test_pattern(test_pattern);
491496
if (data_protocol_version_ >= 110)
492-
temp->save_streambuf(feedbuf_, data_protocol_version_, use_byte_order_, scratch_);
497+
temp->save_streambuf(
498+
feedbuf_, data_protocol_version_, reverse_byte_order_, scratch_);
493499
else
494500
*outarch_ << *temp;
495501
}
@@ -543,7 +549,7 @@ void client_session::transfer_samples_thread(std::shared_ptr<client_session> /*k
543549
// serialize the sample into the stream
544550
if (data_protocol_version_ >= 110)
545551
samp->save_streambuf(
546-
feedbuf_, data_protocol_version_, use_byte_order_, scratch_);
552+
feedbuf_, data_protocol_version_, reverse_byte_order_, scratch_);
547553
else
548554
*outarch_ << *samp;
549555
// if the sample is marked as force-push or the configured chunk size is reached

src/util/endian.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#include "endian.hpp"
2+
#include "../common.h"
3+
4+
double lsl::measure_endian_performance() {
5+
const double measure_duration = 0.01;
6+
const double t_end = lsl_clock() + measure_duration;
7+
uint64_t data = 0x01020304;
8+
double k;
9+
for (k = 0; ((int)k & 0xFF) != 0 || lsl_clock() < t_end; k++)
10+
lslboost::endian::endian_reverse_inplace(data);
11+
return k;
12+
}

0 commit comments

Comments
 (0)