Further modified Send/Receive DataInChunks examples for better speed profiling.

cboulay · cboulay · commit ee36a9479f16 · 2021-09-20T22:37:59.000-04:00
diff --git a/examples/ReceiveDataInChunks.cpp b/examples/ReceiveDataInChunks.cpp
@@ -1,16 +1,19 @@
+#include <chrono>
 #include <iostream>
 #include <lsl_cpp.h>
 #include <stdint.h>
+#include <thread>
 
 
 int main(int argc, char **argv) {
 	std::cout << "ReceiveDataInChunks" << std::endl;
-	std::cout << "ReceiveDataInChunks StreamName max_buflen" << std::endl;
+	std::cout << "ReceiveDataInChunks StreamName max_buflen flush" << std::endl;
 
 	try {
 
 		std::string name{argc > 1 ? argv[1] : "MyAudioStream"};
 		int32_t max_buflen = argc > 2 ? std::stol(argv[2]) : 360;
+		bool flush = argc > 3;
 		// resolve the stream of interest & make an inlet
 		lsl::stream_inlet inlet(lsl::resolve_stream("name", name).at(0), max_buflen);
 
@@ -21,26 +24,32 @@ int main(int argc, char **argv) {
 
 		// and retrieve the chunks
 		uint64_t k = 0, num_samples = 0;
+		std::vector<std::vector<int16_t>> result;
+		auto fetch_interval = std::chrono::milliseconds(20);
+		auto next_fetch = std::chrono::steady_clock::now() + fetch_interval;
+
+
 		while (true) {
-			std::vector < std::vector<int16_t> > result;
-			if (double timestamp = inlet.pull_chunk(result))
-				num_samples += result.size();
+			std::this_thread::sleep_until(next_fetch);
+			if (flush) {
+				// You almost certainly don't want to use flush. This is here so we
+				//  can test maximum outlet throughput.
+				num_samples += inlet.flush();
+			} else {
+				if (double timestamp = inlet.pull_chunk(result)) num_samples += result.size();
+			}
 			k++;
-
-			// display code
+			next_fetch += fetch_interval;
 			if (k % 50 == 0) {
 				double now = lsl::local_clock();
-				if (now > next_display) {
-					std::cout << num_samples / (now - starttime) << " samples/sec" << std::endl;
-					next_display = now + 1;
-				}
-				if (now > next_reset) { std::cout << "Resetting counters..." << std::endl;
+				std::cout << num_samples / (now - starttime) << " samples/sec" << std::endl;
+				if (now > next_reset) {
+					std::cout << "Resetting counters..." << std::endl;
 					starttime = now;
 					next_reset = now + 10;
 					num_samples = 0;
 				}
 			}
-			
 		}
 
 	} catch (std::exception &e) { std::cerr << "Got an exception: " << e.what() << std::endl; }
diff --git a/examples/SendDataInChunks.cpp b/examples/SendDataInChunks.cpp
@@ -5,6 +5,9 @@
 #include <thread>
 #include <algorithm>
 #include <random>
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
 
 
 // define a packed sample struct (here: a 16 bit stereo sample).
@@ -13,29 +16,75 @@ struct stereo_sample {
 	int16_t l, r;
 };
 
-// fill buffer with data from device -- Normally your device SDK would provide such a function. Here we use a random number generator.
-void get_data_from_device(std::vector<std::vector<int16_t>> buffer, uint64_t &sample_counter) {
-	static std::uniform_int_distribution<int16_t> distribution(
-		std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max());
-	static std::default_random_engine generator;
-
-	if (buffer[0].size() == 2) {
-		// If there are only 2 channels then we'll do a sine wave, pretending this is an audio device.
-		for (auto &frame : buffer) {
-			frame[0] = static_cast<int16_t>(100 * sin(sample_counter / 200.));
-			frame[1] = static_cast<int16_t>(120 * sin(sample_counter / 400.));
-			sample_counter++;
+struct fake_device {
+	/*
+	We create a fake device that will generate data. The inner details are not
+	so important because typically it will be up to the real data source + SDK
+	to provide a way to get data.
+	*/
+	std::size_t n_channels;
+	double srate;
+	int64_t pattern_samples;
+	int64_t head;
+	std::vector<int16_t> pattern;
+	std::chrono::steady_clock::time_point last_time;
+
+	fake_device(const int16_t n_channels, const float srate)
+		: n_channels(n_channels), srate(srate), head(0) {
+		pattern_samples = (int64_t)(srate - 0.5) + 1;  // truncate OK.
+
+		// Pre-allocate entire test pattern. The data _could_ be generated on the fly
+		//  for a much smaller memory hit, but we also use this example application
+		//  to test LSL Outlet performance so we want to reduce out-of-LSL CPU
+		//  utilization.
+		int64_t magnitude = std::numeric_limits<int16_t>::max();
+		int64_t offset_0 = magnitude / 2;
+		int64_t offset_step = magnitude / n_channels;
+		pattern.reserve(pattern_samples * n_channels);
+		for (auto sample_ix = 0; sample_ix < pattern_samples; ++sample_ix) {
+			for (auto chan_ix = 0; chan_ix < n_channels; ++chan_ix) {
+				pattern.emplace_back(
+					offset_0 + chan_ix * offset_step +
+					magnitude * static_cast<int16_t>(sin(M_PI * chan_ix * sample_ix / n_channels)));
+			}
 		}
+		last_time = std::chrono::high_resolution_clock::now();
+	}
+
+	std::vector<int16_t> get_data() {
+		auto now = std::chrono::steady_clock::now();
+		auto elapsed_nano =
+			std::chrono::duration_cast<std::chrono::nanoseconds>(now - last_time).count();
+		std::size_t elapsed_samples = std::size_t(elapsed_nano * srate * 1e-9); // truncate OK.
+		std::vector<int16_t> result;
+		result.resize(elapsed_samples * n_channels);
+		int64_t ret_samples = get_data(result);
+		std::vector<int16_t> output(result.begin(), result.begin() + ret_samples);
+		return output;
 	}
-	else {
-		for (auto &frame : buffer) {
-			for (std::size_t chan_idx = 0; chan_idx < frame.size(); ++chan_idx) {
-				frame[chan_idx] = distribution(generator);
+
+	std::size_t get_data(std::vector<int16_t> &buffer) {
+		auto now = std::chrono::steady_clock::now();
+		auto elapsed_nano =
+			std::chrono::duration_cast<std::chrono::nanoseconds>(now - last_time).count();
+		int64_t elapsed_samples = std::size_t(elapsed_nano * srate * 1e-9); // truncate OK.
+		elapsed_samples = std::min(elapsed_samples, (int64_t)buffer.size());
+		if (false) {
+			// The fastest but no patterns.
+			memset(&buffer[0], 23, buffer.size() * sizeof buffer[0]);
+		} else {
+			std::size_t end_sample = head + elapsed_samples;
+			std::size_t nowrap_samples = std::min(pattern_samples - head, elapsed_samples);
+			memcpy(&buffer[0], &(pattern[head]), nowrap_samples);
+			if (end_sample > pattern_samples) {
+				memcpy(&buffer[nowrap_samples], &(pattern[0]), elapsed_samples - nowrap_samples);
 			}
-			sample_counter++;
 		}
+		head = (head + elapsed_samples) % pattern_samples;
+		last_time += std::chrono::nanoseconds(int64_t(1e9 * elapsed_samples / srate));
+		return elapsed_samples;
 	}
-}
+};
 
 int main(int argc, char **argv) {
 	std::cout << "SendDataInChunks" << std::endl;
@@ -44,14 +93,15 @@ int main(int argc, char **argv) {
 	std::cout << "- chunk_rate -- number of chunks pushed per second. For this example, make it a common factor of samplingrate and 1000." << std::endl;
 	
 	std::string name{argc > 1 ? argv[1] : "MyAudioStream"}, type{argc > 2 ? argv[2] : "Audio"};
-	int samplingrate = argc > 3 ? std::stol(argv[3]) : 44100;
-	int n_channels = argc > 4 ? std::stol(argv[4]) : 2;
+	int samplingrate = argc > 3 ? std::stol(argv[3]) : 44100;  // Here we specify srate, but typically this would come from the device.
+	int n_channels = argc > 4 ? std::stol(argv[4]) : 2;        // Here we specify n_chans, but typically this would come from theh device.
 	int32_t max_buffered = argc > 5 ? std::stol(argv[5]) : 360;
 	int32_t chunk_rate = argc > 6 ? std::stol(argv[6]) : 10;  // Chunks per second.
 	int32_t chunk_samples = samplingrate > 0 ? std::max((samplingrate / chunk_rate), 1) : 100;  // Samples per chunk.
 	int32_t chunk_duration = 1000 / chunk_rate;  // Milliseconds per chunk
 
 	try {
+		// Prepare the LSL stream.
 		lsl::stream_info info(name, type, n_channels, samplingrate, lsl::cf_int16);
 		lsl::stream_outlet outlet(info, 0, max_buffered);
 		lsl::xml_element desc = info.desc();
@@ -64,10 +114,12 @@ int main(int argc, char **argv) {
 			chn.append_child_value("type", "EEG");
 		}
 
-		// Prepare buffer to get data from 'device'
-		std::vector<std::vector<int16_t>> chunk_buffer(
-			chunk_samples,
-			std::vector<int16_t>(n_channels));
+		// Create a connection to our device.
+		fake_device my_device(n_channels, (float)samplingrate);
+
+		// Prepare buffer to get data from 'device'.
+		//  The buffer should be largery than you think you need. Here we make it twice as large.
+		std::vector<int16_t> chunk_buffer(2 * chunk_samples * n_channels);
 
 		std::cout << "Now sending data..." << std::endl;
 
@@ -76,16 +128,16 @@ int main(int argc, char **argv) {
 		auto nextsample = std::chrono::high_resolution_clock::now();
 		uint64_t sample_counter = 0;
 		for (unsigned c = 0;; c++) {
-
 			// wait a bit
 			nextsample += std::chrono::milliseconds(chunk_duration);
 			std::this_thread::sleep_until(nextsample);
 
 			// Get data from device
-			get_data_from_device(chunk_buffer, sample_counter);
+			std::size_t returned_samples = my_device.get_data(chunk_buffer);
 
-			// send it to the outlet
-			outlet.push_chunk(chunk_buffer);
+			// send it to the outlet. push_chunk_multiplexed is one of the more complicated approaches.
+			//  other push_chunk methods are easier but slightly slower.
+			outlet.push_chunk_multiplexed(chunk_buffer.data(), returned_samples * n_channels, 0.0, true);
 		}
 
 	} catch (std::exception &e) { std::cerr << "Got an exception: " << e.what() << std::endl; }