From f14195dbf4800f124308614c0b2410d01937a5b5 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Tue, 4 Nov 2025 12:22:40 +0300 Subject: [PATCH 01/36] 1 --- CMakeLists.txt | 28 +++-- include/layers/EWLayer.hpp | 94 +++++++++----- include/layers/Layer.hpp | 132 +++++++++++++++++++ src/layers/EWLayer.cpp | 15 ++- test/CMakeLists.txt | 2 +- test/single_layer/test_ewlayer.cpp | 196 +++++++++++++++++++++++++++++ 6 files changed, 419 insertions(+), 48 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d609a282..b131bacd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,18 +30,28 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -if (NOT WIN32) - find_package(OpenMP REQUIRED) -endif() - -if (NOT WIN32) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") +find_package(OpenMP) + +if(OpenMP_FOUND) + message(STATUS "OpenMP found - enabling parallel support") + add_definitions(-DHAS_OPENMP) + if(TARGET OpenMP::OpenMP_CXX) + set(OPENMP_TARGET OpenMP::OpenMP_CXX) + message(STATUS "Using OpenMP target: OpenMP::OpenMP_CXX") + else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + if(OpenMP_CXX_LIBRARIES) + set(OPENMP_LIBRARIES ${OpenMP_CXX_LIBRARIES}) + endif() + message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") + message(STATUS "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}") + endif() else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") + message(STATUS "OpenMP not found - parallel features disabled") endif() + foreach(CONFIG "" _DEBUG _RELEASE) set("CMAKE_ARCHIVE_OUTPUT_DIRECTORY${CONFIG}" "${CMAKE_BINARY_DIR}/lib") set("CMAKE_LIBRARY_OUTPUT_DIRECTORY${CONFIG}" "${CMAKE_BINARY_DIR}/lib") diff --git a/include/layers/EWLayer.hpp b/include/layers/EWLayer.hpp index 7361689c..22a2fd25 100644 --- a/include/layers/EWLayer.hpp +++ b/include/layers/EWLayer.hpp @@ -46,7 +46,7 @@ class EWLayerImpl : public LayerImpl { public: EWLayerImpl() = delete; EWLayerImpl(const Shape& shape, std::string function, float alpha = 0.0F, - float beta = 0.0F); + float beta = 0.0F, int type_parall = 0); EWLayerImpl(const EWLayerImpl& c) = default; EWLayerImpl& operator=(const EWLayerImpl& c) = default; std::vector run( @@ -56,57 +56,83 @@ class EWLayerImpl : public LayerImpl { std::string func_; float alpha_; float beta_; + int type_parall_; }; template EWLayerImpl::EWLayerImpl(const Shape& shape, std::string function, - float alpha, float beta) + float alpha, float beta, int type_parall) : LayerImpl(shape, shape), func_(std::move(function)), alpha_(alpha), - beta_(beta) {} + beta_(beta), + type_parall_(type_parall) {} template std::vector EWLayerImpl::run( const std::vector& input) const { std::vector res(this->outputShape_.count()); + int available_threads = -1; + if (type_parall_ == 0) available_threads = 1; + if (type_parall_ == 1) + available_threads = std::thread::hardware_concurrency(); + if (type_parall_ == 2) + available_threads = oneapi::tbb::info::default_concurrency(); + if (type_parall_ == 3) available_threads = omp_get_max_threads(); + if (func_ == "relu") { - std::transform(input.begin(), input.end(), res.begin(), relu); + parallel_for( + input.size(), + [&](int i) { + res[i] = input[i] > ValueType(0) ? input[i] : ValueType(0); + }, + type_parall_); } else if (func_ == "tanh") { - auto tanh = [&](const ValueType& value) -> ValueType { - return static_cast(std::tanh(value)); - }; - std::transform(input.begin(), input.end(), res.begin(), tanh); + parallel_for( + input.size(), + [&](int i) { res[i] = static_cast(std::tanh(input[i])); }, + type_parall_); } else if (func_ == "sin") { - auto sin = [&](const ValueType& value) -> ValueType { - return static_cast(std::sin(value)); - }; - std::transform(input.begin(), input.end(), res.begin(), sin); + parallel_for( + input.size(), + [&](int i) { res[i] = static_cast(std::sin(input[i])); }, + type_parall_); } else if (func_ == "minus") { - auto minus = [&](const ValueType& value) -> ValueType { return -value; }; - std::transform(input.begin(), input.end(), res.begin(), minus); + parallel_for( + input.size(), [&](int i) { res[i] = -input[i]; }, type_parall_); } else if (func_ == "linear") { - auto linear = [&](const ValueType& value) -> ValueType { - return value * static_cast(alpha_) + - static_cast(beta_); - }; - std::transform(input.begin(), input.end(), res.begin(), linear); + parallel_for( + input.size(), + [&](int i) { + res[i] = input[i] * static_cast(alpha_) + + static_cast(beta_); + }, + type_parall_); } else if (func_ == "sigmoid") { - auto sigmoid = [](ValueType x) -> ValueType { - if constexpr (std::is_integral_v) { - auto x_float = static_cast(x); - float result = 1.0F / (1.0F + std::exp(-x_float)); - return static_cast(std::round(result)); - } else { - if (x >= ValueType(0)) { - ValueType z = std::exp(-x); - return ValueType(1) / (ValueType(1) + z); - } - ValueType z = std::exp(x); - return z / (ValueType(1) + z); - } - }; - std::transform(input.cbegin(), input.cend(), res.begin(), sigmoid); + if constexpr (std::is_integral_v) { + parallel_for( + input.size(), + [&](int i) { + auto x_float = static_cast(input[i]); + float result = 1.0F / (1.0F + std::exp(-x_float)); + res[i] = static_cast(std::round(result)); + }, + type_parall_); + } else { + parallel_for( + input.size(), + [&](int i) { + ValueType x = input[i]; + if (x >= ValueType(0)) { + ValueType z = std::exp(-x); + res[i] = ValueType(1) / (ValueType(1) + z); + } else { + ValueType z = std::exp(x); + res[i] = z / (ValueType(1) + z); + } + }, + type_parall_); + } } else { throw std::invalid_argument("No such function for EWLayer"); } diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 2da4e0a5..0c75a976 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -1,5 +1,11 @@ #pragma once +#include + +#include +#include +#include #include +#include #include #include #include @@ -49,6 +55,7 @@ class Layer { PostOperations postops; int getID() const { return id_; } void setID(int id) { id_ = id; } + void setTypeParall(int type) { type_parall_ = type; } LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; @@ -59,6 +66,7 @@ class Layer { protected: int id_ = 0; LayerType type_; + int type_parall_; }; template @@ -83,4 +91,128 @@ class LayerImpl { Shape outputShape_; }; +template +inline void parallel_for(int count, Func func, int mode = 0) { + static bool stl_available = true; + static bool tbb_available = true; + static bool omp_available = true; + const int MIN_CHUNK_SIZE = 1000; + if (count < MIN_CHUNK_SIZE) { + mode = 0; + } + + switch (mode) { + case 0: // Sequential + { + for (int i = 0; i < count; ++i) { + func(i); + } + break; + } + + case 1: // STL + { + if (stl_available) { + try { + int num_threads = + static_cast(std::thread::hardware_concurrency()); + if (num_threads == 0) num_threads = 4; + + int min_chunk_size = std::max(1000, count / (num_threads * 4)); + if (count / num_threads < min_chunk_size) { + num_threads = std::max(1, count / min_chunk_size); + } + + std::vector threads; + threads.reserve(num_threads); + + int chunk_size = count / num_threads; + int remainder = count % num_threads; + + int start = 0; + for (int t = 0; t < num_threads; ++t) { + int end = start + chunk_size + (t < remainder ? 1 : 0); + if (start >= end) break; + + threads.emplace_back([start, end, &func]() { + for (int i = start; i < end; ++i) { + func(i); + } + }); + + start = end; + } + + for (auto& thread : threads) { + thread.join(); + } + + } catch (const std::exception& e) { + std::cout << "Thread execution failed: " << e.what() + << ". Falling back to sequential.\n"; + stl_available = false; + for (int i = 0; i < count; ++i) func(i); + } + } else { + for (int i = 0; i < count; ++i) func(i); + } + break; + } + + case 2: // Intel TBB + { + if (tbb_available) { + try { + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range(0, count), + [&](const oneapi::tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + func(i); + } + }, + oneapi::tbb::auto_partitioner()); + } catch (const std::exception& e) { + std::cout << "TBB execution failed: " << e.what() + << ". Falling back to sequential.\n"; + tbb_available = false; + for (int i = 0; i < count; ++i) func(i); + } + } else { + for (int i = 0; i < count; ++i) func(i); + } + break; + } + + case 3: // OpenMP - УЛУЧШЕННАЯ ВЕРСИЯ + { + if (omp_available) { + try { + // Оптимальная настройка для OpenMP + int num_threads = omp_get_max_threads(); + + // Настройка размера чанка для минимизации накладных расходов + int chunk_size = std::max(1000, count / (num_threads * 8)); + +// Устанавливаем оптимальное расписание +#pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads) + for (int i = 0; i < count; ++i) { + func(i); + } + + } catch (...) { + std::cout << "OpenMP execution failed. Falling back to sequential.\n"; + omp_available = false; + for (int i = 0; i < count; ++i) func(i); + } + } else { + for (int i = 0; i < count; ++i) func(i); + } + break; + } + + default: + for (int i = 0; i < count; ++i) func(i); + } +} + } // namespace it_lab_ai diff --git a/src/layers/EWLayer.cpp b/src/layers/EWLayer.cpp index dc86b381..72c9f6c8 100644 --- a/src/layers/EWLayer.cpp +++ b/src/layers/EWLayer.cpp @@ -9,13 +9,20 @@ void EWLayer::run(const std::vector& input, } switch (input[0].get_type()) { case Type::kInt: { - EWLayerImpl used_impl(input[0].get_shape(), func_, alpha_, beta_); - output[0] = - make_tensor(used_impl.run(*input[0].as()), input[0].get_shape()); + EWLayerImpl used_impl(input[0].get_shape(), func_, alpha_, beta_, + type_parall_); + std::vector tmp = used_impl.run(*input[0].as()); + auto start = std::chrono::high_resolution_clock::now(); + output[0] = make_tensor(tmp, input[0].get_shape()); + auto end = std::chrono::high_resolution_clock::now(); + auto total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; break; } case Type::kFloat: { - EWLayerImpl used_impl(input[0].get_shape(), func_, alpha_, beta_); + EWLayerImpl used_impl(input[0].get_shape(), func_, alpha_, beta_, + type_parall_); output[0] = make_tensor(used_impl.run(*input[0].as()), input[0].get_shape()); break; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 318354de..724ae6ca 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,7 +1,7 @@ file(GLOB_RECURSE TEST_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) add_executable(run_test ${TEST_SRC_FILES}) -if (NOT WIN32) +if(OpenMP_FOUND) target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) endif() target_link_libraries(run_test PUBLIC perf_lib layers_lib) diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index 65547b2a..fc63c658 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -215,3 +215,199 @@ TEST(ewlayer, new_ewlayer_can_sigmoid_float_extreme_values) { EXPECT_NEAR((*out[0].as())[i], expected_output[i], 1e-5F); } } + +TEST(ewlayer, parallel_for_ew) { + EWLayer layer0("relu"); + layer0.setTypeParall(0); + EWLayer layer1("relu"); + layer1.setTypeParall(1); + EWLayer layer2("relu"); + layer2.setTypeParall(2); + EWLayer layer3("relu"); + layer3.setTypeParall(3); + std::vector vec(800000000, -1); + Tensor input = make_tensor(vec); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + auto start = std::chrono::high_resolution_clock::now(); + layer0.run(in, out); + auto end = std::chrono::high_resolution_clock::now(); + auto total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (size_t i = 0; i < 800000000; i++) { + EXPECT_EQ((*out[0].as())[i], 0); + } + + start = std::chrono::high_resolution_clock::now(); + layer1.run(in, out); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (size_t i = 0; i < 800000000; i++) { + EXPECT_EQ((*out[0].as())[i], 0); + } + + start = std::chrono::high_resolution_clock::now(); + layer2.run(in, out); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (size_t i = 0; i < 800000000; i++) { + EXPECT_EQ((*out[0].as())[i], 0); + } + + start = std::chrono::high_resolution_clock::now(); + layer3.run(in, out); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (size_t i = 0; i < 800000000; i++) { + EXPECT_EQ((*out[0].as())[i], 0); + } +} + +TEST(ewlayer, parallel_for_ew_sigmoid) { + EWLayer layer0("sigmoid"); + layer0.setTypeParall(0); + EWLayer layer1("sigmoid"); + layer1.setTypeParall(1); + EWLayer layer2("sigmoid"); + layer2.setTypeParall(2); + EWLayer layer3("sigmoid"); + layer3.setTypeParall(3); + std::vector vec(800000000, -1); + Tensor input = make_tensor(vec); + Tensor output; + std::vector in{input}; + std::vector out{output}; + + auto start = std::chrono::high_resolution_clock::now(); + layer0.run(in, out); + auto end = std::chrono::high_resolution_clock::now(); + auto total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + + start = std::chrono::high_resolution_clock::now(); + layer1.run(in, out); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + + start = std::chrono::high_resolution_clock::now(); + layer2.run(in, out); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + + start = std::chrono::high_resolution_clock::now(); + layer3.run(in, out); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + + EXPECT_EQ(0, 0); +} + +TEST(ewlayer, parallel_for_) { + const int SIZE = 20000; + std::vector matrix1(SIZE * SIZE); + std::vector matrix2(SIZE * SIZE); + std::vector result(SIZE * SIZE); + + for (int i = 0; i < SIZE * SIZE; ++i) { + matrix1[i] = 1; + matrix2[i] = 1; + } + + auto start = std::chrono::high_resolution_clock::now(); + parallel_for( + SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 0); + + auto end = std::chrono::high_resolution_clock::now(); + auto total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); + + start = std::chrono::high_resolution_clock::now(); + parallel_for( + SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 1); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); + + start = std::chrono::high_resolution_clock::now(); + parallel_for( + SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 2); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); + + start = std::chrono::high_resolution_clock::now(); + parallel_for( + SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 3); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); +} + +TEST(ewlayer, parallel_for_notmatrix) { + const int SIZE = 30000; + std::vector matrix1(SIZE * SIZE); + std::vector result(SIZE * SIZE); + + for (int i = 0; i < SIZE * SIZE; ++i) { + matrix1[i] = 1; + } + + auto start = std::chrono::high_resolution_clock::now(); + parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 0); + + auto end = std::chrono::high_resolution_clock::now(); + auto total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); + + start = std::chrono::high_resolution_clock::now(); + parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 1); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); + + start = std::chrono::high_resolution_clock::now(); + parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 2); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); + + start = std::chrono::high_resolution_clock::now(); + parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 3); + end = std::chrono::high_resolution_clock::now(); + total_duration = + std::chrono::duration_cast(end - start); + std::cout << total_duration.count() << std::endl; + for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); +} From b19191f04f38e2d8a1cb55c1dbf346880934f236 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 5 Nov 2025 19:17:36 +0300 Subject: [PATCH 02/36] fix --- include/layers/Layer.hpp | 5 +---- src/layers/CMakeLists.txt | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 0c75a976..fc665846 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -183,17 +183,14 @@ inline void parallel_for(int count, Func func, int mode = 0) { break; } - case 3: // OpenMP - УЛУЧШЕННАЯ ВЕРСИЯ + case 3: // OpenMP { if (omp_available) { try { - // Оптимальная настройка для OpenMP int num_threads = omp_get_max_threads(); - // Настройка размера чанка для минимизации накладных расходов int chunk_size = std::max(1000, count / (num_threads * 8)); -// Устанавливаем оптимальное расписание #pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads) for (int i = 0; i < count; ++i) { func(i); diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index d3426e04..5924b0c4 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,3 +1,6 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) +if(OpenMP_FOUND) + target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) +endif() From 30a33ff2948df540e932148fc21c69043ff59392 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 5 Nov 2025 19:37:50 +0300 Subject: [PATCH 03/36] fix --- src/layers/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index 8710e1c7..7f22b872 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,7 +1,7 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) -if(OpenMP_FOUND) - target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) -endif() +# if(OpenMP_FOUND) +# target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) +# endif() target_link_libraries(layers_lib PUBLIC dnnl) From 4a3d16ec816b4e72258bffa61f3ca1b14623cc77 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 5 Nov 2025 19:47:03 +0300 Subject: [PATCH 04/36] fix --- app/Accuracy/CMakeLists.txt | 3 +++ app/AccuracyImgNet/CMakeLists.txt | 3 +++ 2 files changed, 6 insertions(+) diff --git a/app/Accuracy/CMakeLists.txt b/app/Accuracy/CMakeLists.txt index b4010c21..f4c5e4d0 100644 --- a/app/Accuracy/CMakeLists.txt +++ b/app/Accuracy/CMakeLists.txt @@ -13,6 +13,9 @@ target_link_libraries( ACCLib gtest_main) add_executable(Accuracy_Check accuracy_check.cpp) target_link_libraries(Accuracy_Check ACCLib) +if(OpenMP_FOUND) + target_link_libraries(Accuracy_Check PUBLIC OpenMP::OpenMP_CXX) +endif() file(DOWNLOAD "https://raw.githubusercontent.com/opencv/opencv/4.x/samples/data/lena.jpg" diff --git a/app/AccuracyImgNet/CMakeLists.txt b/app/AccuracyImgNet/CMakeLists.txt index a61fa703..6bace778 100644 --- a/app/AccuracyImgNet/CMakeLists.txt +++ b/app/AccuracyImgNet/CMakeLists.txt @@ -6,3 +6,6 @@ target_link_libraries( ACCImgNet ${OpenCV_LIBS} ) target_link_libraries( ACCImgNet TBB_unified) target_link_libraries( ACCImgNet layers_lib) target_link_libraries( ACCImgNet gtest_main) +if(OpenMP_FOUND) + target_link_libraries(ACCImgNet PUBLIC OpenMP::OpenMP_CXX) +endif() From 406387dc4ed243ff0387ba44eeeab1ceffa9bada Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 5 Nov 2025 19:52:30 +0300 Subject: [PATCH 05/36] fix --- app/Accuracy/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/app/Accuracy/CMakeLists.txt b/app/Accuracy/CMakeLists.txt index f4c5e4d0..b4010c21 100644 --- a/app/Accuracy/CMakeLists.txt +++ b/app/Accuracy/CMakeLists.txt @@ -13,9 +13,6 @@ target_link_libraries( ACCLib gtest_main) add_executable(Accuracy_Check accuracy_check.cpp) target_link_libraries(Accuracy_Check ACCLib) -if(OpenMP_FOUND) - target_link_libraries(Accuracy_Check PUBLIC OpenMP::OpenMP_CXX) -endif() file(DOWNLOAD "https://raw.githubusercontent.com/opencv/opencv/4.x/samples/data/lena.jpg" From 6f827962a6ceb63a405a146c4dd29ae13bef3199 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 5 Nov 2025 20:01:44 +0300 Subject: [PATCH 06/36] fix --- app/AccuracyImgNet/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/app/AccuracyImgNet/CMakeLists.txt b/app/AccuracyImgNet/CMakeLists.txt index 6bace778..a61fa703 100644 --- a/app/AccuracyImgNet/CMakeLists.txt +++ b/app/AccuracyImgNet/CMakeLists.txt @@ -6,6 +6,3 @@ target_link_libraries( ACCImgNet ${OpenCV_LIBS} ) target_link_libraries( ACCImgNet TBB_unified) target_link_libraries( ACCImgNet layers_lib) target_link_libraries( ACCImgNet gtest_main) -if(OpenMP_FOUND) - target_link_libraries(ACCImgNet PUBLIC OpenMP::OpenMP_CXX) -endif() From 3cb82635c440d37c49e486d54e38c27a50e1707d Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 16:24:56 +0300 Subject: [PATCH 07/36] fix --- include/CMakeLists.txt | 3 + include/layers/EWLayer.hpp | 64 +++++++------- include/layers/Layer.hpp | 132 ++--------------------------- include/parallel/backends.hpp | 119 ++++++++++++++++++++++++++ include/parallel/parallel.hpp | 76 +++++++++++++++++ src/layers/EWLayer.cpp | 12 ++- test/single_layer/test_ewlayer.cpp | 98 +++++++++++++-------- 7 files changed, 302 insertions(+), 202 deletions(-) create mode 100644 include/parallel/backends.hpp create mode 100644 include/parallel/parallel.hpp diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 760af1d8..781e1cb9 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -15,3 +15,6 @@ set(PERF_HEADERS "${perf_headers}" PARENT_SCOPE) file(GLOB_RECURSE reader_headers Weights_Reader/*.h Weights_Reader/*.hpp) set(READER_HEADERS "${reader_headers}" PARENT_SCOPE) + +file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp) +set(READER_HEADERS "${parallel_headers}" PARENT_SCOPE) diff --git a/include/layers/EWLayer.hpp b/include/layers/EWLayer.hpp index 22a2fd25..29917ffe 100644 --- a/include/layers/EWLayer.hpp +++ b/include/layers/EWLayer.hpp @@ -46,7 +46,7 @@ class EWLayerImpl : public LayerImpl { public: EWLayerImpl() = delete; EWLayerImpl(const Shape& shape, std::string function, float alpha = 0.0F, - float beta = 0.0F, int type_parall = 0); + float beta = 0.0F, ParBackend parallel_backend = ParBackend::Seq); EWLayerImpl(const EWLayerImpl& c) = default; EWLayerImpl& operator=(const EWLayerImpl& c) = default; std::vector run( @@ -56,72 +56,74 @@ class EWLayerImpl : public LayerImpl { std::string func_; float alpha_; float beta_; - int type_parall_; + ParBackend parallel_backend_; }; template EWLayerImpl::EWLayerImpl(const Shape& shape, std::string function, - float alpha, float beta, int type_parall) + float alpha, float beta, + ParBackend parallel_backend) : LayerImpl(shape, shape), func_(std::move(function)), alpha_(alpha), beta_(beta), - type_parall_(type_parall) {} + parallel_backend_(parallel_backend) {} template std::vector EWLayerImpl::run( const std::vector& input) const { std::vector res(this->outputShape_.count()); - int available_threads = -1; - if (type_parall_ == 0) available_threads = 1; - if (type_parall_ == 1) - available_threads = std::thread::hardware_concurrency(); - if (type_parall_ == 2) - available_threads = oneapi::tbb::info::default_concurrency(); - if (type_parall_ == 3) available_threads = omp_get_max_threads(); + + // Получаем настройки параллельности + parallel::Options options; + options.backend = parallel_backend_; if (func_ == "relu") { - parallel_for( + parallel::parallel_for( input.size(), - [&](int i) { + [&](std::size_t i) { res[i] = input[i] > ValueType(0) ? input[i] : ValueType(0); }, - type_parall_); + options); } else if (func_ == "tanh") { - parallel_for( + parallel::parallel_for( input.size(), - [&](int i) { res[i] = static_cast(std::tanh(input[i])); }, - type_parall_); + [&](std::size_t i) { + res[i] = static_cast(std::tanh(input[i])); + }, + options); } else if (func_ == "sin") { - parallel_for( + parallel::parallel_for( input.size(), - [&](int i) { res[i] = static_cast(std::sin(input[i])); }, - type_parall_); + [&](std::size_t i) { + res[i] = static_cast(std::sin(input[i])); + }, + options); } else if (func_ == "minus") { - parallel_for( - input.size(), [&](int i) { res[i] = -input[i]; }, type_parall_); + parallel::parallel_for( + input.size(), [&](std::size_t i) { res[i] = -input[i]; }, options); } else if (func_ == "linear") { - parallel_for( + parallel::parallel_for( input.size(), - [&](int i) { + [&](std::size_t i) { res[i] = input[i] * static_cast(alpha_) + static_cast(beta_); }, - type_parall_); + options); } else if (func_ == "sigmoid") { if constexpr (std::is_integral_v) { - parallel_for( + parallel::parallel_for( input.size(), - [&](int i) { + [&](std::size_t i) { auto x_float = static_cast(input[i]); float result = 1.0F / (1.0F + std::exp(-x_float)); res[i] = static_cast(std::round(result)); }, - type_parall_); + options); } else { - parallel_for( + parallel::parallel_for( input.size(), - [&](int i) { + [&](std::size_t i) { ValueType x = input[i]; if (x >= ValueType(0)) { ValueType z = std::exp(-x); @@ -131,7 +133,7 @@ std::vector EWLayerImpl::run( res[i] = z / (ValueType(1) + z); } }, - type_parall_); + options); } } else { throw std::invalid_argument("No such function for EWLayer"); diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index fc665846..61d42564 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -1,6 +1,4 @@ #pragma once -#include - #include #include #include @@ -13,7 +11,7 @@ #include "layers/Shape.hpp" #include "layers/Tensor.hpp" -#include "oneapi/tbb.h" +#include "parallel/parallel.hpp" namespace it_lab_ai { @@ -39,6 +37,7 @@ enum LayerType : uint8_t { }; enum ImplType : uint8_t { kDefault, kTBB, kSTL }; +using ParBackend = parallel::Backend; class Layer; @@ -55,7 +54,8 @@ class Layer { PostOperations postops; int getID() const { return id_; } void setID(int id) { id_ = id; } - void setTypeParall(int type) { type_parall_ = type; } + void setParallelBackend(ParBackend backend) { parallel_backend_ = backend; } + ParBackend getParallelBackend() const { return parallel_backend_; } LayerType getName() const { return type_; } virtual void run(const std::vector& input, std::vector& output) = 0; @@ -66,7 +66,7 @@ class Layer { protected: int id_ = 0; LayerType type_; - int type_parall_; + ParBackend parallel_backend_ = ParBackend::Seq; }; template @@ -90,126 +90,4 @@ class LayerImpl { Shape inputShape_; Shape outputShape_; }; - -template -inline void parallel_for(int count, Func func, int mode = 0) { - static bool stl_available = true; - static bool tbb_available = true; - static bool omp_available = true; - const int MIN_CHUNK_SIZE = 1000; - if (count < MIN_CHUNK_SIZE) { - mode = 0; - } - - switch (mode) { - case 0: // Sequential - { - for (int i = 0; i < count; ++i) { - func(i); - } - break; - } - - case 1: // STL - { - if (stl_available) { - try { - int num_threads = - static_cast(std::thread::hardware_concurrency()); - if (num_threads == 0) num_threads = 4; - - int min_chunk_size = std::max(1000, count / (num_threads * 4)); - if (count / num_threads < min_chunk_size) { - num_threads = std::max(1, count / min_chunk_size); - } - - std::vector threads; - threads.reserve(num_threads); - - int chunk_size = count / num_threads; - int remainder = count % num_threads; - - int start = 0; - for (int t = 0; t < num_threads; ++t) { - int end = start + chunk_size + (t < remainder ? 1 : 0); - if (start >= end) break; - - threads.emplace_back([start, end, &func]() { - for (int i = start; i < end; ++i) { - func(i); - } - }); - - start = end; - } - - for (auto& thread : threads) { - thread.join(); - } - - } catch (const std::exception& e) { - std::cout << "Thread execution failed: " << e.what() - << ". Falling back to sequential.\n"; - stl_available = false; - for (int i = 0; i < count; ++i) func(i); - } - } else { - for (int i = 0; i < count; ++i) func(i); - } - break; - } - - case 2: // Intel TBB - { - if (tbb_available) { - try { - oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range(0, count), - [&](const oneapi::tbb::blocked_range& range) { - for (int i = range.begin(); i < range.end(); ++i) { - func(i); - } - }, - oneapi::tbb::auto_partitioner()); - } catch (const std::exception& e) { - std::cout << "TBB execution failed: " << e.what() - << ". Falling back to sequential.\n"; - tbb_available = false; - for (int i = 0; i < count; ++i) func(i); - } - } else { - for (int i = 0; i < count; ++i) func(i); - } - break; - } - - case 3: // OpenMP - { - if (omp_available) { - try { - int num_threads = omp_get_max_threads(); - - int chunk_size = std::max(1000, count / (num_threads * 8)); - -#pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads) - for (int i = 0; i < count; ++i) { - func(i); - } - - } catch (...) { - std::cout << "OpenMP execution failed. Falling back to sequential.\n"; - omp_available = false; - for (int i = 0; i < count; ++i) func(i); - } - } else { - for (int i = 0; i < count; ++i) func(i); - } - break; - } - - default: - for (int i = 0; i < count; ++i) func(i); - } -} - } // namespace it_lab_ai diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp new file mode 100644 index 00000000..08809dfd --- /dev/null +++ b/include/parallel/backends.hpp @@ -0,0 +1,119 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#ifdef HAS_OPENMP +#include +#endif + +#include +#include +#include + +namespace it_lab_ai { +namespace parallel { + +enum class Backend { Seq = 0, Threads = 1, TBB = 2, OMP = 3 }; + +struct Options { + Backend backend = Backend::Seq; + int max_threads = 0; + std::size_t min_parallel_n = 1000; + std::size_t grain = 1024; +}; + +inline void impl_seq(std::size_t count, std::function func) { + for (std::size_t i = 0; i < count; ++i) { + func(i); + } + std::cout << "Seq " << std::endl; +} + +inline void impl_threads(std::size_t count, + std::function func, + const Options& opt) { + int num_threads = opt.max_threads > 0 + ? opt.max_threads + : static_cast(std::thread::hardware_concurrency()); + if (num_threads == 0) num_threads = 4; + + std::size_t min_chunk_size = std::max(opt.grain, count / (num_threads * 4)); + if (count / num_threads < min_chunk_size) { + num_threads = std::max(1, static_cast(count / min_chunk_size)); + } + + std::vector threads; + threads.reserve(num_threads); + + std::size_t chunk_size = count / num_threads; + std::size_t remainder = count % num_threads; + + std::size_t start = 0; + for (int t = 0; t < num_threads; ++t) { + std::size_t end = + start + chunk_size + (t < static_cast(remainder) ? 1 : 0); + if (start >= end) break; + + threads.emplace_back([start, end, &func]() { + for (std::size_t i = start; i < end; ++i) { + func(i); + } + }); + + start = end; + } + + for (auto& thread : threads) { + thread.join(); + } + std::cout << "Stl " << std::endl; +} + +inline void impl_tbb(std::size_t count, std::function func, + const Options& opt) { + std::cout << "tbb " << std::endl; + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range(0, count, opt.grain), + [&](const oneapi::tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + func(i); + } + }, + oneapi::tbb::auto_partitioner()); +} + +#ifdef HAS_OPENMP +inline void impl_omp(std::size_t count, std::function func, + const Options& opt) { + if (count == 0) return; + + int num_threads = + opt.max_threads > 0 ? opt.max_threads : omp_get_max_threads(); + int chunk_size = + static_cast(std::max(opt.grain, count / (num_threads * 8))); + + int int_count = static_cast(count); + if (int_count < 0 || static_cast(int_count) != count) { + impl_seq(count, func); + return; + } + +#pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads) + for (int i = 0; i < int_count; ++i) { + func(static_cast(i)); + } + std::cout << "OMP " << std::endl; +} +#else +inline void impl_omp(std::size_t count, std::function func, + const Options& opt) { + impl_seq(count, func); +} +#endif + +} // namespace parallel +} // namespace it_lab_ai \ No newline at end of file diff --git a/include/parallel/parallel.hpp b/include/parallel/parallel.hpp new file mode 100644 index 00000000..ad152efb --- /dev/null +++ b/include/parallel/parallel.hpp @@ -0,0 +1,76 @@ +#pragma once +#include "backends.hpp" + +namespace it_lab_ai { +namespace parallel { + +constexpr bool has_omp = +#ifdef HAS_OPENMP + true; +#else + false; +#endif + +inline Backend resolve_default_backend(std::size_t n, const Options& opt) { + if (n < opt.min_parallel_n) { + return Backend::Seq; + } + +#ifdef HAS_OPENMP + return Backend::OMP; +#else + return Backend::TBB; +#endif +} + +inline Backend select_backend(const Options& opt, std::size_t n) { + if (opt.backend != Backend::Seq && n < opt.min_parallel_n) { + return Backend::Seq; + } + + if (opt.backend == Backend::Seq || opt.backend == Backend::Threads || + opt.backend == Backend::TBB || opt.backend == Backend::OMP) { + return opt.backend; + } + + return resolve_default_backend(n, opt); +} + +template +inline void parallel_for(std::size_t count, Func&& func, + const Options& opt = {}) { + if (count == 0) return; + + Backend backend = select_backend(opt, count); + + switch (backend) { + case Backend::Seq: + impl_seq(count, std::forward(func)); + break; + case Backend::Threads: + impl_threads(count, std::forward(func), opt); + break; + case Backend::TBB: + impl_tbb(count, std::forward(func), opt); + break; + case Backend::OMP: + impl_omp(count, std::forward(func), opt); + break; + } +} + +template +inline void parallel_for(std::size_t count, Func&& func, Backend backend) { + Options opt; + opt.backend = backend; + parallel_for(count, std::forward(func), opt); +} + +template +inline void parallel_for(int count, Func&& func, const Options& opt = {}) { + if (count <= 0) return; + parallel_for(static_cast(count), std::forward(func), opt); +} + +} // namespace parallel +} // namespace it_lab_ai \ No newline at end of file diff --git a/src/layers/EWLayer.cpp b/src/layers/EWLayer.cpp index 72c9f6c8..dac8cade 100644 --- a/src/layers/EWLayer.cpp +++ b/src/layers/EWLayer.cpp @@ -7,22 +7,20 @@ void EWLayer::run(const std::vector& input, if (input.size() != 1) { throw std::runtime_error("EWLayer: Input tensors not 1"); } + + ParBackend backend = getParallelBackend(); + switch (input[0].get_type()) { case Type::kInt: { EWLayerImpl used_impl(input[0].get_shape(), func_, alpha_, beta_, - type_parall_); + backend); std::vector tmp = used_impl.run(*input[0].as()); - auto start = std::chrono::high_resolution_clock::now(); output[0] = make_tensor(tmp, input[0].get_shape()); - auto end = std::chrono::high_resolution_clock::now(); - auto total_duration = - std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; break; } case Type::kFloat: { EWLayerImpl used_impl(input[0].get_shape(), func_, alpha_, beta_, - type_parall_); + backend); output[0] = make_tensor(used_impl.run(*input[0].as()), input[0].get_shape()); break; diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index fc63c658..06af5216 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -218,13 +218,14 @@ TEST(ewlayer, new_ewlayer_can_sigmoid_float_extreme_values) { TEST(ewlayer, parallel_for_ew) { EWLayer layer0("relu"); - layer0.setTypeParall(0); + layer0.setParallelBackend(ParBackend::Seq); EWLayer layer1("relu"); - layer1.setTypeParall(1); + layer1.setParallelBackend(ParBackend::Threads); EWLayer layer2("relu"); - layer2.setTypeParall(2); + layer2.setParallelBackend(ParBackend::TBB); EWLayer layer3("relu"); - layer3.setTypeParall(3); + layer3.setParallelBackend(ParBackend::OMP); + std::vector vec(800000000, -1); Tensor input = make_tensor(vec); Tensor output; @@ -236,7 +237,7 @@ TEST(ewlayer, parallel_for_ew) { auto end = std::chrono::high_resolution_clock::now(); auto total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Sequential: " << total_duration.count() << " ms" << std::endl; for (size_t i = 0; i < 800000000; i++) { EXPECT_EQ((*out[0].as())[i], 0); } @@ -246,7 +247,7 @@ TEST(ewlayer, parallel_for_ew) { end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Threads: " << total_duration.count() << " ms" << std::endl; for (size_t i = 0; i < 800000000; i++) { EXPECT_EQ((*out[0].as())[i], 0); } @@ -256,7 +257,7 @@ TEST(ewlayer, parallel_for_ew) { end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "TBB: " << total_duration.count() << " ms" << std::endl; for (size_t i = 0; i < 800000000; i++) { EXPECT_EQ((*out[0].as())[i], 0); } @@ -266,7 +267,7 @@ TEST(ewlayer, parallel_for_ew) { end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "OpenMP: " << total_duration.count() << " ms" << std::endl; for (size_t i = 0; i < 800000000; i++) { EXPECT_EQ((*out[0].as())[i], 0); } @@ -274,13 +275,14 @@ TEST(ewlayer, parallel_for_ew) { TEST(ewlayer, parallel_for_ew_sigmoid) { EWLayer layer0("sigmoid"); - layer0.setTypeParall(0); + layer0.setParallelBackend(ParBackend::Seq); EWLayer layer1("sigmoid"); - layer1.setTypeParall(1); + layer1.setParallelBackend(ParBackend::Threads); EWLayer layer2("sigmoid"); - layer2.setTypeParall(2); + layer2.setParallelBackend(ParBackend::TBB); EWLayer layer3("sigmoid"); - layer3.setTypeParall(3); + layer3.setParallelBackend(ParBackend::OMP); + std::vector vec(800000000, -1); Tensor input = make_tensor(vec); Tensor output; @@ -292,33 +294,36 @@ TEST(ewlayer, parallel_for_ew_sigmoid) { auto end = std::chrono::high_resolution_clock::now(); auto total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Sequential sigmoid: " << total_duration.count() << " ms" + << std::endl; start = std::chrono::high_resolution_clock::now(); layer1.run(in, out); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Threads sigmoid: " << total_duration.count() << " ms" + << std::endl; start = std::chrono::high_resolution_clock::now(); layer2.run(in, out); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "TBB sigmoid: " << total_duration.count() << " ms" << std::endl; start = std::chrono::high_resolution_clock::now(); layer3.run(in, out); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "OpenMP sigmoid: " << total_duration.count() << " ms" + << std::endl; EXPECT_EQ(0, 0); } -TEST(ewlayer, parallel_for_) { +TEST(ewlayer, parallel_for_direct) { const int SIZE = 20000; std::vector matrix1(SIZE * SIZE); std::vector matrix2(SIZE * SIZE); @@ -330,41 +335,48 @@ TEST(ewlayer, parallel_for_) { } auto start = std::chrono::high_resolution_clock::now(); - parallel_for( - SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 0); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, + ParBackend::Seq); auto end = std::chrono::high_resolution_clock::now(); auto total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Sequential direct: " << total_duration.count() << " ms" + << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); start = std::chrono::high_resolution_clock::now(); - parallel_for( - SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 1); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, + ParBackend::Threads); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Threads direct: " << total_duration.count() << " ms" + << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); start = std::chrono::high_resolution_clock::now(); - parallel_for( - SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 2); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, + ParBackend::TBB); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "TBB direct: " << total_duration.count() << " ms" << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); start = std::chrono::high_resolution_clock::now(); - parallel_for( - SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + matrix2[i]; }, 3); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, + ParBackend::OMP); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "OpenMP direct: " << total_duration.count() << " ms" + << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); } @@ -378,36 +390,48 @@ TEST(ewlayer, parallel_for_notmatrix) { } auto start = std::chrono::high_resolution_clock::now(); - parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 0); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, + ParBackend::Seq); auto end = std::chrono::high_resolution_clock::now(); auto total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Sequential notmatrix: " << total_duration.count() << " ms" + << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); start = std::chrono::high_resolution_clock::now(); - parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 1); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, + ParBackend::Threads); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "Threads notmatrix: " << total_duration.count() << " ms" + << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); start = std::chrono::high_resolution_clock::now(); - parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 2); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, + ParBackend::TBB); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "TBB notmatrix: " << total_duration.count() << " ms" + << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); start = std::chrono::high_resolution_clock::now(); - parallel_for(SIZE * SIZE, [&](int i) { result[i] = matrix1[i] + 1; }, 3); + parallel::parallel_for( + SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, + ParBackend::OMP); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); - std::cout << total_duration.count() << std::endl; + std::cout << "OpenMP notmatrix: " << total_duration.count() << " ms" + << std::endl; for (int i = 0; i < SIZE * SIZE; i++) ASSERT_EQ(result[i], 2); } From 2d369b6ef27e648e8a698b475eb93a41d192fd36 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 16:29:38 +0300 Subject: [PATCH 08/36] fix --- src/layers/EWLayer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/layers/EWLayer.cpp b/src/layers/EWLayer.cpp index dac8cade..c4a83bbe 100644 --- a/src/layers/EWLayer.cpp +++ b/src/layers/EWLayer.cpp @@ -1,5 +1,7 @@ #include "layers/EWLayer.hpp" +#include + namespace it_lab_ai { void EWLayer::run(const std::vector& input, From 0412b6a5cfa55dba260987c3b19cd724b7376ecf Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 16:32:20 +0300 Subject: [PATCH 09/36] fix --- src/layers/EWLayer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/layers/EWLayer.cpp b/src/layers/EWLayer.cpp index c4a83bbe..6a7da06c 100644 --- a/src/layers/EWLayer.cpp +++ b/src/layers/EWLayer.cpp @@ -1,6 +1,7 @@ #include "layers/EWLayer.hpp" #include +#include namespace it_lab_ai { From 55211524e93516c198d82e561b1ee2f4fa0f42de Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 16:38:08 +0300 Subject: [PATCH 10/36] fix --- src/layers_oneDNN/EWLayer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/layers_oneDNN/EWLayer.cpp b/src/layers_oneDNN/EWLayer.cpp index fc838705..fc7d66d7 100644 --- a/src/layers_oneDNN/EWLayer.cpp +++ b/src/layers_oneDNN/EWLayer.cpp @@ -1,5 +1,7 @@ #include "layers_oneDNN/EWLayer.hpp" +#include +#include #include #include From 4293356322c5a358feed938749a5724ade788937 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 16:53:15 +0300 Subject: [PATCH 11/36] fix --- include/parallel/backends.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index 08809dfd..e74d5428 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -91,8 +91,9 @@ inline void impl_omp(std::size_t count, std::function func, const Options& opt) { if (count == 0) return; - int num_threads = - opt.max_threads > 0 ? opt.max_threads : omp_get_max_threads(); + int num_threads = opt.max_threads > 0 + ? opt.max_threads + : static_cast(std::thread::hardware_concurrency()); int chunk_size = static_cast(std::max(opt.grain, count / (num_threads * 8))); From 0ba7e1b6191d9caaf4ba8ed37196cbfe240d3ea5 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 17:14:23 +0300 Subject: [PATCH 12/36] fix --- include/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 781e1cb9..c9ac0850 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -17,4 +17,4 @@ file(GLOB_RECURSE reader_headers Weights_Reader/*.h Weights_Reader/*.hpp) set(READER_HEADERS "${reader_headers}" PARENT_SCOPE) file(GLOB_RECURSE parallel_headers parallel/*.h parallel/*.hpp) -set(READER_HEADERS "${parallel_headers}" PARENT_SCOPE) +set(LAYERS_HEADERS "${parallel_headers}" PARENT_SCOPE) From f5f0f14e07dca3593aae6b93c7a05f57e66a8078 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 17:31:47 +0300 Subject: [PATCH 13/36] fix --- include/parallel/backends.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index e74d5428..e3ae97d6 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -7,7 +7,7 @@ #include #ifdef HAS_OPENMP -#include +// #include #endif #include From 8222f238ae3098e48f25460902a658c74e6e90c8 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 18:05:14 +0300 Subject: [PATCH 14/36] fix --- include/layers/Layer.hpp | 2 +- include/parallel/backends.hpp | 25 +++++++++++++---------- include/parallel/parallel.hpp | 24 +++++++++++----------- test/single_layer/test_ewlayer.cpp | 32 +++++++++++++++--------------- 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 61d42564..b0127942 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -66,7 +66,7 @@ class Layer { protected: int id_ = 0; LayerType type_; - ParBackend parallel_backend_ = ParBackend::Seq; + ParBackend parallel_backend_ = ParBackend::kSeq; }; template diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index e3ae97d6..d61fd495 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -17,16 +17,17 @@ namespace it_lab_ai { namespace parallel { -enum class Backend { Seq = 0, Threads = 1, TBB = 2, OMP = 3 }; +enum class Backend { kSeq = 0, kThreads = 1, kTbb = 2, kOmp = 3 }; struct Options { - Backend backend = Backend::Seq; + Backend backend = Backend::kSeq; int max_threads = 0; std::size_t min_parallel_n = 1000; std::size_t grain = 1024; }; -inline void impl_seq(std::size_t count, std::function func) { +inline void impl_seq(std::size_t count, + const std::function& func) { for (std::size_t i = 0; i < count; ++i) { func(i); } @@ -34,7 +35,7 @@ inline void impl_seq(std::size_t count, std::function func) { } inline void impl_threads(std::size_t count, - std::function func, + const std::function& func, const Options& opt) { int num_threads = opt.max_threads > 0 ? opt.max_threads @@ -73,7 +74,8 @@ inline void impl_threads(std::size_t count, std::cout << "Stl " << std::endl; } -inline void impl_tbb(std::size_t count, std::function func, +inline void impl_tbb(std::size_t count, + const std::function& func, const Options& opt) { std::cout << "tbb " << std::endl; oneapi::tbb::parallel_for( @@ -87,15 +89,17 @@ inline void impl_tbb(std::size_t count, std::function func, } #ifdef HAS_OPENMP -inline void impl_omp(std::size_t count, std::function func, +inline void impl_omp(std::size_t count, + const std::function& func, const Options& opt) { if (count == 0) return; int num_threads = opt.max_threads > 0 ? opt.max_threads : static_cast(std::thread::hardware_concurrency()); - int chunk_size = - static_cast(std::max(opt.grain, count / (num_threads * 8))); + + // Убрана неиспользуемая переменная chunk_size + static_cast(std::max(opt.grain, count / (num_threads * 8))); int int_count = static_cast(count); if (int_count < 0 || static_cast(int_count) != count) { @@ -103,14 +107,15 @@ inline void impl_omp(std::size_t count, std::function func, return; } -#pragma omp parallel for schedule(static, chunk_size) num_threads(num_threads) +#pragma omp parallel for schedule(static) num_threads(num_threads) for (int i = 0; i < int_count; ++i) { func(static_cast(i)); } std::cout << "OMP " << std::endl; } #else -inline void impl_omp(std::size_t count, std::function func, +inline void impl_omp(std::size_t count, + const std::function& func, const Options& opt) { impl_seq(count, func); } diff --git a/include/parallel/parallel.hpp b/include/parallel/parallel.hpp index ad152efb..5232dcae 100644 --- a/include/parallel/parallel.hpp +++ b/include/parallel/parallel.hpp @@ -4,7 +4,7 @@ namespace it_lab_ai { namespace parallel { -constexpr bool has_omp = +constexpr bool kHasOmp = #ifdef HAS_OPENMP true; #else @@ -13,23 +13,23 @@ constexpr bool has_omp = inline Backend resolve_default_backend(std::size_t n, const Options& opt) { if (n < opt.min_parallel_n) { - return Backend::Seq; + return Backend::kSeq; } #ifdef HAS_OPENMP - return Backend::OMP; + return Backend::kOmp; #else - return Backend::TBB; + return Backend::kTbb; #endif } inline Backend select_backend(const Options& opt, std::size_t n) { - if (opt.backend != Backend::Seq && n < opt.min_parallel_n) { - return Backend::Seq; + if (opt.backend != Backend::kSeq && n < opt.min_parallel_n) { + return Backend::kSeq; } - if (opt.backend == Backend::Seq || opt.backend == Backend::Threads || - opt.backend == Backend::TBB || opt.backend == Backend::OMP) { + if (opt.backend == Backend::kSeq || opt.backend == Backend::kThreads || + opt.backend == Backend::kTbb || opt.backend == Backend::kOmp) { return opt.backend; } @@ -44,16 +44,16 @@ inline void parallel_for(std::size_t count, Func&& func, Backend backend = select_backend(opt, count); switch (backend) { - case Backend::Seq: + case Backend::kSeq: impl_seq(count, std::forward(func)); break; - case Backend::Threads: + case Backend::kThreads: impl_threads(count, std::forward(func), opt); break; - case Backend::TBB: + case Backend::kTbb: impl_tbb(count, std::forward(func), opt); break; - case Backend::OMP: + case Backend::kOmp: impl_omp(count, std::forward(func), opt); break; } diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index 06af5216..edce59f3 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -218,13 +218,13 @@ TEST(ewlayer, new_ewlayer_can_sigmoid_float_extreme_values) { TEST(ewlayer, parallel_for_ew) { EWLayer layer0("relu"); - layer0.setParallelBackend(ParBackend::Seq); + layer0.setParallelBackend(ParBackend::kSeq); EWLayer layer1("relu"); - layer1.setParallelBackend(ParBackend::Threads); + layer1.setParallelBackend(ParBackend::kThreads); EWLayer layer2("relu"); - layer2.setParallelBackend(ParBackend::TBB); + layer2.setParallelBackend(ParBackend::kTbb); EWLayer layer3("relu"); - layer3.setParallelBackend(ParBackend::OMP); + layer3.setParallelBackend(ParBackend::kOmp); std::vector vec(800000000, -1); Tensor input = make_tensor(vec); @@ -275,13 +275,13 @@ TEST(ewlayer, parallel_for_ew) { TEST(ewlayer, parallel_for_ew_sigmoid) { EWLayer layer0("sigmoid"); - layer0.setParallelBackend(ParBackend::Seq); + layer0.setParallelBackend(ParBackend::kSeq); EWLayer layer1("sigmoid"); - layer1.setParallelBackend(ParBackend::Threads); + layer1.setParallelBackend(ParBackend::kThreads); EWLayer layer2("sigmoid"); - layer2.setParallelBackend(ParBackend::TBB); + layer2.setParallelBackend(ParBackend::kTbb); EWLayer layer3("sigmoid"); - layer3.setParallelBackend(ParBackend::OMP); + layer3.setParallelBackend(ParBackend::kOmp); std::vector vec(800000000, -1); Tensor input = make_tensor(vec); @@ -337,7 +337,7 @@ TEST(ewlayer, parallel_for_direct) { auto start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, - ParBackend::Seq); + ParBackend::kSeq); auto end = std::chrono::high_resolution_clock::now(); auto total_duration = @@ -350,7 +350,7 @@ TEST(ewlayer, parallel_for_direct) { start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, - ParBackend::Threads); + ParBackend::kThreads); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); @@ -361,7 +361,7 @@ TEST(ewlayer, parallel_for_direct) { start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, - ParBackend::TBB); + ParBackend::kTbb); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); @@ -371,7 +371,7 @@ TEST(ewlayer, parallel_for_direct) { start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + matrix2[i]; }, - ParBackend::OMP); + ParBackend::kOmp); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); @@ -392,7 +392,7 @@ TEST(ewlayer, parallel_for_notmatrix) { auto start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, - ParBackend::Seq); + ParBackend::kSeq); auto end = std::chrono::high_resolution_clock::now(); auto total_duration = @@ -405,7 +405,7 @@ TEST(ewlayer, parallel_for_notmatrix) { start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, - ParBackend::Threads); + ParBackend::kThreads); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); @@ -416,7 +416,7 @@ TEST(ewlayer, parallel_for_notmatrix) { start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, - ParBackend::TBB); + ParBackend::kTbb); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); @@ -427,7 +427,7 @@ TEST(ewlayer, parallel_for_notmatrix) { start = std::chrono::high_resolution_clock::now(); parallel::parallel_for( SIZE * SIZE, [&](std::size_t i) { result[i] = matrix1[i] + 1; }, - ParBackend::OMP); + ParBackend::kOmp); end = std::chrono::high_resolution_clock::now(); total_duration = std::chrono::duration_cast(end - start); From 0bb0d02c8619c7901103e00b99164b0fc3d9ce28 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 18:08:20 +0300 Subject: [PATCH 15/36] fix --- include/layers/EWLayer.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/layers/EWLayer.hpp b/include/layers/EWLayer.hpp index 29917ffe..b6ef6ee5 100644 --- a/include/layers/EWLayer.hpp +++ b/include/layers/EWLayer.hpp @@ -46,7 +46,8 @@ class EWLayerImpl : public LayerImpl { public: EWLayerImpl() = delete; EWLayerImpl(const Shape& shape, std::string function, float alpha = 0.0F, - float beta = 0.0F, ParBackend parallel_backend = ParBackend::Seq); + float beta = 0.0F, + ParBackend parallel_backend = ParBackend::kSeq); EWLayerImpl(const EWLayerImpl& c) = default; EWLayerImpl& operator=(const EWLayerImpl& c) = default; std::vector run( From 66b3b93a652ae72c65a84e29b4776efee03ccaef Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 19:01:24 +0300 Subject: [PATCH 16/36] fix --- CMakeLists.txt | 40 +++++++++++++++++++------------------- include/layers/EWLayer.hpp | 1 - 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c51b3248..e7585092 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,26 +32,26 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -find_package(OpenMP) - -if(OpenMP_FOUND) - message(STATUS "OpenMP found - enabling parallel support") - add_definitions(-DHAS_OPENMP) - if(TARGET OpenMP::OpenMP_CXX) - set(OPENMP_TARGET OpenMP::OpenMP_CXX) - message(STATUS "Using OpenMP target: OpenMP::OpenMP_CXX") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - if(OpenMP_CXX_LIBRARIES) - set(OPENMP_LIBRARIES ${OpenMP_CXX_LIBRARIES}) - endif() - message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") - message(STATUS "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}") - endif() -else() - message(STATUS "OpenMP not found - parallel features disabled") -endif() +# find_package(OpenMP) + +# if(OpenMP_FOUND) +# message(STATUS "OpenMP found - enabling parallel support") +# add_definitions(-DHAS_OPENMP) +# if(TARGET OpenMP::OpenMP_CXX) +# set(OPENMP_TARGET OpenMP::OpenMP_CXX) +# message(STATUS "Using OpenMP target: OpenMP::OpenMP_CXX") +# else() +# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +# if(OpenMP_CXX_LIBRARIES) +# set(OPENMP_LIBRARIES ${OpenMP_CXX_LIBRARIES}) +# endif() +# message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") +# message(STATUS "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}") +# endif() +# else() +# message(STATUS "OpenMP not found - parallel features disabled") +# endif() foreach(CONFIG "" _DEBUG _RELEASE) diff --git a/include/layers/EWLayer.hpp b/include/layers/EWLayer.hpp index b6ef6ee5..a7f18272 100644 --- a/include/layers/EWLayer.hpp +++ b/include/layers/EWLayer.hpp @@ -75,7 +75,6 @@ std::vector EWLayerImpl::run( const std::vector& input) const { std::vector res(this->outputShape_.count()); - // Получаем настройки параллельности parallel::Options options; options.backend = parallel_backend_; From 04c381521caf350dfad65b9535356ff205c1d105 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 19:18:41 +0300 Subject: [PATCH 17/36] fix --- src/perf/CMakeLists.txt | 3 --- test/CMakeLists.txt | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/perf/CMakeLists.txt b/src/perf/CMakeLists.txt index 67125b92..a39339eb 100644 --- a/src/perf/CMakeLists.txt +++ b/src/perf/CMakeLists.txt @@ -1,5 +1,2 @@ file(GLOB_RECURSE perf_src *.cpp) add_library(perf_lib STATIC "${PERF_HEADERS}" "${perf_src}") -if (NOT WIN32) - target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) -endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0e91e931..0c72d407 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,9 +1,9 @@ file(GLOB_RECURSE TEST_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) add_executable(run_test ${TEST_SRC_FILES}) -if(OpenMP_FOUND) - target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) -endif() +# if(OpenMP_FOUND) +# target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) +# endif() target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib) target_link_libraries(run_test PUBLIC gtest) target_link_libraries(run_test PUBLIC ReadLib) From 74302458420e7499ae41a554c40af07b3140e76c Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Wed, 12 Nov 2025 19:34:48 +0300 Subject: [PATCH 18/36] fix --- CMakeLists.txt | 40 ++++++++++++++++++++-------------------- src/perf/CMakeLists.txt | 3 +++ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e7585092..c51b3248 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,26 +32,26 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -# find_package(OpenMP) - -# if(OpenMP_FOUND) -# message(STATUS "OpenMP found - enabling parallel support") -# add_definitions(-DHAS_OPENMP) -# if(TARGET OpenMP::OpenMP_CXX) -# set(OPENMP_TARGET OpenMP::OpenMP_CXX) -# message(STATUS "Using OpenMP target: OpenMP::OpenMP_CXX") -# else() -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -# if(OpenMP_CXX_LIBRARIES) -# set(OPENMP_LIBRARIES ${OpenMP_CXX_LIBRARIES}) -# endif() -# message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") -# message(STATUS "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}") -# endif() -# else() -# message(STATUS "OpenMP not found - parallel features disabled") -# endif() +find_package(OpenMP) + +if(OpenMP_FOUND) + message(STATUS "OpenMP found - enabling parallel support") + add_definitions(-DHAS_OPENMP) + if(TARGET OpenMP::OpenMP_CXX) + set(OPENMP_TARGET OpenMP::OpenMP_CXX) + message(STATUS "Using OpenMP target: OpenMP::OpenMP_CXX") + else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + if(OpenMP_CXX_LIBRARIES) + set(OPENMP_LIBRARIES ${OpenMP_CXX_LIBRARIES}) + endif() + message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") + message(STATUS "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}") + endif() +else() + message(STATUS "OpenMP not found - parallel features disabled") +endif() foreach(CONFIG "" _DEBUG _RELEASE) diff --git a/src/perf/CMakeLists.txt b/src/perf/CMakeLists.txt index a39339eb..67125b92 100644 --- a/src/perf/CMakeLists.txt +++ b/src/perf/CMakeLists.txt @@ -1,2 +1,5 @@ file(GLOB_RECURSE perf_src *.cpp) add_library(perf_lib STATIC "${PERF_HEADERS}" "${perf_src}") +if (NOT WIN32) + target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) +endif() From 56c6d890c13ef6408129bac858fbb96d28c4aa30 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Mon, 17 Nov 2025 10:35:16 +0300 Subject: [PATCH 19/36] fix --- include/parallel/backends.hpp | 9 +++++++-- test/single_layer/test_ewlayer.cpp | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index d61fd495..28f118eb 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -17,7 +18,12 @@ namespace it_lab_ai { namespace parallel { -enum class Backend { kSeq = 0, kThreads = 1, kTbb = 2, kOmp = 3 }; +enum class Backend : std::uint8_t { + kSeq = 0, + kThreads = 1, + kTbb = 2, + kOmp = 3 +}; struct Options { Backend backend = Backend::kSeq; @@ -98,7 +104,6 @@ inline void impl_omp(std::size_t count, ? opt.max_threads : static_cast(std::thread::hardware_concurrency()); - // Убрана неиспользуемая переменная chunk_size static_cast(std::max(opt.grain, count / (num_threads * 8))); int int_count = static_cast(count); diff --git a/test/single_layer/test_ewlayer.cpp b/test/single_layer/test_ewlayer.cpp index edce59f3..67895210 100644 --- a/test/single_layer/test_ewlayer.cpp +++ b/test/single_layer/test_ewlayer.cpp @@ -1,4 +1,5 @@ #include +#include #include #include From 46bfe0bbf141a3c4d0f51c7d8e20c3cea2e3080d Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 12:46:19 +0300 Subject: [PATCH 20/36] fix --- CMakeLists.txt | 23 +++++++++-------------- app/Graph/CMakeLists.txt | 3 --- src/perf/CMakeLists.txt | 5 ++--- test/CMakeLists.txt | 8 ++------ 4 files changed, 13 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c51b3248..c98d4630 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,28 +32,23 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -find_package(OpenMP) +find_package(OpenMP REQUIRED) + +if (NOT WIN32) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") +else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") +endif() if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") add_definitions(-DHAS_OPENMP) - if(TARGET OpenMP::OpenMP_CXX) - set(OPENMP_TARGET OpenMP::OpenMP_CXX) - message(STATUS "Using OpenMP target: OpenMP::OpenMP_CXX") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - if(OpenMP_CXX_LIBRARIES) - set(OPENMP_LIBRARIES ${OpenMP_CXX_LIBRARIES}) - endif() - message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") - message(STATUS "OpenMP libraries: ${OpenMP_CXX_LIBRARIES}") - endif() else() message(STATUS "OpenMP not found - parallel features disabled") endif() - foreach(CONFIG "" _DEBUG _RELEASE) set("CMAKE_ARCHIVE_OUTPUT_DIRECTORY${CONFIG}" "${CMAKE_BINARY_DIR}/lib") set("CMAKE_LIBRARY_OUTPUT_DIRECTORY${CONFIG}" "${CMAKE_BINARY_DIR}/lib") diff --git a/app/Graph/CMakeLists.txt b/app/Graph/CMakeLists.txt index 15f16e4d..9835ba54 100644 --- a/app/Graph/CMakeLists.txt +++ b/app/Graph/CMakeLists.txt @@ -31,9 +31,6 @@ if (WIN32) COMMAND ${CMAKE_COMMAND} -E copy_directory "${OPENCV_BUILD_DIR}/bin/." "${CMAKE_BINARY_DIR}/bin/") -endif() - -if (WIN32) if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") set(CMAKE_BUILD_TYPE "Debug") endif() diff --git a/src/perf/CMakeLists.txt b/src/perf/CMakeLists.txt index 67125b92..eaa84ebb 100644 --- a/src/perf/CMakeLists.txt +++ b/src/perf/CMakeLists.txt @@ -1,5 +1,4 @@ file(GLOB_RECURSE perf_src *.cpp) add_library(perf_lib STATIC "${PERF_HEADERS}" "${perf_src}") -if (NOT WIN32) - target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) -endif() +target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0c72d407..65582b25 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,9 +1,8 @@ file(GLOB_RECURSE TEST_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) add_executable(run_test ${TEST_SRC_FILES}) -# if(OpenMP_FOUND) -# target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) -# endif() + +target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib) target_link_libraries(run_test PUBLIC gtest) target_link_libraries(run_test PUBLIC ReadLib) @@ -24,9 +23,6 @@ if (WIN32) COMMAND ${CMAKE_COMMAND} -E copy_directory "${OPENCV_BUILD_DIR}/bin/." "${CMAKE_BINARY_DIR}/bin/") -endif() - -if (WIN32) add_custom_command(TARGET run_test POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ From 16e39bf5cdea8fc787db6674a16a89f877be3f99 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 13:13:34 +0300 Subject: [PATCH 21/36] fix --- include/parallel/backends.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index 28f118eb..aa5ff000 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -8,7 +8,7 @@ #include #ifdef HAS_OPENMP -// #include +#include #endif #include From 4eac55a135673276b67a0d1e145cf624a61a9f1d Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 13:39:07 +0300 Subject: [PATCH 22/36] fix --- src/layers/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index 7f22b872..50f14846 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,7 +1,5 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) -# if(OpenMP_FOUND) -# target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) -# endif() +target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(layers_lib PUBLIC dnnl) From bbaa42623e8efed2dcb11d7532c29e2f1c861cca Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 13:59:35 +0300 Subject: [PATCH 23/36] fix --- include/parallel/backends.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index aa5ff000..6daa7d0d 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -48,6 +48,8 @@ inline void impl_threads(std::size_t count, : static_cast(std::thread::hardware_concurrency()); if (num_threads == 0) num_threads = 4; + double end = omp_get_wtime(); + std::size_t min_chunk_size = std::max(opt.grain, count / (num_threads * 4)); if (count / num_threads < min_chunk_size) { num_threads = std::max(1, static_cast(count / min_chunk_size)); From 3d7d56c2327148d8d08c3208c7851bc016fc9474 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 14:02:21 +0300 Subject: [PATCH 24/36] fix --- include/parallel/backends.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index 6daa7d0d..14893942 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -49,6 +49,7 @@ inline void impl_threads(std::size_t count, if (num_threads == 0) num_threads = 4; double end = omp_get_wtime(); + end++; std::size_t min_chunk_size = std::max(opt.grain, count / (num_threads * 4)); if (count / num_threads < min_chunk_size) { From 4684d9cbf35b8ebdde64f7e45990514e9825c13f Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 14:07:45 +0300 Subject: [PATCH 25/36] fix --- include/parallel/backends.hpp | 3 --- src/layers/CMakeLists.txt | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index 14893942..aa5ff000 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -48,9 +48,6 @@ inline void impl_threads(std::size_t count, : static_cast(std::thread::hardware_concurrency()); if (num_threads == 0) num_threads = 4; - double end = omp_get_wtime(); - end++; - std::size_t min_chunk_size = std::max(opt.grain, count / (num_threads * 4)); if (count / num_threads < min_chunk_size) { num_threads = std::max(1, static_cast(count / min_chunk_size)); diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index 50f14846..a078a841 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,5 +1,7 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) +target_compile_options(layers_lib PRIVATE ${OpenMP_CXX_FLAGS}) +target_link_libraries(layers_lib PRIVATE ${OpenMP_CXX_FLAGS}) target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(layers_lib PUBLIC dnnl) From 0d7e4bf512dd1c1ea1d113f786e8d9fb53e4e5e7 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 14:18:41 +0300 Subject: [PATCH 26/36] fix --- CMakeLists.txt | 19 ++++++++++++------- src/layers/CMakeLists.txt | 2 -- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c98d4630..e0e336cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,13 +34,18 @@ include(cmake/opencv_config.cmake) find_package(OpenMP REQUIRED) -if (NOT WIN32) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") -else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") -endif() +# if (NOT WIN32) +# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") +# else() +# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") +# endif() + +include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index a078a841..50f14846 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,7 +1,5 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) -target_compile_options(layers_lib PRIVATE ${OpenMP_CXX_FLAGS}) -target_link_libraries(layers_lib PRIVATE ${OpenMP_CXX_FLAGS}) target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(layers_lib PUBLIC dnnl) From 83732e77ad356c84da2e9c4b4427987c99f26af6 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 14:53:56 +0300 Subject: [PATCH 27/36] fix --- CMakeLists.txt | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e0e336cd..5ff063df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,18 +34,19 @@ include(cmake/opencv_config.cmake) find_package(OpenMP REQUIRED) -# if (NOT WIN32) -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") -# else() -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") -# endif() - -include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +if (NOT WIN32) + message(STATUS "OpenMP found1111 - enabling parallel support") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") +else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") +endif() + +# include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) +# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") From a9dc8c3f1995fb61ad4c077525e6c784e8e01f87 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 15:04:20 +0300 Subject: [PATCH 28/36] fix --- CMakeLists.txt | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ff063df..1dc80ba6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,19 +34,10 @@ include(cmake/opencv_config.cmake) find_package(OpenMP REQUIRED) -if (NOT WIN32) - message(STATUS "OpenMP found1111 - enabling parallel support") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") -else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") -endif() - -# include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") From 02b39ab88f79e077f8b74b7012f33aaa3d8b5b92 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 15:19:02 +0300 Subject: [PATCH 29/36] fix --- CMakeLists.txt | 10 +++++----- src/layers/CMakeLists.txt | 2 +- src/perf/CMakeLists.txt | 2 +- test/CMakeLists.txt | 1 - 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1dc80ba6..58a735f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,12 +32,12 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -find_package(OpenMP REQUIRED) +# find_package(OpenMP REQUIRED) -include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +# include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) +# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index 50f14846..db89cc13 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,5 +1,5 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) -target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) +#target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(layers_lib PUBLIC dnnl) diff --git a/src/perf/CMakeLists.txt b/src/perf/CMakeLists.txt index eaa84ebb..d03fafde 100644 --- a/src/perf/CMakeLists.txt +++ b/src/perf/CMakeLists.txt @@ -1,4 +1,4 @@ file(GLOB_RECURSE perf_src *.cpp) add_library(perf_lib STATIC "${PERF_HEADERS}" "${perf_src}") -target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) +#target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 65582b25..59ba26d7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,7 +2,6 @@ file(GLOB_RECURSE TEST_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) add_executable(run_test ${TEST_SRC_FILES}) -target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib) target_link_libraries(run_test PUBLIC gtest) target_link_libraries(run_test PUBLIC ReadLib) From 5f921f17323a1323605e2b16faf8c8b02fdfd3b3 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 15:24:06 +0300 Subject: [PATCH 30/36] fix --- CMakeLists.txt | 10 +++++----- src/layers/CMakeLists.txt | 2 +- src/perf/CMakeLists.txt | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 58a735f1..1dc80ba6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,12 +32,12 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -# find_package(OpenMP REQUIRED) +find_package(OpenMP REQUIRED) -# include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index db89cc13..50f14846 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,5 +1,5 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") target_link_libraries(layers_lib PUBLIC TBB_unified) -#target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(layers_lib PUBLIC dnnl) diff --git a/src/perf/CMakeLists.txt b/src/perf/CMakeLists.txt index d03fafde..eaa84ebb 100644 --- a/src/perf/CMakeLists.txt +++ b/src/perf/CMakeLists.txt @@ -1,4 +1,4 @@ file(GLOB_RECURSE perf_src *.cpp) add_library(perf_lib STATIC "${PERF_HEADERS}" "${perf_src}") -#target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(perf_lib PUBLIC OpenMP::OpenMP_CXX) From 4a8c2f57e3ed2867000cdb91d92aa0b4ef71a180 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 15:37:37 +0300 Subject: [PATCH 31/36] fix --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1dc80ba6..8dc0b038 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") - add_definitions(-DHAS_OPENMP) + #add_definitions(-DHAS_OPENMP) else() message(STATUS "OpenMP not found - parallel features disabled") endif() From a453ed9165389484698b1d233ff608304bb8baea Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 16:08:58 +0300 Subject: [PATCH 32/36] fix --- CMakeLists.txt | 13 ++++++++----- include/parallel/backends.hpp | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8dc0b038..c98d4630 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,14 +34,17 @@ include(cmake/opencv_config.cmake) find_package(OpenMP REQUIRED) -include_directories( ${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS} ) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +if (NOT WIN32) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") +else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4996 /wd4190 /wd4189 /WX") +endif() if(OpenMP_FOUND) message(STATUS "OpenMP found - enabling parallel support") - #add_definitions(-DHAS_OPENMP) + add_definitions(-DHAS_OPENMP) else() message(STATUS "OpenMP not found - parallel features disabled") endif() diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index aa5ff000..2609240d 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -112,7 +112,7 @@ inline void impl_omp(std::size_t count, return; } -#pragma omp parallel for schedule(static) num_threads(num_threads) + // #pragma omp parallel for schedule(static) num_threads(num_threads) for (int i = 0; i < int_count; ++i) { func(static_cast(i)); } From 40a343b85e85fae439d5ccc48bc4c929a094c77f Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 16:45:26 +0300 Subject: [PATCH 33/36] fix --- include/parallel/backends.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/parallel/backends.hpp b/include/parallel/backends.hpp index 2609240d..6072f9f5 100644 --- a/include/parallel/backends.hpp +++ b/include/parallel/backends.hpp @@ -8,7 +8,7 @@ #include #ifdef HAS_OPENMP -#include +// #include #endif #include From e1a1825e5a71d14cbcd231e437e7b06c535d0c15 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 16:59:56 +0300 Subject: [PATCH 34/36] fix --- test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 59ba26d7..65582b25 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,7 @@ file(GLOB_RECURSE TEST_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) add_executable(run_test ${TEST_SRC_FILES}) +target_link_libraries(run_test PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(run_test PUBLIC perf_lib layers_lib layers_oneDNN_lib) target_link_libraries(run_test PUBLIC gtest) target_link_libraries(run_test PUBLIC ReadLib) From 15ee55498f340479fd4590587b36bd9689b79254 Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 17:28:19 +0300 Subject: [PATCH 35/36] fix --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c98d4630..f0d45a42 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -find_package(OpenMP REQUIRED) +find_package(OpenMP) if (NOT WIN32) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror") From a19776ce18da502b7cbb93b40c30b13971ee4c6d Mon Sep 17 00:00:00 2001 From: AndreySorokin7 Date: Thu, 20 Nov 2025 17:38:11 +0300 Subject: [PATCH 36/36] fix --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f0d45a42..c98d4630 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ add_subdirectory(3rdparty) include(cmake/opencv_config.cmake) -find_package(OpenMP) +find_package(OpenMP REQUIRED) if (NOT WIN32) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror")