-
Notifications
You must be signed in to change notification settings - Fork 2
Adding different types of parallelism to the elementwise layer #222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 7 commits
f14195d
b19191f
9efc295
30a33ff
4a3d16e
406387d
6f82796
3cb8263
2d369b6
0412b6a
5521152
4293356
0ba7e1b
f5f0f14
8222f23
0bb0d02
66b3b93
04c3815
7430245
56c6d89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,7 +46,7 @@ class EWLayerImpl : public LayerImpl<ValueType> { | |
| public: | ||
| EWLayerImpl() = delete; | ||
| EWLayerImpl(const Shape& shape, std::string function, float alpha = 0.0F, | ||
| float beta = 0.0F); | ||
| float beta = 0.0F, int type_parall = 0); | ||
| EWLayerImpl(const EWLayerImpl& c) = default; | ||
| EWLayerImpl& operator=(const EWLayerImpl& c) = default; | ||
| std::vector<ValueType> run( | ||
|
|
@@ -56,57 +56,83 @@ class EWLayerImpl : public LayerImpl<ValueType> { | |
| std::string func_; | ||
| float alpha_; | ||
| float beta_; | ||
| int type_parall_; | ||
| }; | ||
|
|
||
| template <typename ValueType> | ||
| EWLayerImpl<ValueType>::EWLayerImpl(const Shape& shape, std::string function, | ||
| float alpha, float beta) | ||
| float alpha, float beta, int type_parall) | ||
| : LayerImpl<ValueType>(shape, shape), | ||
| func_(std::move(function)), | ||
| alpha_(alpha), | ||
| beta_(beta) {} | ||
| beta_(beta), | ||
| type_parall_(type_parall) {} | ||
|
|
||
| template <typename ValueType> | ||
| std::vector<ValueType> EWLayerImpl<ValueType>::run( | ||
| const std::vector<ValueType>& input) const { | ||
| std::vector<ValueType> res(this->outputShape_.count()); | ||
| int available_threads = -1; | ||
| if (type_parall_ == 0) available_threads = 1; | ||
| if (type_parall_ == 1) | ||
| available_threads = std::thread::hardware_concurrency(); | ||
| if (type_parall_ == 2) | ||
| available_threads = oneapi::tbb::info::default_concurrency(); | ||
| if (type_parall_ == 3) available_threads = omp_get_max_threads(); | ||
|
||
|
|
||
| if (func_ == "relu") { | ||
| std::transform(input.begin(), input.end(), res.begin(), relu<ValueType>); | ||
| parallel_for( | ||
| input.size(), | ||
| [&](int i) { | ||
| res[i] = input[i] > ValueType(0) ? input[i] : ValueType(0); | ||
| }, | ||
| type_parall_); | ||
| } else if (func_ == "tanh") { | ||
| auto tanh = [&](const ValueType& value) -> ValueType { | ||
| return static_cast<ValueType>(std::tanh(value)); | ||
| }; | ||
| std::transform(input.begin(), input.end(), res.begin(), tanh); | ||
| parallel_for( | ||
| input.size(), | ||
| [&](int i) { res[i] = static_cast<ValueType>(std::tanh(input[i])); }, | ||
| type_parall_); | ||
| } else if (func_ == "sin") { | ||
| auto sin = [&](const ValueType& value) -> ValueType { | ||
| return static_cast<ValueType>(std::sin(value)); | ||
| }; | ||
| std::transform(input.begin(), input.end(), res.begin(), sin); | ||
| parallel_for( | ||
| input.size(), | ||
| [&](int i) { res[i] = static_cast<ValueType>(std::sin(input[i])); }, | ||
| type_parall_); | ||
| } else if (func_ == "minus") { | ||
| auto minus = [&](const ValueType& value) -> ValueType { return -value; }; | ||
| std::transform(input.begin(), input.end(), res.begin(), minus); | ||
| parallel_for( | ||
| input.size(), [&](int i) { res[i] = -input[i]; }, type_parall_); | ||
| } else if (func_ == "linear") { | ||
| auto linear = [&](const ValueType& value) -> ValueType { | ||
| return value * static_cast<ValueType>(alpha_) + | ||
| static_cast<ValueType>(beta_); | ||
| }; | ||
| std::transform(input.begin(), input.end(), res.begin(), linear); | ||
| parallel_for( | ||
| input.size(), | ||
| [&](int i) { | ||
| res[i] = input[i] * static_cast<ValueType>(alpha_) + | ||
| static_cast<ValueType>(beta_); | ||
| }, | ||
| type_parall_); | ||
| } else if (func_ == "sigmoid") { | ||
| auto sigmoid = [](ValueType x) -> ValueType { | ||
| if constexpr (std::is_integral_v<ValueType>) { | ||
| auto x_float = static_cast<float>(x); | ||
| float result = 1.0F / (1.0F + std::exp(-x_float)); | ||
| return static_cast<ValueType>(std::round(result)); | ||
| } else { | ||
| if (x >= ValueType(0)) { | ||
| ValueType z = std::exp(-x); | ||
| return ValueType(1) / (ValueType(1) + z); | ||
| } | ||
| ValueType z = std::exp(x); | ||
| return z / (ValueType(1) + z); | ||
| } | ||
| }; | ||
| std::transform(input.cbegin(), input.cend(), res.begin(), sigmoid); | ||
| if constexpr (std::is_integral_v<ValueType>) { | ||
| parallel_for( | ||
| input.size(), | ||
| [&](int i) { | ||
| auto x_float = static_cast<float>(input[i]); | ||
| float result = 1.0F / (1.0F + std::exp(-x_float)); | ||
| res[i] = static_cast<ValueType>(std::round(result)); | ||
| }, | ||
| type_parall_); | ||
| } else { | ||
| parallel_for( | ||
| input.size(), | ||
| [&](int i) { | ||
| ValueType x = input[i]; | ||
| if (x >= ValueType(0)) { | ||
| ValueType z = std::exp(-x); | ||
| res[i] = ValueType(1) / (ValueType(1) + z); | ||
| } else { | ||
| ValueType z = std::exp(x); | ||
| res[i] = z / (ValueType(1) + z); | ||
| } | ||
| }, | ||
| type_parall_); | ||
| } | ||
| } else { | ||
| throw std::invalid_argument("No such function for EWLayer"); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -1,5 +1,11 @@ | ||||||||
| #pragma once | ||||||||
| #include <omp.h> | ||||||||
|
||||||||
|
|
||||||||
|
||||||||
| #include <algorithm> | ||||||||
| #include <execution> | ||||||||
| #include <functional> | ||||||||
| #include <initializer_list> | ||||||||
| #include <iostream> | ||||||||
| #include <numeric> | ||||||||
| #include <stdexcept> | ||||||||
| #include <string> | ||||||||
|
|
@@ -49,6 +55,7 @@ class Layer { | |||||||
| PostOperations postops; | ||||||||
| int getID() const { return id_; } | ||||||||
| void setID(int id) { id_ = id; } | ||||||||
| void setTypeParall(int type) { type_parall_ = type; } | ||||||||
| LayerType getName() const { return type_; } | ||||||||
| virtual void run(const std::vector<Tensor>& input, | ||||||||
| std::vector<Tensor>& output) = 0; | ||||||||
|
|
@@ -59,6 +66,7 @@ class Layer { | |||||||
| protected: | ||||||||
| int id_ = 0; | ||||||||
| LayerType type_; | ||||||||
| int type_parall_; | ||||||||
| }; | ||||||||
|
|
||||||||
| template <typename ValueType> | ||||||||
|
|
@@ -83,4 +91,125 @@ class LayerImpl { | |||||||
| Shape outputShape_; | ||||||||
| }; | ||||||||
|
|
||||||||
| template <typename Func> | ||||||||
| inline void parallel_for(int count, Func func, int mode = 0) { | ||||||||
|
||||||||
| inline void parallel_for(int count, Func func, int mode = 0) { | |
| inline void parallel_for(int count, Func func, int mode = 0) { | |
| if (count <= 0) return; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,7 @@ | ||
| file(GLOB_RECURSE layers_src *.cpp) | ||
| add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") | ||
| target_link_libraries(layers_lib PUBLIC TBB_unified) | ||
| # if(OpenMP_FOUND) | ||
| # target_link_libraries(layers_lib PUBLIC OpenMP::OpenMP_CXX) | ||
| # endif() | ||
| target_link_libraries(layers_lib PUBLIC dnnl) |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use a strongly-typed backend enum instead of int for readability and safety.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Propagate ParBackend through API instead of raw int