diff --git a/build_files/cmake/platform/dependency_targets.cmake b/build_files/cmake/platform/dependency_targets.cmake index b08acc0f952..8c81b10109f 100644 --- a/build_files/cmake/platform/dependency_targets.cmake +++ b/build_files/cmake/platform/dependency_targets.cmake @@ -31,3 +31,9 @@ add_library(bf_deps_eigen INTERFACE) add_library(bf::dependencies::eigen ALIAS bf_deps_eigen) target_include_directories(bf_deps_eigen SYSTEM INTERFACE ${EIGEN3_INCLUDE_DIRS}) + +if(WITH_TBB) + target_compile_definitions(bf_deps_eigen INTERFACE WITH_TBB) + target_include_directories(bf_deps_eigen SYSTEM INTERFACE ${TBB_INCLUDE_DIRS}) + target_link_libraries(bf_deps_eigen INTERFACE ${TBB_LIBRARIES}) +endif() diff --git a/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h b/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h index f005a18a18e..b1d96494500 100644 --- a/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h @@ -10,7 +10,11 @@ #ifndef EIGEN_SPARSEDENSEPRODUCT_H #define EIGEN_SPARSEDENSEPRODUCT_H -namespace Eigen { +#ifdef WITH_TBB +#include +#endif + +namespace Eigen { namespace internal { @@ -34,23 +38,21 @@ struct sparse_time_dense_product_impl1 && lhsEval.nonZerosEstimate() > 20000) + if(lhsEval.nonZerosEstimate() > 20000) { - #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) - for(Index i=0; i(0, n, 1024), + [&](const tbb::blocked_range& range) { + for(Index i=range.begin(); i1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000) + if(lhsEval.nonZerosEstimate()*rhs.cols() > 20000) { - #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) - for(Index i=0; i(0, n, 1024), + [&](const tbb::blocked_range& range) { + for(Index i=range.begin(); i ++#endif ++ ++namespace Eigen { + + namespace internal { + +@@ -34,23 +38,21 @@ struct sparse_time_dense_product_impl1 && lhsEval.nonZerosEstimate() > 20000) ++ if(lhsEval.nonZerosEstimate() > 20000) + { +- #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) +- for(Index i=0; i(0, n, 1024), ++ [&](const tbb::blocked_range& range) { ++ for(Index i=range.begin(); i1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000) ++ if(lhsEval.nonZerosEstimate()*rhs.cols() > 20000) + { +- #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads) +- for(Index i=0; i(0, n, 1024), ++ [&](const tbb::blocked_range& range) { ++ for(Index i=range.begin(); i