diff --git a/source/blender/blenlib/BLI_task.hh b/source/blender/blenlib/BLI_task.hh index 55dae787b52..ccde00d2a8a 100644 --- a/source/blender/blenlib/BLI_task.hh +++ b/source/blender/blenlib/BLI_task.hh @@ -35,6 +35,7 @@ #include "BLI_function_ref.hh" #include "BLI_index_range.hh" #include "BLI_lazy_threading.hh" +#include "BLI_span.hh" #include "BLI_utildefines.h" namespace blender { @@ -68,6 +69,10 @@ namespace detail { void parallel_for_impl(IndexRange range, int64_t grain_size, FunctionRef function); +void parallel_for_weighted_impl(IndexRange range, + int64_t grain_size, + FunctionRef function, + FunctionRef)> task_sizes_fn); } // namespace detail template @@ -83,6 +88,40 @@ inline void parallel_for(IndexRange range, int64_t grain_size, const Function &f detail::parallel_for_impl(range, grain_size, function); } +/** + * Almost like `parallel_for` but allows passing in a function that estimates the amount of work + * per index. This allows distributing work to threads more evenly. + * + * Using this function makes sense when the work load for each index can differ significantly, so + * that it is impossible to determine a good constant grain size. + * + * This function has a bit more overhead than the unweighted #parallel_for. If that is noticable + * highly depends on the use-case. So the overhead should be measured when trying to use this + * function for cases where all tasks may be very small. + * + * \param task_size_fn: Gets the task index as input and computes that tasks size. + * \param grain_size: Determines approximately how large a combined task should be. For example, if + * the grain size is 100, then 5 tasks of size 20 fit into it. + */ +template +inline void parallel_for_weighted(IndexRange range, + int64_t grain_size, + const Function &function, + const TaskSizeFn &task_size_fn) +{ + if (range.is_empty()) { + return; + } + detail::parallel_for_weighted_impl( + range, grain_size, function, [&](const IndexRange sub_range, MutableSpan r_sizes) { + for (const int64_t i : sub_range.index_range()) { + const int64_t task_size = task_size_fn(sub_range[i]); + BLI_assert(task_size >= 0); + r_sizes[i] = task_size; + } + }); +} + /** * Move the sub-range boundaries down to the next aligned index. The "global" begin and end * remain fixed though. diff --git a/source/blender/blenlib/intern/task_range.cc b/source/blender/blenlib/intern/task_range.cc index 3b750555777..9acb2ed84f6 100644 --- a/source/blender/blenlib/intern/task_range.cc +++ b/source/blender/blenlib/intern/task_range.cc @@ -12,10 +12,13 @@ #include "MEM_guardedalloc.h" +#include "BLI_array.hh" #include "BLI_lazy_threading.hh" +#include "BLI_offset_indices.hh" #include "BLI_task.h" #include "BLI_task.hh" #include "BLI_threads.h" +#include "BLI_vector.hh" #include "atomic_ops.h" @@ -179,4 +182,45 @@ void parallel_for_impl(const IndexRange range, function(range); } +void parallel_for_weighted_impl( + const IndexRange range, + const int64_t grain_size, + const FunctionRef function, + const FunctionRef)> task_sizes_fn) +{ + /* Shouldn't be too small, because then there is more overhead when the individual tasks are + * small. Also shouldn't be too large because then the serial code to split up tasks causes extra + * overhead. */ + const int64_t outer_grain_size = std::min(grain_size, 512); + threading::parallel_for(range, outer_grain_size, [&](const IndexRange sub_range) { + /* Compute the size of every task in the current range. */ + Array task_sizes(sub_range.size()); + task_sizes_fn(sub_range, task_sizes); + + /* Split range into multiple segments that have a size that approximates the grain size. */ + Vector offsets_vec; + offsets_vec.append(0); + int64_t counter = 0; + for (const int64_t i : sub_range.index_range()) { + counter += task_sizes[i]; + if (counter >= grain_size) { + offsets_vec.append(i + 1); + counter = 0; + } + } + if (offsets_vec.last() < sub_range.size()) { + offsets_vec.append(sub_range.size()); + } + const OffsetIndices offsets = offsets_vec.as_span(); + + /* Run the dynamically split tasks in parallel. */ + threading::parallel_for(offsets.index_range(), 1, [&](const IndexRange offsets_range) { + for (const int64_t i : offsets_range) { + const IndexRange actual_range = offsets[i].shift(sub_range.start()); + function(actual_range); + } + }); + }); +} + } // namespace blender::threading::detail diff --git a/source/blender/nodes/geometry/nodes/node_geo_proximity.cc b/source/blender/nodes/geometry/nodes/node_geo_proximity.cc index 4adb81c82be..c654c85604d 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_proximity.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_proximity.cc @@ -123,16 +123,20 @@ class ProximityFunction : public mf::MultiFunction { /* Construct BVH tree for each group. */ bvh_trees_.resize(groups_num); - threading::parallel_for(IndexRange(groups_num), 16, [&](const IndexRange range) { - for (const int group_i : range) { - const IndexMask &group_mask = group_masks[group_i]; - if (group_mask.is_empty()) { - continue; - } - BVHTreeFromPointCloud &bvh = bvh_trees_[group_i].pointcloud_bvh; - BKE_bvhtree_from_pointcloud_get(pointcloud, group_mask, bvh); - } - }); + threading::parallel_for_weighted( + IndexRange(groups_num), + 512, + [&](const IndexRange range) { + for (const int group_i : range) { + const IndexMask &group_mask = group_masks[group_i]; + if (group_mask.is_empty()) { + continue; + } + BVHTreeFromPointCloud &bvh = bvh_trees_[group_i].pointcloud_bvh; + BKE_bvhtree_from_pointcloud_get(pointcloud, group_mask, bvh); + } + }, + [&](const int group_i) { return group_masks[group_i].size(); }); } void init_for_mesh(const Mesh &mesh, const Field &group_id_field) @@ -152,29 +156,33 @@ class ProximityFunction : public mf::MultiFunction { /* Construct BVH tree for each group. */ bvh_trees_.resize(groups_num); - threading::parallel_for(IndexRange(groups_num), 16, [&](const IndexRange range) { - for (const int group_i : range) { - const IndexMask &group_mask = group_masks[group_i]; - if (group_mask.is_empty()) { - continue; - } - BVHTreeFromMesh &bvh = bvh_trees_[group_i].mesh_bvh; - switch (type_) { - case GEO_NODE_PROX_TARGET_POINTS: { - BKE_bvhtree_from_mesh_verts_init(mesh, group_mask, bvh); - break; + threading::parallel_for_weighted( + IndexRange(groups_num), + 512, + [&](const IndexRange range) { + for (const int group_i : range) { + const IndexMask &group_mask = group_masks[group_i]; + if (group_mask.is_empty()) { + continue; + } + BVHTreeFromMesh &bvh = bvh_trees_[group_i].mesh_bvh; + switch (type_) { + case GEO_NODE_PROX_TARGET_POINTS: { + BKE_bvhtree_from_mesh_verts_init(mesh, group_mask, bvh); + break; + } + case GEO_NODE_PROX_TARGET_EDGES: { + BKE_bvhtree_from_mesh_edges_init(mesh, group_mask, bvh); + break; + } + case GEO_NODE_PROX_TARGET_FACES: { + BKE_bvhtree_from_mesh_tris_init(mesh, group_mask, bvh); + break; + } + } } - case GEO_NODE_PROX_TARGET_EDGES: { - BKE_bvhtree_from_mesh_edges_init(mesh, group_mask, bvh); - break; - } - case GEO_NODE_PROX_TARGET_FACES: { - BKE_bvhtree_from_mesh_tris_init(mesh, group_mask, bvh); - break; - } - } - } - }); + }, + [&](const int group_i) { return group_masks[group_i].size(); }); } bke::AttrDomain get_domain_on_mesh() const diff --git a/source/blender/nodes/geometry/nodes/node_geo_sample_nearest_surface.cc b/source/blender/nodes/geometry/nodes/node_geo_sample_nearest_surface.cc index b4d66fdb445..688398215d7 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_sample_nearest_surface.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_sample_nearest_surface.cc @@ -116,13 +116,17 @@ class SampleNearestSurfaceFunction : public mf::MultiFunction { /* Construct BVH tree for each group. */ bvh_trees_.reinitialize(groups_num); - threading::parallel_for(IndexRange(groups_num), 16, [&](const IndexRange range) { - for (const int group_i : range) { - const IndexMask &group_mask = group_masks[group_i]; - BVHTreeFromMesh &bvh = bvh_trees_[group_i]; - BKE_bvhtree_from_mesh_tris_init(mesh, group_mask, bvh); - } - }); + threading::parallel_for_weighted( + IndexRange(groups_num), + 512, + [&](const IndexRange range) { + for (const int group_i : range) { + const IndexMask &group_mask = group_masks[group_i]; + BVHTreeFromMesh &bvh = bvh_trees_[group_i]; + BKE_bvhtree_from_mesh_tris_init(mesh, group_mask, bvh); + } + }, + [&](const int group_i) { return group_masks[group_i].size(); }); } ~SampleNearestSurfaceFunction() diff --git a/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc b/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc index 23508d5c72d..dfcae1f813e 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc @@ -165,30 +165,35 @@ static void scale_vertex_islands_uniformly(Mesh &mesh, const OffsetIndices faces = mesh.faces(); const Span corner_verts = mesh.corner_verts(); - threading::parallel_for(islands.index_range(), 256, [&](const IndexRange range) { - for (const int island_index : range) { - const ElementIsland &island = islands[island_index]; + threading::parallel_for_weighted( + islands.index_range(), + 512, + [&](const IndexRange range) { + for (const int island_index : range) { + const ElementIsland &island = islands[island_index]; - float scale = 0.0f; - float3 center = {0.0f, 0.0f, 0.0f}; + float scale = 0.0f; + float3 center = {0.0f, 0.0f, 0.0f}; - VectorSet vertex_indices; - for (const int face_index : island.element_indices) { - get_vertex_indices(edges, faces, corner_verts, face_index, vertex_indices); - center += params.centers[face_index]; - scale += params.scales[face_index]; - } + VectorSet vertex_indices; + for (const int face_index : island.element_indices) { + get_vertex_indices(edges, faces, corner_verts, face_index, vertex_indices); + center += params.centers[face_index]; + scale += params.scales[face_index]; + } - /* Divide by number of elements to get the average. */ - const float f = 1.0f / island.element_indices.size(); - scale *= f; - center *= f; + /* Divide by number of elements to get the average. */ + const float f = 1.0f / island.element_indices.size(); + scale *= f; + center *= f; - for (const int vert_index : vertex_indices) { - positions[vert_index] = transform_with_uniform_scale(positions[vert_index], center, scale); - } - } - }); + for (const int vert_index : vertex_indices) { + positions[vert_index] = transform_with_uniform_scale( + positions[vert_index], center, scale); + } + } + }, + [&](const int64_t i) { return islands[i].element_indices.size(); }); mesh.tag_positions_changed(); }