The utility counts the number of occurrences of each index in an array. It's used to build offsets for mesh topology maps, or to count the number of connected elements. Some users are geometry nodes, the subdivision draw cache, and mesh to curve conversion. This PR parallelizes the counting to take advantage of multiple threads. On a Ryzen 7950x, when counting connected edges to vertices, I observed an improvement from 10.2 to 3.0 ms. This most likely makes the counting less efficient, but it is quite a nice performance improvement. The new code was much slower for me at less than four threads, so I added a check so that counting remains single threaded in that case. Pull Request: https://projects.blender.org/blender/blender/pulls/109628
128 lines
4.0 KiB
C++
128 lines
4.0 KiB
C++
/* SPDX-FileCopyrightText: 2023 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#include "BLI_array_utils.hh"
|
|
#include "BLI_threads.h"
|
|
|
|
#include "atomic_ops.h"
|
|
|
|
namespace blender::array_utils {
|
|
|
|
void copy(const GVArray &src, GMutableSpan dst, const int64_t grain_size)
|
|
{
|
|
BLI_assert(src.type() == dst.type());
|
|
BLI_assert(src.size() == dst.size());
|
|
threading::parallel_for(src.index_range(), grain_size, [&](const IndexRange range) {
|
|
src.materialize_to_uninitialized(range, dst.data());
|
|
});
|
|
}
|
|
|
|
void copy(const GVArray &src,
|
|
const IndexMask &selection,
|
|
GMutableSpan dst,
|
|
const int64_t grain_size)
|
|
{
|
|
BLI_assert(src.type() == dst.type());
|
|
BLI_assert(src.size() >= selection.min_array_size());
|
|
BLI_assert(dst.size() >= selection.min_array_size());
|
|
threading::parallel_for(selection.index_range(), grain_size, [&](const IndexRange range) {
|
|
src.materialize_to_uninitialized(selection.slice(range), dst.data());
|
|
});
|
|
}
|
|
|
|
void gather(const GVArray &src,
|
|
const IndexMask &indices,
|
|
GMutableSpan dst,
|
|
const int64_t grain_size)
|
|
{
|
|
BLI_assert(src.type() == dst.type());
|
|
BLI_assert(indices.size() == dst.size());
|
|
threading::parallel_for(indices.index_range(), grain_size, [&](const IndexRange range) {
|
|
src.materialize_compressed_to_uninitialized(indices.slice(range), dst.slice(range).data());
|
|
});
|
|
}
|
|
|
|
void gather(const GSpan src, const IndexMask &indices, GMutableSpan dst, const int64_t grain_size)
|
|
{
|
|
gather(GVArray::ForSpan(src), indices, dst, grain_size);
|
|
}
|
|
|
|
void count_indices(const Span<int> indices, MutableSpan<int> counts)
|
|
{
|
|
if (indices.size() < 8192 || BLI_system_thread_count() < 4) {
|
|
for (const int i : indices) {
|
|
counts[i]++;
|
|
}
|
|
}
|
|
else {
|
|
threading::parallel_for(indices.index_range(), 4096, [&](const IndexRange range) {
|
|
for (const int i : indices.slice(range)) {
|
|
atomic_add_and_fetch_int32(&counts[i], 1);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
void invert_booleans(MutableSpan<bool> span)
|
|
{
|
|
threading::parallel_for(span.index_range(), 4096, [&](IndexRange range) {
|
|
for (const int i : range) {
|
|
span[i] = !span[i];
|
|
}
|
|
});
|
|
}
|
|
|
|
BooleanMix booleans_mix_calc(const VArray<bool> &varray, const IndexRange range_to_check)
|
|
{
|
|
if (varray.is_empty()) {
|
|
return BooleanMix::None;
|
|
}
|
|
const CommonVArrayInfo info = varray.common_info();
|
|
if (info.type == CommonVArrayInfo::Type::Single) {
|
|
return *static_cast<const bool *>(info.data) ? BooleanMix::AllTrue : BooleanMix::AllFalse;
|
|
}
|
|
if (info.type == CommonVArrayInfo::Type::Span) {
|
|
const Span<bool> span(static_cast<const bool *>(info.data), varray.size());
|
|
return threading::parallel_reduce(
|
|
range_to_check,
|
|
4096,
|
|
BooleanMix::None,
|
|
[&](const IndexRange range, const BooleanMix init) {
|
|
if (init == BooleanMix::Mixed) {
|
|
return init;
|
|
}
|
|
|
|
const Span<bool> slice = span.slice(range);
|
|
const bool first = slice.first();
|
|
for (const bool value : slice.drop_front(1)) {
|
|
if (value != first) {
|
|
return BooleanMix::Mixed;
|
|
}
|
|
}
|
|
return first ? BooleanMix::AllTrue : BooleanMix::AllFalse;
|
|
},
|
|
[&](BooleanMix a, BooleanMix b) { return (a == b) ? a : BooleanMix::Mixed; });
|
|
}
|
|
return threading::parallel_reduce(
|
|
range_to_check,
|
|
2048,
|
|
BooleanMix::None,
|
|
[&](const IndexRange range, const BooleanMix init) {
|
|
if (init == BooleanMix::Mixed) {
|
|
return init;
|
|
}
|
|
/* Alternatively, this could use #materialize to retrieve many values at once. */
|
|
const bool first = varray[range.first()];
|
|
for (const int64_t i : range.drop_front(1)) {
|
|
if (varray[i] != first) {
|
|
return BooleanMix::Mixed;
|
|
}
|
|
}
|
|
return first ? BooleanMix::AllTrue : BooleanMix::AllFalse;
|
|
},
|
|
[&](BooleanMix a, BooleanMix b) { return (a == b) ? a : BooleanMix::Mixed; });
|
|
}
|
|
|
|
} // namespace blender::array_utils
|