I've done this a few times and would have benefited from a utility function for it, apparently it's done in a few more places too. The utilities aren't multithreaded for now, it doesn't seem important and often multithreading happens at a different level of the call stack anyway. Pull Request: https://projects.blender.org/blender/blender/pulls/127517
156 lines
4.8 KiB
C++
156 lines
4.8 KiB
C++
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#include "BLI_array_utils.hh"
|
|
#include "BLI_offset_indices.hh"
|
|
#include "BLI_task.hh"
|
|
|
|
namespace blender::offset_indices {
|
|
|
|
OffsetIndices<int> accumulate_counts_to_offsets(MutableSpan<int> counts_to_offsets,
|
|
const int start_offset)
|
|
{
|
|
int offset = start_offset;
|
|
int64_t offset_i64 = start_offset;
|
|
|
|
for (const int i : counts_to_offsets.index_range().drop_back(1)) {
|
|
const int count = counts_to_offsets[i];
|
|
BLI_assert(count >= 0);
|
|
counts_to_offsets[i] = offset;
|
|
offset += count;
|
|
#ifndef NDEBUG
|
|
offset_i64 += count;
|
|
#endif
|
|
}
|
|
counts_to_offsets.last() = offset;
|
|
|
|
BLI_assert_msg(offset == offset_i64, "Integer overflow occured");
|
|
UNUSED_VARS_NDEBUG(offset_i64);
|
|
|
|
return OffsetIndices<int>(counts_to_offsets);
|
|
}
|
|
|
|
std::optional<OffsetIndices<int>> accumulate_counts_to_offsets_with_overflow_check(
|
|
MutableSpan<int> counts_to_offsets, int start_offset)
|
|
{
|
|
/* This variant was measured to be about ~8% slower than the version without overflow check.
|
|
* Since this function is often a serial bottleneck, we use a separate code path for when an
|
|
* overflow check is requested. */
|
|
int64_t offset = start_offset;
|
|
for (const int i : counts_to_offsets.index_range().drop_back(1)) {
|
|
const int count = counts_to_offsets[i];
|
|
BLI_assert(count >= 0);
|
|
counts_to_offsets[i] = offset;
|
|
offset += count;
|
|
}
|
|
counts_to_offsets.last() = offset;
|
|
const bool has_overflow = offset >= std::numeric_limits<int>::max();
|
|
if (has_overflow) {
|
|
return std::nullopt;
|
|
}
|
|
return OffsetIndices<int>(counts_to_offsets);
|
|
}
|
|
|
|
void fill_constant_group_size(const int size, const int start_offset, MutableSpan<int> offsets)
|
|
{
|
|
threading::memory_bandwidth_bound_task(offsets.size_in_bytes(), [&]() {
|
|
threading::parallel_for(offsets.index_range(), 1024, [&](const IndexRange range) {
|
|
for (const int64_t i : range) {
|
|
offsets[i] = size * i + start_offset;
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
void copy_group_sizes(const OffsetIndices<int> offsets,
|
|
const IndexMask &mask,
|
|
MutableSpan<int> sizes)
|
|
{
|
|
mask.foreach_index_optimized<int64_t>(GrainSize(4096),
|
|
[&](const int64_t i) { sizes[i] = offsets[i].size(); });
|
|
}
|
|
|
|
void gather_group_sizes(const OffsetIndices<int> offsets,
|
|
const IndexMask &mask,
|
|
MutableSpan<int> sizes)
|
|
{
|
|
mask.foreach_index_optimized<int64_t>(GrainSize(4096), [&](const int64_t i, const int64_t pos) {
|
|
sizes[pos] = offsets[i].size();
|
|
});
|
|
}
|
|
|
|
void gather_group_sizes(const OffsetIndices<int> offsets,
|
|
const Span<int> indices,
|
|
MutableSpan<int> sizes)
|
|
{
|
|
threading::memory_bandwidth_bound_task(
|
|
sizes.size_in_bytes() + offsets.data().size_in_bytes() + indices.size_in_bytes(), [&]() {
|
|
threading::parallel_for(indices.index_range(), 4096, [&](const IndexRange range) {
|
|
for (const int i : range) {
|
|
sizes[i] = offsets[indices[i]].size();
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
int sum_group_sizes(const OffsetIndices<int> offsets, const Span<int> indices)
|
|
{
|
|
int count = 0;
|
|
for (const int i : indices) {
|
|
count += offsets[i].size();
|
|
}
|
|
return count;
|
|
}
|
|
|
|
int sum_group_sizes(const OffsetIndices<int> offsets, const IndexMask &mask)
|
|
{
|
|
int count = 0;
|
|
mask.foreach_segment_optimized([&](const auto segment) {
|
|
if constexpr (std::is_same_v<std::decay_t<decltype(segment)>, IndexRange>) {
|
|
count += offsets[segment].size();
|
|
}
|
|
else {
|
|
for (const int64_t i : segment) {
|
|
count += offsets[i].size();
|
|
}
|
|
}
|
|
});
|
|
return count;
|
|
}
|
|
|
|
OffsetIndices<int> gather_selected_offsets(const OffsetIndices<int> src_offsets,
|
|
const IndexMask &selection,
|
|
const int start_offset,
|
|
MutableSpan<int> dst_offsets)
|
|
{
|
|
if (selection.is_empty()) {
|
|
return {};
|
|
}
|
|
int offset = start_offset;
|
|
selection.foreach_index_optimized<int>([&](const int i, const int pos) {
|
|
dst_offsets[pos] = offset;
|
|
offset += src_offsets[i].size();
|
|
});
|
|
dst_offsets.last() = offset;
|
|
return OffsetIndices<int>(dst_offsets);
|
|
}
|
|
|
|
void build_reverse_map(OffsetIndices<int> offsets, MutableSpan<int> r_map)
|
|
{
|
|
threading::parallel_for(offsets.index_range(), 1024, [&](const IndexRange range) {
|
|
for (const int64_t i : range) {
|
|
r_map.slice(offsets[i]).fill(i);
|
|
}
|
|
});
|
|
}
|
|
|
|
void build_reverse_offsets(const Span<int> indices, MutableSpan<int> offsets)
|
|
{
|
|
BLI_assert(std::all_of(offsets.begin(), offsets.end(), [](int value) { return value == 0; }));
|
|
array_utils::count_indices(indices, offsets);
|
|
offset_indices::accumulate_counts_to_offsets(offsets);
|
|
}
|
|
|
|
} // namespace blender::offset_indices
|