BLI: Improve IndexMask::complement() performance
IndexMask::complement() is often used in geometry processing algorithms when a selection needs to be inverted, mostly just in curves code so far. Instead of reusing `from_predicate` and lookup in the source mask, scan the mask once, inserting segments between the original indices. Theoretically this improves the performance from O(N*log(N)) to O(N). But with the small constant offset of the former, the improvement is generally just 3-4 times faster. However in some cases like empty and full masks, the new code takes constant time.  Pull Request: https://projects.blender.org/blender/blender/pulls/108331
This commit is contained in:
@@ -152,14 +152,6 @@ IndexMask IndexMask::slice_and_offset(const int64_t start,
|
||||
return sliced_mask;
|
||||
}
|
||||
|
||||
IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const
|
||||
{
|
||||
/* TODO: Implement more efficient solution. */
|
||||
return IndexMask::from_predicate(universe, GrainSize(512), memory, [&](const int64_t index) {
|
||||
return !this->contains(index);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges consecutive segments in some cases. Having fewer but larger segments generally allows for
|
||||
* better performance when using the mask later on.
|
||||
@@ -332,6 +324,168 @@ struct ParallelSegmentsCollector {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert a range to potentially multiple index mask segments.
|
||||
*/
|
||||
static void range_to_segments(const IndexRange range, Vector<IndexMaskSegment, 16> &r_segments)
|
||||
{
|
||||
const Span<int16_t> static_indices = get_static_indices_array();
|
||||
for (int64_t start = 0; start < range.size(); start += max_segment_size) {
|
||||
const int64_t size = std::min(max_segment_size, range.size() - start);
|
||||
r_segments.append_as(range.start() + start, static_indices.take_front(size));
|
||||
}
|
||||
}
|
||||
|
||||
static int64_t get_size_before_gap(const Span<int16_t> indices)
|
||||
{
|
||||
BLI_assert(indices.size() >= 2);
|
||||
if (indices[1] > indices[0] + 1) {
|
||||
/* For sparse indices, often the next gap is just after the next index.
|
||||
* In this case we can skip the logarithmic check below.*/
|
||||
return 1;
|
||||
}
|
||||
return unique_sorted_indices::find_size_of_next_range(indices);
|
||||
}
|
||||
|
||||
static void inverted_indices_to_segments(const IndexMaskSegment segment,
|
||||
LinearAllocator<> &allocator,
|
||||
Vector<IndexMaskSegment, 16> &r_segments)
|
||||
{
|
||||
constexpr int64_t range_threshold = 64;
|
||||
const int64_t offset = segment.offset();
|
||||
const Span<int16_t> static_indices = get_static_indices_array();
|
||||
|
||||
int64_t inverted_index_count = 0;
|
||||
std::array<int16_t, max_segment_size> inverted_indices_array;
|
||||
auto add_indices = [&](const int16_t start, const int16_t num) {
|
||||
int16_t *new_indices_begin = inverted_indices_array.data() + inverted_index_count;
|
||||
std::iota(new_indices_begin, new_indices_begin + num, start);
|
||||
inverted_index_count += num;
|
||||
};
|
||||
|
||||
auto finish_indices = [&]() {
|
||||
if (inverted_index_count == 0) {
|
||||
return;
|
||||
}
|
||||
MutableSpan<int16_t> offset_indices = allocator.allocate_array<int16_t>(inverted_index_count);
|
||||
offset_indices.copy_from(Span(inverted_indices_array).take_front(inverted_index_count));
|
||||
r_segments.append_as(offset, offset_indices);
|
||||
inverted_index_count = 0;
|
||||
};
|
||||
|
||||
Span<int16_t> indices = segment.base_span();
|
||||
while (indices.size() > 1) {
|
||||
const int64_t size_before_gap = get_size_before_gap(indices);
|
||||
if (size_before_gap == indices.size()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const int16_t gap_first = indices[size_before_gap - 1] + 1;
|
||||
const int16_t next = indices[size_before_gap];
|
||||
const int16_t gap_size = next - gap_first;
|
||||
if (gap_size > range_threshold) {
|
||||
finish_indices();
|
||||
r_segments.append_as(offset + gap_first, static_indices.take_front(gap_size));
|
||||
}
|
||||
else {
|
||||
add_indices(gap_first, gap_size);
|
||||
}
|
||||
|
||||
indices = indices.drop_front(size_before_gap);
|
||||
}
|
||||
|
||||
finish_indices();
|
||||
}
|
||||
|
||||
static void invert_segments(const IndexMask &mask,
|
||||
const IndexRange segment_range,
|
||||
LinearAllocator<> &allocator,
|
||||
Vector<IndexMaskSegment, 16> &r_segments)
|
||||
{
|
||||
for (const int64_t segment_i : segment_range) {
|
||||
const IndexMaskSegment segment = mask.segment(segment_i);
|
||||
inverted_indices_to_segments(segment, allocator, r_segments);
|
||||
|
||||
const IndexMaskSegment next_segment = mask.segment(segment_i + 1);
|
||||
const int64_t between_start = segment.last() + 1;
|
||||
const int64_t size_between_segments = next_segment[0] - segment.last() - 1;
|
||||
const IndexRange range_between_segments(between_start, size_between_segments);
|
||||
if (!range_between_segments.is_empty()) {
|
||||
range_to_segments(range_between_segments, r_segments);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const
|
||||
{
|
||||
if (this->is_empty()) {
|
||||
return universe;
|
||||
}
|
||||
if (universe.is_empty()) {
|
||||
return {};
|
||||
}
|
||||
const std::optional<IndexRange> this_range = this->to_range();
|
||||
if (this_range) {
|
||||
const bool first_in_range = this_range->first() <= universe.first();
|
||||
const bool last_in_range = this_range->last() >= universe.last();
|
||||
if (first_in_range && last_in_range) {
|
||||
/* This mask fills the entire universe, so the complement is empty. */
|
||||
return {};
|
||||
}
|
||||
if (first_in_range) {
|
||||
/* This mask is a range that contains the start of the universe.
|
||||
* The complement is a range that contains the end of the universe. */
|
||||
const int64_t complement_start = this_range->one_after_last();
|
||||
const int64_t complement_size = universe.one_after_last() - complement_start;
|
||||
return IndexRange(complement_start, complement_size);
|
||||
}
|
||||
if (last_in_range) {
|
||||
/* This mask is a range that contains the end of the universe.
|
||||
* The complement is a range that contains the start of the universe. */
|
||||
const int64_t complement_start = universe.first();
|
||||
const int64_t complement_size = this_range->first() - complement_start;
|
||||
return IndexRange(complement_start, complement_size);
|
||||
}
|
||||
}
|
||||
|
||||
Vector<IndexMaskSegment, 16> segments;
|
||||
|
||||
if (universe.start() < this->first()) {
|
||||
range_to_segments(universe.take_front(this->first() - universe.start()), segments);
|
||||
}
|
||||
|
||||
if (!this_range) {
|
||||
const int64_t segments_num = this->segments_num();
|
||||
|
||||
constexpr int64_t min_grain_size = 16;
|
||||
constexpr int64_t max_grain_size = 4096;
|
||||
const int64_t threads_num = BLI_system_thread_count();
|
||||
const int64_t grain_size = std::clamp(
|
||||
segments_num / threads_num, min_grain_size, max_grain_size);
|
||||
|
||||
const IndexRange non_last_segments = IndexRange(segments_num).drop_back(1);
|
||||
if (segments_num < min_grain_size) {
|
||||
invert_segments(*this, non_last_segments, memory, segments);
|
||||
}
|
||||
else {
|
||||
ParallelSegmentsCollector segments_collector;
|
||||
threading::parallel_for(non_last_segments, grain_size, [&](const IndexRange range) {
|
||||
ParallelSegmentsCollector::LocalData &local_data =
|
||||
segments_collector.data_by_thread.local();
|
||||
invert_segments(*this, range, local_data.allocator, local_data.segments);
|
||||
});
|
||||
segments_collector.reduce(memory, segments);
|
||||
}
|
||||
inverted_indices_to_segments(this->segment(segments_num - 1), memory, segments);
|
||||
}
|
||||
|
||||
if (universe.last() > this->first()) {
|
||||
range_to_segments(universe.take_back(universe.last() - this->last()), segments);
|
||||
}
|
||||
|
||||
return mask_from_segments(segments, memory);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
IndexMask IndexMask::from_indices(const Span<T> indices, IndexMaskMemory &memory)
|
||||
{
|
||||
|
||||
@@ -225,4 +225,63 @@ TEST(index_mask, FromPredicateFuzzy)
|
||||
});
|
||||
}
|
||||
|
||||
TEST(index_mask, Complement)
|
||||
{
|
||||
IndexMaskMemory memory;
|
||||
{
|
||||
const IndexMask mask(0);
|
||||
const IndexMask complement = mask.complement(IndexRange(100), memory);
|
||||
EXPECT_EQ(100 - mask.size(), complement.size());
|
||||
complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); });
|
||||
mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); });
|
||||
}
|
||||
{
|
||||
const IndexMask mask(10000);
|
||||
const IndexMask complement = mask.complement(IndexRange(10000), memory);
|
||||
EXPECT_EQ(10000 - mask.size(), complement.size());
|
||||
complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); });
|
||||
mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); });
|
||||
}
|
||||
{
|
||||
const IndexMask mask(IndexRange(100, 900));
|
||||
const IndexMask complement = mask.complement(IndexRange(1000), memory);
|
||||
EXPECT_EQ(1000 - mask.size(), complement.size());
|
||||
complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); });
|
||||
mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); });
|
||||
}
|
||||
{
|
||||
const IndexMask mask(IndexRange(0, 900));
|
||||
const IndexMask complement = mask.complement(IndexRange(1000), memory);
|
||||
EXPECT_EQ(1000 - mask.size(), complement.size());
|
||||
complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); });
|
||||
mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); });
|
||||
}
|
||||
}
|
||||
|
||||
TEST(index_mask, ComplementFuzzy)
|
||||
{
|
||||
RandomNumberGenerator rng;
|
||||
|
||||
const int64_t mask_size = 100;
|
||||
const int64_t iter_num = 100;
|
||||
const int64_t universe_size = 110;
|
||||
|
||||
for (const int64_t iter : IndexRange(iter_num)) {
|
||||
Set<int> values;
|
||||
for ([[maybe_unused]] const int64_t _ : IndexRange(iter)) {
|
||||
values.add(rng.get_int32(mask_size));
|
||||
}
|
||||
IndexMaskMemory memory;
|
||||
const IndexMask mask = IndexMask::from_predicate(
|
||||
IndexRange(mask_size), GrainSize(1024), memory, [&](const int64_t i) {
|
||||
return values.contains(int(i));
|
||||
});
|
||||
|
||||
const IndexMask complement = mask.complement(IndexRange(universe_size), memory);
|
||||
EXPECT_EQ(universe_size - mask.size(), complement.size());
|
||||
complement.foreach_index([&](const int64_t i) { EXPECT_FALSE(mask.contains(i)); });
|
||||
mask.foreach_index([&](const int64_t i) { EXPECT_FALSE(complement.contains(i)); });
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blender::index_mask::tests
|
||||
|
||||
Reference in New Issue
Block a user