/* SPDX-FileCopyrightText: 2023 Blender Authors * * SPDX-License-Identifier: GPL-2.0-or-later */ /** \file * \ingroup bli */ #pragma once #include #include "BLI_generic_span.hh" #include "BLI_generic_virtual_array.hh" #include "BLI_index_mask.hh" #include "BLI_math_base.h" #include "BLI_offset_indices.hh" #include "BLI_task.hh" #include "BLI_virtual_array.hh" namespace blender::array_utils { /** * Fill the destination span by copying all values from the `src` array. Threaded based on * grain-size. */ void copy(const GVArray &src, GMutableSpan dst, int64_t grain_size = 4096); template inline void copy(const VArray &src, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(src.size() == dst.size()); threading::parallel_for(src.index_range(), grain_size, [&](const IndexRange range) { src.materialize_to_uninitialized(range, dst); }); } /** * Fill the destination span by copying all values from the `src` array. Threaded based on * grain-size. */ template inline void copy(const Span src, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(src.size() == dst.size()); threading::parallel_for(src.index_range(), grain_size, [&](const IndexRange range) { dst.slice(range).copy_from(src.slice(range)); }); } /** * Fill the destination span by copying masked values from the `src` array. Threaded based on * grain-size. */ void copy(const GVArray &src, const IndexMask &selection, GMutableSpan dst, int64_t grain_size = 4096); /** * Fill the destination span by copying values from the `src` array. Threaded based on * grain-size. */ template inline void copy(const Span src, const IndexMask &selection, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(src.size() == dst.size()); selection.foreach_index_optimized(GrainSize(grain_size), [&](const int64_t i) { dst[i] = src[i]; }); } template T compute_sum(const Span data) { /* Explicitly splitting work into chunks for a couple of reasons: * - Improve numerical stability. While there are even more stable algorithms (e.g. Kahan * summation), they also add more complexity to the hot code path. So far, this simple approach * seems to solve the common issues people run into. * - Support computing the sum using multiple threads. * - Ensure deterministic results even with floating point numbers. */ constexpr int64_t chunk_size = 1024; const int64_t chunks_num = divide_ceil_ul(data.size(), chunk_size); Array partial_sums(chunks_num); threading::parallel_for(partial_sums.index_range(), 1, [&](const IndexRange range) { for (const int64_t i : range) { const int64_t start = i * chunk_size; const Span chunk = data.slice_safe(start, chunk_size); const T partial_sum = std::accumulate(chunk.begin(), chunk.end(), T()); partial_sums[i] = partial_sum; } }); return std::accumulate(partial_sums.begin(), partial_sums.end(), T()); } /** * Fill the specified indices of the destination with the values in the source span. */ template inline void scatter(const Span src, const Span indices, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(indices.size() == src.size()); threading::parallel_for(indices.index_range(), grain_size, [&](const IndexRange range) { for (const int64_t i : range) { dst[indices[i]] = src[i]; } }); } template inline void scatter(const Span src, const IndexMask &indices, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(indices.size() == src.size()); BLI_assert(indices.min_array_size() <= dst.size()); indices.foreach_index_optimized( GrainSize(grain_size), [&](const int64_t index, const int64_t pos) { dst[index] = src[pos]; }); } /** * Fill the destination span by gathering indexed values from the `src` array. */ void gather(const GVArray &src, const IndexMask &indices, GMutableSpan dst, int64_t grain_size = 4096); /** * Fill the destination span by gathering indexed values from the `src` array. */ void gather(GSpan src, const IndexMask &indices, GMutableSpan dst, int64_t grain_size = 4096); /** * Fill the destination span by gathering indexed values from the `src` array. */ template inline void gather(const VArray &src, const IndexMask &indices, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(indices.size() == dst.size()); threading::parallel_for(indices.index_range(), grain_size, [&](const IndexRange range) { src.materialize_compressed_to_uninitialized(indices.slice(range), dst.slice(range)); }); } /** * Fill the destination span by gathering indexed values from the `src` array. */ template inline void gather(const Span src, const IndexMask &indices, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(indices.size() == dst.size()); indices.foreach_segment(GrainSize(grain_size), [&](const IndexMaskSegment segment, const int64_t segment_pos) { for (const int64_t i : segment.index_range()) { dst[segment_pos + i] = src[segment[i]]; } }); } /** * Fill the destination span by gathering indexed values from the `src` array. */ template inline void gather(const Span src, const Span indices, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(indices.size() == dst.size()); threading::parallel_for(indices.index_range(), grain_size, [&](const IndexRange range) { for (const int64_t i : range) { dst[i] = src[indices[i]]; } }); } /** * Fill the destination span by gathering indexed values from the `src` array. */ template inline void gather(const VArray &src, const Span indices, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(indices.size() == dst.size()); devirtualize_varray(src, [&](const auto &src) { threading::parallel_for(indices.index_range(), grain_size, [&](const IndexRange range) { for (const int64_t i : range) { dst[i] = src[indices[i]]; } }); }); } template inline void gather_group_to_group(const OffsetIndices src_offsets, const OffsetIndices dst_offsets, const IndexMask &selection, const Span src, MutableSpan dst) { selection.foreach_index(GrainSize(512), [&](const int64_t src_i, const int64_t dst_i) { dst.slice(dst_offsets[dst_i]).copy_from(src.slice(src_offsets[src_i])); }); } template inline void gather_group_to_group(const OffsetIndices src_offsets, const OffsetIndices dst_offsets, const IndexMask &selection, const VArray src, MutableSpan dst) { selection.foreach_index(GrainSize(512), [&](const int64_t src_i, const int64_t dst_i) { src.materialize_compressed(src_offsets[src_i], dst.slice(dst_offsets[dst_i])); }); } template inline void gather_to_groups(const OffsetIndices dst_offsets, const IndexMask &src_selection, const Span src, MutableSpan dst) { src_selection.foreach_index(GrainSize(1024), [&](const int src_i, const int dst_i) { dst.slice(dst_offsets[dst_i]).fill(src[src_i]); }); } /** * Copy the \a src data from the groups defined by \a src_offsets to the groups in \a dst defined * by \a dst_offsets. Groups to use are masked by \a selection, and it is assumed that the * corresponding groups have the same size. */ void copy_group_to_group(OffsetIndices src_offsets, OffsetIndices dst_offsets, const IndexMask &selection, GSpan src, GMutableSpan dst); template void copy_group_to_group(OffsetIndices src_offsets, OffsetIndices dst_offsets, const IndexMask &selection, Span src, MutableSpan dst) { copy_group_to_group(src_offsets, dst_offsets, selection, GSpan(src), GMutableSpan(dst)); } /** * Count the number of occurrences of each index. * \param indices: The indices to count. * \param counts: The number of occurrences of each index. Typically initialized to zero. * Must be large enough to contain the maximum index. * * \note The memory referenced by the two spans must not overlap. */ void count_indices(Span indices, MutableSpan counts); void invert_booleans(MutableSpan span); void invert_booleans(MutableSpan span, const IndexMask &mask); int64_t count_booleans(const VArray &varray); int64_t count_booleans(const VArray &varray, const IndexMask &mask); enum class BooleanMix { None, AllFalse, AllTrue, Mixed, }; BooleanMix booleans_mix_calc(const VArray &varray, IndexRange range_to_check); inline BooleanMix booleans_mix_calc(const VArray &varray) { return booleans_mix_calc(varray, varray.index_range()); } /** Check if the value exists in the array. */ bool contains(const VArray &varray, const IndexMask &indices_to_check, bool value); /** * Finds all the index ranges for which consecutive values in \a span equal \a value. */ template inline Vector find_all_ranges(const Span span, const T &value) { if (span.is_empty()) { return Vector(); } Vector ranges; int64_t length = (span.first() == value) ? 1 : 0; for (const int64_t i : span.index_range().drop_front(1)) { if (span[i - 1] == value && span[i] != value) { ranges.append(IndexRange::from_end_size(i, length)); length = 0; } else if (span[i] == value) { length++; } } if (length > 0) { ranges.append(IndexRange::from_end_size(span.size(), length)); } return ranges; } /** * Fill the span with increasing indices: 0, 1, 2, ... * Optionally, the start value can be provided. */ template inline void fill_index_range(MutableSpan span, const T start = 0) { std::iota(span.begin(), span.end(), start); } template bool indexed_data_equal(const Span all_values, const Span indices, const Span values) { BLI_assert(indices.size() == values.size()); for (const int i : indices.index_range()) { if (all_values[indices[i]] != values[i]) { return false; } } return true; } bool indices_are_range(Span indices, IndexRange range); } // namespace blender::array_utils