Geometry Nodes: Avoid index lookup from index mask

Recent `IndexMask` refactors introduced log(N) complexity for `mask[i]`.
The greater the fragmentation of the mask, the greater the complexity.
Also, new `IndexMask` implementation has new iterators represented
both real index, and position (`index = mask[position]`).
This PR simply replace manual loops by new methods for iterating.
Added `optimized` in some place as slightly speed up.

The Attribute Statistic became 5 times better, due to multithreading.
The Extrude (Faces Individual) has average changed 42 ms -> 36 ms.
Duplicate Elements (Faces) has average changed 220 ms -> 150 ms.
Transform Instances has average changed 12 ms -> 8 ms.
Other nodes have approximately similar improvement numbers.
All tests use Random(50%) selection as mask.

Pull Request: https://projects.blender.org/blender/blender/pulls/109174
This commit is contained in:
Iliya Katueshenock
2023-06-23 19:20:24 +02:00
committed by Hans Goudey
parent cd6a428259
commit 25c48782e1
10 changed files with 329 additions and 389 deletions

View File

@@ -8,6 +8,7 @@
#include "UI_interface.h"
#include "UI_resources.h"
#include "BLI_array_utils.hh"
#include "BLI_math_base_safe.h"
#include "NOD_socket_search_link.hh"
@@ -215,9 +216,7 @@ static void node_geo_exec(GeoNodeExecParams params)
data.resize(next_data_index + selection.size());
MutableSpan<float> selected_data = data.as_mutable_span().slice(next_data_index,
selection.size());
for (const int i : selection.index_range()) {
selected_data[i] = component_data[selection[i]];
}
array_utils::gather(component_data, selection, selected_data);
}
}
@@ -295,9 +294,7 @@ static void node_geo_exec(GeoNodeExecParams params)
data.resize(data.size() + selection.size());
MutableSpan<float3> selected_data = data.as_mutable_span().slice(next_data_index,
selection.size());
for (const int i : selection.index_range()) {
selected_data[i] = component_data[selection[i]];
}
array_utils::gather(component_data, selection, selected_data);
}
}

View File

@@ -78,10 +78,7 @@ static OffsetIndices<int> accumulate_counts_to_offsets(const IndexMask &selectio
r_offset_data.last() = count * selection.size();
}
else {
threading::parallel_for(selection.index_range(), 1024, [&](const IndexRange range) {
counts.materialize_compressed(selection.slice(range),
r_offset_data.as_mutable_span().slice(range));
});
array_utils::gather(counts, selection, r_offset_data.as_mutable_span(), 1024);
offset_indices::accumulate_counts_to_offsets(r_offset_data);
}
return OffsetIndices<int>(r_offset_data);
@@ -95,10 +92,8 @@ static void threaded_slice_fill(const OffsetIndices<int> offsets,
MutableSpan<T> dst)
{
BLI_assert(offsets.total_size() == dst.size());
threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
for (const int i : range) {
dst.slice(offsets[i]).fill(src[selection[i]]);
}
selection.foreach_index(GrainSize(512), [&](const int64_t index, const int64_t i) {
dst.slice(offsets[i]).fill(src[index]);
});
}
@@ -231,15 +226,13 @@ static void copy_curve_attributes_without_id(
using T = decltype(dummy);
const Span<T> src = attribute.src.typed<T>();
MutableSpan<T> dst = attribute.dst.span.typed<T>();
threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
for (const int i_selection : range) {
const int i_src_curve = selection[i_selection];
const Span<T> curve_src = src.slice(src_points_by_curve[i_src_curve]);
for (const int i_dst_curve : curve_offsets[i_selection]) {
dst.slice(dst_points_by_curve[i_dst_curve]).copy_from(curve_src);
}
}
});
selection.foreach_index(
GrainSize(512), [&](const int64_t index, const int64_t i_selection) {
const Span<T> curve_src = src.slice(src_points_by_curve[index]);
for (const int dst_curve_index : curve_offsets[i_selection]) {
dst.slice(dst_points_by_curve[dst_curve_index]).copy_from(curve_src);
}
});
});
break;
default:
@@ -278,17 +271,16 @@ static void copy_stable_id_curves(const bke::CurvesGeometry &src_curves,
const OffsetIndices src_points_by_curve = src_curves.points_by_curve();
const OffsetIndices dst_points_by_curve = dst_curves.points_by_curve();
threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
for (const int i_selection : range) {
const int i_src_curve = selection[i_selection];
const Span<int> curve_src = src.slice(src_points_by_curve[i_src_curve]);
const IndexRange duplicates_range = offsets[i_selection];
for (const int i_duplicate : IndexRange(offsets[i_selection].size()).drop_front(1)) {
const int i_dst_curve = duplicates_range[i_duplicate];
copy_hashed_ids(curve_src, i_duplicate, dst.slice(dst_points_by_curve[i_dst_curve]));
}
}
});
selection.foreach_index(
GrainSize(512), [&](const int64_t i_src_curve, const int64_t i_selection) {
const Span<int> curve_src = src.slice(src_points_by_curve[i_src_curve]);
const IndexRange duplicates_range = offsets[i_selection];
for (const int i_duplicate : IndexRange(offsets[i_selection].size()).drop_front(1)) {
const int i_dst_curve = duplicates_range[i_duplicate];
copy_hashed_ids(curve_src, i_duplicate, dst.slice(dst_points_by_curve[i_dst_curve]));
}
});
dst_attribute.finish();
}
@@ -324,13 +316,14 @@ static void duplicate_curves(GeometrySet &geometry_set,
int dst_curves_num = 0;
int dst_points_num = 0;
for (const int i_curve : selection.index_range()) {
const int count = counts[selection[i_curve]];
selection.foreach_index_optimized<int>([&](const int index, const int i_curve) {
const int count = counts[index];
curve_offset_data[i_curve] = dst_curves_num;
point_offset_data[i_curve] = dst_points_num;
dst_curves_num += count;
dst_points_num += count * points_by_curve[selection[i_curve]].size();
}
dst_points_num += count * points_by_curve[index].size();
});
if (dst_points_num == 0) {
geometry_set.remove_geometry_during_modify();
@@ -348,18 +341,17 @@ static void duplicate_curves(GeometrySet &geometry_set,
bke::CurvesGeometry &new_curves = new_curves_id->geometry.wrap();
MutableSpan<int> all_dst_offsets = new_curves.offsets_for_write();
threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
for (const int i_selection : range) {
const int i_src_curve = selection[i_selection];
const IndexRange src_curve_range = points_by_curve[i_src_curve];
const IndexRange dst_curves_range = curve_offsets[i_selection];
MutableSpan<int> dst_offsets = all_dst_offsets.slice(dst_curves_range);
for (const int i_duplicate : IndexRange(dst_curves_range.size())) {
dst_offsets[i_duplicate] = point_offsets[i_selection].start() +
src_curve_range.size() * i_duplicate;
}
}
});
selection.foreach_index(GrainSize(512),
[&](const int64_t i_src_curve, const int64_t i_selection) {
const IndexRange src_curve_range = points_by_curve[i_src_curve];
const IndexRange dst_curves_range = curve_offsets[i_selection];
MutableSpan<int> dst_offsets = all_dst_offsets.slice(dst_curves_range);
for (const int i_duplicate : IndexRange(dst_curves_range.size())) {
dst_offsets[i_duplicate] = point_offsets[i_selection].start() +
src_curve_range.size() * i_duplicate;
}
});
all_dst_offsets.last() = dst_points_num;
copy_curve_attributes_without_id(curves, selection, curve_offsets, propagation_info, new_curves);
@@ -505,12 +497,12 @@ static void duplicate_faces(GeometrySet &geometry_set,
int total_polys = 0;
int total_loops = 0;
Array<int> offset_data(selection.size() + 1);
for (const int i_selection : selection.index_range()) {
const int count = counts[selection[i_selection]];
selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
const int count = counts[index];
offset_data[i_selection] = total_polys;
total_polys += count;
total_loops += count * polys[selection[i_selection]].size();
}
total_loops += count * polys[index].size();
});
offset_data[selection.size()] = total_polys;
const OffsetIndices<int> duplicates(offset_data);
@@ -527,19 +519,17 @@ static void duplicate_faces(GeometrySet &geometry_set,
int poly_index = 0;
int loop_index = 0;
for (const int i_selection : selection.index_range()) {
selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
const IndexRange poly_range = duplicates[i_selection];
const IndexRange source = polys[selection[i_selection]];
for ([[maybe_unused]] const int i_duplicate : IndexRange(poly_range.size())) {
const IndexRange source = polys[index];
for ([[maybe_unused]] const int i_duplicate : poly_range.index_range()) {
new_poly_offsets[poly_index] = loop_index;
for (const int i_loops : IndexRange(source.size())) {
const int src_corner = source[i_loops];
for (const int src_corner : source) {
loop_mapping[loop_index] = src_corner;
vert_mapping[loop_index] = corner_verts[src_corner];
edge_mapping[loop_index] = corner_edges[src_corner];
new_edges[loop_index][0] = loop_index;
if (i_loops + 1 != source.size()) {
if (src_corner != source.last()) {
new_edges[loop_index][1] = loop_index + 1;
}
else {
@@ -549,7 +539,7 @@ static void duplicate_faces(GeometrySet &geometry_set,
}
poly_index++;
}
}
});
std::iota(new_corner_verts.begin(), new_corner_verts.end(), 0);
std::iota(new_corner_edges.begin(), new_corner_edges.end(), 0);
@@ -647,21 +637,19 @@ static void copy_stable_id_edges(const Mesh &mesh,
VArraySpan<int> src{src_attribute.varray.typed<int>()};
MutableSpan<int> dst = dst_attribute.span.typed<int>();
threading::parallel_for(IndexRange(selection.size()), 1024, [&](IndexRange range) {
for (const int i_selection : range) {
const IndexRange edge_range = offsets[i_selection];
if (edge_range.size() == 0) {
continue;
}
const int2 &edge = edges[selection[i_selection]];
const IndexRange vert_range = {edge_range.start() * 2, edge_range.size() * 2};
selection.foreach_index(GrainSize(1024), [&](const int64_t index, const int64_t i_selection) {
const IndexRange edge_range = offsets[i_selection];
if (edge_range.is_empty()) {
return;
}
const int2 &edge = edges[index];
const IndexRange vert_range = {edge_range.start() * 2, edge_range.size() * 2};
dst[vert_range[0]] = src[edge[0]];
dst[vert_range[1]] = src[edge[1]];
for (const int i_duplicate : IndexRange(1, edge_range.size() - 1)) {
dst[vert_range[i_duplicate * 2]] = noise::hash(src[edge[0]], i_duplicate);
dst[vert_range[i_duplicate * 2 + 1]] = noise::hash(src[edge[1]], i_duplicate);
}
dst[vert_range[0]] = src[edge[0]];
dst[vert_range[1]] = src[edge[1]];
for (const int i_duplicate : IndexRange(1, edge_range.size() - 1)) {
dst[vert_range[i_duplicate * 2]] = noise::hash(src[edge[0]], i_duplicate);
dst[vert_range[i_duplicate * 2 + 1]] = noise::hash(src[edge[1]], i_duplicate);
}
});
dst_attribute.finish();
@@ -697,16 +685,14 @@ static void duplicate_edges(GeometrySet &geometry_set,
MutableSpan<int2> new_edges = new_mesh->edges_for_write();
Array<int> vert_orig_indices(output_edges_num * 2);
threading::parallel_for(selection.index_range(), 1024, [&](IndexRange range) {
for (const int i_selection : range) {
const int2 &edge = edges[selection[i_selection]];
const IndexRange edge_range = duplicates[i_selection];
const IndexRange vert_range(edge_range.start() * 2, edge_range.size() * 2);
selection.foreach_index(GrainSize(1024), [&](const int64_t index, const int64_t i_selection) {
const int2 &edge = edges[index];
const IndexRange edge_range = duplicates[i_selection];
const IndexRange vert_range(edge_range.start() * 2, edge_range.size() * 2);
for (const int i_duplicate : IndexRange(edge_range.size())) {
vert_orig_indices[vert_range[i_duplicate * 2]] = edge[0];
vert_orig_indices[vert_range[i_duplicate * 2 + 1]] = edge[1];
}
for (const int i_duplicate : IndexRange(edge_range.size())) {
vert_orig_indices[vert_range[i_duplicate * 2]] = edge[0];
vert_orig_indices[vert_range[i_duplicate * 2 + 1]] = edge[1];
}
});
@@ -794,12 +780,11 @@ static void duplicate_points_curve(GeometrySet &geometry_set,
using T = decltype(dummy);
const Span<T> src = attribute.src.typed<T>();
MutableSpan<T> dst = attribute.dst.span.typed<T>();
threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
for (const int i_selection : range) {
const T &src_value = src[point_to_curve_map[selection[i_selection]]];
dst.slice(duplicates[i_selection]).fill(src_value);
}
});
selection.foreach_index(GrainSize(512),
[&](const int64_t index, const int64_t i_selection) {
const T &src_value = src[point_to_curve_map[index]];
dst.slice(duplicates[i_selection]).fill(src_value);
});
});
break;
case ATTR_DOMAIN_POINT:

View File

@@ -284,9 +284,9 @@ static void extrude_mesh_vertices(Mesh &mesh,
const IndexRange new_edge_range{orig_edge_size, selection.size()};
MutableSpan<int2> new_edges = mesh.edges_for_write().slice(new_edge_range);
for (const int i_selection : selection.index_range()) {
new_edges[i_selection] = int2(selection[i_selection], new_vert_range[i_selection]);
}
selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
new_edges[i_selection] = int2(index, new_vert_range[i_selection]);
});
MutableAttributeAccessor attributes = mesh.attributes_for_write();
@@ -309,10 +309,8 @@ static void extrude_mesh_vertices(Mesh &mesh,
MutableSpan<float3> positions = mesh.vert_positions_for_write();
MutableSpan<float3> new_positions = positions.slice(new_vert_range);
threading::parallel_for(selection.index_range(), 1024, [&](const IndexRange range) {
for (const int i : range) {
new_positions[i] = positions[selection[i]] + offsets[selection[i]];
}
selection.foreach_index_optimized<int>(GrainSize(1024), [&](const int index, const int i) {
new_positions[i] = positions[index] + offsets[index];
});
MutableSpan<int> vert_orig_indices = get_orig_index_layer(mesh, ATTR_DOMAIN_POINT);
@@ -504,9 +502,7 @@ static void extrude_mesh_edges(Mesh &mesh,
duplicate_edges[i] = int2(new_vert_range[i_new_vert_1], new_vert_range[i_new_vert_2]);
}
for (const int i : edge_selection.index_range()) {
const int orig_edge_index = edge_selection[i];
edge_selection.foreach_index([&](const int64_t orig_edge_index, const int64_t i) {
const int2 &duplicate_edge = duplicate_edges[i];
const int new_vert_1 = duplicate_edge[0];
const int new_vert_2 = duplicate_edge[1];
@@ -537,7 +533,7 @@ static void extrude_mesh_edges(Mesh &mesh,
connect_edge_range[extrude_index_1],
duplicate_edge_range[i],
connect_edge_range[extrude_index_2]);
}
});
/* Create a map of indices in the extruded vertices array to all of the indices of edges
* in the duplicate edges array that connect to that vertex. This can be used to simplify the
@@ -591,58 +587,55 @@ static void extrude_mesh_edges(Mesh &mesh,
using T = decltype(dummy);
MutableSpan<T> data = attribute.span.typed<T>();
MutableSpan<T> new_data = data.slice(new_loop_range);
threading::parallel_for(edge_selection.index_range(), 256, [&](const IndexRange range) {
for (const int i_edge_selection : range) {
const int orig_edge_index = edge_selection[i_edge_selection];
edge_selection.foreach_index(
GrainSize(256), [&](const int64_t orig_edge_index, const int64_t i_edge_selection) {
const Span<int> connected_polys = edge_to_poly_map[orig_edge_index];
if (connected_polys.is_empty()) {
/* If there are no connected polygons, there is no corner data to
* interpolate. */
new_data.slice(4 * i_edge_selection, 4).fill(T());
return;
}
const Span<int> connected_polys = edge_to_poly_map[orig_edge_index];
if (connected_polys.is_empty()) {
/* If there are no connected polygons, there is no corner data to
* interpolate. */
new_data.slice(4 * i_edge_selection, 4).fill(T());
continue;
}
/* Both corners on each vertical edge of the side polygon get the same value,
* so there are only two unique values to mix. */
Array<T> side_poly_corner_data(2);
bke::attribute_math::DefaultPropagationMixer<T> mixer{side_poly_corner_data};
/* Both corners on each vertical edge of the side polygon get the same value,
* so there are only two unique values to mix. */
Array<T> side_poly_corner_data(2);
bke::attribute_math::DefaultPropagationMixer<T> mixer{side_poly_corner_data};
const int2 &duplicate_edge = duplicate_edges[i_edge_selection];
const int new_vert_1 = duplicate_edge[0];
const int new_vert_2 = duplicate_edge[1];
const int orig_vert_1 = new_vert_indices[new_vert_1 - orig_vert_size];
const int orig_vert_2 = new_vert_indices[new_vert_2 - orig_vert_size];
const int2 &duplicate_edge = duplicate_edges[i_edge_selection];
const int new_vert_1 = duplicate_edge[0];
const int new_vert_2 = duplicate_edge[1];
const int orig_vert_1 = new_vert_indices[new_vert_1 - orig_vert_size];
const int orig_vert_2 = new_vert_indices[new_vert_2 - orig_vert_size];
/* Average the corner data from the corners that share a vertex from the
* polygons that share an edge with the extruded edge. */
for (const int i_connected_poly : connected_polys.index_range()) {
const IndexRange connected_poly = polys[connected_polys[i_connected_poly]];
for (const int i_loop : IndexRange(connected_poly)) {
if (corner_verts[i_loop] == orig_vert_1) {
mixer.mix_in(0, data[i_loop]);
}
if (corner_verts[i_loop] == orig_vert_2) {
mixer.mix_in(1, data[i_loop]);
/* Average the corner data from the corners that share a vertex from the
* polygons that share an edge with the extruded edge. */
for (const int i_connected_poly : connected_polys.index_range()) {
const IndexRange connected_poly = polys[connected_polys[i_connected_poly]];
for (const int i_loop : IndexRange(connected_poly)) {
if (corner_verts[i_loop] == orig_vert_1) {
mixer.mix_in(0, data[i_loop]);
}
if (corner_verts[i_loop] == orig_vert_2) {
mixer.mix_in(1, data[i_loop]);
}
}
}
}
mixer.finalize();
mixer.finalize();
/* Instead of replicating the order in #fill_quad_consistent_direction here, it's
* simpler (though probably slower) to just match the corner data based on the vertex
* indices. */
for (const int i : IndexRange(4 * i_edge_selection, 4)) {
if (ELEM(new_corner_verts[i], new_vert_1, orig_vert_1)) {
new_data[i] = side_poly_corner_data.first();
/* Instead of replicating the order in #fill_quad_consistent_direction here, it's
* simpler (though probably slower) to just match the corner data based on the
* vertex indices. */
for (const int i : IndexRange(4 * i_edge_selection, 4)) {
if (ELEM(new_corner_verts[i], new_vert_1, orig_vert_1)) {
new_data[i] = side_poly_corner_data.first();
}
else if (ELEM(new_corner_verts[i], new_vert_2, orig_vert_2)) {
new_data[i] = side_poly_corner_data.last();
}
}
else if (ELEM(new_corner_verts[i], new_vert_2, orig_vert_2)) {
new_data[i] = side_poly_corner_data.last();
}
}
}
});
});
});
break;
}
@@ -1143,10 +1136,11 @@ static void extrude_individual_mesh_faces(
* all polygons. */
int extrude_corner_size = 0;
Array<int> group_per_face_data(poly_selection.size() + 1);
for (const int i_selection : poly_selection.index_range()) {
poly_selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
group_per_face_data[i_selection] = extrude_corner_size;
extrude_corner_size += orig_polys[poly_selection[i_selection]].size();
}
extrude_corner_size += orig_polys[index].size();
});
group_per_face_data.last() = extrude_corner_size;
const OffsetIndices<int> group_per_face(group_per_face_data);
@@ -1187,54 +1181,53 @@ static void extrude_individual_mesh_faces(
* separate loops, which may or may not be faster, but would involve more duplication. */
Array<int> new_vert_indices(extrude_corner_size);
Array<int> duplicate_edge_indices(extrude_corner_size);
threading::parallel_for(poly_selection.index_range(), 256, [&](const IndexRange range) {
for (const int i_selection : range) {
const IndexRange extrude_range = group_per_face[i_selection];
poly_selection.foreach_index(
GrainSize(256), [&](const int64_t index, const int64_t i_selection) {
const IndexRange extrude_range = group_per_face[i_selection];
const IndexRange poly = polys[poly_selection[i_selection]];
MutableSpan<int> poly_verts = corner_verts.slice(poly);
MutableSpan<int> poly_edges = corner_edges.slice(poly);
const IndexRange poly = polys[index];
MutableSpan<int> poly_verts = corner_verts.slice(poly);
MutableSpan<int> poly_edges = corner_edges.slice(poly);
for (const int i : IndexRange(poly.size())) {
const int i_extrude = extrude_range[i];
new_vert_indices[i_extrude] = poly_verts[i];
duplicate_edge_indices[i_extrude] = poly_edges[i];
for (const int i : IndexRange(poly.size())) {
const int i_extrude = extrude_range[i];
new_vert_indices[i_extrude] = poly_verts[i];
duplicate_edge_indices[i_extrude] = poly_edges[i];
poly_verts[i] = new_vert_range[i_extrude];
poly_edges[i] = duplicate_edge_range[i_extrude];
}
poly_verts[i] = new_vert_range[i_extrude];
poly_edges[i] = duplicate_edge_range[i_extrude];
}
for (const int i : IndexRange(poly.size())) {
const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
const int i_extrude = extrude_range[i];
const int i_extrude_next = extrude_range[i_next];
for (const int i : IndexRange(poly.size())) {
const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
const int i_extrude = extrude_range[i];
const int i_extrude_next = extrude_range[i_next];
const int i_duplicate_edge = duplicate_edge_range[i_extrude];
const int new_vert = new_vert_range[i_extrude];
const int new_vert_next = new_vert_range[i_extrude_next];
const int i_duplicate_edge = duplicate_edge_range[i_extrude];
const int new_vert = new_vert_range[i_extrude];
const int new_vert_next = new_vert_range[i_extrude_next];
const int orig_edge = duplicate_edge_indices[i_extrude];
const int orig_edge = duplicate_edge_indices[i_extrude];
const int orig_vert = new_vert_indices[i_extrude];
const int orig_vert_next = new_vert_indices[i_extrude_next];
const int orig_vert = new_vert_indices[i_extrude];
const int orig_vert_next = new_vert_indices[i_extrude_next];
duplicate_edges[i_extrude] = int2(new_vert, new_vert_next);
duplicate_edges[i_extrude] = int2(new_vert, new_vert_next);
MutableSpan<int> side_poly_verts = corner_verts.slice(side_loop_range[i_extrude * 4], 4);
MutableSpan<int> side_poly_edges = corner_edges.slice(side_loop_range[i_extrude * 4], 4);
side_poly_verts[0] = new_vert_next;
side_poly_edges[0] = i_duplicate_edge;
side_poly_verts[1] = new_vert;
side_poly_edges[1] = connect_edge_range[i_extrude];
side_poly_verts[2] = orig_vert;
side_poly_edges[2] = orig_edge;
side_poly_verts[3] = orig_vert_next;
side_poly_edges[3] = connect_edge_range[i_extrude_next];
MutableSpan<int> side_poly_verts = corner_verts.slice(side_loop_range[i_extrude * 4], 4);
MutableSpan<int> side_poly_edges = corner_edges.slice(side_loop_range[i_extrude * 4], 4);
side_poly_verts[0] = new_vert_next;
side_poly_edges[0] = i_duplicate_edge;
side_poly_verts[1] = new_vert;
side_poly_edges[1] = connect_edge_range[i_extrude];
side_poly_verts[2] = orig_vert;
side_poly_edges[2] = orig_edge;
side_poly_verts[3] = orig_vert_next;
side_poly_edges[3] = connect_edge_range[i_extrude_next];
connect_edges[i_extrude] = int2(orig_vert, new_vert);
}
}
});
connect_edges[i_extrude] = int2(orig_vert, new_vert);
}
});
MutableAttributeAccessor attributes = mesh.attributes_for_write();
@@ -1263,31 +1256,30 @@ static void extrude_individual_mesh_faces(
using T = decltype(dummy);
MutableSpan<T> data = attribute.span.typed<T>();
MutableSpan<T> connect_data = data.slice(connect_edge_range);
threading::parallel_for(poly_selection.index_range(), 512, [&](const IndexRange range) {
for (const int i_selection : range) {
const IndexRange poly = polys[poly_selection[i_selection]];
const IndexRange extrude_range = group_per_face[i_selection];
poly_selection.foreach_index(
GrainSize(512), [&](const int64_t index, const int64_t i_selection) {
const IndexRange poly = polys[index];
const IndexRange extrude_range = group_per_face[i_selection];
/* For the extruded edges, mix the data from the two neighboring original edges of
* the extruded polygon. */
for (const int i : IndexRange(poly.size())) {
const int i_prev = (i == 0) ? poly.size() - 1 : i - 1;
const int i_extrude = extrude_range[i];
const int i_extrude_prev = extrude_range[i_prev];
/* For the extruded edges, mix the data from the two neighboring original edges of
* the extruded polygon. */
for (const int i : IndexRange(poly.size())) {
const int i_prev = (i == 0) ? poly.size() - 1 : i - 1;
const int i_extrude = extrude_range[i];
const int i_extrude_prev = extrude_range[i_prev];
const int orig_edge = duplicate_edge_indices[i_extrude];
const int orig_edge_prev = duplicate_edge_indices[i_extrude_prev];
if constexpr (std::is_same_v<T, bool>) {
/* Propagate selections with "or" instead of "at least half". */
connect_data[i_extrude] = data[orig_edge] || data[orig_edge_prev];
const int orig_edge = duplicate_edge_indices[i_extrude];
const int orig_edge_prev = duplicate_edge_indices[i_extrude_prev];
if constexpr (std::is_same_v<T, bool>) {
/* Propagate selections with "or" instead of "at least half". */
connect_data[i_extrude] = data[orig_edge] || data[orig_edge_prev];
}
else {
connect_data[i_extrude] = bke::attribute_math::mix2(
0.5f, data[orig_edge], data[orig_edge_prev]);
}
}
else {
connect_data[i_extrude] = bke::attribute_math::mix2(
0.5f, data[orig_edge], data[orig_edge_prev]);
}
}
}
});
});
});
break;
}
@@ -1297,13 +1289,11 @@ static void extrude_individual_mesh_faces(
using T = decltype(dummy);
MutableSpan<T> data = attribute.span.typed<T>();
MutableSpan<T> new_data = data.slice(side_poly_range);
threading::parallel_for(poly_selection.index_range(), 1024, [&](const IndexRange range) {
for (const int i_selection : range) {
const int poly_index = poly_selection[i_selection];
const IndexRange extrude_range = group_per_face[i_selection];
new_data.slice(extrude_range).fill(data[poly_index]);
}
});
poly_selection.foreach_index(
GrainSize(1024), [&](const int64_t poly_index, const int64_t i_selection) {
const IndexRange extrude_range = group_per_face[i_selection];
new_data.slice(extrude_range).fill(data[poly_index]);
});
});
break;
}
@@ -1314,28 +1304,27 @@ static void extrude_individual_mesh_faces(
using T = decltype(dummy);
MutableSpan<T> data = attribute.span.typed<T>();
MutableSpan<T> new_data = data.slice(side_loop_range);
threading::parallel_for(poly_selection.index_range(), 256, [&](const IndexRange range) {
for (const int i_selection : range) {
const IndexRange poly = polys[poly_selection[i_selection]];
const Span<T> poly_loop_data = data.slice(poly);
const IndexRange extrude_range = group_per_face[i_selection];
poly_selection.foreach_index(
GrainSize(256), [&](const int64_t index, const int64_t i_selection) {
const IndexRange poly = polys[index];
const Span<T> poly_loop_data = data.slice(poly);
const IndexRange extrude_range = group_per_face[i_selection];
for (const int i : IndexRange(poly.size())) {
const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
const int i_extrude = extrude_range[i];
for (const int i : IndexRange(poly.size())) {
const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
const int i_extrude = extrude_range[i];
MutableSpan<T> side_loop_data = new_data.slice(i_extrude * 4, 4);
MutableSpan<T> side_loop_data = new_data.slice(i_extrude * 4, 4);
/* The two corners on each side of the side polygon get the data from the matching
* corners of the extruded polygon. This order depends on the loop filling the loop
* indices. */
side_loop_data[0] = poly_loop_data[i_next];
side_loop_data[1] = poly_loop_data[i];
side_loop_data[2] = poly_loop_data[i];
side_loop_data[3] = poly_loop_data[i_next];
}
}
});
/* The two corners on each side of the side polygon get the data from the
* matching corners of the extruded polygon. This order depends on the loop
* filling the loop indices. */
side_loop_data[0] = poly_loop_data[i_next];
side_loop_data[1] = poly_loop_data[i];
side_loop_data[2] = poly_loop_data[i];
side_loop_data[3] = poly_loop_data[i_next];
}
});
});
break;
}
@@ -1348,14 +1337,13 @@ static void extrude_individual_mesh_faces(
});
/* Offset the new vertices. */
threading::parallel_for(poly_selection.index_range(), 1024, [&](const IndexRange range) {
for (const int i_selection : range) {
const IndexRange extrude_range = group_per_face[i_selection];
for (float3 &position : new_positions.slice(extrude_range)) {
position += poly_offset[poly_selection[i_selection]];
}
}
});
poly_selection.foreach_index(GrainSize(1025),
[&](const int64_t index, const int64_t i_selection) {
const IndexRange extrude_range = group_per_face[i_selection];
for (float3 &position : new_positions.slice(extrude_range)) {
position += poly_offset[index];
}
});
MutableSpan<int> vert_orig_indices = get_orig_index_layer(mesh, ATTR_DOMAIN_POINT);
if (!vert_orig_indices.is_empty()) {
@@ -1375,13 +1363,11 @@ static void extrude_individual_mesh_faces(
MutableSpan<int> poly_orig_indices = get_orig_index_layer(mesh, ATTR_DOMAIN_FACE);
if (!poly_orig_indices.is_empty()) {
MutableSpan<int> new_poly_orig_indices = poly_orig_indices.slice(side_poly_range);
threading::parallel_for(poly_selection.index_range(), 1024, [&](const IndexRange range) {
for (const int selection_i : range) {
const int poly_i = poly_selection[selection_i];
const IndexRange extrude_range = group_per_face[selection_i];
new_poly_orig_indices.slice(extrude_range).fill(poly_orig_indices[poly_i]);
}
});
poly_selection.foreach_index(
GrainSize(1024), [&](const int64_t poly_i, const int64_t selection_i) {
const IndexRange extrude_range = group_per_face[selection_i];
new_poly_orig_indices.slice(extrude_range).fill(poly_orig_indices[poly_i]);
});
}
if (attribute_outputs.top_id) {

View File

@@ -112,43 +112,39 @@ static void add_instances_from_component(
/* Add this reference last, because it is the most likely one to be removed later on. */
const int empty_reference_handle = dst_component.add_reference(bke::InstanceReference());
threading::parallel_for(selection.index_range(), 1024, [&](IndexRange selection_range) {
for (const int range_i : selection_range) {
const int64_t i = selection[range_i];
selection.foreach_index(GrainSize(1024), [&](const int64_t i, const int64_t range_i) {
/* Compute base transform for every instances. */
float4x4 &dst_transform = dst_transforms[range_i];
dst_transform = math::from_loc_rot_scale<float4x4>(
positions[i], math::EulerXYZ(rotations[i]), scales[i]);
/* Compute base transform for every instances. */
float4x4 &dst_transform = dst_transforms[range_i];
dst_transform = math::from_loc_rot_scale<float4x4>(
positions[i], math::EulerXYZ(rotations[i]), scales[i]);
/* Reference that will be used by this new instance. */
int dst_handle = empty_reference_handle;
/* Reference that will be used by this new instance. */
int dst_handle = empty_reference_handle;
const bool use_individual_instance = pick_instance[i];
if (use_individual_instance) {
if (src_instances != nullptr) {
const int src_instances_num = src_instances->instances_num();
const int original_index = indices[i];
/* Use #mod_i instead of `%` to get the desirable wrap around behavior where -1
* refers to the last element. */
const int index = mod_i(original_index, std::max(src_instances_num, 1));
if (index < src_instances_num) {
/* Get the reference to the source instance. */
const int src_handle = src_instances->reference_handles()[index];
dst_handle = handle_mapping[src_handle];
const bool use_individual_instance = pick_instance[i];
if (use_individual_instance) {
if (src_instances != nullptr) {
const int src_instances_num = src_instances->instances_num();
const int original_index = indices[i];
/* Use #mod_i instead of `%` to get the desirable wrap around behavior where -1
* refers to the last element. */
const int index = mod_i(original_index, std::max(src_instances_num, 1));
if (index < src_instances_num) {
/* Get the reference to the source instance. */
const int src_handle = src_instances->reference_handles()[index];
dst_handle = handle_mapping[src_handle];
/* Take transforms of the source instance into account. */
mul_m4_m4_post(dst_transform.ptr(), src_instances->transforms()[index].ptr());
}
/* Take transforms of the source instance into account. */
mul_m4_m4_post(dst_transform.ptr(), src_instances->transforms()[index].ptr());
}
}
else {
/* Use entire source geometry as instance. */
dst_handle = full_instance_handle;
}
/* Set properties of new instance. */
dst_handles[range_i] = dst_handle;
}
else {
/* Use entire source geometry as instance. */
dst_handle = full_instance_handle;
}
/* Set properties of new instance. */
dst_handles[range_i] = dst_handle;
});
if (pick_instance.is_single()) {

View File

@@ -47,12 +47,9 @@ static VArray<bool> select_mesh_faces_by_material(const Mesh &mesh,
const VArraySpan<int> material_indices_span(material_indices);
Array<bool> face_selection(face_mask.min_array_size());
threading::parallel_for(face_mask.index_range(), 1024, [&](IndexRange range) {
for (const int i : range) {
const int face_index = face_mask[i];
const int slot_i = material_indices_span[face_index];
face_selection[face_index] = slots.contains(slot_i);
}
face_mask.foreach_index_optimized<int>(GrainSize(1024), [&](const int face_index) {
const int slot_i = material_indices_span[face_index];
face_selection[face_index] = slots.contains(slot_i);
});
return VArray<bool>::ForContainer(std::move(face_selection));

View File

@@ -55,18 +55,16 @@ class OffsetCornerInFaceFieldInput final : public bke::MeshFieldInput {
Array<int> loop_to_poly_map = bke::mesh::build_loop_to_poly_map(polys);
Array<int> offset_corners(mask.min_array_size());
threading::parallel_for(mask.index_range(), 2048, [&](const IndexRange range) {
for (const int selection_i : range) {
const int corner_i = corner_indices[selection_i];
const int offset = offsets[selection_i];
if (!corner_range.contains(corner_i)) {
offset_corners[selection_i] = 0;
continue;
}
const IndexRange poly = polys[loop_to_poly_map[corner_i]];
offset_corners[selection_i] = apply_offset_in_cyclic_range(poly, corner_i, offset);
mask.foreach_index_optimized<int>(GrainSize(2048), [&](const int selection_i) {
const int corner_i = corner_indices[selection_i];
const int offset = offsets[selection_i];
if (!corner_range.contains(corner_i)) {
offset_corners[selection_i] = 0;
return;
}
const IndexRange poly = polys[loop_to_poly_map[corner_i]];
offset_corners[selection_i] = apply_offset_in_cyclic_range(poly, corner_i, offset);
});
return VArray<int>::ForContainer(std::move(offset_corners));

View File

@@ -65,24 +65,21 @@ static bool calculate_mesh_proximity(const VArray<float3> &positions,
return false;
}
threading::parallel_for(mask.index_range(), 512, [&](IndexRange range) {
mask.foreach_index(GrainSize(512), [&](const int index) {
BVHTreeNearest nearest;
copy_v3_fl(nearest.co, FLT_MAX);
nearest.index = -1;
for (int i : range) {
const int index = mask[i];
/* Use the distance to the last found point as upper bound to speedup the bvh lookup. */
nearest.dist_sq = math::distance_squared(float3(nearest.co), positions[index]);
/* Use the distance to the last found point as upper bound to speedup the bvh lookup. */
nearest.dist_sq = math::distance_squared(float3(nearest.co), positions[index]);
BLI_bvhtree_find_nearest(
bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);
BLI_bvhtree_find_nearest(
bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);
if (nearest.dist_sq < r_distances[index]) {
r_distances[index] = nearest.dist_sq;
if (!r_locations.is_empty()) {
r_locations[index] = nearest.co;
}
if (nearest.dist_sq < r_distances[index]) {
r_distances[index] = nearest.dist_sq;
if (!r_locations.is_empty()) {
r_locations[index] = nearest.co;
}
}
});
@@ -103,26 +100,23 @@ static bool calculate_pointcloud_proximity(const VArray<float3> &positions,
return false;
}
threading::parallel_for(mask.index_range(), 512, [&](IndexRange range) {
mask.foreach_index(GrainSize(512), [&](const int index) {
BVHTreeNearest nearest;
copy_v3_fl(nearest.co, FLT_MAX);
nearest.index = -1;
for (int i : range) {
const int index = mask[i];
/* Use the distance to the closest point in the mesh to speedup the pointcloud bvh lookup.
* This is ok because we only need to find the closest point in the pointcloud if it's
* closer than the mesh. */
nearest.dist_sq = r_distances[index];
/* Use the distance to the closest point in the mesh to speedup the pointcloud bvh lookup.
* This is ok because we only need to find the closest point in the pointcloud if it's
* closer than the mesh. */
nearest.dist_sq = r_distances[index];
BLI_bvhtree_find_nearest(
bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);
BLI_bvhtree_find_nearest(
bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);
if (nearest.dist_sq < r_distances[index]) {
r_distances[index] = nearest.dist_sq;
if (!r_locations.is_empty()) {
r_locations[index] = nearest.co;
}
if (nearest.dist_sq < r_distances[index]) {
r_distances[index] = nearest.dist_sq;
if (!r_locations.is_empty()) {
r_locations[index] = nearest.co;
}
}
});
@@ -187,12 +181,8 @@ class ProximityFunction : public mf::MultiFunction {
}
if (params.single_output_is_required(2, "Distance")) {
threading::parallel_for(mask.index_range(), 2048, [&](IndexRange range) {
for (const int i : range) {
const int j = mask[i];
distances[j] = std::sqrt(distances[j]);
}
});
mask.foreach_index_optimized<int>(
GrainSize(2048), [&](const int j) { distances[j] = std::sqrt(distances[j]); });
}
}
};

View File

@@ -41,44 +41,41 @@ static void rotate_instances(GeoNodeExecParams &params, bke::Instances &instance
MutableSpan<float4x4> transforms = instances.transforms();
threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
for (const int i_selection : range) {
const int i = selection[i_selection];
const float3 pivot = pivots[i];
const float3 euler = rotations[i];
float4x4 &instance_transform = transforms[i];
selection.foreach_index(GrainSize(512), [&](const int64_t i) {
const float3 pivot = pivots[i];
const float3 euler = rotations[i];
float4x4 &instance_transform = transforms[i];
float4x4 rotation_matrix;
float3 used_pivot;
float4x4 rotation_matrix;
float3 used_pivot;
if (local_spaces[i]) {
/* Find rotation axis from the matrix. This should work even if the instance is skewed. */
/* Create rotations around the individual axis. This could be optimized to skip some axis
* when the angle is zero. */
const float3x3 rotation_x = from_rotation<float3x3>(
AxisAngle(normalize(instance_transform.x_axis()), euler.x));
const float3x3 rotation_y = from_rotation<float3x3>(
AxisAngle(normalize(instance_transform.y_axis()), euler.y));
const float3x3 rotation_z = from_rotation<float3x3>(
AxisAngle(normalize(instance_transform.z_axis()), euler.z));
if (local_spaces[i]) {
/* Find rotation axis from the matrix. This should work even if the instance is skewed. */
/* Create rotations around the individual axis. This could be optimized to skip some axis
* when the angle is zero. */
const float3x3 rotation_x = from_rotation<float3x3>(
AxisAngle(normalize(instance_transform.x_axis()), euler.x));
const float3x3 rotation_y = from_rotation<float3x3>(
AxisAngle(normalize(instance_transform.y_axis()), euler.y));
const float3x3 rotation_z = from_rotation<float3x3>(
AxisAngle(normalize(instance_transform.z_axis()), euler.z));
/* Combine the previously computed rotations into the final rotation matrix. */
rotation_matrix = float4x4(rotation_z * rotation_y * rotation_x);
/* Combine the previously computed rotations into the final rotation matrix. */
rotation_matrix = float4x4(rotation_z * rotation_y * rotation_x);
/* Transform the passed in pivot into the local space of the instance. */
used_pivot = transform_point(instance_transform, pivot);
}
else {
used_pivot = pivot;
rotation_matrix = from_rotation<float4x4>(EulerXYZ(euler));
}
/* Move the pivot to the origin so that we can rotate around it. */
instance_transform.location() -= used_pivot;
/* Perform the actual rotation. */
instance_transform = rotation_matrix * instance_transform;
/* Undo the pivot shifting done before. */
instance_transform.location() += used_pivot;
/* Transform the passed in pivot into the local space of the instance. */
used_pivot = transform_point(instance_transform, pivot);
}
else {
used_pivot = pivot;
rotation_matrix = from_rotation<float4x4>(EulerXYZ(euler));
}
/* Move the pivot to the origin so that we can rotate around it. */
instance_transform.location() -= used_pivot;
/* Perform the actual rotation. */
instance_transform = rotation_matrix * instance_transform;
/* Undo the pivot shifting done before. */
instance_transform.location() += used_pivot;
});
}

View File

@@ -40,24 +40,21 @@ static void scale_instances(GeoNodeExecParams &params, bke::Instances &instances
MutableSpan<float4x4> transforms = instances.transforms();
threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
for (const int i_selection : range) {
const int i = selection[i_selection];
const float3 pivot = pivots[i];
float4x4 &instance_transform = transforms[i];
selection.foreach_index(GrainSize(512), [&](const int64_t i) {
const float3 pivot = pivots[i];
float4x4 &instance_transform = transforms[i];
if (local_spaces[i]) {
instance_transform *= math::from_location<float4x4>(pivot);
rescale_m4(instance_transform.ptr(), scales[i]);
instance_transform *= math::from_location<float4x4>(-pivot);
}
else {
const float4x4 original_transform = instance_transform;
instance_transform = math::from_location<float4x4>(pivot);
rescale_m4(instance_transform.ptr(), scales[i]);
instance_transform *= math::from_location<float4x4>(-pivot);
instance_transform *= original_transform;
}
if (local_spaces[i]) {
instance_transform *= math::from_location<float4x4>(pivot);
rescale_m4(instance_transform.ptr(), scales[i]);
instance_transform *= math::from_location<float4x4>(-pivot);
}
else {
const float4x4 original_transform = instance_transform;
instance_transform = math::from_location<float4x4>(pivot);
rescale_m4(instance_transform.ptr(), scales[i]);
instance_transform *= math::from_location<float4x4>(-pivot);
instance_transform *= original_transform;
}
});
}

View File

@@ -36,15 +36,12 @@ static void translate_instances(GeoNodeExecParams &params, bke::Instances &insta
MutableSpan<float4x4> transforms = instances.transforms();
threading::parallel_for(selection.index_range(), 1024, [&](IndexRange range) {
for (const int i_selection : range) {
const int i = selection[i_selection];
if (local_spaces[i]) {
transforms[i] *= math::from_location<float4x4>(translations[i]);
}
else {
transforms[i].location() += translations[i];
}
selection.foreach_index(GrainSize(1024), [&](const int64_t i) {
if (local_spaces[i]) {
transforms[i] *= math::from_location<float4x4>(translations[i]);
}
else {
transforms[i].location() += translations[i];
}
});
}