Refactor: Mesh: Use C++ threading API for tessellation

This allows deleting a bunch of duplicate code, since there doesn't
need to be a special single-threaded case anymore. It also just
reduces the necessary boilerplate.

Also change naming a bit and use signed integers instead of unsigned.

I didn't notice any performance difference.

Pull Request: https://projects.blender.org/blender/blender/pulls/119069
This commit is contained in:
Hans Goudey
2024-03-05 01:03:45 +01:00
committed by Hans Goudey
parent 5fdd684550
commit 6479dc50fa

View File

@@ -12,12 +12,12 @@
*/
#include "BLI_array_utils.hh"
#include "BLI_enumerable_thread_specific.hh"
#include "BLI_math_geom.h"
#include "BLI_math_matrix.h"
#include "BLI_math_vector.h"
#include "BLI_memarena.h"
#include "BLI_polyfill_2d.h"
#include "BLI_task.h"
#include "BLI_task.hh"
#include "BKE_mesh.hh"
@@ -26,37 +26,31 @@
namespace blender::bke::mesh {
/** Compared against total loops. */
#define MESH_FACE_TESSELLATE_THREADED_LIMIT 4096
/* -------------------------------------------------------------------- */
/** \name Loop Tessellation
/** \name Face Tessellation
*
* Fill in Corner Triangle data-structure.
* Fill in Corner Triangle Array
* \{ */
/**
* \param face_normal: This will be optimized out as a constant.
*/
BLI_INLINE void mesh_calc_tessellation_for_face_impl(const Span<int> corner_verts,
const blender::OffsetIndices<int> faces,
const Span<float3> positions,
uint face_index,
const int face_start,
const int face_size,
int3 *tri,
MemArena **pf_arena_p,
const bool face_normal,
const float normal_precalc[3])
{
const uint mp_loopstart = uint(faces[face_index].start());
const uint mp_totloop = uint(faces[face_index].size());
auto create_tri = [&](uint i1, uint i2, uint i3) {
(*tri)[0] = int(mp_loopstart + i1);
(*tri)[1] = int(mp_loopstart + i2);
(*tri)[2] = int(mp_loopstart + i3);
auto create_tri = [&](int i1, int i2, int i3) {
(*tri)[0] = int(face_start + i1);
(*tri)[1] = int(face_start + i2);
(*tri)[2] = int(face_start + i3);
};
switch (mp_totloop) {
switch (face_size) {
case 3: {
create_tri(0, 1, 2);
break;
@@ -88,9 +82,9 @@ BLI_INLINE void mesh_calc_tessellation_for_face_impl(const Span<int> corner_vert
zero_v3(normal);
/* Calc normal, flipped: to get a positive 2D cross product. */
co_prev = positions[corner_verts[mp_loopstart + mp_totloop - 1]];
for (uint j = 0; j < mp_totloop; j++) {
co_curr = positions[corner_verts[mp_loopstart + j]];
co_prev = positions[corner_verts[face_start + face_size - 1]];
for (int j = 0; j < face_size; j++) {
co_curr = positions[corner_verts[face_start + j]];
add_newell_cross_v3_v3v3(normal, co_prev, co_curr);
co_prev = co_curr;
}
@@ -103,7 +97,7 @@ BLI_INLINE void mesh_calc_tessellation_for_face_impl(const Span<int> corner_vert
axis_dominant_v3_to_m3_negate(axis_mat, normal_precalc);
}
const uint totfilltri = mp_totloop - 2;
const int totfilltri = face_size - 2;
MemArena *pf_arena = *pf_arena_p;
if (UNLIKELY(pf_arena == nullptr)) {
@@ -113,17 +107,17 @@ BLI_INLINE void mesh_calc_tessellation_for_face_impl(const Span<int> corner_vert
uint(*tris)[3] = static_cast<uint(*)[3]>(
BLI_memarena_alloc(pf_arena, sizeof(*tris) * size_t(totfilltri)));
float(*projverts)[2] = static_cast<float(*)[2]>(
BLI_memarena_alloc(pf_arena, sizeof(*projverts) * size_t(mp_totloop)));
BLI_memarena_alloc(pf_arena, sizeof(*projverts) * size_t(face_size)));
for (uint j = 0; j < mp_totloop; j++) {
mul_v2_m3v3(projverts[j], axis_mat, positions[corner_verts[mp_loopstart + j]]);
for (int j = 0; j < face_size; j++) {
mul_v2_m3v3(projverts[j], axis_mat, positions[corner_verts[face_start + j]]);
}
BLI_polyfill_calc_arena(projverts, mp_totloop, 1, tris, pf_arena);
BLI_polyfill_calc_arena(projverts, uint(face_size), 1, tris, pf_arena);
/* Apply fill. */
for (uint j = 0; j < totfilltri; j++, tri++) {
create_tri(tris[j][0], tris[j][1], tris[j][2]);
for (int j = 0; j < totfilltri; j++, tri++) {
create_tri(int(tris[j][0]), int(tris[j][1]), int(tris[j][2]));
}
BLI_memarena_clear(pf_arena);
@@ -131,165 +125,82 @@ BLI_INLINE void mesh_calc_tessellation_for_face_impl(const Span<int> corner_vert
break;
}
}
#undef ML_TO_MLT
}
static void mesh_calc_tessellation_for_face(const Span<int> corner_verts,
const blender::OffsetIndices<int> faces,
const Span<float3> positions,
uint face_index,
const int face_start,
const int face_size,
int3 *tri,
MemArena **pf_arena_p)
{
mesh_calc_tessellation_for_face_impl(
corner_verts, faces, positions, face_index, tri, pf_arena_p, false, nullptr);
corner_verts, positions, face_start, face_size, tri, pf_arena_p, false, nullptr);
}
static void mesh_calc_tessellation_for_face_with_normal(const Span<int> corner_verts,
const blender::OffsetIndices<int> faces,
const Span<float3> positions,
uint face_index,
const int face_start,
const int face_size,
int3 *tri,
MemArena **pf_arena_p,
const float normal_precalc[3])
{
mesh_calc_tessellation_for_face_impl(
corner_verts, faces, positions, face_index, tri, pf_arena_p, true, normal_precalc);
corner_verts, positions, face_start, face_size, tri, pf_arena_p, true, normal_precalc);
}
static void mesh_recalc_corner_tris__single_threaded(const Span<int> corner_verts,
const blender::OffsetIndices<int> faces,
const Span<float3> positions,
int3 *corner_tris,
const float (*face_normals)[3])
{
struct LocalData {
MemArena *pf_arena = nullptr;
uint corner_tri_i = 0;
if (face_normals != nullptr) {
for (const int64_t i : faces.index_range()) {
mesh_calc_tessellation_for_face_with_normal(corner_verts,
faces,
positions,
uint(i),
&corner_tris[corner_tri_i],
&pf_arena,
face_normals[i]);
corner_tri_i += uint(faces[i].size() - 2);
~LocalData()
{
if (pf_arena) {
BLI_memarena_free(pf_arena);
}
}
};
static void corner_tris_calc_impl(const Span<float3> positions,
const OffsetIndices<int> faces,
const Span<int> corner_verts,
const Span<float3> face_normals,
MutableSpan<int3> corner_tris)
{
threading::EnumerableThreadSpecific<LocalData> all_local_data;
if (face_normals.is_empty()) {
threading::parallel_for(faces.index_range(), 1024, [&](const IndexRange range) {
LocalData &local_data = all_local_data.local();
for (const int64_t i : range) {
const int face_start = int(faces[i].start());
const int face_size = int(faces[i].size());
const int tris_start = poly_to_tri_count(int(i), face_start);
mesh_calc_tessellation_for_face(corner_verts,
positions,
face_start,
face_size,
&corner_tris[tris_start],
&local_data.pf_arena);
}
});
}
else {
for (const int64_t i : faces.index_range()) {
mesh_calc_tessellation_for_face(
corner_verts, faces, positions, uint(i), &corner_tris[corner_tri_i], &pf_arena);
corner_tri_i += uint(faces[i].size() - 2);
}
threading::parallel_for(faces.index_range(), 1024, [&](const IndexRange range) {
LocalData &local_data = all_local_data.local();
for (const int64_t i : range) {
const int face_start = int(faces[i].start());
const int face_size = int(faces[i].size());
const int tris_start = poly_to_tri_count(int(i), face_start);
mesh_calc_tessellation_for_face_with_normal(corner_verts,
positions,
face_start,
face_size,
&corner_tris[tris_start],
&local_data.pf_arena,
face_normals[i]);
}
});
}
if (pf_arena) {
BLI_memarena_free(pf_arena);
pf_arena = nullptr;
}
BLI_assert(corner_tri_i == uint(poly_to_tri_count(int(faces.size()), int(corner_verts.size()))));
}
struct TessellationUserData {
Span<int> corner_verts;
blender::OffsetIndices<int> faces;
Span<float3> positions;
/** Output array. */
MutableSpan<int3> corner_tris;
/** Optional pre-calculated face normals array. */
const float (*face_normals)[3];
};
struct TessellationUserTLS {
MemArena *pf_arena;
};
static void mesh_calc_tessellation_for_face_fn(void *__restrict userdata,
const int index,
const TaskParallelTLS *__restrict tls)
{
const TessellationUserData *data = static_cast<const TessellationUserData *>(userdata);
TessellationUserTLS *tls_data = static_cast<TessellationUserTLS *>(tls->userdata_chunk);
const int corner_tri_i = poly_to_tri_count(index, int(data->faces[index].start()));
mesh_calc_tessellation_for_face_impl(data->corner_verts,
data->faces,
data->positions,
uint(index),
&data->corner_tris[corner_tri_i],
&tls_data->pf_arena,
false,
nullptr);
}
static void mesh_calc_tessellation_for_face_with_normal_fn(void *__restrict userdata,
const int index,
const TaskParallelTLS *__restrict tls)
{
const TessellationUserData *data = static_cast<const TessellationUserData *>(userdata);
TessellationUserTLS *tls_data = static_cast<TessellationUserTLS *>(tls->userdata_chunk);
const int corner_tri_i = poly_to_tri_count(index, int(data->faces[index].start()));
mesh_calc_tessellation_for_face_impl(data->corner_verts,
data->faces,
data->positions,
uint(index),
&data->corner_tris[corner_tri_i],
&tls_data->pf_arena,
true,
data->face_normals[index]);
}
static void mesh_calc_tessellation_for_face_free_fn(const void *__restrict /*userdata*/,
void *__restrict tls_v)
{
TessellationUserTLS *tls_data = static_cast<TessellationUserTLS *>(tls_v);
if (tls_data->pf_arena) {
BLI_memarena_free(tls_data->pf_arena);
}
}
static void corner_tris_calc_all(const Span<float3> positions,
const blender::OffsetIndices<int> faces,
const Span<int> corner_verts,
const Span<float3> face_normals,
MutableSpan<int3> corner_tris)
{
if (corner_verts.size() < MESH_FACE_TESSELLATE_THREADED_LIMIT) {
mesh_recalc_corner_tris__single_threaded(
corner_verts,
faces,
positions,
corner_tris.data(),
reinterpret_cast<const float(*)[3]>(face_normals.data()));
return;
}
TessellationUserTLS tls_data_dummy = {nullptr};
TessellationUserData data{};
data.corner_verts = corner_verts;
data.faces = faces;
data.positions = positions;
data.corner_tris = corner_tris;
data.face_normals = reinterpret_cast<const float(*)[3]>(face_normals.data());
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.userdata_chunk = &tls_data_dummy;
settings.userdata_chunk_size = sizeof(tls_data_dummy);
settings.func_free = mesh_calc_tessellation_for_face_free_fn;
BLI_task_parallel_range(0,
int(faces.size()),
&data,
data.face_normals ? mesh_calc_tessellation_for_face_with_normal_fn :
mesh_calc_tessellation_for_face_fn,
&settings);
}
void corner_tris_calc(const Span<float3> vert_positions,
@@ -297,7 +208,7 @@ void corner_tris_calc(const Span<float3> vert_positions,
const Span<int> corner_verts,
MutableSpan<int3> corner_tris)
{
corner_tris_calc_all(vert_positions, faces, corner_verts, {}, corner_tris);
corner_tris_calc_impl(vert_positions, faces, corner_verts, {}, corner_tris);
}
void corner_tris_calc_face_indices(const OffsetIndices<int> faces, MutableSpan<int> tri_faces)
@@ -319,7 +230,7 @@ void corner_tris_calc_with_normals(const Span<float3> vert_positions,
MutableSpan<int3> corner_tris)
{
BLI_assert(!face_normals.is_empty() || faces.is_empty());
corner_tris_calc_all(vert_positions, faces, corner_verts, face_normals, corner_tris);
corner_tris_calc_impl(vert_positions, faces, corner_verts, face_normals, corner_tris);
}
/** \} */