2023-08-16 00:20:26 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2020-2023 Blender Authors
|
2023-06-15 13:09:04 +10:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
|
|
Mikktspace: Optimized port to C++
This commit is a big overhaul to the Mikktspace module, which is used
to compute tangents. I'm not calling it a rewrite since it's the
result of a lot of iterations on the original code, but pretty much
everything is reworked somehow.
Overall goal was to a) make it faster and b) make it maintainable.
Notable changes:
- Since the callbacks for requesting geometry data were a big
bottleneck before, I've ported it to C++ and made it header-only,
templating on the data source. That way, the compiler generates code
specific to the caller, which allows it to inline the data source and
specialize for some cases (e.g. subd vs. non-subd in Cycles).
- The one input parameter, an optional angle threshold, was not used
anywhere. Turns out that removing it allows for considerable
algorithmic simplification, removing a lot of the complexity in the
later stages. Therefore, I've just removed the option in the new code.
- The code computes several outputs, but only one (the tangent itself)
is ever used in Blender. Therefore, I've removed the others to
simplify the code. They could easily be brought back if needed, none
of the algorithmic simplifications are conflicting with them.
- The original code had fallback paths for many steps in case temporary
memory allocation fails, but that never actually gets used anyways
since malloc() doesn't really ever return NULL in practise, so I
removed them.
- In general, I've restructured A LOT of the code to make the
algorithms clearer and make use of some C++ features (vectors,
std::array, booleans, classes), though there's still some of cleanup
that could be done.
- Parallelized duplicate detection, neighbor detection, triangle
tangent computation, degenerate triangle handling and tangent space
accumulation.
- Replaced several algorithms with faster equivalents: Duplicate
detection uses a (concurrent) hash set now, neighbor detection uses
Radixsort and splits vertices by index pairs etc.
As for results, the exact speedup depends on the scene of course, but
let's consider the file from T97378:
- Blender 3.1 (before D14675): 6.07sec
- Blender 3.2 (with D14675): 4.62sec
- rBf0a36599007d (last nightly build): 4.42sec
- With this commit: 0.90sec
This speedup will mostly be noticed at the start of Cycles renders and,
even more importantly, in Eevee when doing something that changes the
geometry (e.g. animating) on a model using normal maps.
Differential Revision: https://developer.blender.org/D15589
2022-09-03 17:21:44 +02:00
|
|
|
/** \file
|
|
|
|
|
* \ingroup mikktspace
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <cmath>
|
|
|
|
|
|
|
|
|
|
namespace mikk {
|
|
|
|
|
|
|
|
|
|
struct float3 {
|
|
|
|
|
float x, y, z;
|
|
|
|
|
|
|
|
|
|
float3() = default;
|
|
|
|
|
|
2023-03-29 16:50:54 +02:00
|
|
|
float3(const float *ptr) : x{ptr[0]}, y{ptr[1]}, z{ptr[2]} {}
|
Mikktspace: Optimized port to C++
This commit is a big overhaul to the Mikktspace module, which is used
to compute tangents. I'm not calling it a rewrite since it's the
result of a lot of iterations on the original code, but pretty much
everything is reworked somehow.
Overall goal was to a) make it faster and b) make it maintainable.
Notable changes:
- Since the callbacks for requesting geometry data were a big
bottleneck before, I've ported it to C++ and made it header-only,
templating on the data source. That way, the compiler generates code
specific to the caller, which allows it to inline the data source and
specialize for some cases (e.g. subd vs. non-subd in Cycles).
- The one input parameter, an optional angle threshold, was not used
anywhere. Turns out that removing it allows for considerable
algorithmic simplification, removing a lot of the complexity in the
later stages. Therefore, I've just removed the option in the new code.
- The code computes several outputs, but only one (the tangent itself)
is ever used in Blender. Therefore, I've removed the others to
simplify the code. They could easily be brought back if needed, none
of the algorithmic simplifications are conflicting with them.
- The original code had fallback paths for many steps in case temporary
memory allocation fails, but that never actually gets used anyways
since malloc() doesn't really ever return NULL in practise, so I
removed them.
- In general, I've restructured A LOT of the code to make the
algorithms clearer and make use of some C++ features (vectors,
std::array, booleans, classes), though there's still some of cleanup
that could be done.
- Parallelized duplicate detection, neighbor detection, triangle
tangent computation, degenerate triangle handling and tangent space
accumulation.
- Replaced several algorithms with faster equivalents: Duplicate
detection uses a (concurrent) hash set now, neighbor detection uses
Radixsort and splits vertices by index pairs etc.
As for results, the exact speedup depends on the scene of course, but
let's consider the file from T97378:
- Blender 3.1 (before D14675): 6.07sec
- Blender 3.2 (with D14675): 4.62sec
- rBf0a36599007d (last nightly build): 4.42sec
- With this commit: 0.90sec
This speedup will mostly be noticed at the start of Cycles renders and,
even more importantly, in Eevee when doing something that changes the
geometry (e.g. animating) on a model using normal maps.
Differential Revision: https://developer.blender.org/D15589
2022-09-03 17:21:44 +02:00
|
|
|
|
2023-03-29 16:50:54 +02:00
|
|
|
float3(const float (*ptr)[3]) : float3((const float *)ptr) {}
|
Mikktspace: Optimized port to C++
This commit is a big overhaul to the Mikktspace module, which is used
to compute tangents. I'm not calling it a rewrite since it's the
result of a lot of iterations on the original code, but pretty much
everything is reworked somehow.
Overall goal was to a) make it faster and b) make it maintainable.
Notable changes:
- Since the callbacks for requesting geometry data were a big
bottleneck before, I've ported it to C++ and made it header-only,
templating on the data source. That way, the compiler generates code
specific to the caller, which allows it to inline the data source and
specialize for some cases (e.g. subd vs. non-subd in Cycles).
- The one input parameter, an optional angle threshold, was not used
anywhere. Turns out that removing it allows for considerable
algorithmic simplification, removing a lot of the complexity in the
later stages. Therefore, I've just removed the option in the new code.
- The code computes several outputs, but only one (the tangent itself)
is ever used in Blender. Therefore, I've removed the others to
simplify the code. They could easily be brought back if needed, none
of the algorithmic simplifications are conflicting with them.
- The original code had fallback paths for many steps in case temporary
memory allocation fails, but that never actually gets used anyways
since malloc() doesn't really ever return NULL in practise, so I
removed them.
- In general, I've restructured A LOT of the code to make the
algorithms clearer and make use of some C++ features (vectors,
std::array, booleans, classes), though there's still some of cleanup
that could be done.
- Parallelized duplicate detection, neighbor detection, triangle
tangent computation, degenerate triangle handling and tangent space
accumulation.
- Replaced several algorithms with faster equivalents: Duplicate
detection uses a (concurrent) hash set now, neighbor detection uses
Radixsort and splits vertices by index pairs etc.
As for results, the exact speedup depends on the scene of course, but
let's consider the file from T97378:
- Blender 3.1 (before D14675): 6.07sec
- Blender 3.2 (with D14675): 4.62sec
- rBf0a36599007d (last nightly build): 4.42sec
- With this commit: 0.90sec
This speedup will mostly be noticed at the start of Cycles renders and,
even more importantly, in Eevee when doing something that changes the
geometry (e.g. animating) on a model using normal maps.
Differential Revision: https://developer.blender.org/D15589
2022-09-03 17:21:44 +02:00
|
|
|
|
2023-03-29 16:50:54 +02:00
|
|
|
explicit float3(float value) : x(value), y(value), z(value) {}
|
Mikktspace: Optimized port to C++
This commit is a big overhaul to the Mikktspace module, which is used
to compute tangents. I'm not calling it a rewrite since it's the
result of a lot of iterations on the original code, but pretty much
everything is reworked somehow.
Overall goal was to a) make it faster and b) make it maintainable.
Notable changes:
- Since the callbacks for requesting geometry data were a big
bottleneck before, I've ported it to C++ and made it header-only,
templating on the data source. That way, the compiler generates code
specific to the caller, which allows it to inline the data source and
specialize for some cases (e.g. subd vs. non-subd in Cycles).
- The one input parameter, an optional angle threshold, was not used
anywhere. Turns out that removing it allows for considerable
algorithmic simplification, removing a lot of the complexity in the
later stages. Therefore, I've just removed the option in the new code.
- The code computes several outputs, but only one (the tangent itself)
is ever used in Blender. Therefore, I've removed the others to
simplify the code. They could easily be brought back if needed, none
of the algorithmic simplifications are conflicting with them.
- The original code had fallback paths for many steps in case temporary
memory allocation fails, but that never actually gets used anyways
since malloc() doesn't really ever return NULL in practise, so I
removed them.
- In general, I've restructured A LOT of the code to make the
algorithms clearer and make use of some C++ features (vectors,
std::array, booleans, classes), though there's still some of cleanup
that could be done.
- Parallelized duplicate detection, neighbor detection, triangle
tangent computation, degenerate triangle handling and tangent space
accumulation.
- Replaced several algorithms with faster equivalents: Duplicate
detection uses a (concurrent) hash set now, neighbor detection uses
Radixsort and splits vertices by index pairs etc.
As for results, the exact speedup depends on the scene of course, but
let's consider the file from T97378:
- Blender 3.1 (before D14675): 6.07sec
- Blender 3.2 (with D14675): 4.62sec
- rBf0a36599007d (last nightly build): 4.42sec
- With this commit: 0.90sec
This speedup will mostly be noticed at the start of Cycles renders and,
even more importantly, in Eevee when doing something that changes the
geometry (e.g. animating) on a model using normal maps.
Differential Revision: https://developer.blender.org/D15589
2022-09-03 17:21:44 +02:00
|
|
|
|
2023-03-29 16:50:54 +02:00
|
|
|
explicit float3(int value) : x((float)value), y((float)value), z((float)value) {}
|
Mikktspace: Optimized port to C++
This commit is a big overhaul to the Mikktspace module, which is used
to compute tangents. I'm not calling it a rewrite since it's the
result of a lot of iterations on the original code, but pretty much
everything is reworked somehow.
Overall goal was to a) make it faster and b) make it maintainable.
Notable changes:
- Since the callbacks for requesting geometry data were a big
bottleneck before, I've ported it to C++ and made it header-only,
templating on the data source. That way, the compiler generates code
specific to the caller, which allows it to inline the data source and
specialize for some cases (e.g. subd vs. non-subd in Cycles).
- The one input parameter, an optional angle threshold, was not used
anywhere. Turns out that removing it allows for considerable
algorithmic simplification, removing a lot of the complexity in the
later stages. Therefore, I've just removed the option in the new code.
- The code computes several outputs, but only one (the tangent itself)
is ever used in Blender. Therefore, I've removed the others to
simplify the code. They could easily be brought back if needed, none
of the algorithmic simplifications are conflicting with them.
- The original code had fallback paths for many steps in case temporary
memory allocation fails, but that never actually gets used anyways
since malloc() doesn't really ever return NULL in practise, so I
removed them.
- In general, I've restructured A LOT of the code to make the
algorithms clearer and make use of some C++ features (vectors,
std::array, booleans, classes), though there's still some of cleanup
that could be done.
- Parallelized duplicate detection, neighbor detection, triangle
tangent computation, degenerate triangle handling and tangent space
accumulation.
- Replaced several algorithms with faster equivalents: Duplicate
detection uses a (concurrent) hash set now, neighbor detection uses
Radixsort and splits vertices by index pairs etc.
As for results, the exact speedup depends on the scene of course, but
let's consider the file from T97378:
- Blender 3.1 (before D14675): 6.07sec
- Blender 3.2 (with D14675): 4.62sec
- rBf0a36599007d (last nightly build): 4.42sec
- With this commit: 0.90sec
This speedup will mostly be noticed at the start of Cycles renders and,
even more importantly, in Eevee when doing something that changes the
geometry (e.g. animating) on a model using normal maps.
Differential Revision: https://developer.blender.org/D15589
2022-09-03 17:21:44 +02:00
|
|
|
|
2023-03-29 16:50:54 +02:00
|
|
|
float3(float x_, float y_, float z_) : x{x_}, y{y_}, z{z_} {}
|
Mikktspace: Optimized port to C++
This commit is a big overhaul to the Mikktspace module, which is used
to compute tangents. I'm not calling it a rewrite since it's the
result of a lot of iterations on the original code, but pretty much
everything is reworked somehow.
Overall goal was to a) make it faster and b) make it maintainable.
Notable changes:
- Since the callbacks for requesting geometry data were a big
bottleneck before, I've ported it to C++ and made it header-only,
templating on the data source. That way, the compiler generates code
specific to the caller, which allows it to inline the data source and
specialize for some cases (e.g. subd vs. non-subd in Cycles).
- The one input parameter, an optional angle threshold, was not used
anywhere. Turns out that removing it allows for considerable
algorithmic simplification, removing a lot of the complexity in the
later stages. Therefore, I've just removed the option in the new code.
- The code computes several outputs, but only one (the tangent itself)
is ever used in Blender. Therefore, I've removed the others to
simplify the code. They could easily be brought back if needed, none
of the algorithmic simplifications are conflicting with them.
- The original code had fallback paths for many steps in case temporary
memory allocation fails, but that never actually gets used anyways
since malloc() doesn't really ever return NULL in practise, so I
removed them.
- In general, I've restructured A LOT of the code to make the
algorithms clearer and make use of some C++ features (vectors,
std::array, booleans, classes), though there's still some of cleanup
that could be done.
- Parallelized duplicate detection, neighbor detection, triangle
tangent computation, degenerate triangle handling and tangent space
accumulation.
- Replaced several algorithms with faster equivalents: Duplicate
detection uses a (concurrent) hash set now, neighbor detection uses
Radixsort and splits vertices by index pairs etc.
As for results, the exact speedup depends on the scene of course, but
let's consider the file from T97378:
- Blender 3.1 (before D14675): 6.07sec
- Blender 3.2 (with D14675): 4.62sec
- rBf0a36599007d (last nightly build): 4.42sec
- With this commit: 0.90sec
This speedup will mostly be noticed at the start of Cycles renders and,
even more importantly, in Eevee when doing something that changes the
geometry (e.g. animating) on a model using normal maps.
Differential Revision: https://developer.blender.org/D15589
2022-09-03 17:21:44 +02:00
|
|
|
|
|
|
|
|
static float3 zero()
|
|
|
|
|
{
|
|
|
|
|
return {0.0f, 0.0f, 0.0f};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
friend float3 operator*(const float3 &a, float b)
|
|
|
|
|
{
|
|
|
|
|
return {a.x * b, a.y * b, a.z * b};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
friend float3 operator*(float b, const float3 &a)
|
|
|
|
|
{
|
|
|
|
|
return {a.x * b, a.y * b, a.z * b};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
friend float3 operator-(const float3 &a, const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
return {a.x - b.x, a.y - b.y, a.z - b.z};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
friend float3 operator-(const float3 &a)
|
|
|
|
|
{
|
|
|
|
|
return {-a.x, -a.y, -a.z};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
friend bool operator==(const float3 &a, const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
return a.x == b.x && a.y == b.y && a.z == b.z;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float length_squared() const
|
|
|
|
|
{
|
|
|
|
|
return x * x + y * y + z * z;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float length() const
|
|
|
|
|
{
|
2024-06-04 14:27:17 +02:00
|
|
|
return sqrtf(length_squared());
|
Mikktspace: Optimized port to C++
This commit is a big overhaul to the Mikktspace module, which is used
to compute tangents. I'm not calling it a rewrite since it's the
result of a lot of iterations on the original code, but pretty much
everything is reworked somehow.
Overall goal was to a) make it faster and b) make it maintainable.
Notable changes:
- Since the callbacks for requesting geometry data were a big
bottleneck before, I've ported it to C++ and made it header-only,
templating on the data source. That way, the compiler generates code
specific to the caller, which allows it to inline the data source and
specialize for some cases (e.g. subd vs. non-subd in Cycles).
- The one input parameter, an optional angle threshold, was not used
anywhere. Turns out that removing it allows for considerable
algorithmic simplification, removing a lot of the complexity in the
later stages. Therefore, I've just removed the option in the new code.
- The code computes several outputs, but only one (the tangent itself)
is ever used in Blender. Therefore, I've removed the others to
simplify the code. They could easily be brought back if needed, none
of the algorithmic simplifications are conflicting with them.
- The original code had fallback paths for many steps in case temporary
memory allocation fails, but that never actually gets used anyways
since malloc() doesn't really ever return NULL in practise, so I
removed them.
- In general, I've restructured A LOT of the code to make the
algorithms clearer and make use of some C++ features (vectors,
std::array, booleans, classes), though there's still some of cleanup
that could be done.
- Parallelized duplicate detection, neighbor detection, triangle
tangent computation, degenerate triangle handling and tangent space
accumulation.
- Replaced several algorithms with faster equivalents: Duplicate
detection uses a (concurrent) hash set now, neighbor detection uses
Radixsort and splits vertices by index pairs etc.
As for results, the exact speedup depends on the scene of course, but
let's consider the file from T97378:
- Blender 3.1 (before D14675): 6.07sec
- Blender 3.2 (with D14675): 4.62sec
- rBf0a36599007d (last nightly build): 4.42sec
- With this commit: 0.90sec
This speedup will mostly be noticed at the start of Cycles renders and,
even more importantly, in Eevee when doing something that changes the
geometry (e.g. animating) on a model using normal maps.
Differential Revision: https://developer.blender.org/D15589
2022-09-03 17:21:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float distance(const float3 &a, const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
return (a - b).length();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
friend float3 operator+(const float3 &a, const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
return {a.x + b.x, a.y + b.y, a.z + b.z};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void operator+=(const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
this->x += b.x;
|
|
|
|
|
this->y += b.y;
|
|
|
|
|
this->z += b.z;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
friend float3 operator*(const float3 &a, const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
return {a.x * b.x, a.y * b.y, a.z * b.z};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float3 normalize() const
|
|
|
|
|
{
|
|
|
|
|
const float len = length();
|
|
|
|
|
return (len != 0.0f) ? *this * (1.0f / len) : *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float reduce_add() const
|
|
|
|
|
{
|
|
|
|
|
return x + y + z;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
inline float dot(const float3 &a, const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
return a.x * b.x + a.y * b.y + a.z * b.z;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline float distance(const float3 &a, const float3 &b)
|
|
|
|
|
{
|
|
|
|
|
return float3::distance(a, b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Projects v onto the surface with normal n. */
|
|
|
|
|
inline float3 project(const float3 &n, const float3 &v)
|
|
|
|
|
{
|
|
|
|
|
return (v - n * dot(n, v)).normalize();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace mikk
|