Merge branch 'blender-v3.5-release'

This commit is contained in:
Hans Goudey
2023-02-28 11:36:20 -05:00
13 changed files with 118 additions and 118 deletions

View File

@@ -182,7 +182,7 @@ class Device {
{
}
/* Return true if device is ready for rendering, or report status if not. */
/* Report status and return true if device is ready for rendering. */
virtual bool is_ready(string & /*status*/) const
{
return true;

View File

@@ -490,6 +490,9 @@ bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_
MD5Hash md5;
md5.append(constant_values);
md5.append(source[pso_type]);
if (use_metalrt) {
md5.append(string_printf("metalrt_features=%d", kernel_features & METALRT_FEATURE_MASK));
}
kernels_md5[pso_type] = md5.get_hex();
return MetalDeviceKernels::should_load_kernels(this, pso_type);
@@ -934,6 +937,17 @@ bool MetalDevice::is_ready(string &status) const
DEVICE_KERNEL_NUM);
return false;
}
if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
status = string_printf("%d kernels to optimize", num_requests);
}
else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
status = "Using optimized intersection kernels";
}
else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
status = "Using optimized kernels";
}
metal_printf("MetalDevice::is_ready(...) --> true\n");
return true;
}
@@ -970,7 +984,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
}
if (specialize_in_background) {
if (!MetalDeviceKernels::any_specialization_happening_now()) {
if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
specialize_kernels_fn);
}

View File

@@ -63,8 +63,7 @@ enum MetalPipelineType {
};
# define METALRT_FEATURE_MASK \
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
KERNEL_FEATURE_OBJECT_MOTION)
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD)
const char *kernel_type_as_string(MetalPipelineType pso_type);
@@ -81,7 +80,7 @@ struct MetalKernelPipeline {
KernelData kernel_data_;
bool use_metalrt;
uint32_t metalrt_features = 0;
uint32_t kernel_features = 0;
int threads_per_threadgroup;
@@ -104,7 +103,7 @@ struct MetalKernelPipeline {
/* Cache of Metal kernels for each DeviceKernel. */
namespace MetalDeviceKernels {
bool any_specialization_happening_now();
int num_incomplete_specialization_requests();
int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
bool load(MetalDevice *device, MetalPipelineType pso_type);

View File

@@ -344,9 +344,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
/* metalrt options */
pipeline->use_metalrt = device->use_metalrt;
pipeline->metalrt_features = device->use_metalrt ?
(device->kernel_features & METALRT_FEATURE_MASK) :
0;
pipeline->kernel_features = device->kernel_features;
{
thread_scoped_lock lock(cache_mutex);
@@ -357,65 +355,36 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
{
/* metalrt options */
bool use_metalrt = device->use_metalrt;
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
bool device_metalrt_hair_thick = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
bool device_metalrt_pointcloud = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
bool device_metalrt_motion = use_metalrt &&
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
MetalKernelPipeline *best_pipeline = nullptr;
while (!best_pipeline) {
while (running) {
/* Search all loaded pipelines with matching kernels_md5 checksums. */
MetalKernelPipeline *best_match = nullptr;
{
thread_scoped_lock lock(cache_mutex);
for (auto &pipeline : pipelines[kernel]) {
if (!pipeline->loaded) {
/* still loading - ignore */
continue;
}
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
bool pipeline_metalrt_motion = use_metalrt &&
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
pipeline_metalrt_motion != device_metalrt_motion) {
/* wrong combination of metalrt options */
continue;
}
if (pipeline->pso_type != PSO_GENERIC) {
if (pipeline->kernels_md5 == device->kernels_md5[PSO_SPECIALIZED_INTERSECT] ||
pipeline->kernels_md5 == device->kernels_md5[PSO_SPECIALIZED_SHADE]) {
best_pipeline = pipeline.get();
for (auto &candidate : pipelines[kernel]) {
if (candidate->loaded &&
candidate->kernels_md5 == device->kernels_md5[candidate->pso_type]) {
/* Replace existing match if candidate is more specialized. */
if (!best_match || candidate->pso_type > best_match->pso_type) {
best_match = candidate.get();
}
}
else if (!best_pipeline) {
best_pipeline = pipeline.get();
}
}
}
if (!best_pipeline) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
if (best_match) {
if (best_match->usage_count == 0 && best_match->pso_type != PSO_GENERIC) {
metal_printf("Swapping in %s version of %s\n",
kernel_type_as_string(best_match->pso_type),
device_kernel_as_string(kernel));
}
best_match->usage_count += 1;
return best_match;
}
}
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
metal_printf("Swapping in %s version of %s\n",
kernel_type_as_string(best_pipeline->pso_type),
device_kernel_as_string(kernel));
/* Spin until a matching kernel is loaded, or we're shutting down. */
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
best_pipeline->usage_count += 1;
return best_pipeline;
return nullptr;
}
bool MetalKernelPipeline::should_use_binary_archive() const
@@ -570,18 +539,14 @@ void MetalKernelPipeline::compile()
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
NSArray *linked_functions = nil;
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
if (use_metalrt) {
id<MTLFunction> curve_intersect_default = nil;
id<MTLFunction> curve_intersect_shadow = nil;
id<MTLFunction> point_intersect_default = nil;
id<MTLFunction> point_intersect_shadow = nil;
if (metalrt_hair) {
if (kernel_features & KERNEL_FEATURE_HAIR) {
/* Add curve intersection programs. */
if (metalrt_hair_thick) {
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
/* Slower programs for thick hair since that also slows down ribbons.
* Ideally this should not be needed. */
curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
@@ -592,7 +557,7 @@ void MetalKernelPipeline::compile()
curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
}
}
if (metalrt_pointcloud) {
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
}
@@ -682,15 +647,6 @@ void MetalKernelPipeline::compile()
local_md5.append((uint8_t *)&this->threads_per_threadgroup,
sizeof(this->threads_per_threadgroup));
string options;
if (use_metalrt && kernel_has_intersection(device_kernel)) {
/* incorporate any MetalRT specializations into the archive name */
options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
metalrt_hair ? 1 : 0,
metalrt_hair_thick ? 1 : 0,
metalrt_pointcloud ? 1 : 0);
}
/* Replace non-alphanumerical characters with underscores. */
string device_name = [mtlDevice.name UTF8String];
for (char &c : device_name) {
@@ -702,7 +658,7 @@ void MetalKernelPipeline::compile()
metalbin_name = device_name;
metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + ".bin");
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
path_create_directories(metalbin_path);
@@ -860,16 +816,15 @@ void MetalDeviceKernels::wait_for_all()
}
}
bool MetalDeviceKernels::any_specialization_happening_now()
int MetalDeviceKernels::num_incomplete_specialization_requests()
{
/* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
* only 1). */
int total = 0;
for (int i = 0; i < g_shaderCacheCount; i++) {
if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
return true;
}
total += g_shaderCache[i].second->incomplete_specialization_requests;
}
return false;
return total;
}
int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,

View File

@@ -706,6 +706,12 @@ void Session::update_status_time(bool show_pause, bool show_done)
string_printf("Sample %d/%d", current_sample, num_samples));
}
/* Append any device-specific status (such as background kernel optimization) */
string device_status;
if (device->is_ready(device_status) && !device_status.empty()) {
substatus += string_printf(" (%s)", device_status.c_str());
}
/* TODO(sergey): Denoising status from the path trace. */
if (show_pause) {

View File

@@ -6,6 +6,7 @@
#include <mutex>
#include "BLI_cache_mutex.hh"
#include "BLI_math_vector_types.hh"
#include "BLI_multi_value_map.hh"
#include "BLI_resource_scope.hh"
#include "BLI_utility_mixins.hh"
@@ -150,6 +151,13 @@ class bNodeTreeRuntime : NonCopyable, NonMovable {
Vector<bNode *> root_frames;
Vector<bNodeSocket *> interface_inputs;
Vector<bNodeSocket *> interface_outputs;
/**
* The location of all sockets in the tree, calculated while drawing the nodes.
* Indexed with #bNodeSocket::index_in_tree(). In the node tree's "world space"
* (the same as #bNode::runtime::totr).
*/
Vector<float2> all_socket_locations;
};
/**

View File

@@ -493,7 +493,13 @@ bool BKE_gpencil_stroke_sample(bGPdata *gpd,
copy_v3_v3(&pt2->x, last_coord);
new_pt[i].pressure = pt[0].pressure;
new_pt[i].strength = pt[0].strength;
memcpy(new_pt[i].vert_color, pt[0].vert_color, sizeof(float[4]));
copy_v3_v3(&pt2->x, last_coord);
new_pt[i].pressure = pt[0].pressure;
new_pt[i].strength = pt[0].strength;
new_pt[i].uv_fac = pt[0].uv_fac;
new_pt[i].uv_rot = pt[0].uv_rot;
copy_v2_v2(new_pt[i].uv_fill, pt[0].uv_fill);
copy_v4_v4(new_pt[i].vert_color, pt[0].vert_color);
if (select) {
new_pt[i].flag |= GP_SPOINT_SELECT;
}

View File

@@ -2039,9 +2039,11 @@ static NodeLinkDrawConfig nodelink_get_draw_config(const bContext &C,
draw_config.th_col2 = th_col2;
draw_config.th_col3 = th_col3;
const bNodeTree &node_tree = *snode.edittree;
draw_config.dim_factor = selected ? 1.0f :
node_link_dim_factor(
snode.runtime->all_socket_locations, v2d, link);
node_tree.runtime->all_socket_locations, v2d, link);
bTheme *btheme = UI_GetTheme();
draw_config.dash_alpha = btheme->space_node.dash_alpha;
@@ -2063,24 +2065,21 @@ static NodeLinkDrawConfig nodelink_get_draw_config(const bContext &C,
if (snode.overlay.flag & SN_OVERLAY_SHOW_OVERLAYS &&
snode.overlay.flag & SN_OVERLAY_SHOW_WIRE_COLORS) {
PointerRNA from_node_ptr, to_node_ptr;
RNA_pointer_create((ID *)snode.edittree, &RNA_Node, link.fromnode, &from_node_ptr);
RNA_pointer_create((ID *)snode.edittree, &RNA_Node, link.tonode, &to_node_ptr);
RNA_pointer_create((ID *)&node_tree, &RNA_Node, link.fromnode, &from_node_ptr);
RNA_pointer_create((ID *)&node_tree, &RNA_Node, link.tonode, &to_node_ptr);
if (link.fromsock) {
node_socket_color_get(
C, *snode.edittree, from_node_ptr, *link.fromsock, draw_config.start_color);
node_socket_color_get(C, node_tree, from_node_ptr, *link.fromsock, draw_config.start_color);
}
else {
node_socket_color_get(
C, *snode.edittree, to_node_ptr, *link.tosock, draw_config.start_color);
node_socket_color_get(C, node_tree, to_node_ptr, *link.tosock, draw_config.start_color);
}
if (link.tosock) {
node_socket_color_get(C, *snode.edittree, to_node_ptr, *link.tosock, draw_config.end_color);
node_socket_color_get(C, node_tree, to_node_ptr, *link.tosock, draw_config.end_color);
}
else {
node_socket_color_get(
C, *snode.edittree, from_node_ptr, *link.fromsock, draw_config.end_color);
node_socket_color_get(C, node_tree, from_node_ptr, *link.fromsock, draw_config.end_color);
}
}
else {
@@ -2167,8 +2166,9 @@ void node_draw_link_bezier(const bContext &C,
const int th_col3,
const bool selected)
{
const std::array<float2, 4> points = node_link_bezier_points(snode.runtime->all_socket_locations,
link);
const bNodeTree &node_tree = *snode.edittree;
const std::array<float2, 4> points = node_link_bezier_points(
node_tree.runtime->all_socket_locations, link);
if (!node_link_draw_is_visible(v2d, points)) {
return;
}
@@ -2227,15 +2227,18 @@ void node_draw_link(const bContext &C,
static std::array<float2, 4> node_link_bezier_points_dragged(const SpaceNode &snode,
const bNodeLink &link)
{
const bNodeTree &node_tree = *snode.edittree;
const float2 cursor = snode.runtime->cursor * UI_DPI_FAC;
std::array<float2, 4> points;
points[0] = link.fromsock ?
socket_link_connection_location(
snode.runtime->all_socket_locations, *link.fromnode, *link.fromsock, link) :
socket_link_connection_location(node_tree.runtime->all_socket_locations,
*link.fromnode,
*link.fromsock,
link) :
cursor;
points[3] = link.tosock ?
socket_link_connection_location(
snode.runtime->all_socket_locations, *link.tonode, *link.tosock, link) :
node_tree.runtime->all_socket_locations, *link.tonode, *link.tosock, link) :
cursor;
calculate_inner_link_bezier_points(points);
return points;

View File

@@ -138,7 +138,7 @@ static int add_reroute_exec(bContext *C, wmOperator *op)
const ARegion &region = *CTX_wm_region(C);
SpaceNode &snode = *CTX_wm_space_node(C);
bNodeTree &ntree = *snode.edittree;
const Span<float2> socket_locations = snode.runtime->all_socket_locations;
const Span<float2> socket_locations = ntree.runtime->all_socket_locations;
Vector<float2> path;
RNA_BEGIN (op->ptr, itemptr, "path") {

View File

@@ -268,6 +268,9 @@ void node_sort(bNodeTree &ntree)
ntree.runtime->nodes_by_id.add_new(sort_nodes[i]);
sort_nodes[i]->runtime->index_in_tree = i;
}
/* Nodes have been reordered; the socket locations are invalid until the node tree is redrawn. */
ntree.runtime->all_socket_locations.clear();
}
static Array<uiBlock *> node_uiblocks_init(const bContext &C, const Span<bNode *> nodes)
@@ -3178,16 +3181,16 @@ static void draw_nodetree(const bContext &C,
else if (ntree.type == NTREE_COMPOSIT) {
tree_draw_ctx.used_by_realtime_compositor = realtime_compositor_is_in_use(C);
}
snode->runtime->all_socket_locations.reinitialize(ntree.all_sockets().size());
ntree.runtime->all_socket_locations.reinitialize(ntree.all_sockets().size());
node_update_nodetree(
C, tree_draw_ctx, ntree, nodes, blocks, snode->runtime->all_socket_locations);
C, tree_draw_ctx, ntree, nodes, blocks, ntree.runtime->all_socket_locations);
node_draw_nodetree(C,
tree_draw_ctx,
region,
*snode,
ntree,
snode->runtime->all_socket_locations,
ntree.runtime->all_socket_locations,
nodes,
blocks,
parent_key);

View File

@@ -1129,13 +1129,14 @@ bNodeSocket *node_find_indicated_socket(SpaceNode &snode,
rctf rect;
const float size_sock_padded = NODE_SOCKSIZE + 4;
snode.edittree->ensure_topology_cache();
const Span<float2> socket_locations = snode.runtime->all_socket_locations;
if (socket_locations.size() != snode.edittree->all_sockets().size()) {
bNodeTree &node_tree = *snode.edittree;
node_tree.ensure_topology_cache();
const Span<float2> socket_locations = node_tree.runtime->all_socket_locations;
if (socket_locations.size() != node_tree.all_sockets().size()) {
/* Sockets haven't been drawn yet, e.g. when the file is currently opening. */
return nullptr;
}
const Span<bNode *> nodes = snode.edittree->all_nodes();
const Span<bNode *> nodes = node_tree.all_nodes();
if (nodes.is_empty()) {
return nullptr;
}

View File

@@ -79,12 +79,6 @@ struct bNodeLinkDrag {
};
struct SpaceNode_Runtime {
/**
* The location of all sockets in the tree, calculated while drawing the nodes.
* To be indexed with #bNodeSocket::index_in_tree().
*/
Vector<float2> all_socket_locations;
float aspect;
/** Mouse position for drawing socket-less links and adding nodes. */

View File

@@ -121,7 +121,11 @@ static void pick_input_link_by_link_intersect(const bContext &C,
const float2 &cursor)
{
SpaceNode *snode = CTX_wm_space_node(&C);
const Span<float2> socket_locations = snode->runtime->all_socket_locations;
bNodeTree &node_tree = *snode->edittree;
const Span<float2> socket_locations = node_tree.runtime->all_socket_locations;
if (socket_locations.is_empty()) {
return;
}
float2 drag_start;
RNA_float_get_array(op.ptr, "drag_start", drag_start);
@@ -132,7 +136,7 @@ static void pick_input_link_by_link_intersect(const bContext &C,
const float cursor_link_touch_distance = 12.5f * UI_DPI_FAC;
bNodeLink *link_to_pick = nullptr;
clear_picking_highlight(&snode->edittree->links);
clear_picking_highlight(&node_tree.links);
for (bNodeLink *link : socket->directly_linked_links()) {
/* Test if the cursor is near a link. */
std::array<float2, NODE_LINK_RESOL + 1> coords;
@@ -643,7 +647,7 @@ static int view_socket(const bContext &C,
}
if (viewer_node == nullptr) {
const float2 socket_location =
snode.runtime->all_socket_locations[bsocket_to_view.index_in_tree()];
btree.runtime->all_socket_locations[bsocket_to_view.index_in_tree()];
const int viewer_type = get_default_viewer_type(&C);
const float2 location{socket_location.x / UI_DPI_FAC + 100, socket_location.y / UI_DPI_FAC};
viewer_node = add_static_node(C, viewer_type, location);
@@ -1072,7 +1076,12 @@ static void node_link_cancel(bContext *C, wmOperator *op)
static void node_link_find_socket(bContext &C, wmOperator &op, const float2 &cursor)
{
SpaceNode &snode = *CTX_wm_space_node(&C);
bNodeTree &node_tree = *snode.edittree;
bNodeLinkDrag &nldrag = *static_cast<bNodeLinkDrag *>(op.customdata);
const Span<float2> socket_locations = node_tree.runtime->all_socket_locations;
if (socket_locations.is_empty()) {
return;
}
if (nldrag.in_out == SOCK_OUT) {
if (bNodeSocket *tsock = node_find_indicated_socket(snode, cursor, SOCK_IN)) {
@@ -1103,8 +1112,7 @@ static void node_link_find_socket(bContext &C, wmOperator &op, const float2 &cur
continue;
}
if (tsock && tsock->is_multi_input()) {
sort_multi_input_socket_links_with_drag(
snode.runtime->all_socket_locations, *tsock, link, cursor);
sort_multi_input_socket_links_with_drag(socket_locations, *tsock, link, cursor);
}
}
}
@@ -1477,7 +1485,7 @@ static int cut_links_exec(bContext *C, wmOperator *op)
bNodeTree &node_tree = *snode.edittree;
node_tree.ensure_topology_cache();
const Span<float2> socket_locations = snode.runtime->all_socket_locations;
const Span<float2> socket_locations = node_tree.runtime->all_socket_locations;
Set<bNodeLink *> links_to_remove;
LISTBASE_FOREACH (bNodeLink *, link, &node_tree.links) {
@@ -1563,7 +1571,7 @@ static int mute_links_exec(bContext *C, wmOperator *op)
SpaceNode &snode = *CTX_wm_space_node(C);
const ARegion &region = *CTX_wm_region(C);
bNodeTree &ntree = *snode.edittree;
const Span<float2> socket_locations = snode.runtime->all_socket_locations;
const Span<float2> socket_locations = ntree.runtime->all_socket_locations;
Vector<float2> path;
RNA_BEGIN (op->ptr, itemptr, "path") {
@@ -2046,7 +2054,10 @@ void node_insert_on_link_flags_set(SpaceNode &snode, const ARegion &region)
{
bNodeTree &node_tree = *snode.edittree;
node_tree.ensure_topology_cache();
const Span<float2> socket_locations = snode.runtime->all_socket_locations;
const Span<float2> socket_locations = node_tree.runtime->all_socket_locations;
if (socket_locations.is_empty()) {
return;
}
node_insert_on_link_flags_clear(node_tree);