For performance reasons render graphs can keep memory allocated so it could be reused. This PR optimizes the memory usage inside the rendergraph to keep it within normal usage. I didn't detect any performance regression with this change but reduces the memory when performing final image rendering of heavy scenes. Partial fix for #137382. the amount of memory still increases with 4mb per render. It fixes the main difference when using large scenes. Pull Request: https://projects.blender.org/blender/blender/pulls/137660
259 lines
7.9 KiB
C++
259 lines
7.9 KiB
C++
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*
|
|
* The render graph primarily is a a graph of GPU commands that are then serialized into command
|
|
* buffers. The submission order can be altered and barriers are added for resource sync.
|
|
*
|
|
* # Building render graph
|
|
*
|
|
* The graph contains nodes that refers to resources it reads from, or modifies.
|
|
* The resources that are read from are linked to the node inputs. The resources that are written
|
|
* to are linked to the node outputs.
|
|
*
|
|
* Resources needs to be tracked as usage can alter the content of the resource. For example an
|
|
* image can be optimized for data transfer, or optimized for sampling which can use a different
|
|
* pixel layout on the device.
|
|
*
|
|
* When adding a node to the render graph the input and output links are extracted from the
|
|
* See `VKNodeInfo::build_links`.
|
|
*
|
|
* # Executing render graph
|
|
*
|
|
* Executing a render graph is done by calling `submit_for_read` or `submit_for_present`. When
|
|
* called the nodes that are needed to render the resource are determined by a `VKScheduler`. The
|
|
* nodes are converted to `vkCmd*` and recorded in the command buffer by `VKCommandBuilder`.
|
|
*
|
|
* # Thread safety
|
|
*
|
|
* When the render graph is called the device will be locked. Nodes inside the render graph relies
|
|
* on the resources which are device specific. The locked time is tiny when adding new nodes.
|
|
* During execution this takes a longer time, but the lock can be released when the commands have
|
|
* been queued. So other threads can continue.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <mutex>
|
|
#include <optional>
|
|
#include <pthread.h>
|
|
|
|
#include "BKE_global.hh"
|
|
|
|
#include "BLI_color.hh"
|
|
#include "BLI_map.hh"
|
|
#include "BLI_utility_mixins.hh"
|
|
#include "BLI_vector.hh"
|
|
#include "BLI_vector_set.hh"
|
|
|
|
#include "BKE_global.hh"
|
|
|
|
#include "vk_common.hh"
|
|
|
|
#include "vk_command_buffer_wrapper.hh"
|
|
#include "vk_command_builder.hh"
|
|
#include "vk_render_graph_links.hh"
|
|
#include "vk_resource_state_tracker.hh"
|
|
#include "vk_resource_tracker.hh"
|
|
|
|
namespace blender::gpu::render_graph {
|
|
class VKScheduler;
|
|
|
|
class VKRenderGraph : public NonCopyable {
|
|
friend class VKCommandBuilder;
|
|
friend class VKScheduler;
|
|
using DebugGroupNameID = int64_t;
|
|
using DebugGroupID = int64_t;
|
|
|
|
/** All links inside the graph indexable via NodeHandle. */
|
|
Vector<VKRenderGraphNodeLinks, 1024> links_;
|
|
/** All nodes inside the graph indexable via NodeHandle. */
|
|
Vector<VKRenderGraphNode, 1024> nodes_;
|
|
/** Storage for large node datas to improve CPU cache pre-loading. */
|
|
VKRenderGraphStorage storage_;
|
|
|
|
/**
|
|
* Not owning pointer to device resources.
|
|
*
|
|
* To improve testability the render graph doesn't access VKDevice or VKBackend directly.
|
|
* resources_ can be replaced by a local variable. This way test cases don't need to create a
|
|
* fully working context in order to test something render graph specific. Is marked optional as
|
|
* device could
|
|
*/
|
|
VKResourceStateTracker &resources_;
|
|
|
|
struct DebugGroup {
|
|
std::string name;
|
|
ColorTheme4f color;
|
|
|
|
BLI_STRUCT_EQUALITY_OPERATORS_2(DebugGroup, name, color)
|
|
uint64_t hash() const
|
|
{
|
|
return get_default_hash<std::string, ColorTheme4f>(name, color);
|
|
}
|
|
};
|
|
|
|
struct {
|
|
VectorSet<DebugGroup> groups;
|
|
|
|
/** Current stack of debug group names. */
|
|
Vector<DebugGroupNameID> group_stack;
|
|
|
|
/**
|
|
* Has a node been added to the current stack? If not the group stack will be added to
|
|
* used_groups.
|
|
*/
|
|
bool group_used = false;
|
|
|
|
/** All used debug groups. */
|
|
Vector<Vector<DebugGroupNameID>> used_groups;
|
|
|
|
/**
|
|
* Map of a node_handle to an index of debug group in used_groups.
|
|
*
|
|
* <source>
|
|
* int used_group_id = node_group_map[node_handle];
|
|
* const Vector<DebugGroupNameID> &used_group = used_groups[used_group_id];
|
|
* </source>
|
|
*/
|
|
Vector<DebugGroupID> node_group_map;
|
|
} debug_;
|
|
|
|
public:
|
|
VKSubmissionID submission_id;
|
|
|
|
/**
|
|
* Construct a new render graph instance.
|
|
*
|
|
* To improve testability the command buffer and resources they work on are provided as a
|
|
* parameter.
|
|
*/
|
|
VKRenderGraph(VKResourceStateTracker &resources);
|
|
|
|
private:
|
|
/**
|
|
* Add a node to the render graph.
|
|
*/
|
|
template<typename NodeInfo> void add_node(const typename NodeInfo::CreateInfo &create_info)
|
|
{
|
|
std::scoped_lock lock(resources_.mutex);
|
|
static VKRenderGraphNode node_template = {};
|
|
NodeHandle node_handle = nodes_.append_and_get_index(node_template);
|
|
#if 0
|
|
/* Useful during debugging. When a validation error occurs during submission we know the node
|
|
* type and node handle, but we don't know when and by who that specific node was added to the
|
|
* render graph. By enabling this part of the code and set the correct node_handle and node
|
|
* type a debugger can break at the moment the node has been added to the render graph. */
|
|
if (node_handle == 267 && NodeInfo::node_type == VKNodeType::DRAW) {
|
|
std::cout << "break\n";
|
|
}
|
|
#endif
|
|
if (nodes_.size() > links_.size()) {
|
|
links_.resize(nodes_.size());
|
|
}
|
|
VKRenderGraphNode &node = nodes_[node_handle];
|
|
node.set_node_data<NodeInfo>(storage_, create_info);
|
|
|
|
VKRenderGraphNodeLinks &node_links = links_[node_handle];
|
|
BLI_assert(node_links.inputs.is_empty());
|
|
BLI_assert(node_links.outputs.is_empty());
|
|
node.build_links<NodeInfo>(resources_, node_links, create_info);
|
|
|
|
if (G.debug & G_DEBUG_GPU) {
|
|
if (!debug_.group_used) {
|
|
debug_.group_used = true;
|
|
debug_.used_groups.append(debug_.group_stack);
|
|
}
|
|
if (nodes_.size() > debug_.node_group_map.size()) {
|
|
debug_.node_group_map.resize(nodes_.size());
|
|
}
|
|
debug_.node_group_map[node_handle] = debug_.used_groups.size() - 1;
|
|
}
|
|
}
|
|
|
|
public:
|
|
#define ADD_NODE(NODE_CLASS) \
|
|
void add_node(const NODE_CLASS::CreateInfo &create_info) \
|
|
{ \
|
|
add_node<NODE_CLASS>(create_info); \
|
|
}
|
|
ADD_NODE(VKBeginQueryNode)
|
|
ADD_NODE(VKBeginRenderingNode)
|
|
ADD_NODE(VKEndQueryNode)
|
|
ADD_NODE(VKEndRenderingNode)
|
|
ADD_NODE(VKClearAttachmentsNode)
|
|
ADD_NODE(VKClearColorImageNode)
|
|
ADD_NODE(VKClearDepthStencilImageNode)
|
|
ADD_NODE(VKFillBufferNode)
|
|
ADD_NODE(VKCopyBufferNode)
|
|
ADD_NODE(VKCopyBufferToImageNode)
|
|
ADD_NODE(VKCopyImageNode)
|
|
ADD_NODE(VKCopyImageToBufferNode)
|
|
ADD_NODE(VKBlitImageNode)
|
|
ADD_NODE(VKDispatchNode)
|
|
ADD_NODE(VKDispatchIndirectNode)
|
|
ADD_NODE(VKDrawNode)
|
|
ADD_NODE(VKDrawIndexedNode)
|
|
ADD_NODE(VKDrawIndexedIndirectNode)
|
|
ADD_NODE(VKDrawIndirectNode)
|
|
ADD_NODE(VKResetQueryPoolNode)
|
|
ADD_NODE(VKUpdateBufferNode)
|
|
ADD_NODE(VKUpdateMipmapsNode)
|
|
ADD_NODE(VKSynchronizationNode)
|
|
#undef ADD_NODE
|
|
|
|
/**
|
|
* Push a new debugging group to the stack with the given name.
|
|
*
|
|
* New nodes added to the render graph will be associated with this debug group.
|
|
*/
|
|
void debug_group_begin(const char *name, const ColorTheme4f &color);
|
|
|
|
/**
|
|
* Pop the top of the debugging group stack.
|
|
*
|
|
* New nodes added to the render graph will be associated with the parent of the current debug
|
|
* group.
|
|
*/
|
|
void debug_group_end();
|
|
|
|
/**
|
|
* Return the full debug group of the given node_handle. Returns an empty string when debug
|
|
* groups are not enabled (`--debug-gpu`).
|
|
*/
|
|
std::string full_debug_group(NodeHandle node_handle) const;
|
|
|
|
/**
|
|
* Utility function that is used during debugging.
|
|
*
|
|
* When debugging most of the time know the node_handle that is needed after the node has been
|
|
* constructed. When haunting a bug it is more useful to query what the next node handle will be
|
|
* so you can step through the node building process.
|
|
*/
|
|
NodeHandle next_node_handle()
|
|
{
|
|
return nodes_.size();
|
|
}
|
|
|
|
bool is_empty()
|
|
{
|
|
return nodes_.is_empty();
|
|
}
|
|
|
|
void debug_print(NodeHandle node_handle) const;
|
|
|
|
/**
|
|
* Reset the render graph.
|
|
*/
|
|
void reset();
|
|
|
|
void memstats() const;
|
|
|
|
private:
|
|
};
|
|
|
|
} // namespace blender::gpu::render_graph
|