Files
test2/source/blender/gpu/vulkan/vk_device_submission.cc
Jeroen Bakker aed9f22233 Refactor: Vulkan: swapchain
This PR refactors the way how swapchains are used.

Allow scaling of the swapchain content to the actual resolution of the swapchain.
can reduce artefacts when resizing windows when supported.

When frame rate is to fast the previous implementation could use a semaphore
that were still in use, leading to unwanted stuttering on certain platforms. Waiting
when the rendering has finished (GHOST_Frame.submission_fence), before the
next image is acquired from the swap chain.

Mailbox has been disabled as it can calculate more frames then actually been
presented, leading to a lag and increased  power usage on others.

Pull Request: https://projects.blender.org/blender/blender/pulls/136603
2025-04-01 16:01:22 +02:00

299 lines
12 KiB
C++

/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#include <chrono>
#include <thread>
#include "vk_device.hh"
namespace blender::gpu {
/* -------------------------------------------------------------------- */
/** \name Render graph
* \{ */
struct VKRenderGraphSubmitTask {
render_graph::VKRenderGraph *render_graph;
uint64_t timeline;
bool submit_to_device;
VkPipelineStageFlags wait_dst_stage_mask;
VkSemaphore wait_semaphore;
VkSemaphore signal_semaphore;
VkFence signal_fence;
bool *is_submitted_ptr;
};
TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_graph,
VKDiscardPool &context_discard_pool,
bool submit_to_device,
bool wait_for_completion,
VkPipelineStageFlags wait_dst_stage_mask,
VkSemaphore wait_semaphore,
VkSemaphore signal_semaphore,
VkFence signal_fence)
{
if (render_graph->is_empty()) {
render_graph->reset();
BLI_thread_queue_push(unused_render_graphs_, render_graph);
return 0;
}
VKRenderGraphSubmitTask *submit_task = MEM_new<VKRenderGraphSubmitTask>(__func__);
submit_task->render_graph = render_graph;
submit_task->submit_to_device = submit_to_device;
submit_task->wait_dst_stage_mask = wait_dst_stage_mask;
submit_task->wait_semaphore = wait_semaphore;
submit_task->signal_semaphore = signal_semaphore;
submit_task->signal_fence = signal_fence;
submit_task->is_submitted_ptr = nullptr;
/* We need to wait for submission as otherwise the signal semaphore can still not be in an
* initial state. */
const bool wait_for_submission = signal_semaphore != VK_NULL_HANDLE && !wait_for_completion;
bool is_submitted = false;
if (wait_for_submission) {
submit_task->is_submitted_ptr = &is_submitted;
}
TimelineValue timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ :
timeline_value_ + 1;
orphaned_data.timeline_ = timeline + 1;
orphaned_data.move_data(context_discard_pool, timeline);
BLI_thread_queue_push(submitted_render_graphs_, submit_task);
submit_task = nullptr;
if (wait_for_submission) {
while (!is_submitted) {
using namespace std::chrono_literals;
std::this_thread::sleep_for(1ns);
}
}
if (wait_for_completion) {
wait_for_timeline(timeline);
}
return timeline;
}
void VKDevice::wait_for_timeline(TimelineValue timeline)
{
if (timeline == 0) {
return;
}
VkSemaphoreWaitInfo vk_semaphore_wait_info = {
VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, nullptr, 0, 1, &vk_timeline_semaphore_, &timeline};
vkWaitSemaphores(vk_device_, &vk_semaphore_wait_info, UINT64_MAX);
}
render_graph::VKRenderGraph *VKDevice::render_graph_new()
{
render_graph::VKRenderGraph *render_graph = static_cast<render_graph::VKRenderGraph *>(
BLI_thread_queue_pop_timeout(unused_render_graphs_, 0));
if (render_graph) {
return render_graph;
}
std::scoped_lock lock(resources.mutex);
render_graph = MEM_new<render_graph::VKRenderGraph>(__func__, resources);
render_graphs_.append(render_graph);
return render_graph;
}
void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
{
UNUSED_VARS(task_data);
VKDevice *device = static_cast<VKDevice *>(BLI_task_pool_user_data(pool));
VkCommandPool vk_command_pool = VK_NULL_HANDLE;
VkCommandPoolCreateInfo vk_command_pool_create_info = {
VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
nullptr,
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
device->vk_queue_family_};
vkCreateCommandPool(device->vk_device_, &vk_command_pool_create_info, nullptr, &vk_command_pool);
render_graph::VKScheduler scheduler;
render_graph::VKCommandBuilder command_builder;
Vector<VkCommandBuffer> command_buffers_unused;
TimelineResources<VkCommandBuffer> command_buffers_in_use;
VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE;
Vector<VkCommandBuffer> unsubmitted_command_buffers;
Vector<VkSubmitInfo> submit_infos;
submit_infos.reserve(2);
std::optional<render_graph::VKCommandBufferWrapper> command_buffer;
while (device->lifetime < Lifetime::DEINITIALIZING) {
VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
BLI_thread_queue_pop_timeout(device->submitted_render_graphs_, 1));
if (submit_task == nullptr) {
continue;
}
/* End current command buffer when we need to wait for a semaphore. In this case all previous
* recorded commands can run before the wait semaphores. The commands that must be guarded by
* the semaphores are part of the new submitted render graph. */
if (submit_task->wait_semaphore != VK_NULL_HANDLE && command_buffer.has_value()) {
command_buffer->end_recording();
unsubmitted_command_buffers.append(vk_command_buffer);
command_buffer.reset();
}
if (!command_buffer.has_value()) {
/* Check for completed command buffers that can be reused. */
if (command_buffers_unused.is_empty()) {
uint64_t current_timeline = device->submission_finished_timeline_get();
command_buffers_in_use.remove_old(current_timeline,
[&](VkCommandBuffer vk_command_buffer) {
command_buffers_unused.append(vk_command_buffer);
});
}
/* Create new command buffers when there are no left to be reused. */
if (command_buffers_unused.is_empty()) {
command_buffers_unused.resize(10, VK_NULL_HANDLE);
VkCommandBufferAllocateInfo vk_command_buffer_allocate_info = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
nullptr,
vk_command_pool,
VK_COMMAND_BUFFER_LEVEL_PRIMARY,
10};
vkAllocateCommandBuffers(
device->vk_device_, &vk_command_buffer_allocate_info, command_buffers_unused.data());
};
vk_command_buffer = command_buffers_unused.pop_last();
command_buffer = std::make_optional<render_graph::VKCommandBufferWrapper>(
vk_command_buffer, device->extensions_);
command_buffer->begin_recording();
}
BLI_assert(vk_command_buffer != VK_NULL_HANDLE);
render_graph::VKRenderGraph &render_graph = *submit_task->render_graph;
Span<render_graph::NodeHandle> node_handles = scheduler.select_nodes(render_graph);
{
std::scoped_lock lock_resources(device->resources.mutex);
command_builder.build_nodes(render_graph, *command_buffer, node_handles);
}
command_builder.record_commands(render_graph, *command_buffer, node_handles);
if (submit_task->submit_to_device) {
/* Create submit infos for previous command buffers. */
submit_infos.clear();
if (!unsubmitted_command_buffers.is_empty()) {
VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
nullptr,
0,
nullptr,
nullptr,
uint32_t(unsubmitted_command_buffers.size()),
unsubmitted_command_buffers.data(),
0,
nullptr};
submit_infos.append(vk_submit_info);
}
/* Finalize current command buffer. */
command_buffer->end_recording();
unsubmitted_command_buffers.append(vk_command_buffer);
uint32_t wait_semaphore_len = submit_task->wait_semaphore == VK_NULL_HANDLE ? 0 : 1;
uint32_t signal_semaphore_len = submit_task->signal_semaphore == VK_NULL_HANDLE ? 1 : 2;
VkSemaphore signal_semaphores[2] = {device->vk_timeline_semaphore_,
submit_task->signal_semaphore};
uint64_t signal_semaphore_values[2] = {submit_task->timeline, 0};
VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = {
VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
nullptr,
0,
nullptr,
signal_semaphore_len,
signal_semaphore_values};
VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
&vk_timeline_semaphore_submit_info,
wait_semaphore_len,
&submit_task->wait_semaphore,
&submit_task->wait_dst_stage_mask,
1,
&unsubmitted_command_buffers.last(),
signal_semaphore_len,
signal_semaphores};
submit_infos.append(vk_submit_info);
{
std::scoped_lock lock_queue(*device->queue_mutex_);
vkQueueSubmit(device->vk_queue_,
submit_infos.size(),
submit_infos.data(),
submit_task->signal_fence);
}
if (submit_task->is_submitted_ptr != nullptr) {
*submit_task->is_submitted_ptr = true;
}
vk_command_buffer = VK_NULL_HANDLE;
for (VkCommandBuffer vk_command_buffer : unsubmitted_command_buffers) {
command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
}
unsubmitted_command_buffers.clear();
command_buffer.reset();
}
render_graph.reset();
BLI_thread_queue_push(device->unused_render_graphs_, std::move(submit_task->render_graph));
MEM_delete<VKRenderGraphSubmitTask>(submit_task);
}
/* Clear command buffers and pool */
vkDeviceWaitIdle(device->vk_device_);
command_buffers_in_use.remove_old(UINT64_MAX, [&](VkCommandBuffer vk_command_buffer) {
command_buffers_unused.append(vk_command_buffer);
});
vkFreeCommandBuffers(device->vk_device_,
vk_command_pool,
command_buffers_unused.size(),
command_buffers_unused.data());
vkDestroyCommandPool(device->vk_device_, vk_command_pool, nullptr);
} // namespace blender::gpu
void VKDevice::init_submission_pool()
{
submission_pool_ = BLI_task_pool_create_background_serial(this, TASK_PRIORITY_HIGH);
BLI_task_pool_push(submission_pool_, VKDevice::submission_runner, nullptr, false, nullptr);
submitted_render_graphs_ = BLI_thread_queue_init();
unused_render_graphs_ = BLI_thread_queue_init();
VkSemaphoreTypeCreateInfo vk_semaphore_type_create_info = {
VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_TIMELINE, 0};
VkSemaphoreCreateInfo vk_semaphore_create_info = {
VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &vk_semaphore_type_create_info, 0};
vkCreateSemaphore(vk_device_, &vk_semaphore_create_info, nullptr, &vk_timeline_semaphore_);
}
void VKDevice::deinit_submission_pool()
{
BLI_task_pool_free(submission_pool_);
submission_pool_ = nullptr;
while (!BLI_thread_queue_is_empty(submitted_render_graphs_)) {
VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
BLI_thread_queue_pop(submitted_render_graphs_));
MEM_delete<VKRenderGraphSubmitTask>(submit_task);
}
BLI_thread_queue_free(submitted_render_graphs_);
submitted_render_graphs_ = nullptr;
BLI_thread_queue_free(unused_render_graphs_);
unused_render_graphs_ = nullptr;
vkDestroySemaphore(vk_device_, vk_timeline_semaphore_, nullptr);
vk_timeline_semaphore_ = VK_NULL_HANDLE;
}
/** \} */
} // namespace blender::gpu