Cycles: refactor API for GPU display
* Split GPUDisplay into two classes. PathTraceDisplay to implement the Cycles side, and DisplayDriver to implement the host application side. The DisplayDriver is now a fully abstract base class, embedded in the PathTraceDisplay. * Move copy_pixels_to_texture implementation out of the host side into the Cycles side, since it can be implemented in terms of the texture buffer mapping. * Move definition of DeviceGraphicsInteropDestination into display driver header, so that we do not need to expose private device headers in the public API. * Add more detailed comments about how the DisplayDriver should be implemented. The "driver" terminology might not be obvious, but is also used in other renderers. Differential Revision: https://developer.blender.org/D12626
This commit is contained in:
@@ -31,9 +31,9 @@ set(INC_SYS
|
||||
set(SRC
|
||||
blender_camera.cpp
|
||||
blender_device.cpp
|
||||
blender_display_driver.cpp
|
||||
blender_image.cpp
|
||||
blender_geometry.cpp
|
||||
blender_gpu_display.cpp
|
||||
blender_light.cpp
|
||||
blender_mesh.cpp
|
||||
blender_object.cpp
|
||||
@@ -51,7 +51,7 @@ set(SRC
|
||||
|
||||
CCL_api.h
|
||||
blender_device.h
|
||||
blender_gpu_display.h
|
||||
blender_display_driver.h
|
||||
blender_id_map.h
|
||||
blender_image.h
|
||||
blender_object_cull.h
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "blender/blender_gpu_display.h"
|
||||
#include "blender/blender_display_driver.h"
|
||||
|
||||
#include "device/device.h"
|
||||
#include "util/util_logging.h"
|
||||
@@ -273,17 +273,17 @@ uint BlenderDisplaySpaceShader::get_shader_program()
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* BlenderGPUDisplay.
|
||||
* BlenderDisplayDriver.
|
||||
*/
|
||||
|
||||
BlenderGPUDisplay::BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene)
|
||||
BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
|
||||
: b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
|
||||
{
|
||||
/* Create context while on the main thread. */
|
||||
gl_context_create();
|
||||
}
|
||||
|
||||
BlenderGPUDisplay::~BlenderGPUDisplay()
|
||||
BlenderDisplayDriver::~BlenderDisplayDriver()
|
||||
{
|
||||
gl_resources_destroy();
|
||||
}
|
||||
@@ -292,19 +292,18 @@ BlenderGPUDisplay::~BlenderGPUDisplay()
|
||||
* Update procedure.
|
||||
*/
|
||||
|
||||
bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms,
|
||||
bool BlenderDisplayDriver::update_begin(const Params ¶ms,
|
||||
int texture_width,
|
||||
int texture_height)
|
||||
{
|
||||
/* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing
|
||||
/* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
|
||||
* the texture does not happen at the same time. This is achieved indirectly.
|
||||
*
|
||||
* When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
|
||||
* This same lock is also held when do_draw() is called, which together ensure mutual
|
||||
* exclusion.
|
||||
*
|
||||
* This locking is not performed at the GPU display level, because that would cause lock
|
||||
* inversion. */
|
||||
* This locking is not performed on the Cycles side, because that would cause lock inversion. */
|
||||
if (!gl_context_enable()) {
|
||||
return false;
|
||||
}
|
||||
@@ -361,7 +360,7 @@ bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams ¶ms,
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::do_update_end()
|
||||
void BlenderDisplayDriver::update_end()
|
||||
{
|
||||
gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
glFlush();
|
||||
@@ -369,54 +368,18 @@ void BlenderGPUDisplay::do_update_end()
|
||||
gl_context_disable();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture update from CPU buffer.
|
||||
*/
|
||||
|
||||
void BlenderGPUDisplay::do_copy_pixels_to_texture(
|
||||
const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
|
||||
{
|
||||
/* This call copies pixels to a Pixel Buffer Object (PBO) which is much cheaper from CPU time
|
||||
* point of view than to copy data directly to the OpenGL texture.
|
||||
*
|
||||
* The possible downside of this approach is that it might require a higher peak memory when
|
||||
* doing partial updates of the texture (although, in practice even partial updates might peak
|
||||
* with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
|
||||
|
||||
half4 *mapped_rgba_pixels = map_texture_buffer();
|
||||
if (!mapped_rgba_pixels) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (texture_x == 0 && texture_y == 0 && pixels_width == texture_.width &&
|
||||
pixels_height == texture_.height) {
|
||||
const size_t size_in_bytes = sizeof(half4) * texture_.width * texture_.height;
|
||||
memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
|
||||
}
|
||||
else {
|
||||
const half4 *rgba_row = rgba_pixels;
|
||||
half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_.width + texture_x;
|
||||
for (int y = 0; y < pixels_height;
|
||||
++y, rgba_row += pixels_width, mapped_rgba_row += texture_.width) {
|
||||
memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
|
||||
}
|
||||
}
|
||||
|
||||
unmap_texture_buffer();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture buffer mapping.
|
||||
*/
|
||||
|
||||
half4 *BlenderGPUDisplay::do_map_texture_buffer()
|
||||
half4 *BlenderDisplayDriver::map_texture_buffer()
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
|
||||
|
||||
half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
|
||||
glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
|
||||
if (!mapped_rgba_pixels) {
|
||||
LOG(ERROR) << "Error mapping BlenderGPUDisplay pixel buffer object.";
|
||||
LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
|
||||
}
|
||||
|
||||
if (texture_.need_clear) {
|
||||
@@ -431,7 +394,7 @@ half4 *BlenderGPUDisplay::do_map_texture_buffer()
|
||||
return mapped_rgba_pixels;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::do_unmap_texture_buffer()
|
||||
void BlenderDisplayDriver::unmap_texture_buffer()
|
||||
{
|
||||
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
|
||||
|
||||
@@ -442,9 +405,9 @@ void BlenderGPUDisplay::do_unmap_texture_buffer()
|
||||
* Graphics interoperability.
|
||||
*/
|
||||
|
||||
DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
|
||||
BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
|
||||
{
|
||||
DeviceGraphicsInteropDestination interop_dst;
|
||||
GraphicsInterop interop_dst;
|
||||
|
||||
interop_dst.buffer_width = texture_.buffer_width;
|
||||
interop_dst.buffer_height = texture_.buffer_height;
|
||||
@@ -456,12 +419,12 @@ DeviceGraphicsInteropDestination BlenderGPUDisplay::do_graphics_interop_get()
|
||||
return interop_dst;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::graphics_interop_activate()
|
||||
void BlenderDisplayDriver::graphics_interop_activate()
|
||||
{
|
||||
gl_context_enable();
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::graphics_interop_deactivate()
|
||||
void BlenderDisplayDriver::graphics_interop_deactivate()
|
||||
{
|
||||
gl_context_disable();
|
||||
}
|
||||
@@ -470,17 +433,17 @@ void BlenderGPUDisplay::graphics_interop_deactivate()
|
||||
* Drawing.
|
||||
*/
|
||||
|
||||
void BlenderGPUDisplay::clear()
|
||||
void BlenderDisplayDriver::clear()
|
||||
{
|
||||
texture_.need_clear = true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::set_zoom(float zoom_x, float zoom_y)
|
||||
void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
|
||||
{
|
||||
zoom_ = make_float2(zoom_x, zoom_y);
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms)
|
||||
void BlenderDisplayDriver::draw(const Params ¶ms)
|
||||
{
|
||||
/* See do_update_begin() for why no locking is required here. */
|
||||
const bool transparent = true; // TODO(sergey): Derive this from Film.
|
||||
@@ -584,7 +547,7 @@ void BlenderGPUDisplay::do_draw(const GPUDisplayParams ¶ms)
|
||||
}
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_context_create()
|
||||
void BlenderDisplayDriver::gl_context_create()
|
||||
{
|
||||
/* When rendering in viewport there is no render context available via engine.
|
||||
* Check whether own context is to be created here.
|
||||
@@ -613,7 +576,7 @@ void BlenderGPUDisplay::gl_context_create()
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderGPUDisplay::gl_context_enable()
|
||||
bool BlenderDisplayDriver::gl_context_enable()
|
||||
{
|
||||
if (use_gl_context_) {
|
||||
if (!gl_context_) {
|
||||
@@ -628,7 +591,7 @@ bool BlenderGPUDisplay::gl_context_enable()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_context_disable()
|
||||
void BlenderDisplayDriver::gl_context_disable()
|
||||
{
|
||||
if (use_gl_context_) {
|
||||
if (gl_context_) {
|
||||
@@ -641,7 +604,7 @@ void BlenderGPUDisplay::gl_context_disable()
|
||||
RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_context_dispose()
|
||||
void BlenderDisplayDriver::gl_context_dispose()
|
||||
{
|
||||
if (gl_context_) {
|
||||
const bool drw_state = DRW_opengl_context_release();
|
||||
@@ -653,7 +616,7 @@ void BlenderGPUDisplay::gl_context_dispose()
|
||||
}
|
||||
}
|
||||
|
||||
bool BlenderGPUDisplay::gl_draw_resources_ensure()
|
||||
bool BlenderDisplayDriver::gl_draw_resources_ensure()
|
||||
{
|
||||
if (!texture_.gl_id) {
|
||||
/* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
|
||||
@@ -680,7 +643,7 @@ bool BlenderGPUDisplay::gl_draw_resources_ensure()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::gl_resources_destroy()
|
||||
void BlenderDisplayDriver::gl_resources_destroy()
|
||||
{
|
||||
gl_context_enable();
|
||||
|
||||
@@ -703,7 +666,7 @@ void BlenderGPUDisplay::gl_resources_destroy()
|
||||
gl_context_dispose();
|
||||
}
|
||||
|
||||
bool BlenderGPUDisplay::gl_texture_resources_ensure()
|
||||
bool BlenderDisplayDriver::gl_texture_resources_ensure()
|
||||
{
|
||||
if (texture_.creation_attempted) {
|
||||
return texture_.is_created;
|
||||
@@ -740,7 +703,7 @@ bool BlenderGPUDisplay::gl_texture_resources_ensure()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::texture_update_if_needed()
|
||||
void BlenderDisplayDriver::texture_update_if_needed()
|
||||
{
|
||||
if (!texture_.need_update) {
|
||||
return;
|
||||
@@ -754,7 +717,7 @@ void BlenderGPUDisplay::texture_update_if_needed()
|
||||
texture_.need_update = false;
|
||||
}
|
||||
|
||||
void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms)
|
||||
void BlenderDisplayDriver::vertex_buffer_update(const Params ¶ms)
|
||||
{
|
||||
/* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
|
||||
* rendered. */
|
||||
@@ -767,23 +730,23 @@ void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams ¶ms)
|
||||
|
||||
vpointer[0] = 0.0f;
|
||||
vpointer[1] = 0.0f;
|
||||
vpointer[2] = params.offset.x;
|
||||
vpointer[3] = params.offset.y;
|
||||
vpointer[2] = params.full_offset.x;
|
||||
vpointer[3] = params.full_offset.y;
|
||||
|
||||
vpointer[4] = 1.0f;
|
||||
vpointer[5] = 0.0f;
|
||||
vpointer[6] = (float)params.size.x + params.offset.x;
|
||||
vpointer[7] = params.offset.y;
|
||||
vpointer[6] = (float)params.size.x + params.full_offset.x;
|
||||
vpointer[7] = params.full_offset.y;
|
||||
|
||||
vpointer[8] = 1.0f;
|
||||
vpointer[9] = 1.0f;
|
||||
vpointer[10] = (float)params.size.x + params.offset.x;
|
||||
vpointer[11] = (float)params.size.y + params.offset.y;
|
||||
vpointer[10] = (float)params.size.x + params.full_offset.x;
|
||||
vpointer[11] = (float)params.size.y + params.full_offset.y;
|
||||
|
||||
vpointer[12] = 0.0f;
|
||||
vpointer[13] = 1.0f;
|
||||
vpointer[14] = params.offset.x;
|
||||
vpointer[15] = (float)params.size.y + params.offset.y;
|
||||
vpointer[14] = params.full_offset.x;
|
||||
vpointer[15] = (float)params.size.y + params.full_offset.y;
|
||||
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
}
|
||||
@@ -22,12 +22,14 @@
|
||||
|
||||
#include "RNA_blender_cpp.h"
|
||||
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_thread.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Base class of shader used for GPU display rendering. */
|
||||
/* Base class of shader used for display driver rendering. */
|
||||
class BlenderDisplayShader {
|
||||
public:
|
||||
static constexpr const char *position_attribute_name = "pos";
|
||||
@@ -96,11 +98,11 @@ class BlenderDisplaySpaceShader : public BlenderDisplayShader {
|
||||
uint shader_program_ = 0;
|
||||
};
|
||||
|
||||
/* GPU display implementation which is specific for Blender viewport integration. */
|
||||
class BlenderGPUDisplay : public GPUDisplay {
|
||||
/* Display driver implementation which is specific for Blender viewport integration. */
|
||||
class BlenderDisplayDriver : public DisplayDriver {
|
||||
public:
|
||||
BlenderGPUDisplay(BL::RenderEngine &b_engine, BL::Scene &b_scene);
|
||||
~BlenderGPUDisplay();
|
||||
BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
|
||||
~BlenderDisplayDriver();
|
||||
|
||||
virtual void graphics_interop_activate() override;
|
||||
virtual void graphics_interop_deactivate() override;
|
||||
@@ -110,22 +112,15 @@ class BlenderGPUDisplay : public GPUDisplay {
|
||||
void set_zoom(float zoom_x, float zoom_y);
|
||||
|
||||
protected:
|
||||
virtual bool do_update_begin(const GPUDisplayParams ¶ms,
|
||||
int texture_width,
|
||||
int texture_height) override;
|
||||
virtual void do_update_end() override;
|
||||
virtual bool update_begin(const Params ¶ms, int texture_width, int texture_height) override;
|
||||
virtual void update_end() override;
|
||||
|
||||
virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
|
||||
int texture_x,
|
||||
int texture_y,
|
||||
int pixels_width,
|
||||
int pixels_height) override;
|
||||
virtual void do_draw(const GPUDisplayParams ¶ms) override;
|
||||
virtual half4 *map_texture_buffer() override;
|
||||
virtual void unmap_texture_buffer() override;
|
||||
|
||||
virtual half4 *do_map_texture_buffer() override;
|
||||
virtual void do_unmap_texture_buffer() override;
|
||||
virtual GraphicsInterop graphics_interop_get() override;
|
||||
|
||||
virtual DeviceGraphicsInteropDestination do_graphics_interop_get() override;
|
||||
virtual void draw(const Params ¶ms) override;
|
||||
|
||||
/* Helper function which allocates new GPU context. */
|
||||
void gl_context_create();
|
||||
@@ -152,13 +147,13 @@ class BlenderGPUDisplay : public GPUDisplay {
|
||||
* This buffer is used to render texture in the viewport.
|
||||
*
|
||||
* NOTE: The buffer needs to be bound. */
|
||||
void vertex_buffer_update(const GPUDisplayParams ¶ms);
|
||||
void vertex_buffer_update(const Params ¶ms);
|
||||
|
||||
BL::RenderEngine b_engine_;
|
||||
|
||||
/* OpenGL context which is used the render engine doesn't have its own. */
|
||||
void *gl_context_ = nullptr;
|
||||
/* The when Blender RenderEngine side context is not available and the GPUDisplay is to create
|
||||
/* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
|
||||
* its own context. */
|
||||
bool use_gl_context_ = false;
|
||||
/* Mutex used to guard the `gl_context_`. */
|
||||
@@ -42,7 +42,7 @@
|
||||
#include "util/util_progress.h"
|
||||
#include "util/util_time.h"
|
||||
|
||||
#include "blender/blender_gpu_display.h"
|
||||
#include "blender/blender_display_driver.h"
|
||||
#include "blender/blender_session.h"
|
||||
#include "blender/blender_sync.h"
|
||||
#include "blender/blender_util.h"
|
||||
@@ -159,9 +159,10 @@ void BlenderSession::create_session()
|
||||
|
||||
/* Create GPU display. */
|
||||
if (!b_engine.is_preview() && !headless) {
|
||||
unique_ptr<BlenderGPUDisplay> gpu_display = make_unique<BlenderGPUDisplay>(b_engine, b_scene);
|
||||
gpu_display_ = gpu_display.get();
|
||||
session->set_gpu_display(move(gpu_display));
|
||||
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
|
||||
b_scene);
|
||||
display_driver_ = display_driver.get();
|
||||
session->set_display_driver(move(display_driver));
|
||||
}
|
||||
|
||||
/* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
|
||||
@@ -446,7 +447,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
|
||||
|
||||
/* Use final write for preview renders, otherwise render result wouldn't be be updated on Blender
|
||||
* side. */
|
||||
/* TODO(sergey): Investigate whether GPUDisplay can be used for the preview as well. */
|
||||
/* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
|
||||
if (b_engine.is_preview()) {
|
||||
session->update_render_tile_cb = [&]() { write_render_tile(); };
|
||||
}
|
||||
@@ -708,7 +709,7 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
|
||||
|
||||
session->read_render_tile_cb = [&]() { read_render_tile(); };
|
||||
session->write_render_tile_cb = [&]() { write_render_tile(); };
|
||||
session->set_gpu_display(nullptr);
|
||||
session->set_display_driver(nullptr);
|
||||
|
||||
if (!session->progress.get_cancel()) {
|
||||
/* Sync scene. */
|
||||
@@ -895,7 +896,7 @@ void BlenderSession::draw(BL::SpaceImageEditor &space_image)
|
||||
}
|
||||
|
||||
BL::Array<float, 2> zoom = space_image.zoom();
|
||||
gpu_display_->set_zoom(zoom[0], zoom[1]);
|
||||
display_driver_->set_zoom(zoom[0], zoom[1]);
|
||||
|
||||
session->draw();
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BlenderGPUDisplay;
|
||||
class BlenderDisplayDriver;
|
||||
class BlenderSync;
|
||||
class ImageMetaData;
|
||||
class Scene;
|
||||
@@ -164,8 +164,8 @@ class BlenderSession {
|
||||
int last_pass_index = -1;
|
||||
} draw_state_;
|
||||
|
||||
/* NOTE: The BlenderSession references the GPU display. */
|
||||
BlenderGPUDisplay *gpu_display_ = nullptr;
|
||||
/* NOTE: The BlenderSession references the display driver. */
|
||||
BlenderDisplayDriver *display_driver_ = nullptr;
|
||||
|
||||
vector<string> full_buffer_files_;
|
||||
};
|
||||
|
||||
@@ -37,14 +37,15 @@ CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
|
||||
}
|
||||
}
|
||||
|
||||
void CUDADeviceGraphicsInterop::set_destination(
|
||||
const DeviceGraphicsInteropDestination &destination)
|
||||
void CUDADeviceGraphicsInterop::set_display_interop(
|
||||
const DisplayDriver::GraphicsInterop &display_interop)
|
||||
{
|
||||
const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;
|
||||
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
|
||||
display_interop.buffer_height;
|
||||
|
||||
need_clear_ = destination.need_clear;
|
||||
need_clear_ = display_interop.need_clear;
|
||||
|
||||
if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -55,12 +56,12 @@ void CUDADeviceGraphicsInterop::set_destination(
|
||||
}
|
||||
|
||||
const CUresult result = cuGraphicsGLRegisterBuffer(
|
||||
&cu_graphics_resource_, destination.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
|
||||
&cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
|
||||
}
|
||||
|
||||
opengl_pbo_id_ = destination.opengl_pbo_id;
|
||||
opengl_pbo_id_ = display_interop.opengl_pbo_id;
|
||||
buffer_area_ = new_buffer_area;
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
|
||||
CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;
|
||||
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
|
||||
|
||||
virtual device_ptr map() override;
|
||||
virtual void unmap() override;
|
||||
|
||||
@@ -16,25 +16,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Information about interoperability destination.
|
||||
* Is provided by the GPUDisplay. */
|
||||
class DeviceGraphicsInteropDestination {
|
||||
public:
|
||||
/* Dimensions of the buffer, in pixels. */
|
||||
int buffer_width = 0;
|
||||
int buffer_height = 0;
|
||||
|
||||
/* OpenGL pixel buffer object. */
|
||||
int opengl_pbo_id = 0;
|
||||
|
||||
/* Clear the entire destination before doing partial write to it. */
|
||||
bool need_clear = false;
|
||||
};
|
||||
|
||||
/* Device-side graphics interoperability support.
|
||||
*
|
||||
* Takes care of holding all the handlers needed by the device to implement interoperability with
|
||||
@@ -46,7 +33,7 @@ class DeviceGraphicsInterop {
|
||||
|
||||
/* Update this device-side graphics interoperability object with the given destination resource
|
||||
* information. */
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) = 0;
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
|
||||
|
||||
virtual device_ptr map() = 0;
|
||||
virtual void unmap() = 0;
|
||||
|
||||
@@ -37,11 +37,15 @@ HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop()
|
||||
}
|
||||
}
|
||||
|
||||
void HIPDeviceGraphicsInterop::set_destination(const DeviceGraphicsInteropDestination &destination)
|
||||
void HIPDeviceGraphicsInterop::set_display_interop(
|
||||
const DisplayDriver::GraphicsInterop &display_interop)
|
||||
{
|
||||
const int64_t new_buffer_area = int64_t(destination.buffer_width) * destination.buffer_height;
|
||||
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
|
||||
display_interop.buffer_height;
|
||||
|
||||
if (opengl_pbo_id_ == destination.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
need_clear_ = display_interop.need_clear;
|
||||
|
||||
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -52,12 +56,12 @@ void HIPDeviceGraphicsInterop::set_destination(const DeviceGraphicsInteropDestin
|
||||
}
|
||||
|
||||
const hipError_t result = hipGraphicsGLRegisterBuffer(
|
||||
&hip_graphics_resource_, destination.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
|
||||
&hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
|
||||
if (result != hipSuccess) {
|
||||
LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
|
||||
}
|
||||
|
||||
opengl_pbo_id_ = destination.opengl_pbo_id;
|
||||
opengl_pbo_id_ = display_interop.opengl_pbo_id;
|
||||
buffer_area_ = new_buffer_area;
|
||||
}
|
||||
|
||||
@@ -77,6 +81,14 @@ device_ptr HIPDeviceGraphicsInterop::map()
|
||||
hip_device_assert(
|
||||
device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
|
||||
|
||||
if (need_clear_) {
|
||||
hip_device_assert(
|
||||
device_,
|
||||
hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
|
||||
|
||||
need_clear_ = false;
|
||||
}
|
||||
|
||||
return static_cast<device_ptr>(hip_buffer);
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete;
|
||||
HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete;
|
||||
|
||||
virtual void set_destination(const DeviceGraphicsInteropDestination &destination) override;
|
||||
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
|
||||
|
||||
virtual device_ptr map() override;
|
||||
virtual void unmap() override;
|
||||
@@ -53,6 +53,9 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
|
||||
/* Buffer area in pixels of the corresponding PBO. */
|
||||
int64_t buffer_area_ = 0;
|
||||
|
||||
/* The destination was requested to be cleared. */
|
||||
bool need_clear_ = false;
|
||||
|
||||
hipGraphicsResource hip_graphics_resource_ = nullptr;
|
||||
};
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ set(SRC
|
||||
pass_accessor.cpp
|
||||
pass_accessor_cpu.cpp
|
||||
pass_accessor_gpu.cpp
|
||||
path_trace_display.cpp
|
||||
path_trace_work.cpp
|
||||
path_trace_work_cpu.cpp
|
||||
path_trace_work_gpu.cpp
|
||||
@@ -47,6 +48,7 @@ set(SRC_HEADERS
|
||||
pass_accessor.h
|
||||
pass_accessor_cpu.h
|
||||
pass_accessor_gpu.h
|
||||
path_trace_display.h
|
||||
path_trace_work.h
|
||||
path_trace_work_cpu.h
|
||||
path_trace_work_gpu.h
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
#include "device/cpu/device.h"
|
||||
#include "device/device.h"
|
||||
#include "integrator/pass_accessor.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
#include "integrator/render_scheduler.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/pass.h"
|
||||
#include "render/scene.h"
|
||||
#include "render/tile.h"
|
||||
@@ -67,11 +67,11 @@ PathTrace::PathTrace(Device *device,
|
||||
PathTrace::~PathTrace()
|
||||
{
|
||||
/* Destroy any GPU resource which was used for graphics interop.
|
||||
* Need to have access to the GPUDisplay as it is the only source of drawing context which is
|
||||
* used for interop. */
|
||||
if (gpu_display_) {
|
||||
* Need to have access to the PathTraceDisplay as it is the only source of drawing context which
|
||||
* is used for interop. */
|
||||
if (display_) {
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->destroy_gpu_resources(gpu_display_.get());
|
||||
path_trace_work->destroy_gpu_resources(display_.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -94,7 +94,7 @@ bool PathTrace::ready_to_reset()
|
||||
{
|
||||
/* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
|
||||
* display. Of there is no such display, the logic here will break. */
|
||||
DCHECK(gpu_display_);
|
||||
DCHECK(display_);
|
||||
|
||||
/* The logic here tries to provide behavior which feels the most interactive feel to artists.
|
||||
* General idea is to be able to reset as quickly as possible, while still providing interactive
|
||||
@@ -126,8 +126,8 @@ void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_t
|
||||
/* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
|
||||
* It is requires to inform about reset whenever it happens, so that the redraw state tracking is
|
||||
* properly updated. */
|
||||
if (gpu_display_) {
|
||||
gpu_display_->reset(full_params);
|
||||
if (display_) {
|
||||
display_->reset(full_params);
|
||||
}
|
||||
|
||||
render_state_.has_denoised_result = false;
|
||||
@@ -535,25 +535,30 @@ void PathTrace::denoise(const RenderWork &render_work)
|
||||
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
|
||||
}
|
||||
|
||||
void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
|
||||
void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
|
||||
{
|
||||
gpu_display_ = move(gpu_display);
|
||||
if (driver) {
|
||||
display_ = make_unique<PathTraceDisplay>(move(driver));
|
||||
}
|
||||
else {
|
||||
display_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::clear_gpu_display()
|
||||
void PathTrace::clear_display()
|
||||
{
|
||||
if (gpu_display_) {
|
||||
gpu_display_->clear();
|
||||
if (display_) {
|
||||
display_->clear();
|
||||
}
|
||||
}
|
||||
|
||||
void PathTrace::draw()
|
||||
{
|
||||
if (!gpu_display_) {
|
||||
if (!display_) {
|
||||
return;
|
||||
}
|
||||
|
||||
did_draw_after_reset_ |= gpu_display_->draw();
|
||||
did_draw_after_reset_ |= display_->draw();
|
||||
}
|
||||
|
||||
void PathTrace::update_display(const RenderWork &render_work)
|
||||
@@ -562,13 +567,13 @@ void PathTrace::update_display(const RenderWork &render_work)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!gpu_display_ && !tile_buffer_update_cb) {
|
||||
if (!display_ && !tile_buffer_update_cb) {
|
||||
VLOG(3) << "Ignore display update.";
|
||||
return;
|
||||
}
|
||||
|
||||
if (full_params_.width == 0 || full_params_.height == 0) {
|
||||
VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
|
||||
VLOG(3) << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -580,13 +585,13 @@ void PathTrace::update_display(const RenderWork &render_work)
|
||||
tile_buffer_update_cb();
|
||||
}
|
||||
|
||||
if (gpu_display_) {
|
||||
if (display_) {
|
||||
VLOG(3) << "Perform copy to GPUDisplay work.";
|
||||
|
||||
const int resolution_divider = render_work.resolution_divider;
|
||||
const int texture_width = max(1, full_params_.width / resolution_divider);
|
||||
const int texture_height = max(1, full_params_.height / resolution_divider);
|
||||
if (!gpu_display_->update_begin(texture_width, texture_height)) {
|
||||
if (!display_->update_begin(texture_width, texture_height)) {
|
||||
LOG(ERROR) << "Error beginning GPUDisplay update.";
|
||||
return;
|
||||
}
|
||||
@@ -600,10 +605,10 @@ void PathTrace::update_display(const RenderWork &render_work)
|
||||
* all works in parallel. */
|
||||
const int num_samples = get_num_samples_in_buffer();
|
||||
for (auto &&path_trace_work : path_trace_works_) {
|
||||
path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
|
||||
path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
|
||||
}
|
||||
|
||||
gpu_display_->update_end();
|
||||
display_->update_end();
|
||||
}
|
||||
|
||||
render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
|
||||
|
||||
@@ -31,12 +31,13 @@ CCL_NAMESPACE_BEGIN
|
||||
class AdaptiveSampling;
|
||||
class Device;
|
||||
class DeviceScene;
|
||||
class DisplayDriver;
|
||||
class Film;
|
||||
class RenderBuffers;
|
||||
class RenderScheduler;
|
||||
class RenderWork;
|
||||
class PathTraceDisplay;
|
||||
class Progress;
|
||||
class GPUDisplay;
|
||||
class TileManager;
|
||||
|
||||
/* PathTrace class takes care of kernel graph and scheduling on a (multi)device. It takes care of
|
||||
@@ -98,13 +99,13 @@ class PathTrace {
|
||||
* Use this to configure the adaptive sampler before rendering any samples. */
|
||||
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling);
|
||||
|
||||
/* Set GPU display which takes care of drawing the render result. */
|
||||
void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);
|
||||
/* Set display driver which takes care of drawing the render result. */
|
||||
void set_display_driver(unique_ptr<DisplayDriver> driver);
|
||||
|
||||
/* Clear the GPU display by filling it in with all zeroes. */
|
||||
void clear_gpu_display();
|
||||
/* Clear the display buffer by filling it in with all zeroes. */
|
||||
void clear_display();
|
||||
|
||||
/* Perform drawing of the current state of the GPUDisplay. */
|
||||
/* Perform drawing of the current state of the DisplayDriver. */
|
||||
void draw();
|
||||
|
||||
/* Cancel rendering process as soon as possible, without waiting for full tile to be sampled.
|
||||
@@ -252,7 +253,7 @@ class PathTrace {
|
||||
RenderScheduler &render_scheduler_;
|
||||
TileManager &tile_manager_;
|
||||
|
||||
unique_ptr<GPUDisplay> gpu_display_;
|
||||
unique_ptr<PathTraceDisplay> display_;
|
||||
|
||||
/* Per-compute device descriptors of work which is responsible for path tracing on its configured
|
||||
* device. */
|
||||
|
||||
@@ -14,20 +14,25 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "render/gpu_display.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "render/buffers.h"
|
||||
|
||||
#include "util/util_logging.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void GPUDisplay::reset(const BufferParams &buffer_params)
|
||||
PathTraceDisplay::PathTraceDisplay(unique_ptr<DisplayDriver> driver) : driver_(move(driver))
|
||||
{
|
||||
}
|
||||
|
||||
void PathTraceDisplay::reset(const BufferParams &buffer_params)
|
||||
{
|
||||
thread_scoped_lock lock(mutex_);
|
||||
|
||||
const GPUDisplayParams old_params = params_;
|
||||
const DisplayDriver::Params old_params = params_;
|
||||
|
||||
params_.offset = make_int2(buffer_params.full_x, buffer_params.full_y);
|
||||
params_.full_offset = make_int2(buffer_params.full_x, buffer_params.full_y);
|
||||
params_.full_size = make_int2(buffer_params.full_width, buffer_params.full_height);
|
||||
params_.size = make_int2(buffer_params.width, buffer_params.height);
|
||||
|
||||
@@ -44,7 +49,7 @@ void GPUDisplay::reset(const BufferParams &buffer_params)
|
||||
texture_state_.is_outdated = true;
|
||||
}
|
||||
|
||||
void GPUDisplay::mark_texture_updated()
|
||||
void PathTraceDisplay::mark_texture_updated()
|
||||
{
|
||||
texture_state_.is_outdated = false;
|
||||
texture_state_.is_usable = true;
|
||||
@@ -54,7 +59,7 @@ void GPUDisplay::mark_texture_updated()
|
||||
* Update procedure.
|
||||
*/
|
||||
|
||||
bool GPUDisplay::update_begin(int texture_width, int texture_height)
|
||||
bool PathTraceDisplay::update_begin(int texture_width, int texture_height)
|
||||
{
|
||||
DCHECK(!update_state_.is_active);
|
||||
|
||||
@@ -66,15 +71,15 @@ bool GPUDisplay::update_begin(int texture_width, int texture_height)
|
||||
/* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
|
||||
* The update itself is non-blocking however, for better performance and to avoid
|
||||
* potential deadlocks due to locks held by the subclass. */
|
||||
GPUDisplayParams params;
|
||||
DisplayDriver::Params params;
|
||||
{
|
||||
thread_scoped_lock lock(mutex_);
|
||||
params = params_;
|
||||
texture_state_.size = make_int2(texture_width, texture_height);
|
||||
}
|
||||
|
||||
if (!do_update_begin(params, texture_width, texture_height)) {
|
||||
LOG(ERROR) << "GPUDisplay implementation could not begin update.";
|
||||
if (!driver_->update_begin(params, texture_width, texture_height)) {
|
||||
LOG(ERROR) << "PathTraceDisplay implementation could not begin update.";
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -83,7 +88,7 @@ bool GPUDisplay::update_begin(int texture_width, int texture_height)
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPUDisplay::update_end()
|
||||
void PathTraceDisplay::update_end()
|
||||
{
|
||||
DCHECK(update_state_.is_active);
|
||||
|
||||
@@ -92,12 +97,12 @@ void GPUDisplay::update_end()
|
||||
return;
|
||||
}
|
||||
|
||||
do_update_end();
|
||||
driver_->update_end();
|
||||
|
||||
update_state_.is_active = false;
|
||||
}
|
||||
|
||||
int2 GPUDisplay::get_texture_size() const
|
||||
int2 PathTraceDisplay::get_texture_size() const
|
||||
{
|
||||
return texture_state_.size;
|
||||
}
|
||||
@@ -106,25 +111,54 @@ int2 GPUDisplay::get_texture_size() const
|
||||
* Texture update from CPU buffer.
|
||||
*/
|
||||
|
||||
void GPUDisplay::copy_pixels_to_texture(
|
||||
void PathTraceDisplay::copy_pixels_to_texture(
|
||||
const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height)
|
||||
{
|
||||
DCHECK(update_state_.is_active);
|
||||
|
||||
if (!update_state_.is_active) {
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update.";
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of PathTraceDisplay update.";
|
||||
return;
|
||||
}
|
||||
|
||||
mark_texture_updated();
|
||||
do_copy_pixels_to_texture(rgba_pixels, texture_x, texture_y, pixels_width, pixels_height);
|
||||
|
||||
/* This call copies pixels to a mapped texture buffer which is typically much cheaper from CPU
|
||||
* time point of view than to copy data directly to a texture.
|
||||
*
|
||||
* The possible downside of this approach is that it might require a higher peak memory when
|
||||
* doing partial updates of the texture (although, in practice even partial updates might peak
|
||||
* with a full-frame buffer stored on the CPU if the GPU is currently occupied). */
|
||||
half4 *mapped_rgba_pixels = map_texture_buffer();
|
||||
if (!mapped_rgba_pixels) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int texture_width = texture_state_.size.x;
|
||||
const int texture_height = texture_state_.size.y;
|
||||
|
||||
if (texture_x == 0 && texture_y == 0 && pixels_width == texture_width &&
|
||||
pixels_height == texture_height) {
|
||||
const size_t size_in_bytes = sizeof(half4) * texture_width * texture_height;
|
||||
memcpy(mapped_rgba_pixels, rgba_pixels, size_in_bytes);
|
||||
}
|
||||
else {
|
||||
const half4 *rgba_row = rgba_pixels;
|
||||
half4 *mapped_rgba_row = mapped_rgba_pixels + texture_y * texture_width + texture_x;
|
||||
for (int y = 0; y < pixels_height;
|
||||
++y, rgba_row += pixels_width, mapped_rgba_row += texture_width) {
|
||||
memcpy(mapped_rgba_row, rgba_row, sizeof(half4) * pixels_width);
|
||||
}
|
||||
}
|
||||
|
||||
unmap_texture_buffer();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture buffer mapping.
|
||||
*/
|
||||
|
||||
half4 *GPUDisplay::map_texture_buffer()
|
||||
half4 *PathTraceDisplay::map_texture_buffer()
|
||||
{
|
||||
DCHECK(!texture_buffer_state_.is_mapped);
|
||||
DCHECK(update_state_.is_active);
|
||||
@@ -135,11 +169,11 @@ half4 *GPUDisplay::map_texture_buffer()
|
||||
}
|
||||
|
||||
if (!update_state_.is_active) {
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update.";
|
||||
LOG(ERROR) << "Attempt to copy pixels data outside of PathTraceDisplay update.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
half4 *mapped_rgba_pixels = do_map_texture_buffer();
|
||||
half4 *mapped_rgba_pixels = driver_->map_texture_buffer();
|
||||
|
||||
if (mapped_rgba_pixels) {
|
||||
texture_buffer_state_.is_mapped = true;
|
||||
@@ -148,7 +182,7 @@ half4 *GPUDisplay::map_texture_buffer()
|
||||
return mapped_rgba_pixels;
|
||||
}
|
||||
|
||||
void GPUDisplay::unmap_texture_buffer()
|
||||
void PathTraceDisplay::unmap_texture_buffer()
|
||||
{
|
||||
DCHECK(texture_buffer_state_.is_mapped);
|
||||
|
||||
@@ -160,14 +194,14 @@ void GPUDisplay::unmap_texture_buffer()
|
||||
texture_buffer_state_.is_mapped = false;
|
||||
|
||||
mark_texture_updated();
|
||||
do_unmap_texture_buffer();
|
||||
driver_->unmap_texture_buffer();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Graphics interoperability.
|
||||
*/
|
||||
|
||||
DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get()
|
||||
DisplayDriver::GraphicsInterop PathTraceDisplay::graphics_interop_get()
|
||||
{
|
||||
DCHECK(!texture_buffer_state_.is_mapped);
|
||||
DCHECK(update_state_.is_active);
|
||||
@@ -175,38 +209,45 @@ DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get()
|
||||
if (texture_buffer_state_.is_mapped) {
|
||||
LOG(ERROR)
|
||||
<< "Attempt to use graphics interoperability mode while the texture buffer is mapped.";
|
||||
return DeviceGraphicsInteropDestination();
|
||||
return DisplayDriver::GraphicsInterop();
|
||||
}
|
||||
|
||||
if (!update_state_.is_active) {
|
||||
LOG(ERROR) << "Attempt to use graphics interoperability outside of GPUDisplay update.";
|
||||
return DeviceGraphicsInteropDestination();
|
||||
LOG(ERROR) << "Attempt to use graphics interoperability outside of PathTraceDisplay update.";
|
||||
return DisplayDriver::GraphicsInterop();
|
||||
}
|
||||
|
||||
/* Assume that interop will write new values to the texture. */
|
||||
mark_texture_updated();
|
||||
|
||||
return do_graphics_interop_get();
|
||||
return driver_->graphics_interop_get();
|
||||
}
|
||||
|
||||
void GPUDisplay::graphics_interop_activate()
|
||||
void PathTraceDisplay::graphics_interop_activate()
|
||||
{
|
||||
driver_->graphics_interop_activate();
|
||||
}
|
||||
|
||||
void GPUDisplay::graphics_interop_deactivate()
|
||||
void PathTraceDisplay::graphics_interop_deactivate()
|
||||
{
|
||||
driver_->graphics_interop_deactivate();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Drawing.
|
||||
*/
|
||||
|
||||
bool GPUDisplay::draw()
|
||||
void PathTraceDisplay::clear()
|
||||
{
|
||||
driver_->clear();
|
||||
}
|
||||
|
||||
bool PathTraceDisplay::draw()
|
||||
{
|
||||
/* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
|
||||
* The drawing itself is non-blocking however, for better performance and to avoid
|
||||
* potential deadlocks due to locks held by the subclass. */
|
||||
GPUDisplayParams params;
|
||||
DisplayDriver::Params params;
|
||||
bool is_usable;
|
||||
bool is_outdated;
|
||||
|
||||
@@ -218,7 +259,7 @@ bool GPUDisplay::draw()
|
||||
}
|
||||
|
||||
if (is_usable) {
|
||||
do_draw(params);
|
||||
driver_->draw(params);
|
||||
}
|
||||
|
||||
return !is_outdated;
|
||||
@@ -16,52 +16,30 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "device/device_graphics_interop.h"
|
||||
#include "render/display_driver.h"
|
||||
|
||||
#include "util/util_half.h"
|
||||
#include "util/util_thread.h"
|
||||
#include "util/util_types.h"
|
||||
#include "util/util_unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class BufferParams;
|
||||
|
||||
/* GPUDisplay class takes care of drawing render result in a viewport. The render result is stored
|
||||
* in a GPU-side texture, which is updated from a path tracer and drawn by an application.
|
||||
/* PathTraceDisplay is used for efficient render buffer display.
|
||||
*
|
||||
* The base GPUDisplay does some special texture state tracking, which allows render Session to
|
||||
* make decisions on whether reset for an updated state is possible or not. This state should only
|
||||
* be tracked in a base class and a particular implementation should not worry about it.
|
||||
* The host applications implements a DisplayDriver, storing a render pass in a GPU-side
|
||||
* textures. This texture is continuously updated by the path tracer and drawn by the host
|
||||
* application.
|
||||
*
|
||||
* The subclasses should only implement the pure virtual methods, which allows them to not worry
|
||||
* about parent method calls, which helps them to be as small and reliable as possible. */
|
||||
* PathTraceDisplay is a wrapper around the DisplayDriver, adding thread safety, state tracking
|
||||
* and error checking. */
|
||||
|
||||
class GPUDisplayParams {
|
||||
class PathTraceDisplay {
|
||||
public:
|
||||
/* Offset of the display within a viewport.
|
||||
* For example, set to a lower-bottom corner of border render in Blender's viewport. */
|
||||
int2 offset = make_int2(0, 0);
|
||||
|
||||
/* Full viewport size.
|
||||
*
|
||||
* NOTE: Is not affected by the resolution divider. */
|
||||
int2 full_size = make_int2(0, 0);
|
||||
|
||||
/* Effective viewport size.
|
||||
* In the case of border render, size of the border rectangle.
|
||||
*
|
||||
* NOTE: Is not affected by the resolution divider. */
|
||||
int2 size = make_int2(0, 0);
|
||||
|
||||
bool modified(const GPUDisplayParams &other) const
|
||||
{
|
||||
return !(offset == other.offset && full_size == other.full_size && size == other.size);
|
||||
}
|
||||
};
|
||||
|
||||
class GPUDisplay {
|
||||
public:
|
||||
GPUDisplay() = default;
|
||||
virtual ~GPUDisplay() = default;
|
||||
PathTraceDisplay(unique_ptr<DisplayDriver> driver);
|
||||
virtual ~PathTraceDisplay() = default;
|
||||
|
||||
/* Reset the display for the new state of render session. Is called whenever session is reset,
|
||||
* which happens on changes like viewport navigation or viewport dimension change.
|
||||
@@ -69,11 +47,6 @@ class GPUDisplay {
|
||||
* This call will configure parameters for a changed buffer and reset the texture state. */
|
||||
void reset(const BufferParams &buffer_params);
|
||||
|
||||
const GPUDisplayParams &get_params() const
|
||||
{
|
||||
return params_;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Update procedure.
|
||||
*
|
||||
@@ -94,7 +67,8 @@ class GPUDisplay {
|
||||
/* --------------------------------------------------------------------
|
||||
* Texture update from CPU buffer.
|
||||
*
|
||||
* NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`.
|
||||
* NOTE: The PathTraceDisplay should be marked for an update being in process with
|
||||
* `update_begin()`.
|
||||
*
|
||||
* Most portable implementation, which must be supported by all platforms. Might not be the most
|
||||
* efficient one.
|
||||
@@ -115,7 +89,8 @@ class GPUDisplay {
|
||||
* This functionality is used to update GPU-side texture content without need to maintain CPU
|
||||
* side buffer on the caller.
|
||||
*
|
||||
* NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`.
|
||||
* NOTE: The PathTraceDisplay should be marked for an update being in process with
|
||||
* `update_begin()`.
|
||||
*
|
||||
* NOTE: Texture buffer can not be mapped while graphics interoperability is active. This means
|
||||
* that `map_texture_buffer()` is not allowed between `graphics_interop_begin()` and
|
||||
@@ -145,14 +120,14 @@ class GPUDisplay {
|
||||
* that `graphics_interop_get()` is not allowed between `map_texture_buffer()` and
|
||||
* `unmap_texture_buffer()` calls. */
|
||||
|
||||
/* Get GPUDisplay graphics interoperability information which acts as a destination for the
|
||||
/* Get PathTraceDisplay graphics interoperability information which acts as a destination for the
|
||||
* device API. */
|
||||
DeviceGraphicsInteropDestination graphics_interop_get();
|
||||
DisplayDriver::GraphicsInterop graphics_interop_get();
|
||||
|
||||
/* (De)activate GPU display for graphics interoperability outside of regular display update
|
||||
* routines. */
|
||||
virtual void graphics_interop_activate();
|
||||
virtual void graphics_interop_deactivate();
|
||||
void graphics_interop_activate();
|
||||
void graphics_interop_deactivate();
|
||||
|
||||
/* --------------------------------------------------------------------
|
||||
* Drawing.
|
||||
@@ -168,42 +143,21 @@ class GPUDisplay {
|
||||
* after clear will write new pixel values for an updating area, leaving everything else zeroed.
|
||||
*
|
||||
* If the GPU display supports graphics interoperability then the zeroing the display is to be
|
||||
* delegated to the device via the `DeviceGraphicsInteropDestination`. */
|
||||
virtual void clear() = 0;
|
||||
* delegated to the device via the `DisplayDriver::GraphicsInterop`. */
|
||||
void clear();
|
||||
|
||||
/* Draw the current state of the texture.
|
||||
*
|
||||
* Returns true if this call did draw an updated state of the texture. */
|
||||
bool draw();
|
||||
|
||||
protected:
|
||||
/* Implementation-specific calls which subclasses are to implement.
|
||||
* These `do_foo()` method corresponds to their `foo()` calls, but they are purely virtual to
|
||||
* simplify their particular implementation. */
|
||||
virtual bool do_update_begin(const GPUDisplayParams ¶ms,
|
||||
int texture_width,
|
||||
int texture_height) = 0;
|
||||
virtual void do_update_end() = 0;
|
||||
|
||||
virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
|
||||
int texture_x,
|
||||
int texture_y,
|
||||
int pixels_width,
|
||||
int pixels_height) = 0;
|
||||
|
||||
virtual half4 *do_map_texture_buffer() = 0;
|
||||
virtual void do_unmap_texture_buffer() = 0;
|
||||
|
||||
/* Note that this might be called in parallel to do_update_begin() and do_update_end(),
|
||||
* the subclass is responsible for appropriate mutex locks to avoid multiple threads
|
||||
* editing and drawing the texture at the same time. */
|
||||
virtual void do_draw(const GPUDisplayParams ¶ms) = 0;
|
||||
|
||||
virtual DeviceGraphicsInteropDestination do_graphics_interop_get() = 0;
|
||||
|
||||
private:
|
||||
/* Display driver implemented by the host application. */
|
||||
unique_ptr<DisplayDriver> driver_;
|
||||
|
||||
/* Current display parameters */
|
||||
thread_mutex mutex_;
|
||||
GPUDisplayParams params_;
|
||||
DisplayDriver::Params params_;
|
||||
|
||||
/* Mark texture as its content has been updated.
|
||||
* Used from places which knows that the texture content has been brought up-to-date, so that the
|
||||
@@ -16,12 +16,12 @@
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/path_trace_display.h"
|
||||
#include "integrator/path_trace_work.h"
|
||||
#include "integrator/path_trace_work_cpu.h"
|
||||
#include "integrator/path_trace_work_gpu.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/film.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "kernel/kernel_types.h"
|
||||
@@ -185,12 +185,12 @@ PassAccessor::PassAccessInfo PathTraceWork::get_display_pass_access_info(PassMod
|
||||
return pass_access_info;
|
||||
}
|
||||
|
||||
PassAccessor::Destination PathTraceWork::get_gpu_display_destination_template(
|
||||
const GPUDisplay *gpu_display) const
|
||||
PassAccessor::Destination PathTraceWork::get_display_destination_template(
|
||||
const PathTraceDisplay *display) const
|
||||
{
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
|
||||
const int2 display_texture_size = gpu_display->get_texture_size();
|
||||
const int2 display_texture_size = display->get_texture_size();
|
||||
const int texture_x = effective_buffer_params_.full_x - effective_full_params_.full_x;
|
||||
const int texture_y = effective_buffer_params_.full_y - effective_full_params_.full_y;
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ class BufferParams;
|
||||
class Device;
|
||||
class DeviceScene;
|
||||
class Film;
|
||||
class GPUDisplay;
|
||||
class PathTraceDisplay;
|
||||
class RenderBuffers;
|
||||
|
||||
class PathTraceWork {
|
||||
@@ -83,11 +83,9 @@ class PathTraceWork {
|
||||
* noisy pass mode will be passed here when it is known that the buffer does not have denoised
|
||||
* passes yet (because denoiser did not run). If the denoised pass is requested and denoiser is
|
||||
* not used then this function will fall-back to the noisy pass instead. */
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) = 0;
|
||||
virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) = 0;
|
||||
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) = 0;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) = 0;
|
||||
|
||||
/* Copy data from/to given render buffers.
|
||||
* Will copy pixels from a corresponding place (from multi-device point of view) of the render
|
||||
@@ -162,8 +160,8 @@ class PathTraceWork {
|
||||
|
||||
/* Get destination which offset and stride are configured so that writing to it will write to a
|
||||
* proper location of GPU display texture, taking current tile and device slice into account. */
|
||||
PassAccessor::Destination get_gpu_display_destination_template(
|
||||
const GPUDisplay *gpu_display) const;
|
||||
PassAccessor::Destination get_display_destination_template(
|
||||
const PathTraceDisplay *display) const;
|
||||
|
||||
/* Device which will be used for path tracing.
|
||||
* Note that it is an actual render device (and never is a multi-device). */
|
||||
|
||||
@@ -22,9 +22,9 @@
|
||||
#include "kernel/kernel_path_state.h"
|
||||
|
||||
#include "integrator/pass_accessor_cpu.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "render/buffers.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
|
||||
#include "util/util_atomic.h"
|
||||
@@ -161,14 +161,14 @@ void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_global
|
||||
}
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkCPU::copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
half4 *rgba_half = gpu_display->map_texture_buffer();
|
||||
half4 *rgba_half = display->map_texture_buffer();
|
||||
if (!rgba_half) {
|
||||
/* TODO(sergey): Look into using copy_to_gpu_display() if mapping failed. Might be needed for
|
||||
* some implementations of GPUDisplay which can not map memory? */
|
||||
/* TODO(sergey): Look into using copy_to_display() if mapping failed. Might be needed for
|
||||
* some implementations of PathTraceDisplay which can not map memory? */
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -178,7 +178,7 @@ void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
|
||||
const PassAccessorCPU pass_accessor(pass_access_info, kfilm.exposure, num_samples);
|
||||
|
||||
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
destination.pixels_half_rgba = rgba_half;
|
||||
|
||||
tbb::task_arena local_arena = local_tbb_arena_create(device_);
|
||||
@@ -186,10 +186,10 @@ void PathTraceWorkCPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
pass_accessor.get_render_tile_pixels(buffers_.get(), effective_buffer_params_, destination);
|
||||
});
|
||||
|
||||
gpu_display->unmap_texture_buffer();
|
||||
display->unmap_texture_buffer();
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::destroy_gpu_resources(GPUDisplay * /*gpu_display*/)
|
||||
void PathTraceWorkCPU::destroy_gpu_resources(PathTraceDisplay * /*display*/)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -50,10 +50,10 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
int start_sample,
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
|
||||
|
||||
virtual bool copy_render_buffers_from_device() override;
|
||||
virtual bool copy_render_buffers_to_device() override;
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
*/
|
||||
|
||||
#include "integrator/path_trace_work_gpu.h"
|
||||
#include "integrator/path_trace_display.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
#include "integrator/pass_accessor_gpu.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/scene.h"
|
||||
#include "util/util_logging.h"
|
||||
#include "util/util_tbb.h"
|
||||
@@ -46,7 +46,7 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device,
|
||||
queued_paths_(device, "queued_paths", MEM_READ_WRITE),
|
||||
num_queued_paths_(device, "num_queued_paths", MEM_READ_WRITE),
|
||||
work_tiles_(device, "work_tiles", MEM_READ_WRITE),
|
||||
gpu_display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
|
||||
display_rgba_half_(device, "display buffer half", MEM_READ_WRITE),
|
||||
max_num_paths_(queue_->num_concurrent_states(sizeof(IntegratorStateCPU))),
|
||||
min_num_active_paths_(queue_->num_concurrent_busy_states()),
|
||||
max_active_path_index_(0)
|
||||
@@ -652,7 +652,7 @@ int PathTraceWorkGPU::get_num_active_paths()
|
||||
bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
{
|
||||
/* There are few aspects with the graphics interop when using multiple devices caused by the fact
|
||||
* that the GPUDisplay has a single texture:
|
||||
* that the PathTraceDisplay has a single texture:
|
||||
*
|
||||
* CUDA will return `CUDA_ERROR_NOT_SUPPORTED` from `cuGraphicsGLRegisterBuffer()` when
|
||||
* attempting to register OpenGL PBO which has been mapped. Which makes sense, because
|
||||
@@ -678,9 +678,9 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
|
||||
return interop_use_;
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
if (device_->have_error()) {
|
||||
/* Don't attempt to update GPU display if the device has errors: the error state will make
|
||||
@@ -694,7 +694,7 @@ void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
}
|
||||
|
||||
if (should_use_graphics_interop()) {
|
||||
if (copy_to_gpu_display_interop(gpu_display, pass_mode, num_samples)) {
|
||||
if (copy_to_display_interop(display, pass_mode, num_samples)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -703,12 +703,12 @@ void PathTraceWorkGPU::copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
interop_use_ = false;
|
||||
}
|
||||
|
||||
copy_to_gpu_display_naive(gpu_display, pass_mode, num_samples);
|
||||
copy_to_display_naive(display, pass_mode, num_samples);
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
void PathTraceWorkGPU::copy_to_display_naive(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
const int full_x = effective_buffer_params_.full_x;
|
||||
const int full_y = effective_buffer_params_.full_y;
|
||||
@@ -725,44 +725,42 @@ void PathTraceWorkGPU::copy_to_gpu_display_naive(GPUDisplay *gpu_display,
|
||||
* NOTE: allocation happens to the final resolution so that no re-allocation happens on every
|
||||
* change of the resolution divider. However, if the display becomes smaller, shrink the
|
||||
* allocated memory as well. */
|
||||
if (gpu_display_rgba_half_.data_width != final_width ||
|
||||
gpu_display_rgba_half_.data_height != final_height) {
|
||||
gpu_display_rgba_half_.alloc(final_width, final_height);
|
||||
if (display_rgba_half_.data_width != final_width ||
|
||||
display_rgba_half_.data_height != final_height) {
|
||||
display_rgba_half_.alloc(final_width, final_height);
|
||||
/* TODO(sergey): There should be a way to make sure device-side memory is allocated without
|
||||
* transferring zeroes to the device. */
|
||||
queue_->zero_to_device(gpu_display_rgba_half_);
|
||||
queue_->zero_to_device(display_rgba_half_);
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination(film_->get_display_pass());
|
||||
destination.d_pixels_half_rgba = gpu_display_rgba_half_.device_pointer;
|
||||
destination.d_pixels_half_rgba = display_rgba_half_.device_pointer;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
|
||||
queue_->copy_from_device(gpu_display_rgba_half_);
|
||||
queue_->copy_from_device(display_rgba_half_);
|
||||
queue_->synchronize();
|
||||
|
||||
gpu_display->copy_pixels_to_texture(
|
||||
gpu_display_rgba_half_.data(), texture_x, texture_y, width, height);
|
||||
display->copy_pixels_to_texture(display_rgba_half_.data(), texture_x, texture_y, width, height);
|
||||
}
|
||||
|
||||
bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples)
|
||||
{
|
||||
if (!device_graphics_interop_) {
|
||||
device_graphics_interop_ = queue_->graphics_interop_create();
|
||||
}
|
||||
|
||||
const DeviceGraphicsInteropDestination graphics_interop_dst =
|
||||
gpu_display->graphics_interop_get();
|
||||
device_graphics_interop_->set_destination(graphics_interop_dst);
|
||||
const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get();
|
||||
device_graphics_interop_->set_display_interop(graphics_interop_dst);
|
||||
|
||||
const device_ptr d_rgba_half = device_graphics_interop_->map();
|
||||
if (!d_rgba_half) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PassAccessor::Destination destination = get_gpu_display_destination_template(gpu_display);
|
||||
PassAccessor::Destination destination = get_display_destination_template(display);
|
||||
destination.d_pixels_half_rgba = d_rgba_half;
|
||||
|
||||
get_render_tile_film_pixels(destination, pass_mode, num_samples);
|
||||
@@ -772,14 +770,14 @@ bool PathTraceWorkGPU::copy_to_gpu_display_interop(GPUDisplay *gpu_display,
|
||||
return true;
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::destroy_gpu_resources(GPUDisplay *gpu_display)
|
||||
void PathTraceWorkGPU::destroy_gpu_resources(PathTraceDisplay *display)
|
||||
{
|
||||
if (!device_graphics_interop_) {
|
||||
return;
|
||||
}
|
||||
gpu_display->graphics_interop_activate();
|
||||
display->graphics_interop_activate();
|
||||
device_graphics_interop_ = nullptr;
|
||||
gpu_display->graphics_interop_deactivate();
|
||||
display->graphics_interop_deactivate();
|
||||
}
|
||||
|
||||
void PathTraceWorkGPU::get_render_tile_film_pixels(const PassAccessor::Destination &destination,
|
||||
|
||||
@@ -48,10 +48,10 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
int start_sample,
|
||||
int samples_num) override;
|
||||
|
||||
virtual void copy_to_gpu_display(GPUDisplay *gpu_display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(GPUDisplay *gpu_display) override;
|
||||
virtual void copy_to_display(PathTraceDisplay *display,
|
||||
PassMode pass_mode,
|
||||
int num_samples) override;
|
||||
virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
|
||||
|
||||
virtual bool copy_render_buffers_from_device() override;
|
||||
virtual bool copy_render_buffers_to_device() override;
|
||||
@@ -88,16 +88,16 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
|
||||
int get_num_active_paths();
|
||||
|
||||
/* Check whether graphics interop can be used for the GPUDisplay update. */
|
||||
/* Check whether graphics interop can be used for the PathTraceDisplay update. */
|
||||
bool should_use_graphics_interop();
|
||||
|
||||
/* Naive implementation of the `copy_to_gpu_display()` which performs film conversion on the
|
||||
* device, then copies pixels to the host and pushes them to the `gpu_display`. */
|
||||
void copy_to_gpu_display_naive(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
|
||||
/* Naive implementation of the `copy_to_display()` which performs film conversion on the
|
||||
* device, then copies pixels to the host and pushes them to the `display`. */
|
||||
void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
|
||||
|
||||
/* Implementation of `copy_to_gpu_display()` which uses driver's OpenGL/GPU interoperability
|
||||
/* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
|
||||
* functionality, avoiding copy of pixels to the host. */
|
||||
bool copy_to_gpu_display_interop(GPUDisplay *gpu_display, PassMode pass_mode, int num_samples);
|
||||
bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
|
||||
|
||||
/* Synchronously run film conversion kernel and store display result in the given destination. */
|
||||
void get_render_tile_film_pixels(const PassAccessor::Destination &destination,
|
||||
@@ -139,9 +139,9 @@ class PathTraceWorkGPU : public PathTraceWork {
|
||||
/* Temporary buffer for passing work tiles to kernel. */
|
||||
device_vector<KernelWorkTile> work_tiles_;
|
||||
|
||||
/* Temporary buffer used by the copy_to_gpu_display() whenever graphics interoperability is not
|
||||
/* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
|
||||
* available. Is allocated on-demand. */
|
||||
device_vector<half4> gpu_display_rgba_half_;
|
||||
device_vector<half4> display_rgba_half_;
|
||||
|
||||
unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;
|
||||
|
||||
|
||||
@@ -344,7 +344,7 @@ class RenderScheduler {
|
||||
/* Number of rendered samples on top of the start sample. */
|
||||
int num_rendered_samples = 0;
|
||||
|
||||
/* Point in time the latest GPUDisplay work has been scheduled. */
|
||||
/* Point in time the latest PathTraceDisplay work has been scheduled. */
|
||||
double last_display_update_time = 0.0;
|
||||
/* Value of -1 means display was never updated. */
|
||||
int last_display_update_sample = -1;
|
||||
|
||||
@@ -35,7 +35,6 @@ set(SRC
|
||||
denoising.cpp
|
||||
film.cpp
|
||||
geometry.cpp
|
||||
gpu_display.cpp
|
||||
graph.cpp
|
||||
hair.cpp
|
||||
image.cpp
|
||||
@@ -78,9 +77,9 @@ set(SRC_HEADERS
|
||||
colorspace.h
|
||||
constant_fold.h
|
||||
denoising.h
|
||||
display_driver.h
|
||||
film.h
|
||||
geometry.h
|
||||
gpu_display.h
|
||||
graph.h
|
||||
hair.h
|
||||
image.h
|
||||
|
||||
131
intern/cycles/render/display_driver.h
Normal file
131
intern/cycles/render/display_driver.h
Normal file
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright 2021 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/util_half.h"
|
||||
#include "util/util_types.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Display driver for efficient interactive display of renders.
|
||||
*
|
||||
* Host applications implement this interface for viewport rendering. For best performance, we
|
||||
* recommend:
|
||||
* - Allocating a texture on the GPU to be interactively updated
|
||||
* - Using the graphics interop mechanism to avoid CPU-GPU copying overhead
|
||||
* - Using a dedicated or thread-safe graphics API context for updates, to avoid
|
||||
* blocking the host application.
|
||||
*/
|
||||
class DisplayDriver {
|
||||
public:
|
||||
DisplayDriver() = default;
|
||||
virtual ~DisplayDriver() = default;
|
||||
|
||||
/* Render buffer parameters. */
|
||||
struct Params {
|
||||
public:
|
||||
/* Render resolution, ignoring progressive resolution changes.
|
||||
* The texture buffer should be allocated with this size. */
|
||||
int2 size = make_int2(0, 0);
|
||||
|
||||
/* For border rendering, the full resolution of the render, and the offset within that larger
|
||||
* render. */
|
||||
int2 full_size = make_int2(0, 0);
|
||||
int2 full_offset = make_int2(0, 0);
|
||||
|
||||
bool modified(const Params &other) const
|
||||
{
|
||||
return !(full_offset == other.full_offset && full_size == other.full_size &&
|
||||
size == other.size);
|
||||
}
|
||||
};
|
||||
|
||||
/* Update the render from the rendering thread.
|
||||
*
|
||||
* Cycles periodically updates the render to be displayed. For multithreaded updates with
|
||||
* potentially multiple rendering devices, it will call these methods as follows.
|
||||
*
|
||||
* if (driver.update_begin(params, width, height)) {
|
||||
* parallel_for_each(rendering_device) {
|
||||
* buffer = driver.map_texture_buffer();
|
||||
* if (buffer) {
|
||||
* fill(buffer);
|
||||
* driver.unmap_texture_buffer();
|
||||
* }
|
||||
* }
|
||||
* driver.update_end();
|
||||
* }
|
||||
*
|
||||
* The parameters may dynamically change due to camera changes in the scene, and resources should
|
||||
* be re-allocated accordingly.
|
||||
*
|
||||
* The width and height passed to update_begin() are the effective render resolution taking into
|
||||
* account progressive resolution changes, which may be equal to or smaller than the params.size.
|
||||
* For efficiency, changes in this resolution should be handled without re-allocating resources,
|
||||
* but rather by using a subset of the full resolution buffer. */
|
||||
virtual bool update_begin(const Params ¶ms, int width, int height) = 0;
|
||||
virtual void update_end() = 0;
|
||||
|
||||
virtual half4 *map_texture_buffer() = 0;
|
||||
virtual void unmap_texture_buffer() = 0;
|
||||
|
||||
/* Optionally return a handle to a native graphics API texture buffer. If supported,
|
||||
* the rendering device may write directly to this buffer instead of calling
|
||||
* map_texture_buffer() and unmap_texture_buffer(). */
|
||||
class GraphicsInterop {
|
||||
public:
|
||||
/* Dimensions of the buffer, in pixels. */
|
||||
int buffer_width = 0;
|
||||
int buffer_height = 0;
|
||||
|
||||
/* OpenGL pixel buffer object. */
|
||||
int opengl_pbo_id = 0;
|
||||
|
||||
/* Clear the entire buffer before doing partial write to it. */
|
||||
bool need_clear = false;
|
||||
};
|
||||
|
||||
virtual GraphicsInterop graphics_interop_get()
|
||||
{
|
||||
return GraphicsInterop();
|
||||
}
|
||||
|
||||
/* (De)activate graphics context required for editing or deleting the graphics interop
|
||||
* object.
|
||||
*
|
||||
* For example, destruction of the CUDA object associated with an OpenGL requires the
|
||||
* OpenGL context to be active. */
|
||||
virtual void graphics_interop_activate(){};
|
||||
virtual void graphics_interop_deactivate(){};
|
||||
|
||||
/* Clear the display buffer by filling it with zeros. */
|
||||
virtual void clear() = 0;
|
||||
|
||||
/* Draw the render using the native graphics API.
|
||||
*
|
||||
* Note that this may be called in parallel to updates. The implementation is responsible for
|
||||
* mutex locking or other mechanisms to avoid conflicts.
|
||||
*
|
||||
* The parameters may have changed since the last update. The implementation is responsible for
|
||||
* deciding to skip or adjust render display for such changes.
|
||||
*
|
||||
* Host application drawing the render buffer should use Session.draw(), which will
|
||||
* call this method. */
|
||||
virtual void draw(const Params ¶ms) = 0;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
@@ -25,7 +25,7 @@
|
||||
#include "render/bake.h"
|
||||
#include "render/buffers.h"
|
||||
#include "render/camera.h"
|
||||
#include "render/gpu_display.h"
|
||||
#include "render/display_driver.h"
|
||||
#include "render/graph.h"
|
||||
#include "render/integrator.h"
|
||||
#include "render/light.h"
|
||||
@@ -162,7 +162,7 @@ bool Session::ready_to_reset()
|
||||
|
||||
void Session::run_main_render_loop()
|
||||
{
|
||||
path_trace_->clear_gpu_display();
|
||||
path_trace_->clear_display();
|
||||
|
||||
while (true) {
|
||||
RenderWork render_work = run_update_for_next_iteration();
|
||||
@@ -514,9 +514,9 @@ void Session::set_pause(bool pause)
|
||||
}
|
||||
}
|
||||
|
||||
void Session::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
|
||||
void Session::set_display_driver(unique_ptr<DisplayDriver> driver)
|
||||
{
|
||||
path_trace_->set_gpu_display(move(gpu_display));
|
||||
path_trace_->set_display_driver(move(driver));
|
||||
}
|
||||
|
||||
double Session::get_estimated_remaining_time() const
|
||||
|
||||
@@ -35,9 +35,9 @@ CCL_NAMESPACE_BEGIN
|
||||
class BufferParams;
|
||||
class Device;
|
||||
class DeviceScene;
|
||||
class DisplayDriver;
|
||||
class PathTrace;
|
||||
class Progress;
|
||||
class GPUDisplay;
|
||||
class RenderBuffers;
|
||||
class Scene;
|
||||
class SceneParams;
|
||||
@@ -143,7 +143,7 @@ class Session {
|
||||
void set_samples(int samples);
|
||||
void set_time_limit(double time_limit);
|
||||
|
||||
void set_gpu_display(unique_ptr<GPUDisplay> gpu_display);
|
||||
void set_display_driver(unique_ptr<DisplayDriver> driver);
|
||||
|
||||
double get_estimated_remaining_time() const;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user