Metal: MTLQueryPool implementation adding support for occlusion queries.

When a query begins, the current visibility result buffer needs to be associated with the currently active Render Pass. The MTLContext and MTLCommandBuffer are responsible for ensuring new render pass objects are created if the visibility state changes. Authored by Apple: Michael Parkin-White Ref T96261 Reviewed By: fclem Maniphest Tasks: T96261 Differential Revision: https://developer.blender.org/D15356
2022-07-19 16:59:42 +02:00
parent 3370c1a8a7
commit 6bba4d864e
7 changed files with 228 additions and 3 deletions
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -195,6 +195,7 @@ set(METAL_SRC
  metal/mtl_debug.mm
  metal/mtl_framebuffer.mm
  metal/mtl_memory.mm
+  metal/mtl_query.mm
  metal/mtl_state.mm
  metal/mtl_texture.mm
  metal/mtl_texture_util.mm
@@ -206,6 +207,7 @@ set(METAL_SRC
  metal/mtl_debug.hh
  metal/mtl_framebuffer.hh
  metal/mtl_memory.hh
+  metal/mtl_query.hh
  metal/mtl_state.hh
  metal/mtl_texture.hh
 )
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -10,6 +10,7 @@
 #include "mtl_backend.hh"
 #include "mtl_context.hh"
 #include "mtl_framebuffer.hh"
+#include "mtl_query.hh"

 #include "gpu_capabilities_private.hh"
 #include "gpu_platform_private.hh"
@@ -64,8 +65,7 @@ IndexBuf *MTLBackend::indexbuf_alloc()

 QueryPool *MTLBackend::querypool_alloc()
 {
-  /* TODO(Metal): Implement MTLQueryPool. */
-  return nullptr;
+  return new MTLQueryPool();
 };

 Shader *MTLBackend::shader_alloc(const char *name)
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -308,6 +308,12 @@ id<MTLRenderCommandEncoder> MTLCommandBufferManager::ensure_begin_render_command
    active_pass_descriptor_ = active_frame_buffer_->bake_render_pass_descriptor(
        is_rebind && (!active_frame_buffer_->get_pending_clear()));

+    /* Determine if there is a visibility buffer assigned to the context. */
+    gpu::MTLBuffer *visibility_buffer = context_.get_visibility_buffer();
+    this->active_pass_descriptor_.visibilityResultBuffer =
+        (visibility_buffer) ? visibility_buffer->get_metal_buffer() : nil;
+    context_.clear_visibility_dirty();
+
    /* Ensure we have already cleaned up our previous render command encoder. */
    BLI_assert(active_render_command_encoder_ == nil);

--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -3,6 +3,8 @@
 /** \file
 * \ingroup gpu
 */
+#pragma once
+
 #include "MEM_guardedalloc.h"

 #include "gpu_context_private.hh"
@@ -588,6 +590,10 @@ class MTLContext : public Context {
  bool is_inside_frame_ = false;
  uint current_frame_index_;

+  /* Visibility buffer for MTLQuery results. */
+  gpu::MTLBuffer *visibility_buffer_ = nullptr;
+  bool visibility_is_dirty_ = false;
+
 public:
  /* Shaders and Pipeline state. */
  MTLContextGlobalShaderPipelineState pipeline_state;
@@ -660,6 +666,18 @@ class MTLContext : public Context {
  void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
  void set_scissor_enabled(bool scissor_enabled);

+  /* Visibility buffer control. */
+  void set_visibility_buffer(gpu::MTLBuffer *buffer);
+  gpu::MTLBuffer *get_visibility_buffer() const;
+
+  /* Flag whether the visibility buffer for query results
+   * has changed. This requires a new RenderPass in order
+   * to update.*/
+  bool is_visibility_dirty() const;
+
+  /* Reset dirty flag state for visibility buffer. */
+  void clear_visibility_dirty();
+
  /* Texture utilities. */
  MTLContextTextureUtils &get_texture_utils()
  {
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -188,7 +188,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
   * framebuffer state has been modified (is_dirty). */
  if (!this->main_command_buffer.is_inside_render_pass() ||
      this->active_fb != this->main_command_buffer.get_active_framebuffer() ||
-      this->main_command_buffer.get_active_framebuffer()->get_dirty()) {
+      this->main_command_buffer.get_active_framebuffer()->get_dirty() ||
+      this->is_visibility_dirty()) {

    /* Validate bound framebuffer before beginning render pass. */
    if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) {
@@ -371,6 +372,45 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)

 /** \} */

+/* -------------------------------------------------------------------- */
+/** \name Visibility buffer control for MTLQueryPool.
+ * \{ */
+
+void MTLContext::set_visibility_buffer(gpu::MTLBuffer *buffer)
+{
+  /* Flag visibility buffer as dirty if the buffer being used for visibility has changed --
+   * This is required by the render pass, and we will break the pass if the results destination
+   * buffer is modified. */
+  if (buffer) {
+    visibility_is_dirty_ = (buffer != visibility_buffer_) || visibility_is_dirty_;
+    visibility_buffer_ = buffer;
+    visibility_buffer_->debug_ensure_used();
+  }
+  else {
+    /* If buffer is null, reset visibility state, mark dirty to break render pass if results are no
+     * longer needed. */
+    visibility_is_dirty_ = (visibility_buffer_ != nullptr) || visibility_is_dirty_;
+    visibility_buffer_ = nullptr;
+  }
+}
+
+gpu::MTLBuffer *MTLContext::get_visibility_buffer() const
+{
+  return visibility_buffer_;
+}
+
+void MTLContext::clear_visibility_dirty()
+{
+  visibility_is_dirty_ = false;
+}
+
+bool MTLContext::is_visibility_dirty() const
+{
+  return visibility_is_dirty_;
+}
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Texture State Management
 * \{ */
--- a/source/blender/gpu/metal/mtl_query.hh
+++ b/source/blender/gpu/metal/mtl_query.hh
@@ -0,0 +1,39 @@
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "BLI_vector.hh"
+
+#include "gpu_query.hh"
+#include "mtl_context.hh"
+
+namespace blender::gpu {
+
+class MTLQueryPool : public QueryPool {
+ private:
+  /** Number of queries that have been issued since last initialization.
+   * Should be equal to query_ids_.size(). */
+  uint32_t query_issued_;
+  /** Type of this query pool. */
+  GPUQueryType type_;
+  /** Can only be initialized once. */
+  bool initialized_ = false;
+  MTLVisibilityResultMode mtl_type_;
+  Vector<gpu::MTLBuffer *> buffer_;
+
+  void allocate_buffer();
+
+ public:
+  MTLQueryPool();
+  ~MTLQueryPool();
+
+  void init(GPUQueryType type) override;
+
+  void begin_query() override;
+  void end_query() override;
+
+  void get_occlusion_result(MutableSpan<uint32_t> r_values) override;
+};
+}  // namespace blender::gpu
--- a/source/blender/gpu/metal/mtl_query.mm
+++ b/source/blender/gpu/metal/mtl_query.mm
@@ -0,0 +1,120 @@
+/** \file
+ * \ingroup gpu
+ */
+
+#include "mtl_query.hh"
+
+namespace blender::gpu {
+
+static const size_t VISIBILITY_COUNT_PER_BUFFER = 512;
+/* defined in the documentation but not queryable programmatically:
+ * https://developer.apple.com/documentation/metal/mtlvisibilityresultmode/mtlvisibilityresultmodeboolean?language=objc
+ */
+static const size_t VISIBILITY_RESULT_SIZE_IN_BYTES = 8;
+
+MTLQueryPool::MTLQueryPool()
+{
+  allocate_buffer();
+}
+MTLQueryPool::~MTLQueryPool()
+{
+  for (gpu::MTLBuffer *buf : buffer_) {
+    BLI_assert(buf);
+    buf->free();
+  }
+}
+
+void MTLQueryPool::allocate_buffer()
+{
+  /* Allocate Metal buffer for visibility results. */
+  size_t buffer_size_in_bytes = VISIBILITY_COUNT_PER_BUFFER * VISIBILITY_RESULT_SIZE_IN_BYTES;
+  gpu::MTLBuffer *buffer = MTLContext::get_global_memory_manager().allocate_buffer(
+      buffer_size_in_bytes, true);
+  BLI_assert(buffer);
+  buffer_.append(buffer);
+}
+
+static inline MTLVisibilityResultMode to_mtl_type(GPUQueryType type)
+{
+  if (type == GPU_QUERY_OCCLUSION) {
+    return MTLVisibilityResultModeBoolean;
+  }
+  BLI_assert(0);
+  return MTLVisibilityResultModeBoolean;
+}
+
+void MTLQueryPool::init(GPUQueryType type)
+{
+  BLI_assert(initialized_ == false);
+  initialized_ = true;
+  type_ = type;
+  mtl_type_ = to_mtl_type(type);
+  query_issued_ = 0;
+}
+
+void MTLQueryPool::begin_query()
+{
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+
+  /* Ensure our allocated buffer pool has enough space for the current queries. */
+  int query_id = query_issued_;
+  int requested_buffer = query_id / VISIBILITY_COUNT_PER_BUFFER;
+  if (requested_buffer >= buffer_.size()) {
+    allocate_buffer();
+  }
+
+  BLI_assert(requested_buffer < buffer_.size());
+  gpu::MTLBuffer *buffer = buffer_[requested_buffer];
+
+  /* Ensure visibility buffer is set on the context. If visibility buffer changes,
+   * we need to begin a new render pass with an updated reference in the
+   * MTLRenderPassDescriptor. */
+  ctx->set_visibility_buffer(buffer);
+
+  ctx->ensure_begin_render_pass();
+  id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder();
+  [rec setVisibilityResultMode:mtl_type_
+                        offset:(query_id % VISIBILITY_COUNT_PER_BUFFER) *
+                               VISIBILITY_RESULT_SIZE_IN_BYTES];
+  query_issued_ += 1;
+}
+
+void MTLQueryPool::end_query()
+{
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+
+  id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder();
+  [rec setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0];
+}
+
+void MTLQueryPool::get_occlusion_result(MutableSpan<uint32_t> r_values)
+{
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+
+  /* Create a blit encoder to synchronize the query buffer results between
+   * GPU and CPU when not using shared-memory. */
+  if ([ctx->device hasUnifiedMemory] == false) {
+    id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
+    BLI_assert(blit_encoder);
+    for (gpu::MTLBuffer *buf : buffer_) {
+      [blit_encoder synchronizeResource:buf->get_metal_buffer()];
+    }
+    BLI_assert(ctx->get_inside_frame());
+  }
+
+  /* Wait for GPU operatiosn to complete and for query buffer contents
+   * to be synchronised back to host memory. */
+  GPU_finish();
+
+  /* Iterate through all possible visibility buffers and copy results into provided
+   * container. */
+  for (const int i : IndexRange(query_issued_)) {
+    int requested_buffer = i / VISIBILITY_COUNT_PER_BUFFER;
+    const uint64_t *queries = static_cast<const uint64_t *>(
+        buffer_[requested_buffer]->get_host_ptr());
+    r_values[i] = static_cast<uint32_t>(queries[i % VISIBILITY_COUNT_PER_BUFFER]);
+  }
+  ctx->set_visibility_buffer(nullptr);
+}
+
+}  // namespace blender::gpu