From 658700ddffae2c4e576cd519e2bc4878106a8dcd Mon Sep 17 00:00:00 2001 From: Jason Fielder Date: Thu, 7 Nov 2024 15:53:09 +0100 Subject: [PATCH] Fix #126364: Metal: modified texture usage flags causing cache misses For Metal we can change the texture usage flags to get more optimal behaviour - one example is adding the attachment flag so we can utilise renders to do texture clears. However these usage flags are used as the part of the match-criteria when trying to reuse released textures in the texture pool. The modifications means a request for the same type of texture will fail causing a cache miss. When we render to an image-view the texture pool is not released until the final sample has been rendered as we consider the entire render to be a single frame (as opposed to normal viewport rendering when we are presenting the intermediate results). This causes the texture pool to grow and grow and grow hence the large memory usage. This fix splits the usage flags into two sets, the internal ones we use to create the MTLTexture (which we may modify) and the originally requested ones. The originally requested ones are used for the texture pool matching. This fix also improves memory efficiency for normal viewport rendering. Mr Elephant Scene Before -> After Load scene in viewport: 13.04Gb -> 9.15 Gb Viewport Render Image: 78.69Gb -> 16.61Gb Authored by Apple: James McCarthy Pull Request: https://projects.blender.org/blender/blender/pulls/129951 --- source/blender/gpu/metal/mtl_framebuffer.mm | 7 ++-- source/blender/gpu/metal/mtl_texture.hh | 3 ++ source/blender/gpu/metal/mtl_texture.mm | 46 ++++++++++++--------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/source/blender/gpu/metal/mtl_framebuffer.mm b/source/blender/gpu/metal/mtl_framebuffer.mm index 548e5839e74..9cf3c8034b7 100644 --- a/source/blender/gpu/metal/mtl_framebuffer.mm +++ b/source/blender/gpu/metal/mtl_framebuffer.mm @@ -173,7 +173,7 @@ bool MTLFrameBuffer::check(char err_out[256]) for (int col_att = 0; col_att < this->get_attachment_count(); col_att++) { MTLAttachment att = this->get_color_attachment(col_att); if (att.used) { - if (att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT) { + if (att.texture->internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT) { if (first) { dim_x = att.texture->width_get(); dim_y = att.texture->height_get(); @@ -217,7 +217,7 @@ bool MTLFrameBuffer::check(char err_out[256]) dim_x = depth_att.texture->width_get(); dim_y = depth_att.texture->height_get(); first = false; - valid = (depth_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT); + valid = (depth_att.texture->internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT); if (!valid) { const char *format = @@ -254,7 +254,8 @@ bool MTLFrameBuffer::check(char err_out[256]) dim_x = stencil_att.texture->width_get(); dim_y = stencil_att.texture->height_get(); first = false; - valid = (stencil_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT); + valid = (stencil_att.texture->internal_gpu_image_usage_flags_ & + GPU_TEXTURE_USAGE_ATTACHMENT); if (!valid) { const char *format = "Framebuffer %s: Stencil attachment does not have usage " diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh index d97e8cffd3b..ea4a3f7ee36 100644 --- a/source/blender/gpu/metal/mtl_texture.hh +++ b/source/blender/gpu/metal/mtl_texture.hh @@ -250,6 +250,9 @@ class MTLTexture : public Texture { int mtl_max_mips_ = 1; bool has_generated_mips_ = false; + /* We may modify the requested usage flags so store them separately. */ + eGPUTextureUsage internal_gpu_image_usage_flags_; + /* VBO. */ MTLVertBuf *vert_buffer_; id vert_buffer_mtl_; diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index c42e6e517fd..6c7db63a246 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -88,7 +88,8 @@ gpu::MTLTexture::MTLTexture(const char *name, /* Assign MTLTexture. */ texture_ = metal_texture; [texture_ retain]; - gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage); + internal_gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage); + gpu_image_usage_flags_ = internal_gpu_image_usage_flags_; /* Flag as Baked. */ is_baked_ = true; @@ -197,7 +198,7 @@ void gpu::MTLTexture::bake_mip_swizzle_view() * rendering. */ BLI_assert_msg( (texture_view_pixel_format == texture_.pixelFormat) || - (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW), + (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW), "Usage Flag GPU_TEXTURE_USAGE_FORMAT_VIEW must be specified if a texture view is " "created with a different format to its source texture."); @@ -703,7 +704,7 @@ void gpu::MTLTexture::update_sub( * format is unwritable, if our texture has not been initialized with * texture view support, use a staging texture. */ if ((compatible_write_format != destination_format) && - !(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) + !(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) { use_staging_texture = true; } @@ -715,11 +716,11 @@ void gpu::MTLTexture::update_sub( /* For compute, we should use a stating texture to avoid texture write usage, * if it has not been specified for the texture. Using shader-write disables * lossless texture compression, so this is best to avoid where possible. */ - if (!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) { + if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) { use_staging_texture = true; } if (compatible_write_format != destination_format) { - if (!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) { + if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) { use_staging_texture = true; } } @@ -744,7 +745,7 @@ void gpu::MTLTexture::update_sub( else { /* Use texture view. */ if (compatible_write_format != destination_format) { - BLI_assert(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW); + BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW); texture_handle = [texture_ newTextureViewWithPixelFormat:compatible_write_format]; } else { @@ -1726,7 +1727,7 @@ void gpu::MTLTexture::read_internal(int mip, } /* Create Texture View for SRGB special case to bypass internal type conversion. */ if (format_ == GPU_SRGB8_A8) { - BLI_assert(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW); + BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW); read_texture = [read_texture newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm]; } @@ -2184,7 +2185,7 @@ bool gpu::MTLTexture::init_internal(GPUTexture *src, texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY; /* Assign usage. */ - gpu_image_usage_flags_ = GPU_texture_usage(src); + internal_gpu_image_usage_flags_ = GPU_texture_usage(src); /* Assign texture as view. */ gpu::MTLTexture *mtltex = static_cast(unwrap(src)); @@ -2223,6 +2224,10 @@ bool gpu::MTLTexture::texture_is_baked() /* Prepare texture parameters after initialization, but before baking. */ void gpu::MTLTexture::prepare_internal() { + /* Take a copy of the flags so that any modifications we make won't effect the texture + * cache/pool match finding test. */ + internal_gpu_image_usage_flags_ = gpu_image_usage_flags_; + /* Metal: Texture clearing is done using frame-buffer clear. This has no performance impact or * bandwidth implications for lossless compression and is considered best-practice. * @@ -2230,10 +2235,11 @@ void gpu::MTLTexture::prepare_internal() * NOTE: Emulated atomic textures cannot support render-target usage. For clearing, the backing * buffer is cleared instead. */ - if (!((gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && + if (!((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !MTLBackend::get_capabilities().supports_texture_atomics)) { - gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT; + /* Force attachment usage - see comment above. */ + internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT; } /* Derive maximum number of mip levels by default. @@ -2277,7 +2283,7 @@ void gpu::MTLTexture::ensure_baked() * disabled. Enabling the texture_view or texture_read usage flags disables lossless * compression, so the situations in which it is used should be limited. */ if (format_ == GPU_SRGB8_A8) { - gpu_image_usage_flags_ = gpu_image_usage_flags_ | GPU_TEXTURE_USAGE_FORMAT_VIEW; + internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_FORMAT_VIEW; } /* Create texture descriptor. */ @@ -2296,7 +2302,7 @@ void gpu::MTLTexture::ensure_baked() texture_descriptor_.depth = 1; texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_1D_ARRAY) ? h_ : 1; texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; - texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_); + texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_); texture_descriptor_.storageMode = MTLStorageModePrivate; texture_descriptor_.sampleCount = 1; texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; @@ -2316,7 +2322,7 @@ void gpu::MTLTexture::ensure_baked() texture_descriptor_.depth = 1; texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_2D_ARRAY) ? d_ : 1; texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; - texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_); + texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_); texture_descriptor_.storageMode = MTLStorageModePrivate; texture_descriptor_.sampleCount = 1; texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; @@ -2334,7 +2340,7 @@ void gpu::MTLTexture::ensure_baked() texture_descriptor_.depth = d_; texture_descriptor_.arrayLength = 1; texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; - texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_); + texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_); texture_descriptor_.storageMode = MTLStorageModePrivate; texture_descriptor_.sampleCount = 1; texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; @@ -2357,7 +2363,7 @@ void gpu::MTLTexture::ensure_baked() texture_descriptor_.depth = 1; texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_CUBE_ARRAY) ? d_ / 6 : 1; texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; - texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_); + texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_); texture_descriptor_.storageMode = MTLStorageModePrivate; texture_descriptor_.sampleCount = 1; texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; @@ -2374,7 +2380,7 @@ void gpu::MTLTexture::ensure_baked() texture_descriptor_.depth = 1; texture_descriptor_.arrayLength = 1; texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; - texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_); + texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_); texture_descriptor_.storageMode = MTLStorageModePrivate; texture_descriptor_.sampleCount = 1; texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; @@ -2392,7 +2398,7 @@ void gpu::MTLTexture::ensure_baked() /* Override storage mode if memoryless attachments are being used. * NOTE: Memoryless textures can only be supported on TBDR GPUs. */ - if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) { + if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) { const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR); if (is_tile_based_arch) { texture_descriptor_.storageMode = MTLStorageModeMemoryless; @@ -2404,7 +2410,7 @@ void gpu::MTLTexture::ensure_baked() * allocate a buffer-backed 2D texture and perform atomic operations on this instead. Support * for 2D Array textures and 3D textures is achieved via packing layers into the 2D texture. */ bool native_texture_atomics = MTLBackend::get_capabilities().supports_texture_atomics; - if ((gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) { + if ((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) { /* Validate format support. */ BLI_assert_msg(ELEM(type_, GPU_TEXTURE_2D, GPU_TEXTURE_2D_ARRAY, GPU_TEXTURE_3D), @@ -2447,7 +2453,7 @@ void gpu::MTLTexture::ensure_baked() size_t total_bytes = bytes_per_row * texture_descriptor_.height; backing_buffer_ = MTLContext::get_global_memory_manager()->allocate( - total_bytes, (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ)); + total_bytes, (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ)); BLI_assert(backing_buffer_ != nullptr); /* NOTE: Fallback buffer-backed texture always set to Texture2D. */ @@ -2484,7 +2490,7 @@ void gpu::MTLTexture::ensure_baked() texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_]; #ifndef NDEBUG - if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) { + if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) { texture_.label = [NSString stringWithFormat:@"MemorylessTexture_%s", this->get_name()]; } else {