Fix #126364: Metal: modified texture usage flags causing cache misses

For Metal we can change the texture usage flags to get more optimal
behaviour - one example is adding the attachment flag so we can utilise
renders to do texture clears. However these usage flags are used as the
part of the match-criteria when trying to reuse released textures in
the texture pool.

The modifications means a request for the same type of texture will
fail causing a cache miss. When we render to an
image-view the texture pool is not released until the final sample has
been rendered as we consider the entire render to be a single frame
(as opposed to normal viewport rendering when we are presenting the
intermediate results).

This causes the texture pool to grow and grow and grow hence the large
memory usage. This fix splits the usage flags
into two sets, the internal ones we use to create the MTLTexture (which
we may modify) and the originally requested ones. The originally requested
ones are used for the texture pool matching.

This fix also improves memory efficiency for normal viewport rendering.

Mr Elephant Scene
Before -> After
Load scene in viewport: 13.04Gb ->  9.15 Gb
Viewport Render Image: 78.69Gb -> 16.61Gb

Authored by Apple: James McCarthy

Pull Request: https://projects.blender.org/blender/blender/pulls/129951
This commit is contained in:
Jason Fielder
2024-11-07 15:53:09 +01:00
committed by Clément Foucault
parent d9748470fa
commit 658700ddff
3 changed files with 33 additions and 23 deletions

View File

@@ -173,7 +173,7 @@ bool MTLFrameBuffer::check(char err_out[256])
for (int col_att = 0; col_att < this->get_attachment_count(); col_att++) {
MTLAttachment att = this->get_color_attachment(col_att);
if (att.used) {
if (att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT) {
if (att.texture->internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT) {
if (first) {
dim_x = att.texture->width_get();
dim_y = att.texture->height_get();
@@ -217,7 +217,7 @@ bool MTLFrameBuffer::check(char err_out[256])
dim_x = depth_att.texture->width_get();
dim_y = depth_att.texture->height_get();
first = false;
valid = (depth_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT);
valid = (depth_att.texture->internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT);
if (!valid) {
const char *format =
@@ -254,7 +254,8 @@ bool MTLFrameBuffer::check(char err_out[256])
dim_x = stencil_att.texture->width_get();
dim_y = stencil_att.texture->height_get();
first = false;
valid = (stencil_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT);
valid = (stencil_att.texture->internal_gpu_image_usage_flags_ &
GPU_TEXTURE_USAGE_ATTACHMENT);
if (!valid) {
const char *format =
"Framebuffer %s: Stencil attachment does not have usage "

View File

@@ -250,6 +250,9 @@ class MTLTexture : public Texture {
int mtl_max_mips_ = 1;
bool has_generated_mips_ = false;
/* We may modify the requested usage flags so store them separately. */
eGPUTextureUsage internal_gpu_image_usage_flags_;
/* VBO. */
MTLVertBuf *vert_buffer_;
id<MTLBuffer> vert_buffer_mtl_;

View File

@@ -88,7 +88,8 @@ gpu::MTLTexture::MTLTexture(const char *name,
/* Assign MTLTexture. */
texture_ = metal_texture;
[texture_ retain];
gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage);
internal_gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage);
gpu_image_usage_flags_ = internal_gpu_image_usage_flags_;
/* Flag as Baked. */
is_baked_ = true;
@@ -197,7 +198,7 @@ void gpu::MTLTexture::bake_mip_swizzle_view()
* rendering. */
BLI_assert_msg(
(texture_view_pixel_format == texture_.pixelFormat) ||
(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW),
(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW),
"Usage Flag GPU_TEXTURE_USAGE_FORMAT_VIEW must be specified if a texture view is "
"created with a different format to its source texture.");
@@ -703,7 +704,7 @@ void gpu::MTLTexture::update_sub(
* format is unwritable, if our texture has not been initialized with
* texture view support, use a staging texture. */
if ((compatible_write_format != destination_format) &&
!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW))
!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW))
{
use_staging_texture = true;
}
@@ -715,11 +716,11 @@ void gpu::MTLTexture::update_sub(
/* For compute, we should use a stating texture to avoid texture write usage,
* if it has not been specified for the texture. Using shader-write disables
* lossless texture compression, so this is best to avoid where possible. */
if (!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) {
if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) {
use_staging_texture = true;
}
if (compatible_write_format != destination_format) {
if (!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) {
if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) {
use_staging_texture = true;
}
}
@@ -744,7 +745,7 @@ void gpu::MTLTexture::update_sub(
else {
/* Use texture view. */
if (compatible_write_format != destination_format) {
BLI_assert(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
texture_handle = [texture_ newTextureViewWithPixelFormat:compatible_write_format];
}
else {
@@ -1726,7 +1727,7 @@ void gpu::MTLTexture::read_internal(int mip,
}
/* Create Texture View for SRGB special case to bypass internal type conversion. */
if (format_ == GPU_SRGB8_A8) {
BLI_assert(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
read_texture = [read_texture newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm];
}
@@ -2184,7 +2185,7 @@ bool gpu::MTLTexture::init_internal(GPUTexture *src,
texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY;
/* Assign usage. */
gpu_image_usage_flags_ = GPU_texture_usage(src);
internal_gpu_image_usage_flags_ = GPU_texture_usage(src);
/* Assign texture as view. */
gpu::MTLTexture *mtltex = static_cast<gpu::MTLTexture *>(unwrap(src));
@@ -2223,6 +2224,10 @@ bool gpu::MTLTexture::texture_is_baked()
/* Prepare texture parameters after initialization, but before baking. */
void gpu::MTLTexture::prepare_internal()
{
/* Take a copy of the flags so that any modifications we make won't effect the texture
* cache/pool match finding test. */
internal_gpu_image_usage_flags_ = gpu_image_usage_flags_;
/* Metal: Texture clearing is done using frame-buffer clear. This has no performance impact or
* bandwidth implications for lossless compression and is considered best-practice.
*
@@ -2230,10 +2235,11 @@ void gpu::MTLTexture::prepare_internal()
* NOTE: Emulated atomic textures cannot support render-target usage. For clearing, the backing
* buffer is cleared instead.
*/
if (!((gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) &&
if (!((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) &&
!MTLBackend::get_capabilities().supports_texture_atomics))
{
gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
/* Force attachment usage - see comment above. */
internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
}
/* Derive maximum number of mip levels by default.
@@ -2277,7 +2283,7 @@ void gpu::MTLTexture::ensure_baked()
* disabled. Enabling the texture_view or texture_read usage flags disables lossless
* compression, so the situations in which it is used should be limited. */
if (format_ == GPU_SRGB8_A8) {
gpu_image_usage_flags_ = gpu_image_usage_flags_ | GPU_TEXTURE_USAGE_FORMAT_VIEW;
internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_FORMAT_VIEW;
}
/* Create texture descriptor. */
@@ -2296,7 +2302,7 @@ void gpu::MTLTexture::ensure_baked()
texture_descriptor_.depth = 1;
texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_1D_ARRAY) ? h_ : 1;
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
texture_descriptor_.storageMode = MTLStorageModePrivate;
texture_descriptor_.sampleCount = 1;
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
@@ -2316,7 +2322,7 @@ void gpu::MTLTexture::ensure_baked()
texture_descriptor_.depth = 1;
texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_2D_ARRAY) ? d_ : 1;
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
texture_descriptor_.storageMode = MTLStorageModePrivate;
texture_descriptor_.sampleCount = 1;
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
@@ -2334,7 +2340,7 @@ void gpu::MTLTexture::ensure_baked()
texture_descriptor_.depth = d_;
texture_descriptor_.arrayLength = 1;
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
texture_descriptor_.storageMode = MTLStorageModePrivate;
texture_descriptor_.sampleCount = 1;
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
@@ -2357,7 +2363,7 @@ void gpu::MTLTexture::ensure_baked()
texture_descriptor_.depth = 1;
texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_CUBE_ARRAY) ? d_ / 6 : 1;
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
texture_descriptor_.storageMode = MTLStorageModePrivate;
texture_descriptor_.sampleCount = 1;
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
@@ -2374,7 +2380,7 @@ void gpu::MTLTexture::ensure_baked()
texture_descriptor_.depth = 1;
texture_descriptor_.arrayLength = 1;
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
texture_descriptor_.storageMode = MTLStorageModePrivate;
texture_descriptor_.sampleCount = 1;
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
@@ -2392,7 +2398,7 @@ void gpu::MTLTexture::ensure_baked()
/* Override storage mode if memoryless attachments are being used.
* NOTE: Memoryless textures can only be supported on TBDR GPUs. */
if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
if (is_tile_based_arch) {
texture_descriptor_.storageMode = MTLStorageModeMemoryless;
@@ -2404,7 +2410,7 @@ void gpu::MTLTexture::ensure_baked()
* allocate a buffer-backed 2D texture and perform atomic operations on this instead. Support
* for 2D Array textures and 3D textures is achieved via packing layers into the 2D texture. */
bool native_texture_atomics = MTLBackend::get_capabilities().supports_texture_atomics;
if ((gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) {
if ((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) {
/* Validate format support. */
BLI_assert_msg(ELEM(type_, GPU_TEXTURE_2D, GPU_TEXTURE_2D_ARRAY, GPU_TEXTURE_3D),
@@ -2447,7 +2453,7 @@ void gpu::MTLTexture::ensure_baked()
size_t total_bytes = bytes_per_row * texture_descriptor_.height;
backing_buffer_ = MTLContext::get_global_memory_manager()->allocate(
total_bytes, (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ));
total_bytes, (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ));
BLI_assert(backing_buffer_ != nullptr);
/* NOTE: Fallback buffer-backed texture always set to Texture2D. */
@@ -2484,7 +2490,7 @@ void gpu::MTLTexture::ensure_baked()
texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_];
#ifndef NDEBUG
if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
texture_.label = [NSString stringWithFormat:@"MemorylessTexture_%s", this->get_name()];
}
else {