Fix #126364: Metal: modified texture usage flags causing cache misses
For Metal we can change the texture usage flags to get more optimal behaviour - one example is adding the attachment flag so we can utilise renders to do texture clears. However these usage flags are used as the part of the match-criteria when trying to reuse released textures in the texture pool. The modifications means a request for the same type of texture will fail causing a cache miss. When we render to an image-view the texture pool is not released until the final sample has been rendered as we consider the entire render to be a single frame (as opposed to normal viewport rendering when we are presenting the intermediate results). This causes the texture pool to grow and grow and grow hence the large memory usage. This fix splits the usage flags into two sets, the internal ones we use to create the MTLTexture (which we may modify) and the originally requested ones. The originally requested ones are used for the texture pool matching. This fix also improves memory efficiency for normal viewport rendering. Mr Elephant Scene Before -> After Load scene in viewport: 13.04Gb -> 9.15 Gb Viewport Render Image: 78.69Gb -> 16.61Gb Authored by Apple: James McCarthy Pull Request: https://projects.blender.org/blender/blender/pulls/129951
This commit is contained in:
committed by
Clément Foucault
parent
d9748470fa
commit
658700ddff
@@ -173,7 +173,7 @@ bool MTLFrameBuffer::check(char err_out[256])
|
||||
for (int col_att = 0; col_att < this->get_attachment_count(); col_att++) {
|
||||
MTLAttachment att = this->get_color_attachment(col_att);
|
||||
if (att.used) {
|
||||
if (att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT) {
|
||||
if (att.texture->internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT) {
|
||||
if (first) {
|
||||
dim_x = att.texture->width_get();
|
||||
dim_y = att.texture->height_get();
|
||||
@@ -217,7 +217,7 @@ bool MTLFrameBuffer::check(char err_out[256])
|
||||
dim_x = depth_att.texture->width_get();
|
||||
dim_y = depth_att.texture->height_get();
|
||||
first = false;
|
||||
valid = (depth_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT);
|
||||
valid = (depth_att.texture->internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT);
|
||||
|
||||
if (!valid) {
|
||||
const char *format =
|
||||
@@ -254,7 +254,8 @@ bool MTLFrameBuffer::check(char err_out[256])
|
||||
dim_x = stencil_att.texture->width_get();
|
||||
dim_y = stencil_att.texture->height_get();
|
||||
first = false;
|
||||
valid = (stencil_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT);
|
||||
valid = (stencil_att.texture->internal_gpu_image_usage_flags_ &
|
||||
GPU_TEXTURE_USAGE_ATTACHMENT);
|
||||
if (!valid) {
|
||||
const char *format =
|
||||
"Framebuffer %s: Stencil attachment does not have usage "
|
||||
|
||||
@@ -250,6 +250,9 @@ class MTLTexture : public Texture {
|
||||
int mtl_max_mips_ = 1;
|
||||
bool has_generated_mips_ = false;
|
||||
|
||||
/* We may modify the requested usage flags so store them separately. */
|
||||
eGPUTextureUsage internal_gpu_image_usage_flags_;
|
||||
|
||||
/* VBO. */
|
||||
MTLVertBuf *vert_buffer_;
|
||||
id<MTLBuffer> vert_buffer_mtl_;
|
||||
|
||||
@@ -88,7 +88,8 @@ gpu::MTLTexture::MTLTexture(const char *name,
|
||||
/* Assign MTLTexture. */
|
||||
texture_ = metal_texture;
|
||||
[texture_ retain];
|
||||
gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage);
|
||||
internal_gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage);
|
||||
gpu_image_usage_flags_ = internal_gpu_image_usage_flags_;
|
||||
|
||||
/* Flag as Baked. */
|
||||
is_baked_ = true;
|
||||
@@ -197,7 +198,7 @@ void gpu::MTLTexture::bake_mip_swizzle_view()
|
||||
* rendering. */
|
||||
BLI_assert_msg(
|
||||
(texture_view_pixel_format == texture_.pixelFormat) ||
|
||||
(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW),
|
||||
(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW),
|
||||
"Usage Flag GPU_TEXTURE_USAGE_FORMAT_VIEW must be specified if a texture view is "
|
||||
"created with a different format to its source texture.");
|
||||
|
||||
@@ -703,7 +704,7 @@ void gpu::MTLTexture::update_sub(
|
||||
* format is unwritable, if our texture has not been initialized with
|
||||
* texture view support, use a staging texture. */
|
||||
if ((compatible_write_format != destination_format) &&
|
||||
!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW))
|
||||
!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW))
|
||||
{
|
||||
use_staging_texture = true;
|
||||
}
|
||||
@@ -715,11 +716,11 @@ void gpu::MTLTexture::update_sub(
|
||||
/* For compute, we should use a stating texture to avoid texture write usage,
|
||||
* if it has not been specified for the texture. Using shader-write disables
|
||||
* lossless texture compression, so this is best to avoid where possible. */
|
||||
if (!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) {
|
||||
if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) {
|
||||
use_staging_texture = true;
|
||||
}
|
||||
if (compatible_write_format != destination_format) {
|
||||
if (!(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) {
|
||||
if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) {
|
||||
use_staging_texture = true;
|
||||
}
|
||||
}
|
||||
@@ -744,7 +745,7 @@ void gpu::MTLTexture::update_sub(
|
||||
else {
|
||||
/* Use texture view. */
|
||||
if (compatible_write_format != destination_format) {
|
||||
BLI_assert(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
|
||||
BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
|
||||
texture_handle = [texture_ newTextureViewWithPixelFormat:compatible_write_format];
|
||||
}
|
||||
else {
|
||||
@@ -1726,7 +1727,7 @@ void gpu::MTLTexture::read_internal(int mip,
|
||||
}
|
||||
/* Create Texture View for SRGB special case to bypass internal type conversion. */
|
||||
if (format_ == GPU_SRGB8_A8) {
|
||||
BLI_assert(gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
|
||||
BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
|
||||
read_texture = [read_texture newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm];
|
||||
}
|
||||
|
||||
@@ -2184,7 +2185,7 @@ bool gpu::MTLTexture::init_internal(GPUTexture *src,
|
||||
texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY;
|
||||
|
||||
/* Assign usage. */
|
||||
gpu_image_usage_flags_ = GPU_texture_usage(src);
|
||||
internal_gpu_image_usage_flags_ = GPU_texture_usage(src);
|
||||
|
||||
/* Assign texture as view. */
|
||||
gpu::MTLTexture *mtltex = static_cast<gpu::MTLTexture *>(unwrap(src));
|
||||
@@ -2223,6 +2224,10 @@ bool gpu::MTLTexture::texture_is_baked()
|
||||
/* Prepare texture parameters after initialization, but before baking. */
|
||||
void gpu::MTLTexture::prepare_internal()
|
||||
{
|
||||
/* Take a copy of the flags so that any modifications we make won't effect the texture
|
||||
* cache/pool match finding test. */
|
||||
internal_gpu_image_usage_flags_ = gpu_image_usage_flags_;
|
||||
|
||||
/* Metal: Texture clearing is done using frame-buffer clear. This has no performance impact or
|
||||
* bandwidth implications for lossless compression and is considered best-practice.
|
||||
*
|
||||
@@ -2230,10 +2235,11 @@ void gpu::MTLTexture::prepare_internal()
|
||||
* NOTE: Emulated atomic textures cannot support render-target usage. For clearing, the backing
|
||||
* buffer is cleared instead.
|
||||
*/
|
||||
if (!((gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) &&
|
||||
if (!((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) &&
|
||||
!MTLBackend::get_capabilities().supports_texture_atomics))
|
||||
{
|
||||
gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
|
||||
/* Force attachment usage - see comment above. */
|
||||
internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
|
||||
}
|
||||
|
||||
/* Derive maximum number of mip levels by default.
|
||||
@@ -2277,7 +2283,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
* disabled. Enabling the texture_view or texture_read usage flags disables lossless
|
||||
* compression, so the situations in which it is used should be limited. */
|
||||
if (format_ == GPU_SRGB8_A8) {
|
||||
gpu_image_usage_flags_ = gpu_image_usage_flags_ | GPU_TEXTURE_USAGE_FORMAT_VIEW;
|
||||
internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_FORMAT_VIEW;
|
||||
}
|
||||
|
||||
/* Create texture descriptor. */
|
||||
@@ -2296,7 +2302,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
texture_descriptor_.depth = 1;
|
||||
texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_1D_ARRAY) ? h_ : 1;
|
||||
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
|
||||
texture_descriptor_.storageMode = MTLStorageModePrivate;
|
||||
texture_descriptor_.sampleCount = 1;
|
||||
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
|
||||
@@ -2316,7 +2322,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
texture_descriptor_.depth = 1;
|
||||
texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_2D_ARRAY) ? d_ : 1;
|
||||
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
|
||||
texture_descriptor_.storageMode = MTLStorageModePrivate;
|
||||
texture_descriptor_.sampleCount = 1;
|
||||
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
|
||||
@@ -2334,7 +2340,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
texture_descriptor_.depth = d_;
|
||||
texture_descriptor_.arrayLength = 1;
|
||||
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
|
||||
texture_descriptor_.storageMode = MTLStorageModePrivate;
|
||||
texture_descriptor_.sampleCount = 1;
|
||||
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
|
||||
@@ -2357,7 +2363,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
texture_descriptor_.depth = 1;
|
||||
texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_CUBE_ARRAY) ? d_ / 6 : 1;
|
||||
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
|
||||
texture_descriptor_.storageMode = MTLStorageModePrivate;
|
||||
texture_descriptor_.sampleCount = 1;
|
||||
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
|
||||
@@ -2374,7 +2380,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
texture_descriptor_.depth = 1;
|
||||
texture_descriptor_.arrayLength = 1;
|
||||
texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(gpu_image_usage_flags_);
|
||||
texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
|
||||
texture_descriptor_.storageMode = MTLStorageModePrivate;
|
||||
texture_descriptor_.sampleCount = 1;
|
||||
texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
|
||||
@@ -2392,7 +2398,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
|
||||
/* Override storage mode if memoryless attachments are being used.
|
||||
* NOTE: Memoryless textures can only be supported on TBDR GPUs. */
|
||||
if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
|
||||
if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
|
||||
const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
|
||||
if (is_tile_based_arch) {
|
||||
texture_descriptor_.storageMode = MTLStorageModeMemoryless;
|
||||
@@ -2404,7 +2410,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
* allocate a buffer-backed 2D texture and perform atomic operations on this instead. Support
|
||||
* for 2D Array textures and 3D textures is achieved via packing layers into the 2D texture. */
|
||||
bool native_texture_atomics = MTLBackend::get_capabilities().supports_texture_atomics;
|
||||
if ((gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) {
|
||||
if ((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) {
|
||||
|
||||
/* Validate format support. */
|
||||
BLI_assert_msg(ELEM(type_, GPU_TEXTURE_2D, GPU_TEXTURE_2D_ARRAY, GPU_TEXTURE_3D),
|
||||
@@ -2447,7 +2453,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
size_t total_bytes = bytes_per_row * texture_descriptor_.height;
|
||||
|
||||
backing_buffer_ = MTLContext::get_global_memory_manager()->allocate(
|
||||
total_bytes, (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ));
|
||||
total_bytes, (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ));
|
||||
BLI_assert(backing_buffer_ != nullptr);
|
||||
|
||||
/* NOTE: Fallback buffer-backed texture always set to Texture2D. */
|
||||
@@ -2484,7 +2490,7 @@ void gpu::MTLTexture::ensure_baked()
|
||||
texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_];
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
|
||||
if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
|
||||
texture_.label = [NSString stringWithFormat:@"MemorylessTexture_%s", this->get_name()];
|
||||
}
|
||||
else {
|
||||
|
||||
Reference in New Issue
Block a user