2023-06-14 16:52:36 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
|
|
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#ifdef WITH_METAL
|
|
|
|
|
|
|
|
|
|
# include "bvh/bvh.h"
|
|
|
|
|
# include "device/device.h"
|
|
|
|
|
# include "device/metal/bvh.h"
|
|
|
|
|
# include "device/metal/device.h"
|
|
|
|
|
# include "device/metal/kernel.h"
|
|
|
|
|
# include "device/metal/queue.h"
|
|
|
|
|
# include "device/metal/util.h"
|
|
|
|
|
|
|
|
|
|
# include <Metal/Metal.h>
|
|
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
|
|
class DeviceQueue;
|
|
|
|
|
|
|
|
|
|
class MetalDevice : public Device {
|
|
|
|
|
public:
|
|
|
|
|
id<MTLDevice> mtlDevice = nil;
|
|
|
|
|
id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
|
|
|
|
|
id<MTLArgumentEncoder> mtlBufferKernelParamsEncoder =
|
|
|
|
|
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
2023-10-24 23:20:16 +01:00
|
|
|
id<MTLCommandQueue> mtlComputeCommandQueue = nil;
|
2021-12-07 15:11:35 +00:00
|
|
|
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
|
|
|
|
|
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
|
|
|
|
|
nil; /* encoder used for fetching device pointers from MTLBuffers */
|
2025-03-21 12:47:15 +01:00
|
|
|
id<MTLCounterSampleBuffer> mtlCounterSampleBuffer = nil;
|
2022-05-11 14:52:49 +01:00
|
|
|
string source[PSO_NUM];
|
2023-02-23 11:07:28 +01:00
|
|
|
string kernels_md5[PSO_NUM];
|
|
|
|
|
string global_defines_md5[PSO_NUM];
|
2021-12-07 15:11:35 +00:00
|
|
|
|
Cycles: Useful Metal backend debug & profiling functionality
This patch adds some useful debugging & profiling env vars to the Metal backend:
- `CYCLES_METAL_PROFILING`: output a per-kernel timing report at the end of the render
- `CYCLES_METAL_DEBUG`: enable per-dispatch tracing (very verbose)
- `CYCLES_DEBUG_METAL_CAPTURE_KERNEL`: enable programatic .gputrace capture for a specified kernel index
Here's an example of the timing report with `CYCLES_METAL_PROFILING` enabled:
```
---------------------------------------------------------------------------------------------------
Kernel name Total threads Dispatches Avg. T/D Time Time%
---------------------------------------------------------------------------------------------------
integrator_init_from_camera 657,407,232 161 4,083,274 0.24s 0.51%
integrator_intersect_closest 1,629,288,440 681 2,392,494 15.18s 32.12%
integrator_intersect_shadow 751,652,291 470 1,599,260 5.80s 12.28%
integrator_shade_background 304,612,074 263 1,158,220 1.16s 2.45%
integrator_shade_surface 1,159,764,041 676 1,715,627 20.57s 43.52%
integrator_shade_shadow 598,885,847 418 1,432,741 1.27s 2.69%
integrator_queued_paths_array 2,969,650,130 805 3,689,006 0.35s 0.74%
integrator_queued_shadow_paths_array 593,936,619 379 1,567,115 0.14s 0.29%
integrator_terminated_paths_array 22,205,417 155 143,260 0.05s 0.10%
integrator_sorted_paths_array 2,517,140,043 676 3,723,579 1.65s 3.50%
integrator_compact_paths_array 648,912,748 155 4,186,533 0.03s 0.07%
integrator_compact_states 20,872,687 155 134,662 0.14s 0.29%
integrator_terminated_shadow_paths_array 374,100,675 438 854,111 0.16s 0.33%
integrator_compact_shadow_paths_array 503,768,657 438 1,150,156 0.05s 0.10%
integrator_compact_shadow_states 37,664,941 202 186,460 0.23s 0.50%
integrator_reset 25,165,824 6 4,194,304 0.06s 0.12%
film_convert_combined_half_rgba 3,110,400 6 518,400 0.00s 0.01%
prefix_sum 676 676 1 0.19s 0.40%
---------------------------------------------------------------------------------------------------
6,760 47.27s 100.00%
---------------------------------------------------------------------------------------------------
```
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D15044
2022-06-07 11:08:21 +01:00
|
|
|
bool capture_enabled = false;
|
|
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
KernelParamsMetal launch_params = {nullptr};
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
/* MetalRT members ----------------------------------*/
|
2024-07-08 16:18:34 +02:00
|
|
|
bool use_metalrt = false;
|
2021-12-07 15:11:35 +00:00
|
|
|
bool motion_blur = false;
|
2025-04-03 16:24:04 +02:00
|
|
|
bool use_pcmi = false;
|
2021-12-07 15:11:35 +00:00
|
|
|
id<MTLArgumentEncoder> mtlASArgEncoder =
|
|
|
|
|
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
|
2024-07-08 16:18:34 +02:00
|
|
|
|
|
|
|
|
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
|
|
|
|
|
id<MTLBuffer> blas_buffer = nil;
|
|
|
|
|
|
|
|
|
|
API_AVAILABLE(macos(11.0))
|
|
|
|
|
vector<id<MTLAccelerationStructure>> unique_blas_array;
|
|
|
|
|
|
|
|
|
|
API_AVAILABLE(macos(11.0))
|
|
|
|
|
id<MTLAccelerationStructure> accel_struct = nil;
|
2021-12-07 15:11:35 +00:00
|
|
|
/*---------------------------------------------------*/
|
|
|
|
|
|
2024-09-26 13:39:22 +02:00
|
|
|
uint kernel_features = 0;
|
2023-09-25 14:56:58 +02:00
|
|
|
bool using_nanovdb = false;
|
2021-12-07 15:11:35 +00:00
|
|
|
int max_threads_per_threadgroup;
|
|
|
|
|
|
|
|
|
|
int mtlDevId = 0;
|
2023-05-05 18:52:54 +02:00
|
|
|
bool has_error = false;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
struct MetalMem {
|
|
|
|
|
device_memory *mem = nullptr;
|
|
|
|
|
int pointer_index = -1;
|
|
|
|
|
id<MTLBuffer> mtlBuffer = nil;
|
|
|
|
|
id<MTLTexture> mtlTexture = nil;
|
|
|
|
|
uint64_t offset = 0;
|
|
|
|
|
uint64_t size = 0;
|
|
|
|
|
void *hostPtr = nullptr;
|
|
|
|
|
};
|
2024-12-26 17:53:59 +01:00
|
|
|
using MetalMemMap = map<device_memory *, unique_ptr<MetalMem>>;
|
2021-12-07 15:11:35 +00:00
|
|
|
MetalMemMap metal_mem_map;
|
|
|
|
|
std::vector<id<MTLResource>> delayed_free_list;
|
|
|
|
|
std::recursive_mutex metal_mem_map_mutex;
|
|
|
|
|
|
|
|
|
|
/* Bindless Textures */
|
2023-02-10 18:44:46 +01:00
|
|
|
bool is_texture(const TextureInfo &tex);
|
2021-12-07 15:11:35 +00:00
|
|
|
device_vector<TextureInfo> texture_info;
|
2023-12-11 13:55:36 +01:00
|
|
|
bool need_texture_info = false;
|
2021-12-07 15:11:35 +00:00
|
|
|
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
|
2023-02-10 18:44:46 +01:00
|
|
|
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
|
|
|
|
|
id<MTLBuffer> buffer_bindings_1d = nil;
|
2021-12-07 15:11:35 +00:00
|
|
|
id<MTLBuffer> texture_bindings_2d = nil;
|
|
|
|
|
id<MTLBuffer> texture_bindings_3d = nil;
|
|
|
|
|
std::vector<id<MTLTexture>> texture_slot_map;
|
|
|
|
|
|
2022-07-12 15:32:46 +02:00
|
|
|
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
|
2023-01-04 14:23:33 +00:00
|
|
|
|
|
|
|
|
int device_id = 0;
|
|
|
|
|
|
|
|
|
|
static thread_mutex existing_devices_mutex;
|
|
|
|
|
static std::map<int, MetalDevice *> active_device_ids;
|
|
|
|
|
|
2025-01-01 18:15:54 +01:00
|
|
|
static bool is_device_cancelled(const int device_id);
|
2023-01-04 14:23:33 +00:00
|
|
|
|
2025-01-01 18:15:54 +01:00
|
|
|
static MetalDevice *get_device_by_ID(const int device_idID,
|
2023-01-04 14:23:33 +00:00
|
|
|
thread_scoped_lock &existing_devices_mutex_lock);
|
|
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
bool is_ready(string &status) const override;
|
2023-01-04 14:23:33 +00:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
void cancel() override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
void set_error(const string &error) override;
|
|
|
|
|
|
2024-06-07 17:53:44 +02:00
|
|
|
MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
|
2021-12-07 15:11:35 +00:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
~MetalDevice() override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
bool support_device(const uint /*kernel_features*/);
|
|
|
|
|
|
|
|
|
|
bool check_peer_access(Device *peer_device) override;
|
|
|
|
|
|
|
|
|
|
bool use_adaptive_compilation();
|
|
|
|
|
|
2023-02-06 11:16:02 +00:00
|
|
|
bool use_local_atomic_sort() const;
|
|
|
|
|
|
2023-02-26 11:55:22 +13:00
|
|
|
string preprocess_source(MetalPipelineType pso_type,
|
|
|
|
|
const uint kernel_features,
|
|
|
|
|
string *source = nullptr);
|
2023-02-23 11:07:28 +01:00
|
|
|
|
2023-10-25 17:47:13 +02:00
|
|
|
void refresh_source_and_kernels_md5(MetalPipelineType pso_type);
|
2023-01-04 14:23:33 +00:00
|
|
|
|
2022-07-12 15:32:46 +02:00
|
|
|
void make_source(MetalPipelineType pso_type, const uint kernel_features);
|
2021-12-07 15:11:35 +00:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
bool load_kernels(const uint kernel_features) override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
void load_texture_info();
|
|
|
|
|
|
2022-01-19 17:57:24 +00:00
|
|
|
void erase_allocation(device_memory &mem);
|
|
|
|
|
|
2025-04-12 14:24:08 +02:00
|
|
|
bool should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
|
|
|
|
|
const bool log) override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
void *get_native_buffer(device_ptr ptr) override;
|
2024-02-06 21:13:23 +01:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
unique_ptr<DeviceQueue> gpu_queue_create() override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
2024-12-26 17:53:59 +01:00
|
|
|
void optimize_for_scene(Scene *scene) override;
|
2022-07-12 15:32:46 +02:00
|
|
|
|
2025-01-01 18:15:54 +01:00
|
|
|
static void compile_and_load(const int device_id, MetalPipelineType pso_type);
|
2022-05-11 14:52:49 +01:00
|
|
|
|
2021-12-07 15:11:35 +00:00
|
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
|
/* low-level memory management */
|
|
|
|
|
|
2025-01-01 18:15:54 +01:00
|
|
|
bool max_working_set_exceeded(const size_t safety_margin = 8 * 1024 * 1024) const;
|
2022-12-07 13:28:59 +00:00
|
|
|
|
2021-12-07 15:11:35 +00:00
|
|
|
MetalMem *generic_alloc(device_memory &mem);
|
|
|
|
|
|
|
|
|
|
void generic_copy_to(device_memory &mem);
|
|
|
|
|
|
|
|
|
|
void generic_free(device_memory &mem);
|
|
|
|
|
|
|
|
|
|
void mem_alloc(device_memory &mem) override;
|
|
|
|
|
|
|
|
|
|
void mem_copy_to(device_memory &mem) override;
|
|
|
|
|
|
2025-01-09 12:04:08 +01:00
|
|
|
void mem_move_to_host(device_memory &mem) override;
|
|
|
|
|
|
2021-12-07 15:11:35 +00:00
|
|
|
void mem_copy_from(device_memory &mem)
|
|
|
|
|
{
|
|
|
|
|
mem_copy_from(mem, -1, -1, -1, -1);
|
|
|
|
|
}
|
2025-01-01 18:15:54 +01:00
|
|
|
void mem_copy_from(
|
|
|
|
|
device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
void mem_zero(device_memory &mem) override;
|
|
|
|
|
|
|
|
|
|
void mem_free(device_memory &mem) override;
|
|
|
|
|
|
2025-01-01 18:15:54 +01:00
|
|
|
device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
2025-01-01 18:15:54 +01:00
|
|
|
void const_copy_to(const char *name, void *host, const size_t size) override;
|
2021-12-07 15:11:35 +00:00
|
|
|
|
|
|
|
|
void global_alloc(device_memory &mem);
|
|
|
|
|
void global_free(device_memory &mem);
|
|
|
|
|
|
|
|
|
|
void tex_alloc(device_texture &mem);
|
|
|
|
|
void tex_alloc_as_buffer(device_texture &mem);
|
2025-01-09 12:04:08 +01:00
|
|
|
void tex_copy_to(device_texture &mem);
|
2021-12-07 15:11:35 +00:00
|
|
|
void tex_free(device_texture &mem);
|
|
|
|
|
|
|
|
|
|
void flush_delayed_free_list();
|
2024-07-08 16:18:34 +02:00
|
|
|
|
|
|
|
|
void free_bvh();
|
|
|
|
|
|
|
|
|
|
void update_bvh(BVHMetal *bvh_metal);
|
2021-12-07 15:11:35 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|
|
|
|
|
|
#endif
|