Files
test2/intern/cycles/device/metal/device_impl.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

200 lines
5.7 KiB
C
Raw Normal View History

/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#pragma once
#ifdef WITH_METAL
# include "bvh/bvh.h"
# include "device/device.h"
# include "device/metal/bvh.h"
# include "device/metal/device.h"
# include "device/metal/kernel.h"
# include "device/metal/queue.h"
# include "device/metal/util.h"
# include <Metal/Metal.h>
CCL_NAMESPACE_BEGIN
class DeviceQueue;
class MetalDevice : public Device {
public:
id<MTLDevice> mtlDevice = nil;
id<MTLLibrary> mtlLibrary[PSO_NUM] = {nil};
id<MTLArgumentEncoder> mtlBufferKernelParamsEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
id<MTLCommandQueue> mtlComputeCommandQueue = nil;
id<MTLCommandQueue> mtlGeneralCommandQueue = nil;
id<MTLArgumentEncoder> mtlAncillaryArgEncoder =
nil; /* encoder used for fetching device pointers from MTLBuffers */
id<MTLCounterSampleBuffer> mtlCounterSampleBuffer = nil;
Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives This patch is the same as D14763, but with a fix for unit test failures caused by ShaderCache fetch logic not working in the non-MetalRT case: ``` diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index ad268ae7057..6aa1a56056e 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -203,9 +203,12 @@ bool kernel_has_intersection(DeviceKernel device_kernel) /* metalrt options */ request.pipeline->use_metalrt = device->use_metalrt; - request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR; - request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK; - request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD; + request.pipeline->metalrt_hair = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR); + request.pipeline->metalrt_hair_thick = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + request.pipeline->metalrt_pointcloud = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); { thread_scoped_lock lock(cache_mutex); @@ -225,9 +228,9 @@ bool kernel_has_intersection(DeviceKernel device_kernel) /* metalrt options */ bool use_metalrt = device->use_metalrt; - bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR; - bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK; - bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD; + bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR); + bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); MetalKernelPipeline *best_pipeline = nullptr; for (auto &pipeline : collection) { ``` Reviewed By: brecht Differential Revision: https://developer.blender.org/D14923
2022-05-11 14:52:49 +01:00
string source[PSO_NUM];
string kernels_md5[PSO_NUM];
string global_defines_md5[PSO_NUM];
Cycles: Useful Metal backend debug & profiling functionality This patch adds some useful debugging & profiling env vars to the Metal backend: - `CYCLES_METAL_PROFILING`: output a per-kernel timing report at the end of the render - `CYCLES_METAL_DEBUG`: enable per-dispatch tracing (very verbose) - `CYCLES_DEBUG_METAL_CAPTURE_KERNEL`: enable programatic .gputrace capture for a specified kernel index Here's an example of the timing report with `CYCLES_METAL_PROFILING` enabled: ``` --------------------------------------------------------------------------------------------------- Kernel name Total threads Dispatches Avg. T/D Time Time% --------------------------------------------------------------------------------------------------- integrator_init_from_camera 657,407,232 161 4,083,274 0.24s 0.51% integrator_intersect_closest 1,629,288,440 681 2,392,494 15.18s 32.12% integrator_intersect_shadow 751,652,291 470 1,599,260 5.80s 12.28% integrator_shade_background 304,612,074 263 1,158,220 1.16s 2.45% integrator_shade_surface 1,159,764,041 676 1,715,627 20.57s 43.52% integrator_shade_shadow 598,885,847 418 1,432,741 1.27s 2.69% integrator_queued_paths_array 2,969,650,130 805 3,689,006 0.35s 0.74% integrator_queued_shadow_paths_array 593,936,619 379 1,567,115 0.14s 0.29% integrator_terminated_paths_array 22,205,417 155 143,260 0.05s 0.10% integrator_sorted_paths_array 2,517,140,043 676 3,723,579 1.65s 3.50% integrator_compact_paths_array 648,912,748 155 4,186,533 0.03s 0.07% integrator_compact_states 20,872,687 155 134,662 0.14s 0.29% integrator_terminated_shadow_paths_array 374,100,675 438 854,111 0.16s 0.33% integrator_compact_shadow_paths_array 503,768,657 438 1,150,156 0.05s 0.10% integrator_compact_shadow_states 37,664,941 202 186,460 0.23s 0.50% integrator_reset 25,165,824 6 4,194,304 0.06s 0.12% film_convert_combined_half_rgba 3,110,400 6 518,400 0.00s 0.01% prefix_sum 676 676 1 0.19s 0.40% --------------------------------------------------------------------------------------------------- 6,760 47.27s 100.00% --------------------------------------------------------------------------------------------------- ``` Reviewed By: brecht Differential Revision: https://developer.blender.org/D15044
2022-06-07 11:08:21 +01:00
bool capture_enabled = false;
KernelParamsMetal launch_params = {nullptr};
/* MetalRT members ----------------------------------*/
bool use_metalrt = false;
bool motion_blur = false;
bool use_pcmi = false;
id<MTLArgumentEncoder> mtlASArgEncoder =
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
id<MTLBuffer> blas_buffer = nil;
API_AVAILABLE(macos(11.0))
vector<id<MTLAccelerationStructure>> unique_blas_array;
API_AVAILABLE(macos(11.0))
id<MTLAccelerationStructure> accel_struct = nil;
/*---------------------------------------------------*/
uint kernel_features = 0;
bool using_nanovdb = false;
int max_threads_per_threadgroup;
int mtlDevId = 0;
bool has_error = false;
struct MetalMem {
device_memory *mem = nullptr;
int pointer_index = -1;
id<MTLBuffer> mtlBuffer = nil;
id<MTLTexture> mtlTexture = nil;
uint64_t offset = 0;
uint64_t size = 0;
void *hostPtr = nullptr;
};
using MetalMemMap = map<device_memory *, unique_ptr<MetalMem>>;
MetalMemMap metal_mem_map;
std::vector<id<MTLResource>> delayed_free_list;
std::recursive_mutex metal_mem_map_mutex;
/* Bindless Textures */
bool is_texture(const TextureInfo &tex);
device_vector<TextureInfo> texture_info;
bool need_texture_info = false;
id<MTLArgumentEncoder> mtlTextureArgEncoder = nil;
id<MTLArgumentEncoder> mtlBufferArgEncoder = nil;
id<MTLBuffer> buffer_bindings_1d = nil;
id<MTLBuffer> texture_bindings_2d = nil;
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
int device_id = 0;
static thread_mutex existing_devices_mutex;
static std::map<int, MetalDevice *> active_device_ids;
static bool is_device_cancelled(const int device_id);
static MetalDevice *get_device_by_ID(const int device_idID,
thread_scoped_lock &existing_devices_mutex_lock);
bool is_ready(string &status) const override;
void cancel() override;
BVHLayoutMask get_bvh_layout_mask(uint /*kernel_features*/) const override;
void set_error(const string &error) override;
MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless);
~MetalDevice() override;
bool support_device(const uint /*kernel_features*/);
bool check_peer_access(Device *peer_device) override;
bool use_adaptive_compilation();
bool use_local_atomic_sort() const;
2023-02-26 11:55:22 +13:00
string preprocess_source(MetalPipelineType pso_type,
const uint kernel_features,
string *source = nullptr);
void refresh_source_and_kernels_md5(MetalPipelineType pso_type);
void make_source(MetalPipelineType pso_type, const uint kernel_features);
bool load_kernels(const uint kernel_features) override;
void load_texture_info();
void erase_allocation(device_memory &mem);
bool should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
const bool log) override;
void *get_native_buffer(device_ptr ptr) override;
unique_ptr<DeviceQueue> gpu_queue_create() override;
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
void optimize_for_scene(Scene *scene) override;
static void compile_and_load(const int device_id, MetalPipelineType pso_type);
Enable inlining on Apple Silicon. Use new process-wide ShaderCache in order to safely re-enable binary archives This patch is the same as D14763, but with a fix for unit test failures caused by ShaderCache fetch logic not working in the non-MetalRT case: ``` diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index ad268ae7057..6aa1a56056e 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -203,9 +203,12 @@ bool kernel_has_intersection(DeviceKernel device_kernel) /* metalrt options */ request.pipeline->use_metalrt = device->use_metalrt; - request.pipeline->metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR; - request.pipeline->metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK; - request.pipeline->metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD; + request.pipeline->metalrt_hair = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR); + request.pipeline->metalrt_hair_thick = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + request.pipeline->metalrt_pointcloud = device->use_metalrt && + (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); { thread_scoped_lock lock(cache_mutex); @@ -225,9 +228,9 @@ bool kernel_has_intersection(DeviceKernel device_kernel) /* metalrt options */ bool use_metalrt = device->use_metalrt; - bool metalrt_hair = device->kernel_features & KERNEL_FEATURE_HAIR; - bool metalrt_hair_thick = device->kernel_features & KERNEL_FEATURE_HAIR_THICK; - bool metalrt_pointcloud = device->kernel_features & KERNEL_FEATURE_POINTCLOUD; + bool metalrt_hair = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR); + bool metalrt_hair_thick = use_metalrt && (device->kernel_features & KERNEL_FEATURE_HAIR_THICK); + bool metalrt_pointcloud = use_metalrt && (device->kernel_features & KERNEL_FEATURE_POINTCLOUD); MetalKernelPipeline *best_pipeline = nullptr; for (auto &pipeline : collection) { ``` Reviewed By: brecht Differential Revision: https://developer.blender.org/D14923
2022-05-11 14:52:49 +01:00
/* ------------------------------------------------------------------ */
/* low-level memory management */
bool max_working_set_exceeded(const size_t safety_margin = 8 * 1024 * 1024) const;
MetalMem *generic_alloc(device_memory &mem);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_move_to_host(device_memory &mem) override;
void mem_copy_from(device_memory &mem)
{
mem_copy_from(mem, -1, -1, -1, -1);
}
void mem_copy_from(
device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override;
void mem_zero(device_memory &mem) override;
void mem_free(device_memory &mem) override;
device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/) override;
void const_copy_to(const char *name, void *host, const size_t size) override;
void global_alloc(device_memory &mem);
void global_free(device_memory &mem);
void tex_alloc(device_texture &mem);
void tex_alloc_as_buffer(device_texture &mem);
void tex_copy_to(device_texture &mem);
void tex_free(device_texture &mem);
void flush_delayed_free_list();
void free_bvh();
void update_bvh(BVHMetal *bvh_metal);
};
CCL_NAMESPACE_END
#endif