Vulkan: Add Viewport Array/Layer Workarounds

This PR adds workarounds for platforms that don't support `shaderOutputLayer`
or `shaderOutputViewportIndex`. Some NVIDIA laptop GPUs and ARM GPUs don't
have those device features.

The workaround uses the same approach as OpenGL. A geometry shader is injected
to emulate the feature.

For testing the workarounds they have also been connected to the
`--debug-gpu-force-workarounds` command line argument.

Fixes #113475 by implementing #113529

Pull Request: https://projects.blender.org/blender/blender/pulls/113605
This commit is contained in:
Jeroen Bakker
2023-10-13 10:40:11 +02:00
parent 0d83083f29
commit 159e798cdb
6 changed files with 187 additions and 17 deletions

View File

@@ -231,30 +231,23 @@ class GHOST_DeviceVK {
device_features.drawIndirectFirstInstance = VK_TRUE;
device_features.fragmentStoresAndAtomics = VK_TRUE;
VkPhysicalDeviceVulkan12Features device_12_features = {};
device_12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
device_12_features.shaderOutputLayer = VK_TRUE;
device_12_features.shaderOutputViewportIndex = VK_TRUE;
/* Enable shader draw parameters on logical device when supported on physical device. */
VkPhysicalDeviceShaderDrawParametersFeatures shader_draw_parameters = {};
shader_draw_parameters.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES;
shader_draw_parameters.shaderDrawParameters = features_11.shaderDrawParameters;
VkPhysicalDeviceMaintenance4FeaturesKHR maintenance_4 = {};
maintenance_4.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR;
maintenance_4.maintenance4 = VK_TRUE;
/* Mainenance4 is core in Vulkan 1.3 so we need to query for availability. */
if (has_extensions({VK_KHR_MAINTENANCE_4_EXTENSION_NAME})) {
maintenance_4.pNext = device_12_features.pNext;
device_12_features.pNext = &maintenance_4;
maintenance_4.pNext = shader_draw_parameters.pNext;
shader_draw_parameters.pNext = &maintenance_4;
}
/* Enable shader draw parameters on logical device when supported on physical device. */
VkPhysicalDeviceShaderDrawParametersFeatures shader_draw_parameters = {};
shader_draw_parameters.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES;
shader_draw_parameters.shaderDrawParameters = features_11.shaderDrawParameters;
shader_draw_parameters.pNext = device_12_features.pNext;
device_12_features.pNext = &shader_draw_parameters;
VkDeviceCreateInfo device_create_info = {};
device_create_info.pNext = &device_12_features;
device_create_info.pNext = &shader_draw_parameters;
device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device_create_info.queueCreateInfoCount = uint32_t(queue_create_infos.size());
device_create_info.pQueueCreateInfos = queue_create_infos.data();

View File

@@ -81,6 +81,25 @@ void VKBackend::detect_workarounds(VKDevice &device)
{
VKWorkarounds workarounds;
if (G.debug & G_DEBUG_GPU_FORCE_WORKAROUNDS) {
printf("\n");
printf("VK: Forcing workaround usage and disabling features and extensions.\n");
printf(" Vendor: %s\n", device.vendor_name().c_str());
printf(" Device: %s\n", device.physical_device_properties_get().deviceName);
printf(" Driver: %s\n", device.driver_version().c_str());
/* Force workarounds. */
workarounds.not_aligned_pixel_formats = true;
workarounds.shader_output_layer = true;
workarounds.shader_output_viewport_index = true;
return;
}
workarounds.shader_output_layer =
!device.physical_device_vulkan_12_features_get().shaderOutputLayer;
workarounds.shader_output_viewport_index =
!device.physical_device_vulkan_12_features_get().shaderOutputViewportIndex;
/* AMD GPUs don't support texture formats that use are aligned to 24 or 48 bits. */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY)) {
workarounds.not_aligned_pixel_formats = true;

View File

@@ -89,11 +89,17 @@ void VKDevice::init_physical_device_properties()
void VKDevice::init_physical_device_features()
{
BLI_assert(vk_physical_device_ != VK_NULL_HANDLE);
VkPhysicalDeviceFeatures2 features = {};
features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
vk_physical_device_vulkan_11_features_.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
vk_physical_device_vulkan_12_features_.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
features.pNext = &vk_physical_device_vulkan_11_features_;
vk_physical_device_vulkan_11_features_.pNext = &vk_physical_device_vulkan_12_features_;
vkGetPhysicalDeviceFeatures2(vk_physical_device_, &features);
vk_physical_device_features_ = features.features;
}

View File

@@ -28,6 +28,18 @@ struct VKWorkarounds {
* If set to true we should work around this issue by using a different texture format.
*/
bool not_aligned_pixel_formats = false;
/**
* Is the workaround for devices that don't support
* #VkPhysicalDeviceVulkan12Features::shaderOutputViewportIndex enabled.
*/
bool shader_output_viewport_index = false;
/**
* Is the workaround for devices that don't support
* #VkPhysicalDeviceVulkan12Features::shaderOutputLayer enabled.
*/
bool shader_output_layer = false;
};
class VKDevice : public NonCopyable {
@@ -63,6 +75,7 @@ class VKDevice : public NonCopyable {
/** Features support. */
VkPhysicalDeviceFeatures vk_physical_device_features_ = {};
VkPhysicalDeviceVulkan11Features vk_physical_device_vulkan_11_features_ = {};
VkPhysicalDeviceVulkan12Features vk_physical_device_vulkan_12_features_ = {};
/** Functions of vk_ext_debugutils for this device/instance. */
debug::VKDebuggingTools debugging_tools_;
@@ -101,6 +114,11 @@ class VKDevice : public NonCopyable {
return vk_physical_device_vulkan_11_features_;
}
const VkPhysicalDeviceVulkan12Features &physical_device_vulkan_12_features_get() const
{
return vk_physical_device_vulkan_12_features_;
}
VkInstance instance_get() const
{
return vk_instance_;

View File

@@ -494,6 +494,8 @@ static char *glsl_patch_get()
return patch;
}
const VKWorkarounds &workarounds = VKBackend::get().device_get().workarounds_get();
size_t slen = 0;
/* Version need to go first. */
STR_CONCAT(patch, slen, "#version 450\n");
@@ -511,8 +513,12 @@ static char *glsl_patch_get()
/* TODO(fclem): This creates a validation error and should be already part of Vulkan 1.2. */
STR_CONCAT(patch, slen, "#extension GL_ARB_shader_viewport_layer_array: enable\n");
STR_CONCAT(patch, slen, "#define gpu_Layer gl_Layer\n");
STR_CONCAT(patch, slen, "#define gpu_ViewportIndex gl_ViewportIndex\n");
if (!workarounds.shader_output_layer) {
STR_CONCAT(patch, slen, "#define gpu_Layer gl_Layer\n");
}
if (!workarounds.shader_output_viewport_index) {
STR_CONCAT(patch, slen, "#define gpu_ViewportIndex gl_ViewportIndex\n");
}
STR_CONCAT(patch, slen, "#define DFDX_SIGN 1.0\n");
STR_CONCAT(patch, slen, "#define DFDY_SIGN 1.0\n");
@@ -670,6 +676,14 @@ bool VKShader::finalize(const shader::ShaderCreateInfo *info)
return false;
}
if (do_geometry_shader_injection(info)) {
std::string source = workaround_geometry_shader_source_create(*info);
Vector<const char *> sources;
sources.append("version");
sources.append(source.c_str());
geometry_shader_from_glsl(sources);
}
VKShaderInterface *vk_interface = new VKShaderInterface();
vk_interface->init(*info);
@@ -1042,6 +1056,7 @@ std::string VKShader::vertex_interface_declare(const shader::ShaderCreateInfo &i
{
std::stringstream ss;
std::string post_main;
const VKWorkarounds &workarounds = VKBackend::get().device_get().workarounds_get();
ss << "\n/* Inputs. */\n";
for (const ShaderCreateInfo::VertIn &attr : info.vertex_inputs_) {
@@ -1053,6 +1068,13 @@ std::string VKShader::vertex_interface_declare(const shader::ShaderCreateInfo &i
for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
print_interface(ss, "out", *iface, location);
}
if (workarounds.shader_output_layer && bool(info.builtins_ & BuiltinBits::LAYER)) {
ss << "layout(location=" << (location++) << ") out int gpu_Layer;\n ";
}
if (workarounds.shader_output_viewport_index &&
bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX)) {
ss << "layout(location=" << (location++) << ") out int gpu_ViewportIndex;\n";
}
if (bool(info.builtins_ & BuiltinBits::BARYCENTRIC_COORD)) {
/* Need this for stable barycentric. */
ss << "flat out vec4 gpu_pos_flat;\n";
@@ -1073,6 +1095,7 @@ std::string VKShader::fragment_interface_declare(const shader::ShaderCreateInfo
{
std::stringstream ss;
std::string pre_main;
const VKWorkarounds &workarounds = VKBackend::get().device_get().workarounds_get();
ss << "\n/* Interfaces. */\n";
const Vector<StageInterfaceInfo *> &in_interfaces = info.geometry_source_.is_empty() ?
@@ -1082,6 +1105,14 @@ std::string VKShader::fragment_interface_declare(const shader::ShaderCreateInfo
for (const StageInterfaceInfo *iface : in_interfaces) {
print_interface(ss, "in", *iface, location);
}
if (workarounds.shader_output_layer && bool(info.builtins_ & BuiltinBits::LAYER)) {
ss << "#define gpu_Layer gl_Layer\n";
}
if (workarounds.shader_output_viewport_index &&
bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX)) {
ss << "#define gpu_ViewportIndex gl_ViewportIndex\n";
}
if (bool(info.builtins_ & BuiltinBits::BARYCENTRIC_COORD)) {
std::cout << "native" << std::endl;
/* NOTE(fclem): This won't work with geometry shader. Hopefully, we don't need geometry
@@ -1223,6 +1254,102 @@ std::string VKShader::compute_layout_declare(const shader::ShaderCreateInfo &inf
return ss.str();
}
/* -------------------------------------------------------------------- */
/** \name Passthrough geometry shader emulation
*
* \{ */
std::string VKShader::workaround_geometry_shader_source_create(
const shader::ShaderCreateInfo &info)
{
std::stringstream ss;
const VKWorkarounds &workarounds = VKBackend::get().device_get().workarounds_get();
const bool do_layer_workaround = workarounds.shader_output_layer &&
bool(info.builtins_ & BuiltinBits::LAYER);
const bool do_viewport_workaround = workarounds.shader_output_viewport_index &&
bool(info.builtins_ & BuiltinBits::VIEWPORT_INDEX);
const bool do_barycentric_workaround = bool(info.builtins_ & BuiltinBits::BARYCENTRIC_COORD);
shader::ShaderCreateInfo info_modified = info;
info_modified.geometry_out_interfaces_ = info_modified.vertex_out_interfaces_;
/**
* NOTE(@fclem): Assuming we will render TRIANGLES. This will not work with other primitive
* types. In this case, it might not trigger an error on some implementations.
*/
info_modified.geometry_layout(PrimitiveIn::TRIANGLES, PrimitiveOut::TRIANGLE_STRIP, 3);
ss << geometry_layout_declare(info_modified);
ss << geometry_interface_declare(info_modified);
int location = 0;
for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
for (const StageInterfaceInfo::InOut &inout : iface->inouts) {
location += get_location_count(inout.type);
}
}
if (do_layer_workaround) {
ss << "layout(location=" << (location++) << ") in int gpu_Layer[];\n";
}
if (do_viewport_workaround) {
ss << "layout(location=" << (location++) << ") in int gpu_ViewportIndex[];\n";
}
if (do_barycentric_workaround) {
ss << "flat out vec4 gpu_pos[3];\n";
ss << "smooth out vec3 gpu_BaryCoord;\n";
ss << "noperspective out vec3 gpu_BaryCoordNoPersp;\n";
}
ss << "\n";
ss << "void main()\n";
ss << "{\n";
if (do_layer_workaround) {
ss << " gl_Layer = gpu_Layer[0];\n";
}
if (do_viewport_workaround) {
ss << " gl_ViewportIndex = gpu_ViewportIndex[0];\n";
}
if (do_barycentric_workaround) {
ss << " gpu_pos[0] = gl_in[0].gl_Position;\n";
ss << " gpu_pos[1] = gl_in[1].gl_Position;\n";
ss << " gpu_pos[2] = gl_in[2].gl_Position;\n";
}
for (auto i : IndexRange(3)) {
for (StageInterfaceInfo *iface : info_modified.vertex_out_interfaces_) {
for (auto &inout : iface->inouts) {
ss << " " << iface->instance_name << "_out." << inout.name;
ss << " = " << iface->instance_name << "_in[" << i << "]." << inout.name << ";\n";
}
}
if (do_barycentric_workaround) {
ss << " gpu_BaryCoordNoPersp = gpu_BaryCoord =";
ss << " vec3(" << int(i == 0) << ", " << int(i == 1) << ", " << int(i == 2) << ");\n";
}
ss << " gl_Position = gl_in[" << i << "].gl_Position;\n";
ss << " EmitVertex();\n";
}
ss << "}\n";
return ss.str();
}
bool VKShader::do_geometry_shader_injection(const shader::ShaderCreateInfo *info)
{
const VKWorkarounds &workarounds = VKBackend::get().device_get().workarounds_get();
BuiltinBits builtins = info->builtins_;
if (bool(builtins & BuiltinBits::BARYCENTRIC_COORD)) {
return true;
}
if (workarounds.shader_output_layer && bool(builtins & BuiltinBits::LAYER)) {
return true;
}
if (workarounds.shader_output_viewport_index && bool(builtins & BuiltinBits::VIEWPORT_INDEX)) {
return true;
}
return false;
}
/** \} */
int VKShader::program_handle_get() const
{
return -1;

View File

@@ -95,6 +95,13 @@ class VKShader : public Shader {
{
return compute_module_ != VK_NULL_HANDLE;
}
/**
* \brief features available on newer implementation such as native barycentric coordinates
* and layered rendering, necessitate a geometry shader to work on older hardware.
*/
std::string workaround_geometry_shader_source_create(const shader::ShaderCreateInfo &info);
bool do_geometry_shader_injection(const shader::ShaderCreateInfo *info);
};
static inline VKShader &unwrap(Shader &shader)