diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8cd3afc2e12..a6a4136a4cb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -525,6 +525,12 @@ if(NOT APPLE)
   set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
   mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
   mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
+
+  # HIPRT is only available on Windows for now.
+  if(WIN32)
+    option(WITH_CYCLES_DEVICE_HIPRT      "Enable Cycles AMD HIPRT support" OFF)
+    mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
+  endif()
 endif()
 
 # Apple Metal
@@ -1981,10 +1987,13 @@ if(FIRST_RUN)
       info_cfg_option(WITH_CYCLES_DEVICE_OPTIX)
       info_cfg_option(WITH_CYCLES_DEVICE_CUDA)
       info_cfg_option(WITH_CYCLES_CUDA_BINARIES)
-      info_cfg_option(WITH_CYCLES_DEVICE_HIP)
-      info_cfg_option(WITH_CYCLES_HIP_BINARIES)
       info_cfg_option(WITH_CYCLES_DEVICE_ONEAPI)
       info_cfg_option(WITH_CYCLES_ONEAPI_BINARIES)
+      info_cfg_option(WITH_CYCLES_DEVICE_HIP)
+      info_cfg_option(WITH_CYCLES_HIP_BINARIES)
+    endif()
+    if(WIN32)
+      info_cfg_option(WITH_CYCLES_DEVICE_HIPRT)
     endif()
   endif()
 
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index f28c14ad4cc..68316e3cd9b 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -249,6 +249,13 @@ endif()
 if(WITH_CYCLES_DEVICE_HIP)
   add_definitions(-DWITH_HIP)
 
+  if(WITH_CYCLES_DEVICE_HIPRT)
+    include_directories(
+      ${HIPRT_INCLUDE_DIR}
+    )
+    add_definitions(-DWITH_HIPRT)
+  endif()
+
   if(WITH_HIP_DYNLOAD)
     include_directories(
       ../../extern/hipew/include
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 9d16b4983a7..0d236ee4b12 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1507,7 +1507,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
 
     def get_device_types(self, context):
         import _cycles
-        has_cuda, has_optix, has_hip, has_metal, has_oneapi = _cycles.get_device_types()
+        has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt = _cycles.get_device_types()
 
         list = [('NONE', "None", "Don't use compute device", 0)]
         if has_cuda:
@@ -1544,6 +1544,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
         default=False,
     )
 
+    use_hiprt: BoolProperty(
+        name="HIP RT (Experimental)",
+        description="HIP RT enables AMD hardware ray tracing on RDNA2 and above, with shader fallback on older cards. "
+                    "This feature is experimental and some scenes may render incorrectly",
+        default=False,
+    )
+
     use_oneapirt: BoolProperty(
         name="Embree on GPU (Experimental)",
         description="Embree GPU execution will allow to use hardware ray tracing on Intel GPUs, which will provide better performance. "
@@ -1770,7 +1777,13 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                     col.prop(self, "kernel_optimization_level")
                 col.prop(self, "use_metalrt")
 
-        if compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
+        if compute_device_type == 'HIP':
+            has_cuda, has_optix, has_hip, has_metal, has_oneapi, has_hiprt = _cycles.get_device_types()
+            row = layout.row()
+            row.enabled = has_hiprt
+            row.prop(self, "use_hiprt")
+
+        elif compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
             row = layout.row()
             row.use_property_split = True
             row.prop(self, "use_oneapirt")
diff --git a/intern/cycles/blender/device.cpp b/intern/cycles/blender/device.cpp
index b5fc5b0df44..4e1f77eb9cb 100644
--- a/intern/cycles/blender/device.cpp
+++ b/intern/cycles/blender/device.cpp
@@ -124,6 +124,10 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences,
       info.use_hardware_raytracing = false;
     }
 
+    if (info.type == DEVICE_HIP && !get_boolean(cpreferences, "use_hiprt")) {
+      info.use_hardware_raytracing = false;
+    }
+
     /* There is an accumulative logic here, because Multi-devices are support only for
      * the same backend + CPU in Blender right now, and both oneAPI and Metal have a
      * global boolean backend setting (see above) for enabling/disabling HW RT,
diff --git a/intern/cycles/blender/python.cpp b/intern/cycles/blender/python.cpp
index 682d7075455..1afd1e9bd1c 100644
--- a/intern/cycles/blender/python.cpp
+++ b/intern/cycles/blender/python.cpp
@@ -876,20 +876,23 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
 static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
 {
   vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false;
+  bool has_cuda = false, has_optix = false, has_hip = false, has_metal = false, has_oneapi = false,
+       has_hiprt = false;
   foreach (DeviceType device_type, device_types) {
     has_cuda |= (device_type == DEVICE_CUDA);
     has_optix |= (device_type == DEVICE_OPTIX);
     has_hip |= (device_type == DEVICE_HIP);
     has_metal |= (device_type == DEVICE_METAL);
     has_oneapi |= (device_type == DEVICE_ONEAPI);
+    has_hiprt |= (device_type == DEVICE_HIPRT);
   }
-  PyObject *list = PyTuple_New(5);
+  PyObject *list = PyTuple_New(6);
   PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
   PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
   PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
   PyTuple_SET_ITEM(list, 3, PyBool_FromLong(has_metal));
   PyTuple_SET_ITEM(list, 4, PyBool_FromLong(has_oneapi));
+  PyTuple_SET_ITEM(list, 5, PyBool_FromLong(has_hiprt));
   return list;
 }
 
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index 8ccc38660b3..f3242e6de97 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SRC
   binning.cpp
   build.cpp
   embree.cpp
+  hiprt.cpp
   multi.cpp
   node.cpp
   optix.cpp
@@ -39,6 +40,7 @@ set(SRC_HEADERS
   binning.h
   build.h
   embree.h
+  hiprt.h
   multi.h
   node.h
   optix.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 2a7bbfe968b..6db59e81a89 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -6,6 +6,7 @@
 
 #include "bvh/bvh2.h"
 #include "bvh/embree.h"
+#include "bvh/hiprt.h"
 #include "bvh/metal.h"
 #include "bvh/multi.h"
 #include "bvh/optix.h"
@@ -30,10 +31,14 @@ const char *bvh_layout_name(BVHLayout layout)
       return "OPTIX";
     case BVH_LAYOUT_METAL:
       return "METAL";
+    case BVH_LAYOUT_HIPRT:
+      return "HIPRT";
     case BVH_LAYOUT_MULTI_OPTIX:
     case BVH_LAYOUT_MULTI_METAL:
+    case BVH_LAYOUT_MULTI_HIPRT:
     case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
     case BVH_LAYOUT_MULTI_METAL_EMBREE:
+    case BVH_LAYOUT_MULTI_HIPRT_EMBREE:
       return "MULTI";
     case BVH_LAYOUT_ALL:
       return "ALL";
@@ -101,11 +106,20 @@ BVH *BVH::create(const BVHParams &params,
 #else
       (void)device;
       break;
+#endif
+    case BVH_LAYOUT_HIPRT:
+#ifdef WITH_HIPRT
+      return new BVHHIPRT(params, geometry, objects, device);
+#else
+      (void)device;
+      break;
 #endif
     case BVH_LAYOUT_MULTI_OPTIX:
     case BVH_LAYOUT_MULTI_METAL:
+    case BVH_LAYOUT_MULTI_HIPRT:
     case BVH_LAYOUT_MULTI_OPTIX_EMBREE:
     case BVH_LAYOUT_MULTI_METAL_EMBREE:
+    case BVH_LAYOUT_MULTI_HIPRT_EMBREE:
       return new BVHMulti(params, geometry, objects);
     case BVH_LAYOUT_NONE:
     case BVH_LAYOUT_ALL:
diff --git a/intern/cycles/bvh/hiprt.cpp b/intern/cycles/bvh/hiprt.cpp
new file mode 100644
index 00000000000..05f317ee70e
--- /dev/null
+++ b/intern/cycles/bvh/hiprt.cpp
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#ifdef WITH_HIPRT
+
+#  include "bvh/hiprt.h"
+
+#  include "scene/mesh.h"
+#  include "scene/object.h"
+
+#  include "util/foreach.h"
+#  include "util/progress.h"
+
+#  include "device/hiprt/device_impl.h"
+
+CCL_NAMESPACE_BEGIN
+
+BVHHIPRT::BVHHIPRT(const BVHParams &params,
+                   const vector<Geometry *> &geometry,
+                   const vector<Object *> &objects,
+                   Device *in_device)
+    : BVH(params, geometry, objects),
+      hiprt_geom(0),
+      custom_primitive_bound(in_device, "Custom Primitive Bound", MEM_READ_ONLY),
+      triangle_index(in_device, "HIPRT Triangle Index", MEM_READ_ONLY),
+      vertex_data(in_device, "vertex_data", MEM_READ_ONLY),
+      device(in_device)
+{
+  triangle_mesh = {0};
+  custom_prim_aabb = {0};
+}
+
+BVHHIPRT::~BVHHIPRT()
+{
+  HIPRTDevice *hiprt_device = static_cast<HIPRTDevice *>(device);
+  hiprtContext hiprt_context = hiprt_device->get_hiprt_context();
+  custom_primitive_bound.free();
+  triangle_index.free();
+  vertex_data.free();
+  hiprtDestroyGeometry(hiprt_context, hiprt_geom);
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/bvh/hiprt.h b/intern/cycles/bvh/hiprt.h
new file mode 100644
index 00000000000..fd155a1ae0c
--- /dev/null
+++ b/intern/cycles/bvh/hiprt.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#ifdef WITH_HIPRT
+
+#  pragma once
+
+#  include "bvh/bvh.h"
+#  include "bvh/params.h"
+
+#  ifdef WITH_HIP_DYNLOAD
+#    include "hiprtew.h"
+#  else
+#    include <hiprt/hiprt_types.h>
+#  endif
+
+#  include "device/memory.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHHIPRT : public BVH {
+ public:
+  friend class HIPDevice;
+
+  bool is_tlas()
+  {
+    return params.top_level;
+  }
+
+  hiprtGeometry hiprt_geom;
+  hiprtTriangleMeshPrimitive triangle_mesh;
+  hiprtAABBListPrimitive custom_prim_aabb;
+  hiprtGeometryBuildInput geom_input;
+
+  vector<int2> custom_prim_info; /* x: prim_id, y: prim_type */
+  vector<float2> prims_time;
+
+  /* Custom primitives. */
+  device_vector<BoundBox> custom_primitive_bound;
+  device_vector<int> triangle_index;
+  device_vector<float> vertex_data;
+
+ protected:
+  friend class BVH;
+  BVHHIPRT(const BVHParams &params,
+           const vector<Geometry *> &geometry,
+           const vector<Object *> &objects,
+           Device *in_device);
+
+  virtual ~BVHHIPRT();
+
+ private:
+  Device *device;
+};
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake
index 536639b3154..e43275f2398 100644
--- a/intern/cycles/cmake/external_libs.cmake
+++ b/intern/cycles/cmake/external_libs.cmake
@@ -41,20 +41,32 @@ endif()
 # HIP
 ###########################################################################
 
-if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
-  if(UNIX)
-    # Disabled until there is a HIP 5.5 release for Linux.
-    set(WITH_CYCLES_HIP_BINARIES OFF)
-    message(STATUS "HIP temporarily disabled due to compiler bugs")
-  else()
-    # Need at least HIP 5.5 to solve compiler bug affecting the kernel.
-    find_package(HIP 5.5.0)
-    set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
+if(WITH_CYCLES_DEVICE_HIP)
+  if(WITH_CYCLES_HIP_BINARIES)
+    if(UNIX)
+      # Disabled until there is a HIP 5.5 release for Linux.
+      set(WITH_CYCLES_HIP_BINARIES OFF)
+      message(STATUS "HIP temporarily disabled due to compiler bugs")
+    else()
+      # Need at least HIP 5.5 to solve compiler bug affecting the kernel.
+      find_package(HIP 5.5.0)
+      set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
 
-    if(HIP_FOUND)
-      message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
+      if(HIP_FOUND)
+        message(STATUS "Found HIP ${HIP_HIPCC_EXECUTABLE} (${HIP_VERSION})")
+      endif()
     endif()
   endif()
+
+  # HIP RT
+  if(WITH_CYCLES_DEVICE_HIP AND WITH_CYCLES_DEVICE_HIPRT)
+    find_package(HIPRT)
+    set_and_warn_library_found("HIP RT" HIPRT_FOUND WITH_CYCLES_DEVICE_HIPRT)
+  endif()
+endif()
+
+if(NOT WITH_CYCLES_DEVICE_HIP)
+  set(WITH_CYCLES_DEVICE_HIPRT OFF)
 endif()
 
 if(NOT WITH_HIP_DYNLOAD)
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 127ad1d4d1e..aceaac5b2a6 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -66,6 +66,13 @@ set(SRC_HIP
   hip/util.h
 )
 
+set(SRC_HIPRT
+  hiprt/device_impl.cpp
+  hiprt/device_impl.h
+  hiprt/queue.cpp
+  hiprt/queue.h
+)
+
 set(SRC_ONEAPI
   oneapi/device_impl.cpp
   oneapi/device_impl.h
@@ -124,6 +131,7 @@ set(SRC
   ${SRC_CPU}
   ${SRC_CUDA}
   ${SRC_HIP}
+  ${SRC_HIPRT}
   ${SRC_DUMMY}
   ${SRC_MULTI}
   ${SRC_OPTIX}
@@ -209,6 +217,7 @@ source_group("cpu" FILES ${SRC_CPU})
 source_group("cuda" FILES ${SRC_CUDA})
 source_group("dummy" FILES ${SRC_DUMMY})
 source_group("hip" FILES ${SRC_HIP})
+source_group("hiprt" FILES ${SRC_HIPRT})
 source_group("multi" FILES ${SRC_MULTI})
 source_group("metal" FILES ${SRC_METAL})
 source_group("optix" FILES ${SRC_OPTIX})
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 0ed03f1e94d..f36bb7d2fda 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -14,6 +14,7 @@
 #include "device/cuda/device.h"
 #include "device/dummy/device.h"
 #include "device/hip/device.h"
+#include "device/hiprt/device_impl.h"
 #include "device/metal/device.h"
 #include "device/multi/device.h"
 #include "device/oneapi/device.h"
@@ -135,6 +136,8 @@ DeviceType Device::type_from_string(const char *name)
     return DEVICE_METAL;
   else if (strcmp(name, "ONEAPI") == 0)
     return DEVICE_ONEAPI;
+  else if (strcmp(name, "HIPRT") == 0)
+    return DEVICE_HIPRT;
 
   return DEVICE_NONE;
 }
@@ -155,6 +158,8 @@ string Device::string_from_type(DeviceType type)
     return "METAL";
   else if (type == DEVICE_ONEAPI)
     return "ONEAPI";
+  else if (type == DEVICE_HIPRT)
+    return "HIPRT";
 
   return "";
 }
@@ -177,6 +182,10 @@ vector<DeviceType> Device::available_types()
 #endif
 #ifdef WITH_ONEAPI
   types.push_back(DEVICE_ONEAPI);
+#endif
+#ifdef WITH_HIPRT
+  if (hiprtewInit())
+    types.push_back(DEVICE_HIPRT);
 #endif
   return types;
 }
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 1a665efcd2e..2b6b42f0822 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -40,6 +40,7 @@ enum DeviceType {
   DEVICE_MULTI,
   DEVICE_OPTIX,
   DEVICE_HIP,
+  DEVICE_HIPRT,
   DEVICE_METAL,
   DEVICE_ONEAPI,
   DEVICE_DUMMY,
@@ -79,8 +80,7 @@ class DeviceInfo {
   bool has_profiling;           /* Supports runtime collection of profiling info. */
   bool has_peer_memory;         /* GPU has P2P access to memory of another GPU. */
   bool has_gpu_queue;           /* Device supports GPU queue. */
-  bool use_hardware_raytracing; /* Use hardware ray tracing to accelerate ray queries in a backend.
-                                 */
+  bool use_hardware_raytracing; /* Use hardware instructions to accelerate ray tracing. */
   KernelOptimizationLevel kernel_optimization_level; /* Optimization level applied to path tracing
                                                       * kernels (Metal only). */
   DenoiserTypeMask denoisers;                        /* Supported denoiser types. */
diff --git a/intern/cycles/device/hip/device.cpp b/intern/cycles/device/hip/device.cpp
index c853114cea7..b982dd39ed3 100644
--- a/intern/cycles/device/hip/device.cpp
+++ b/intern/cycles/device/hip/device.cpp
@@ -13,6 +13,10 @@
 #  include "util/windows.h"
 #endif /* WITH_HIP */
 
+#ifdef WITH_HIPRT
+#  include "device/hiprt/device_impl.h"
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 bool device_hip_init()
@@ -65,7 +69,12 @@ bool device_hip_init()
 
 Device *device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
 {
-#ifdef WITH_HIP
+#ifdef WITH_HIPRT
+  if (info.use_hardware_raytracing)
+    return new HIPRTDevice(info, stats, profiler);
+  else
+    return new HIPDevice(info, stats, profiler);
+#elif defined(WITH_HIP)
   return new HIPDevice(info, stats, profiler);
 #else
   (void)info;
@@ -115,6 +124,12 @@ void device_hip_info(vector<DeviceInfo> &devices)
     return;
   }
 
+#  ifdef WITH_HIPRT
+  const bool has_hardware_raytracing = hiprtewInit();
+#  else
+  const bool has_hardware_raytracing = false;
+#  endif
+
   vector<DeviceInfo> display_devices;
 
   for (int num = 0; num < count; num++) {
@@ -150,6 +165,8 @@ void device_hip_info(vector<DeviceInfo> &devices)
       }
     }
 
+    info.use_hardware_raytracing = has_hardware_raytracing;
+
     int pci_location[3] = {0, 0, 0};
     hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
     hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
@@ -176,6 +193,7 @@ void device_hip_info(vector<DeviceInfo> &devices)
       VLOG_INFO << "Device has compute preemption or is not used for display.";
       devices.push_back(info);
     }
+
     VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
   }
 
diff --git a/intern/cycles/device/hip/device_impl.h b/intern/cycles/device/hip/device_impl.h
index 021d8d1651f..97b8ba29490 100644
--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: Apache-2.0
  * Copyright 2011-2022 Blender Foundation */
 
+#pragma once
+
 #ifdef WITH_HIP
 
 #  include "device/device.h"
@@ -49,9 +51,11 @@ class HIPDevice : public GPUDevice {
 
   bool use_adaptive_compilation();
 
-  string compile_kernel_get_common_cflags(const uint kernel_features);
+  virtual string compile_kernel_get_common_cflags(const uint kernel_features);
 
-  string compile_kernel(const uint kernel_features, const char *name, const char *base = "hip");
+  virtual string compile_kernel(const uint kernel_features,
+                                const char *name,
+                                const char *base = "hip");
 
   virtual bool load_kernels(const uint kernel_features) override;
   void reserve_local_memory(const uint kernel_features);
diff --git a/intern/cycles/device/hiprt/device_impl.cpp b/intern/cycles/device/hiprt/device_impl.cpp
new file mode 100644
index 00000000000..beb255deba4
--- /dev/null
+++ b/intern/cycles/device/hiprt/device_impl.cpp
@@ -0,0 +1,1054 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#ifdef WITH_HIPRT
+
+#  include "device/hiprt/device_impl.h"
+
+#  include "util/debug.h"
+#  include "util/foreach.h"
+#  include "util/log.h"
+#  include "util/map.h"
+#  include "util/md5.h"
+#  include "util/path.h"
+#  include "util/progress.h"
+#  include "util/string.h"
+#  include "util/system.h"
+#  include "util/time.h"
+#  include "util/types.h"
+#  include "util/windows.h"
+
+#  include "bvh/hiprt.h"
+
+#  include "scene/hair.h"
+#  include "scene/mesh.h"
+#  include "scene/object.h"
+#  include "scene/pointcloud.h"
+
+CCL_NAMESPACE_BEGIN
+
+static void get_hiprt_transform(float matrix[][4], Transform &tfm)
+{
+  int row = 0;
+  int col = 0;
+  matrix[row][col++] = tfm.x.x;
+  matrix[row][col++] = tfm.x.y;
+  matrix[row][col++] = tfm.x.z;
+  matrix[row][col++] = tfm.x.w;
+  row++;
+  col = 0;
+  matrix[row][col++] = tfm.y.x;
+  matrix[row][col++] = tfm.y.y;
+  matrix[row][col++] = tfm.y.z;
+  matrix[row][col++] = tfm.y.w;
+  row++;
+  col = 0;
+  matrix[row][col++] = tfm.z.x;
+  matrix[row][col++] = tfm.z.y;
+  matrix[row][col++] = tfm.z.z;
+  matrix[row][col++] = tfm.z.w;
+}
+
+class HIPRTDevice;
+
+BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */) const
+{
+  return BVH_LAYOUT_HIPRT;
+}
+
+HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+    : HIPDevice(info, stats, profiler),
+      global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY),
+      hiprt_context(NULL),
+      scene(NULL),
+      functions_table(NULL),
+      scratch_buffer_size(0),
+      scratch_buffer(this, "scratch_buffer", MEM_DEVICE_ONLY),
+      visibility(this, "visibility", MEM_READ_ONLY),
+      instance_transform_matrix(this, "instance_transform_matrix", MEM_READ_ONLY),
+      transform_headers(this, "transform_headers", MEM_READ_ONLY),
+      user_instance_id(this, "user_instance_id", MEM_GLOBAL),
+      hiprt_blas_ptr(this, "hiprt_blas_ptr", MEM_READ_WRITE),
+      blas_ptr(this, "blas_ptr", MEM_GLOBAL),
+      custom_prim_info(this, "custom_prim_info", MEM_GLOBAL),
+      custom_prim_info_offset(this, "custom_prim_info_offset", MEM_GLOBAL),
+      prims_time(this, "prims_time", MEM_GLOBAL),
+      prim_time_offset(this, "prim_time_offset", MEM_GLOBAL)
+{
+  HIPContextScope scope(this);
+  hiprtContextCreationInput hiprt_context_input = {0};
+  hiprt_context_input.ctxt = hipContext;
+  hiprt_context_input.device = hipDevice;
+  hiprt_context_input.deviceType = hiprtDeviceAMD;
+  hiprtError rt_result = hiprtCreateContext(
+      HIPRT_API_VERSION, hiprt_context_input, &hiprt_context);
+
+  if (rt_result != hiprtSuccess) {
+    set_error(string_printf("Failed to create HIPRT context"));
+    return;
+  }
+
+  rt_result = hiprtCreateFuncTable(
+      hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, &functions_table);
+
+  if (rt_result != hiprtSuccess) {
+    set_error(string_printf("Failed to create HIPRT Function Table"));
+    return;
+  }
+}
+
+HIPRTDevice::~HIPRTDevice()
+{
+  HIPContextScope scope(this);
+  user_instance_id.free();
+  visibility.free();
+  hiprt_blas_ptr.free();
+  blas_ptr.free();
+  instance_transform_matrix.free();
+  transform_headers.free();
+  custom_prim_info_offset.free();
+  custom_prim_info.free();
+  prim_time_offset.free();
+  prims_time.free();
+  global_stack_buffer.free();
+  hiprtDestroyFuncTable(hiprt_context, functions_table);
+  hiprtDestroyScene(hiprt_context, scene);
+  hiprtDestroyContext(hiprt_context);
+}
+
+unique_ptr<DeviceQueue> HIPRTDevice::gpu_queue_create()
+{
+  return make_unique<HIPRTDeviceQueue>(this);
+}
+
+string HIPRTDevice::compile_kernel_get_common_cflags(const uint kernel_features)
+{
+  string cflags = HIPDevice::compile_kernel_get_common_cflags(kernel_features);
+
+  cflags += " -D __HIPRT__ ";
+
+  return cflags;
+}
+
+string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
+{
+  int major, minor;
+  hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
+  hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
+  hipDeviceProp_t props;
+  hipGetDeviceProperties(&props, hipDevId);
+
+  char *arch = strtok(props.gcnArchName, ":");
+  if (arch == NULL) {
+    arch = props.gcnArchName;
+  }
+
+  if (!use_adaptive_compilation()) {
+    const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb", name));
+    VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
+    if (path_exists(fatbin)) {
+      VLOG(1) << "Using precompiled kernel.";
+      return fatbin;
+    }
+  }
+
+  string source_path = path_get("source");
+  const string source_md5 = path_files_md5_hash(source_path);
+
+  string common_cflags = compile_kernel_get_common_cflags(kernel_features);
+  const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
+
+  const string include_path = source_path;
+  const string bitcode_file = string_printf("cycles_%s_%s_%s.bc", name, arch, kernel_md5.c_str());
+  const string bitcode = path_cache_get(path_join("kernels", bitcode_file));
+  const string fatbin_file = string_printf(
+      "cycles_%s_%s_%s.hipfb", name, arch, kernel_md5.c_str());
+  const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
+
+  VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
+  if (path_exists(fatbin)) {
+    VLOG(1) << "Using locally compiled kernel.";
+    return fatbin;
+  }
+
+#  ifdef _WIN32
+  if (!use_adaptive_compilation() && have_precompiled_kernels()) {
+    if (!hipSupportsDevice(hipDevId)) {
+      set_error(
+          string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
+                        "Your GPU is not supported.",
+                        major,
+                        minor));
+    }
+    else {
+      set_error(
+          string_printf("HIP binary kernel for this graphics card compute "
+                        "capability (%d.%d) not found.",
+                        major,
+                        minor));
+    }
+    return string();
+  }
+#  endif
+
+  const char *const hipcc = hipewCompilerPath();
+  if (hipcc == NULL) {
+    set_error(
+        "HIP hipcc compiler not found. "
+        "Install HIP toolkit in default location.");
+    return string();
+  }
+
+  const int hipcc_hip_version = hipewCompilerVersion();
+  VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
+  if (hipcc_hip_version < 40) {
+    printf(
+        "Unsupported HIP version %d.%d detected, "
+        "you need HIP 4.0 or newer.\n",
+        hipcc_hip_version / 10,
+        hipcc_hip_version % 10);
+    return string();
+  }
+
+  path_create_directories(fatbin);
+
+  source_path = path_join(path_join(source_path, "kernel"),
+                          path_join("device", path_join(base, string_printf("%s.cpp", name))));
+
+  printf("Compiling  %s and caching to %s", source_path.c_str(), fatbin.c_str());
+
+  double starttime = time_dt();
+
+  const string hiprt_path = getenv("HIPRT_ROOT_DIR");
+  // First, app kernels are compiled into bitcode, without access to implementation of HIP RT
+  // functions
+  if (!path_exists(bitcode)) {
+
+    std::string rtc_options;
+
+    rtc_options.append(" --offload-arch=").append(arch);
+    rtc_options.append(" -D __HIPRT__");
+    rtc_options.append(" -ffast-math -O3 -std=c++17");
+    rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
+
+    string command = string_printf("%s %s -I %s  -I %s %s -o \"%s\"",
+                                   hipcc,
+                                   rtc_options.c_str(),
+                                   include_path.c_str(),
+                                   hiprt_path.c_str(),
+                                   source_path.c_str(),
+                                   bitcode.c_str());
+
+    printf("Compiling %sHIP kernel ...\n%s\n",
+           (use_adaptive_compilation()) ? "adaptive " : "",
+           command.c_str());
+
+#  ifdef _WIN32
+    command = "call " + command;
+#  endif
+    if (system(command.c_str()) != 0) {
+      set_error(
+          "Failed to execute compilation command, "
+          "see console for details.");
+      return string();
+    }
+  }
+
+  // After compilation, the bitcode produced is linked with HIP RT bitcode (containing
+  // implementations of HIP RT functions, e.g. traversal, to produce the final executable code
+  string linker_options;
+  linker_options.append(" --offload-arch=").append(arch);
+  linker_options.append(" -fgpu-rdc --hip-link --cuda-device-only ");
+  string hiprt_ver(HIPRT_VERSION_STR);
+  string hiprt_bc;
+  hiprt_bc = hiprt_path + "\\hiprt" + hiprt_ver + "_amd_lib_win.bc";
+
+  string linker_command = string_printf("clang++ %s \"%s\" %s -o \"%s\"",
+                                        linker_options.c_str(),
+                                        bitcode.c_str(),
+                                        hiprt_bc.c_str(),
+                                        fatbin.c_str());
+
+#  ifdef _WIN32
+  linker_command = "call " + linker_command;
+#  endif
+  if (system(linker_command.c_str()) != 0) {
+    set_error(
+        "Failed to execute linking command, "
+        "see console for details.");
+    return string();
+  }
+
+  printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+
+  return fatbin;
+}
+
+bool HIPRTDevice::load_kernels(const uint kernel_features)
+{
+  if (hipModule) {
+    if (use_adaptive_compilation()) {
+      VLOG(1) << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
+    }
+    return true;
+  }
+
+  if (hipContext == 0)
+    return false;
+
+  if (!support_device(kernel_features)) {
+    return false;
+  }
+
+  /* get kernel */
+  const char *kernel_name = "kernel";
+  string fatbin = compile_kernel(kernel_features, kernel_name);
+  if (fatbin.empty())
+    return false;
+
+  /* open module */
+  HIPContextScope scope(this);
+
+  string fatbin_data;
+  hipError_t result;
+
+  if (path_read_text(fatbin, fatbin_data)) {
+
+    result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
+  }
+  else
+    result = hipErrorFileNotFound;
+
+  if (result != hipSuccess)
+    set_error(string_printf(
+        "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(result)));
+
+  if (result == hipSuccess) {
+    kernels.load(this);
+    {
+      const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
+                                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+                                       (kernel_features & KERNEL_FEATURE_MNEE) ?
+                                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
+                                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
+
+      HIPRTDeviceQueue queue(this);
+
+      device_ptr d_path_index = 0;
+      device_ptr d_render_buffer = 0;
+      int d_work_size = 0;
+      DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
+
+      queue.init_execution();
+      queue.enqueue(test_kernel, 1, args);
+      queue.synchronize();
+    }
+  }
+
+  return (result == hipSuccess);
+}
+
+void HIPRTDevice::const_copy_to(const char *name, void *host, size_t size)
+{
+  HIPContextScope scope(this);
+  hipDeviceptr_t mem;
+  size_t bytes;
+
+  if (strcmp(name, "data") == 0) {
+    assert(size <= sizeof(KernelData));
+    KernelData *const data = (KernelData *)host;
+    *(hiprtScene *)&data->device_bvh = scene;
+  }
+
+  hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, "kernel_params"));
+  assert(bytes == sizeof(KernelParamsHIPRT));
+
+#  define KERNEL_DATA_ARRAY(data_type, data_name) \
+    if (strcmp(name, #data_name) == 0) { \
+      hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIPRT, data_name), host, size)); \
+      return; \
+    }
+  KERNEL_DATA_ARRAY(KernelData, data)
+  KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
+  KERNEL_DATA_ARRAY(int, user_instance_id)
+  KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
+  KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
+  KERNEL_DATA_ARRAY(int2, custom_prim_info)
+  KERNEL_DATA_ARRAY(int, prim_time_offset)
+  KERNEL_DATA_ARRAY(float2, prims_time)
+
+#  include "kernel/data_arrays.h"
+#  undef KERNEL_DATA_ARRAY
+}
+
+hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *mesh)
+{
+  hiprtGeometryBuildInput geom_input;
+  geom_input.geomType = Triangle;
+
+  if (mesh->has_motion_blur() &&
+      !(bvh->params.num_motion_triangle_steps == 0 || bvh->params.use_spatial_split)) {
+
+    const Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+    const size_t num_triangles = mesh->num_triangles();
+
+    const int num_bvh_steps = bvh->params.num_motion_triangle_steps * 2 + 1;
+    const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+
+    int num_bounds = 0;
+    bvh->custom_primitive_bound.alloc(num_triangles * num_bvh_steps);
+
+    for (uint j = 0; j < num_triangles; j++) {
+      Mesh::Triangle t = mesh->get_triangle(j);
+      const float3 *verts = mesh->get_verts().data();
+
+      const size_t num_verts = mesh->get_verts().size();
+      const size_t num_steps = mesh->get_motion_steps();
+      const float3 *vert_steps = attr_mP->data_float3();
+
+      float3 prev_verts[3];
+      t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts);
+      BoundBox prev_bounds = BoundBox::empty;
+      prev_bounds.grow(prev_verts[0]);
+      prev_bounds.grow(prev_verts[1]);
+      prev_bounds.grow(prev_verts[2]);
+
+      for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+        const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
+        float3 curr_verts[3];
+        t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts);
+        BoundBox curr_bounds = BoundBox::empty;
+        curr_bounds.grow(curr_verts[0]);
+        curr_bounds.grow(curr_verts[1]);
+        curr_bounds.grow(curr_verts[2]);
+        BoundBox bounds = prev_bounds;
+        bounds.grow(curr_bounds);
+        if (bounds.valid()) {
+          const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+          bvh->custom_primitive_bound[num_bounds] = bounds;
+          bvh->custom_prim_info[num_bounds].x = j;
+          bvh->custom_prim_info[num_bounds].y = mesh->primitive_type();
+          bvh->prims_time[num_bounds].x = curr_time;
+          bvh->prims_time[num_bounds].y = prev_time;
+          num_bounds++;
+        }
+        prev_bounds = curr_bounds;
+      }
+    }
+
+    bvh->custom_prim_aabb.aabbCount = bvh->custom_primitive_bound.size();
+    bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
+    bvh->custom_primitive_bound.copy_to_device();
+    bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
+
+    geom_input.type = hiprtPrimitiveTypeAABBList;
+    geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
+    geom_input.geomType = Motion_Triangle;
+  }
+  else {
+
+    size_t triangle_size = mesh->get_triangles().size();
+    void *triangle_data = mesh->get_triangles().data();
+
+    size_t vertex_size = mesh->get_verts().size();
+    void *vertex_data = mesh->get_verts().data();
+
+    bvh->triangle_mesh.triangleCount = mesh->num_triangles();
+    bvh->triangle_mesh.triangleStride = 3 * sizeof(int);
+    bvh->triangle_mesh.vertexCount = vertex_size;
+    bvh->triangle_mesh.vertexStride = sizeof(float3);
+
+    bvh->triangle_index.host_pointer = triangle_data;
+    bvh->triangle_index.data_elements = 1;
+    bvh->triangle_index.data_type = TYPE_INT;
+    bvh->triangle_index.data_size = triangle_size;
+    bvh->triangle_index.copy_to_device();
+    bvh->triangle_mesh.triangleIndices = (void *)(bvh->triangle_index.device_pointer);
+    // either has to set the host pointer to zero, or increment the refcount on triangle_data
+    bvh->triangle_index.host_pointer = 0;
+    bvh->vertex_data.host_pointer = vertex_data;
+    bvh->vertex_data.data_elements = 4;
+    bvh->vertex_data.data_type = TYPE_FLOAT;
+    bvh->vertex_data.data_size = vertex_size;
+    bvh->vertex_data.copy_to_device();
+    bvh->triangle_mesh.vertices = (void *)(bvh->vertex_data.device_pointer);
+    bvh->vertex_data.host_pointer = 0;
+
+    geom_input.type = hiprtPrimitiveTypeTriangleMesh;
+    geom_input.triangleMesh.primitive = &(bvh->triangle_mesh);
+  }
+  return geom_input;
+}
+
+hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh, Hair *hair)
+{
+  hiprtGeometryBuildInput geom_input;
+
+  const PrimitiveType primitive_type = hair->primitive_type();
+  const size_t num_curves = hair->num_curves();
+  const size_t num_segments = hair->num_segments();
+  const Attribute *curve_attr_mP = NULL;
+
+  if (curve_attr_mP == NULL || bvh->params.num_motion_curve_steps == 0) {
+
+    bvh->custom_prim_info.resize(num_segments);
+    bvh->custom_primitive_bound.alloc(num_segments);
+  }
+  else {
+    size_t num_boxes = bvh->params.num_motion_curve_steps * 2 * num_segments;
+    bvh->custom_prim_info.resize(num_boxes);
+    bvh->custom_primitive_bound.alloc(num_boxes);
+    curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+
+  int num_bounds = 0;
+  float3 *curve_keys = hair->get_curve_keys().data();
+
+  for (uint j = 0; j < num_curves; j++) {
+    const Hair::Curve curve = hair->get_curve(j);
+    const float *curve_radius = &hair->get_curve_radius()[0];
+    int first_key = curve.first_key;
+    for (int k = 0; k < curve.num_keys - 1; k++) {
+      if (curve_attr_mP == NULL || bvh->params.num_motion_curve_steps == 0) {
+        float3 current_keys[4];
+        current_keys[0] = curve_keys[max(first_key + k - 1, first_key)];
+        current_keys[1] = curve_keys[first_key + k];
+        current_keys[2] = curve_keys[first_key + k + 1];
+        current_keys[3] = curve_keys[min(first_key + k + 2, first_key + curve.num_keys - 1)];
+
+        if (current_keys[0].x == current_keys[1].x && current_keys[1].x == current_keys[2].x &&
+            current_keys[2].x == current_keys[3].x && current_keys[0].y == current_keys[1].y &&
+            current_keys[1].y == current_keys[2].y && current_keys[2].y == current_keys[3].y &&
+            current_keys[0].z == current_keys[1].z && current_keys[1].z == current_keys[2].z &&
+            current_keys[2].z == current_keys[3].z)
+          continue;
+
+        BoundBox bounds = BoundBox::empty;
+        curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
+        if (bounds.valid()) {
+          int type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
+          bvh->custom_prim_info[num_bounds].x = j;
+          bvh->custom_prim_info[num_bounds].y = type;
+          bvh->custom_primitive_bound[num_bounds] = bounds;
+          num_bounds++;
+        }
+      }
+      else {
+
+        const int num_bvh_steps = bvh->params.num_motion_curve_steps * 2 + 1;
+        const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+        const size_t num_steps = hair->get_motion_steps();
+        const float3 *curve_keys = &hair->get_curve_keys()[0];
+        const float4 *key_steps = curve_attr_mP->data_float4();
+        const size_t num_keys = hair->get_curve_keys().size();
+
+        float4 prev_keys[4];
+        curve.cardinal_motion_keys(curve_keys,
+                                   curve_radius,
+                                   key_steps,
+                                   num_keys,
+                                   num_steps,
+                                   0.0f,
+                                   k - 1,
+                                   k,
+                                   k + 1,
+                                   k + 2,
+                                   prev_keys);
+        BoundBox prev_bounds = BoundBox::empty;
+        curve.bounds_grow(prev_keys, prev_bounds);
+
+        for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+          const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
+          float4 curr_keys[4];
+          curve.cardinal_motion_keys(curve_keys,
+                                     curve_radius,
+                                     key_steps,
+                                     num_keys,
+                                     num_steps,
+                                     curr_time,
+                                     k - 1,
+                                     k,
+                                     k + 1,
+                                     k + 2,
+                                     curr_keys);
+          BoundBox curr_bounds = BoundBox::empty;
+          curve.bounds_grow(curr_keys, curr_bounds);
+          BoundBox bounds = prev_bounds;
+          bounds.grow(curr_bounds);
+          if (bounds.valid()) {
+            const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+            int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
+            bvh->custom_prim_info[num_bounds].x = j;
+            bvh->custom_prim_info[num_bounds].y = packed_type;  // k
+            bvh->custom_primitive_bound[num_bounds] = bounds;
+            bvh->prims_time[num_bounds].x = curr_time;
+            bvh->prims_time[num_bounds].y = prev_time;
+            num_bounds++;
+          }
+          prev_bounds = curr_bounds;
+        }
+      }
+    }
+  }
+
+  bvh->custom_prim_aabb.aabbCount = num_bounds;
+  bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
+  bvh->custom_primitive_bound.copy_to_device();
+  bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
+
+  geom_input.type = hiprtPrimitiveTypeAABBList;
+  geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
+  geom_input.geomType = Curve;
+
+  return geom_input;
+}
+
+hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh, PointCloud *pointcloud)
+{
+  hiprtGeometryBuildInput geom_input;
+
+  const Attribute *point_attr_mP = NULL;
+  if (pointcloud->has_motion_blur()) {
+    point_attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+
+  const float3 *points_data = pointcloud->get_points().data();
+  const float *radius_data = pointcloud->get_radius().data();
+  const size_t num_points = pointcloud->num_points();
+  const float3 *motion_data = (point_attr_mP) ? point_attr_mP->data_float3() : NULL;
+  const size_t num_steps = pointcloud->get_motion_steps();
+
+  int num_bounds = 0;
+
+  if (point_attr_mP == NULL) {
+    bvh->custom_primitive_bound.alloc(num_points);
+    for (uint j = 0; j < num_points; j++) {
+      const PointCloud::Point point = pointcloud->get_point(j);
+      BoundBox bounds = BoundBox::empty;
+      point.bounds_grow(points_data, radius_data, bounds);
+      if (bounds.valid()) {
+        bvh->custom_primitive_bound[num_bounds] = bounds;
+        bvh->custom_prim_info[num_bounds].x = j;
+        bvh->custom_prim_info[num_bounds].y = PRIMITIVE_POINT;
+        num_bounds++;
+      }
+    }
+  }
+  else if (bvh->params.num_motion_point_steps == 0) {
+
+    bvh->custom_primitive_bound.alloc(num_points * num_steps);
+
+    for (uint j = 0; j < num_points; j++) {
+      const PointCloud::Point point = pointcloud->get_point(j);
+      BoundBox bounds = BoundBox::empty;
+      point.bounds_grow(points_data, radius_data, bounds);
+      for (size_t step = 0; step < num_steps - 1; step++) {
+        point.bounds_grow(motion_data + step * num_points, radius_data, bounds);
+      }
+      if (bounds.valid()) {
+        bvh->custom_primitive_bound[num_bounds] = bounds;
+        bvh->custom_prim_info[num_bounds].x = j;
+        bvh->custom_prim_info[num_bounds].y = PRIMITIVE_POINT;
+        num_bounds++;
+      }
+    }
+  }
+  else {
+
+    const int num_bvh_steps = bvh->params.num_motion_point_steps * 2 + 1;
+    const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+
+    bvh->custom_primitive_bound.alloc(num_points * num_bvh_steps);
+
+    for (uint j = 0; j < num_points; j++) {
+      const PointCloud::Point point = pointcloud->get_point(j);
+      const size_t num_steps = pointcloud->get_motion_steps();
+      const float3 *point_steps = point_attr_mP->data_float3();
+
+      float4 prev_key = point.motion_key(
+          points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
+      BoundBox prev_bounds = BoundBox::empty;
+      point.bounds_grow(prev_key, prev_bounds);
+
+      for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+        const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
+        float4 curr_key = point.motion_key(
+            points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
+        BoundBox curr_bounds = BoundBox::empty;
+        point.bounds_grow(curr_key, curr_bounds);
+        BoundBox bounds = prev_bounds;
+        bounds.grow(curr_bounds);
+        if (bounds.valid()) {
+          const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+          bvh->custom_primitive_bound[num_bounds] = bounds;
+          bvh->custom_prim_info[num_bounds].x = j;
+          bvh->custom_prim_info[num_bounds].y = PRIMITIVE_MOTION_POINT;
+          bvh->prims_time[num_bounds].x = curr_time;
+          bvh->prims_time[num_bounds].y = prev_time;
+          num_bounds++;
+        }
+        prev_bounds = curr_bounds;
+      }
+    }
+  }
+
+  bvh->custom_prim_aabb.aabbCount = bvh->custom_primitive_bound.size();
+  bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
+  bvh->custom_primitive_bound.copy_to_device();
+  bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
+
+  geom_input.type = hiprtPrimitiveTypeAABBList;
+  geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
+  geom_input.geomType = Point;
+
+  return geom_input;
+}
+
+void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions options)
+{
+  hiprtGeometryBuildInput geom_input = {};
+
+  switch (geom->geometry_type) {
+    case Geometry::MESH:
+    case Geometry::VOLUME: {
+      Mesh *mesh = static_cast<Mesh *>(geom);
+
+      if (mesh->num_triangles() == 0)
+        return;
+
+      geom_input = prepare_triangle_blas(bvh, mesh);
+      break;
+    }
+
+    case Geometry::HAIR: {
+      Hair *const hair = static_cast<Hair *const>(geom);
+
+      if (hair->num_segments() == 0)
+        return;
+
+      geom_input = prepare_curve_blas(bvh, hair);
+      break;
+    }
+
+    case Geometry::POINTCLOUD: {
+      PointCloud *pointcloud = static_cast<PointCloud *>(geom);
+      if (pointcloud->num_points() == 0)
+        return;
+
+      geom_input = prepare_point_blas(bvh, pointcloud);
+      break;
+    }
+
+    default:
+      assert(geom_input.geomType != hiprtInvalidValue);
+  }
+
+  size_t blas_scratch_buffer_size = 0;
+  hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
+      hiprt_context, &geom_input, &options, &blas_scratch_buffer_size);
+
+  if (rt_err != hiprtSuccess) {
+    set_error(string_printf("Failed to get scratch buffer size for BLAS!"));
+  }
+
+  rt_err = hiprtCreateGeometry(hiprt_context, &geom_input, &options, &bvh->hiprt_geom);
+
+  if (rt_err != hiprtSuccess) {
+    set_error(string_printf("Failed to create BLAS!"));
+  }
+  bvh->geom_input = geom_input;
+  {
+    thread_scoped_lock lock(hiprt_mutex);
+    if (blas_scratch_buffer_size > scratch_buffer_size) {
+      scratch_buffer.alloc(blas_scratch_buffer_size);
+      scratch_buffer_size = blas_scratch_buffer_size;
+      scratch_buffer.zero_to_device();
+    }
+    rt_err = hiprtBuildGeometry(hiprt_context,
+                                hiprtBuildOperationBuild,
+                                &bvh->geom_input,
+                                &options,
+                                (void *)(scratch_buffer.device_pointer),
+                                0,
+                                bvh->hiprt_geom);
+  }
+  if (rt_err != hiprtSuccess) {
+    set_error(string_printf("Failed to build BLAS"));
+  }
+}
+
+hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
+                                   vector<Object *> objects,
+                                   hiprtBuildOptions options,
+                                   bool refit)
+{
+  hiprtBuildOperation build_operation = refit ? hiprtBuildOperationUpdate :
+                                                hiprtBuildOperationBuild;
+
+  array<hiprtFrameMatrix> transform_matrix;
+
+  unordered_map<Geometry *, int2> prim_info_map;
+  size_t custom_prim_offset = 0;
+
+  unordered_map<Geometry *, int> prim_time_map;
+
+  size_t num_instances = 0;
+  int blender_instance_id = 0;
+
+  size_t num_object = objects.size();
+  user_instance_id.alloc(num_object);
+  visibility.alloc(num_object);
+  hiprt_blas_ptr.alloc(num_object);
+  blas_ptr.alloc(num_object);
+  transform_headers.alloc(num_object);
+  custom_prim_info_offset.alloc(num_object);
+  prim_time_offset.alloc(num_object);
+
+  foreach (Object *ob, objects) {
+    uint32_t mask = 0;
+    if (ob->is_traceable()) {
+      mask = ob->visibility_for_tracing();
+    }
+
+    Transform current_transform = ob->get_tfm();
+    Geometry *geom = ob->get_geometry();
+    bool transform_applied = geom->transform_applied;
+
+    BVHHIPRT *current_bvh = static_cast<BVHHIPRT *>(geom->bvh);
+    bool is_valid_geometry = current_bvh->geom_input.geomType != hiprtInvalidValue;
+    hiprtGeometry hiprt_geom_current = current_bvh->hiprt_geom;
+
+    hiprtFrameMatrix hiprt_transform_matrix = {{{0}}};
+    Transform identity_matrix = transform_identity();
+    get_hiprt_transform(hiprt_transform_matrix.matrix, identity_matrix);
+
+    if (is_valid_geometry) {
+      bool is_custom_prim = current_bvh->custom_prim_info.size() > 0;
+
+      if (is_custom_prim) {
+
+        bool has_motion_blur = current_bvh->prims_time.size() > 0;
+
+        unordered_map<Geometry *, int2>::iterator it = prim_info_map.find(geom);
+
+        if (prim_info_map.find(geom) != prim_info_map.end()) {
+
+          custom_prim_info_offset[blender_instance_id] = it->second;
+
+          if (has_motion_blur) {
+
+            prim_time_offset[blender_instance_id] = prim_time_map[geom];
+          }
+        }
+        else {
+          int offset = bvh->custom_prim_info.size();
+
+          prim_info_map[geom].x = offset;
+          prim_info_map[geom].y = custom_prim_offset;
+
+          bvh->custom_prim_info.resize(offset + current_bvh->custom_prim_info.size());
+          memcpy(bvh->custom_prim_info.data() + offset,
+                 current_bvh->custom_prim_info.data(),
+                 current_bvh->custom_prim_info.size() * sizeof(int2));
+
+          custom_prim_info_offset[blender_instance_id].x = offset;
+          custom_prim_info_offset[blender_instance_id].y = custom_prim_offset;
+
+          if (geom->geometry_type == Geometry::HAIR) {
+            custom_prim_offset += ((Hair *)geom)->num_curves();
+          }
+          else if (geom->geometry_type == Geometry::POINTCLOUD) {
+            custom_prim_offset += ((PointCloud *)geom)->num_points();
+          }
+          else {
+            custom_prim_offset += ((Mesh *)geom)->num_triangles();
+          }
+
+          if (has_motion_blur) {
+            int time_offset = bvh->prims_time.size();
+            prim_time_map[geom] = time_offset;
+
+            memcpy(bvh->prims_time.data() + time_offset,
+                   current_bvh->prims_time.data(),
+                   current_bvh->prims_time.size() * sizeof(float2));
+
+            prim_time_offset[blender_instance_id] = time_offset;
+          }
+          else
+            prim_time_offset[blender_instance_id] = -1;
+        }
+      }
+      else
+        custom_prim_info_offset[blender_instance_id] = {-1, -1};
+
+      hiprtTransformHeader current_header = {0};
+      current_header.frameCount = 1;
+      current_header.frameIndex = transform_matrix.size();
+      if (ob->get_motion().size()) {
+        int motion_size = ob->get_motion().size();
+        assert(motion_size == 1);
+
+        array<Transform> tfm_array = ob->get_motion();
+        float time_iternval = 1 / (float)(motion_size - 1);
+        current_header.frameCount = motion_size;
+
+        vector<hiprtFrameMatrix> tfm_hiprt_mb;
+        tfm_hiprt_mb.resize(motion_size);
+        for (int i = 0; i < motion_size; i++) {
+          get_hiprt_transform(tfm_hiprt_mb[i].matrix, tfm_array[i]);
+          tfm_hiprt_mb[i].time = (float)i * time_iternval;
+          transform_matrix.push_back_slow(tfm_hiprt_mb[i]);
+        }
+      }
+      else {
+        if (transform_applied)
+          current_transform = identity_matrix;
+        get_hiprt_transform(hiprt_transform_matrix.matrix, current_transform);
+        transform_matrix.push_back_slow(hiprt_transform_matrix);
+      }
+
+      transform_headers[num_instances] = current_header;
+
+      user_instance_id[num_instances] = blender_instance_id;
+      visibility[num_instances] = mask;
+      hiprt_blas_ptr[num_instances] = (uint64_t)hiprt_geom_current;
+      num_instances++;
+    }
+    blas_ptr[blender_instance_id] = (uint64_t)hiprt_geom_current;
+    blender_instance_id++;
+  }
+
+  int frame_count = transform_matrix.size();
+  hiprtSceneBuildInput scene_input_ptr = {0};
+  scene_input_ptr.instanceCount = num_instances;
+  scene_input_ptr.frameCount = frame_count;
+  scene_input_ptr.frameType = hiprtFrameTypeMatrix;
+
+  user_instance_id.copy_to_device();
+  visibility.copy_to_device();
+  hiprt_blas_ptr.copy_to_device();
+  blas_ptr.copy_to_device();
+  transform_headers.copy_to_device();
+  {
+    instance_transform_matrix.alloc(frame_count);
+    instance_transform_matrix.host_pointer = transform_matrix.data();
+    instance_transform_matrix.data_elements = sizeof(hiprtFrameMatrix);
+    instance_transform_matrix.data_type = TYPE_UCHAR;
+    instance_transform_matrix.data_size = frame_count;
+    instance_transform_matrix.copy_to_device();
+    instance_transform_matrix.host_pointer = 0;
+  }
+
+  scene_input_ptr.instanceMasks = (void *)visibility.device_pointer;
+  scene_input_ptr.instanceGeometries = (void *)hiprt_blas_ptr.device_pointer;
+  scene_input_ptr.instanceTransformHeaders = (void *)transform_headers.device_pointer;
+  scene_input_ptr.instanceFrames = (void *)instance_transform_matrix.device_pointer;
+
+  hiprtScene scene = 0;
+
+  hiprtError rt_err = hiprtCreateScene(hiprt_context, &scene_input_ptr, &options, &scene);
+
+  if (rt_err != hiprtSuccess) {
+    set_error(string_printf("Failed to create TLAS"));
+  }
+
+  size_t tlas_scratch_buffer_size;
+  rt_err = hiprtGetSceneBuildTemporaryBufferSize(
+      hiprt_context, &scene_input_ptr, &options, &tlas_scratch_buffer_size);
+
+  if (rt_err != hiprtSuccess) {
+    set_error(string_printf("Failed to get scratch buffer size for TLAS"));
+  }
+
+  if (tlas_scratch_buffer_size > scratch_buffer_size) {
+    scratch_buffer.alloc(tlas_scratch_buffer_size);
+    scratch_buffer.zero_to_device();
+  }
+
+  rt_err = hiprtBuildScene(hiprt_context,
+                           build_operation,
+                           &scene_input_ptr,
+                           &options,
+                           (void *)scratch_buffer.device_pointer,
+                           0,
+                           scene);
+
+  if (rt_err != hiprtSuccess) {
+    set_error(string_printf("Failed to build TLAS"));
+  }
+
+  scratch_buffer.free();
+  scratch_buffer_size = 0;
+
+  if (bvh->custom_prim_info.size()) {
+    size_t data_size = bvh->custom_prim_info.size();
+    custom_prim_info.alloc(data_size);
+    custom_prim_info.host_pointer = bvh->custom_prim_info.data();
+    custom_prim_info.data_elements = 2;
+    custom_prim_info.data_type = TYPE_INT;
+    custom_prim_info.data_size = data_size;
+    custom_prim_info.copy_to_device();
+    custom_prim_info.host_pointer = 0;
+
+    custom_prim_info_offset.copy_to_device();
+  }
+
+  if (bvh->prims_time.size()) {
+    size_t data_size = bvh->prims_time.size();
+    prims_time.alloc(data_size);
+    prims_time.host_pointer = bvh->prims_time.data();
+    prims_time.data_elements = 2;
+    prims_time.data_type = TYPE_FLOAT;
+    prims_time.data_size = data_size;
+    prims_time.copy_to_device();
+    prims_time.host_pointer = 0;
+
+    prim_time_offset.copy_to_device();
+  }
+
+  size_t table_ptr_size = 0;
+  hipDeviceptr_t table_device_ptr;
+
+  hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule, "kernel_params"));
+
+  size_t kernel_param_offset[4];
+  int table_index = 0;
+  kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_closest_intersect);
+  kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_shadow_intersect);
+  kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_local_intersect);
+  kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_volume_intersect);
+
+  for (int index = 0; index < table_index; index++) {
+
+    hip_assert(hipMemcpyHtoD(
+        table_device_ptr + kernel_param_offset[index], &functions_table, sizeof(device_ptr)));
+  }
+
+  return scene;
+}
+
+void HIPRTDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
+{
+  progress.set_substatus("Building HIPRT acceleration structure");
+
+  hiprtBuildOptions options;
+  options.buildFlags = hiprtBuildFlagBitPreferHighQualityBuild;
+
+  BVHHIPRT *bvh_rt = static_cast<BVHHIPRT *>(bvh);
+  HIPContextScope scope(this);
+
+  if (!bvh_rt->is_tlas()) {
+    vector<Geometry *> geometry = bvh_rt->geometry;
+    assert(geometry.size() == 1);
+    Geometry *geom = geometry[0];
+    build_blas(bvh_rt, geom, options);
+  }
+  else {
+
+    const vector<Object *> objects = bvh_rt->objects;
+    scene = build_tlas(bvh_rt, objects, options, refit);
+  }
+}
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/hiprt/device_impl.h b/intern/cycles/device/hiprt/device_impl.h
new file mode 100644
index 00000000000..b5005cbccb5
--- /dev/null
+++ b/intern/cycles/device/hiprt/device_impl.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#pragma once
+
+#ifdef WITH_HIPRT
+
+#  include "device/hip/device_impl.h"
+#  include "device/hip/kernel.h"
+#  include "device/hip/queue.h"
+#  include "device/hiprt/queue.h"
+
+#  ifdef WITH_HIP_DYNLOAD
+#    include "hiprtew.h"
+#  else
+#    include <hiprt/hiprt_types.h>
+#  endif
+
+#  include "kernel/device/hiprt/globals.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Mesh;
+class Hair;
+class PointCloud;
+class Geometry;
+class Object;
+class BVHHIPRT;
+
+class HIPRTDevice : public HIPDevice {
+
+ public:
+  virtual BVHLayoutMask get_bvh_layout_mask(const uint kernel_features) const override;
+
+  HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
+
+  virtual ~HIPRTDevice();
+  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
+  string compile_kernel_get_common_cflags(const uint kernel_features) override;
+  virtual string compile_kernel(const uint kernel_features,
+                                const char *name,
+                                const char *base = "hiprt") override;
+
+  virtual bool load_kernels(const uint kernel_features) override;
+
+  virtual void const_copy_to(const char *name, void *host, size_t size) override;
+
+  virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
+
+  hiprtContext get_hiprt_context()
+  {
+    return hiprt_context;
+  }
+
+  device_vector<int> global_stack_buffer;
+
+ protected:
+  enum Filter_Function { Closest = 0, Shadows, Local, Volume, Max_Intersect_Filter_Function };
+  enum Primitive_Type { Triangle = 0, Curve, Motion_Triangle, Point, Max_Primitive_Type };
+
+  hiprtGeometryBuildInput prepare_triangle_blas(BVHHIPRT *bvh, Mesh *mesh);
+  hiprtGeometryBuildInput prepare_curve_blas(BVHHIPRT *bvh, Hair *hair);
+  hiprtGeometryBuildInput prepare_point_blas(BVHHIPRT *bvh, PointCloud *pointcloud);
+  void build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions options);
+  hiprtScene build_tlas(BVHHIPRT *bvh,
+                        vector<Object *> objects,
+                        hiprtBuildOptions options,
+                        bool refit);
+
+  hiprtContext hiprt_context;
+  hiprtScene scene;
+  hiprtFuncTable functions_table;
+
+  thread_mutex hiprt_mutex;
+  size_t scratch_buffer_size;
+  device_vector<char> scratch_buffer;
+
+  /* The following vectors are to transfer scene information available on the host to the GPU
+   * visibility, instance_transform_matrix, transform_headers, and hiprt_blas_ptr are passed to
+   * hiprt to build bvh the rest are directly used in traversal functions/intersection kernels and
+   * are defined on the GPU side as members of KernelParamsHIPRT struct the host memory is copied
+   * to GPU through const_copy_to() function. */
+
+  device_vector<uint32_t> visibility;
+
+  /* instance_transform_matrix passes transform matrix of instances converted from Cycles Transform
+   * format to instanceFrames member of hiprtSceneBuildInput. */
+  device_vector<hiprtFrameMatrix> instance_transform_matrix;
+  /* Movement over a time interval for motion blur is captured through multiple transform matrices.
+   * In this case transform matrix of an instance cannot be directly retrieved by looking up
+   * instance_transform_matrix give the instance id. transform_headers maps the instance id to the
+   * appropriate index to retrieve instance transform matrix (frameIndex member of
+   * hiprtTransformHeader). transform_headers also has the information on how many transform
+   * matrices are associated with an instance (frameCount member of hiprtTransformHeader)
+   * transform_headers is passed to hiprt through instanceTransformHeaders member of
+   * hiprtSceneBuildInput. */
+  device_vector<hiprtTransformHeader> transform_headers;
+
+  /* Instance/object ids are not explicitly  passed to hiprt.
+   * HIP RT assigns the ids based on the order blas pointers are passed to it (through
+   * instanceGeometries member of hiprtSceneBuildInput). If blas is absent for a particular
+   * geometry (e.g. a plane), HIP RT removes that entry and in scenes with objects with no blas,
+   * the instance id that hiprt returns for a hit point will not necessarily match the instance id
+   * of the application. user_instance_id provides a map for retrieving original instance id from
+   * what HIP RT returns as instance id. hiprt_blas_ptr is the list of all the valid blas pointers.
+   * blas_ptr has all the valid pointers and null pointers and blas for any geometry can be
+   * directly retrieved from this array (used in subsurface scattering). */
+  device_vector<int> user_instance_id;
+  device_vector<uint64_t> hiprt_blas_ptr;
+  device_vector<uint64_t> blas_ptr;
+
+  /* custom_prim_info stores custom information for custom primitives for all the primitives in a
+   * scene. Primitive id that HIP RT returns is local to the geometry that was hit.
+   * custom_prim_info_offset returns the offset required to add to the primitive id to retrieve
+   * primitive info from custom_prim_info. */
+  device_vector<int2> custom_prim_info;
+  device_vector<int2> custom_prim_info_offset;
+
+  /* prims_time stores primitive time for geometries with motion blur.
+   * prim_time_offset returns the offset to add to primitive id to retrieve primitive time. */
+  device_vector<float2> prims_time;
+  device_vector<int> prim_time_offset;
+};
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/hiprt/queue.cpp b/intern/cycles/device/hiprt/queue.cpp
new file mode 100644
index 00000000000..28cc6673c87
--- /dev/null
+++ b/intern/cycles/device/hiprt/queue.cpp
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifdef WITH_HIPRT
+
+#  include "device/hiprt/queue.h"
+
+#  include "device/hip/graphics_interop.h"
+#  include "device/hip/kernel.h"
+#  include "device/hiprt/device_impl.h"
+
+CCL_NAMESPACE_BEGIN
+
+HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
+    : HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
+{
+}
+
+bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
+                               const int work_size,
+                               DeviceKernelArguments const &args)
+{
+  if (hiprt_device_->have_error()) {
+    return false;
+  }
+
+  if (!device_kernel_has_intersection(kernel)) {
+    return HIPDeviceQueue::enqueue(kernel, work_size, args);
+  }
+
+  debug_enqueue_begin(kernel, work_size);
+
+  const HIPContextScope scope(hiprt_device_);
+  const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
+
+  if (!hiprt_device_->global_stack_buffer.device_pointer) {
+    int max_path = num_concurrent_states(0);
+    hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
+    hiprt_device_->global_stack_buffer.zero_to_device();
+  }
+
+  DeviceKernelArguments args_copy = args;
+  args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
+
+  /* Compute kernel launch parameters. */
+  const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
+  const int num_blocks = divide_up(work_size, num_threads_per_block);
+  int shared_mem_bytes = 0;
+
+  assert_success(hipModuleLaunchKernel(hip_kernel.function,
+                                       num_blocks,
+                                       1,
+                                       1,
+                                       num_threads_per_block,
+                                       1,
+                                       1,
+                                       shared_mem_bytes,
+                                       hip_stream_,
+                                       const_cast<void **>(args_copy.values),
+                                       0),
+                 "enqueue");
+
+  return !(hiprt_device_->have_error());
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_HIPRT */
diff --git a/intern/cycles/device/hiprt/queue.h b/intern/cycles/device/hiprt/queue.h
new file mode 100644
index 00000000000..1318e2c15f0
--- /dev/null
+++ b/intern/cycles/device/hiprt/queue.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#ifdef WITH_HIPRT
+
+#  include "device/kernel.h"
+#  include "device/memory.h"
+#  include "device/queue.h"
+
+#  include "device/hip/queue.h"
+#  include "device/hip/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+class HIPRTDevice;
+
+class HIPRTDeviceQueue : public HIPDeviceQueue {
+ public:
+  HIPRTDeviceQueue(HIPRTDevice *device);
+  ~HIPRTDeviceQueue() {}
+  virtual bool enqueue(DeviceKernel kernel,
+                       const int work_size,
+                       DeviceKernelArguments const &args) override;
+
+ protected:
+  HIPRTDevice *hiprt_device_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_HIPRT */
diff --git a/intern/cycles/device/multi/device.cpp b/intern/cycles/device/multi/device.cpp
index aae812604eb..fb5534ab9cc 100644
--- a/intern/cycles/device/multi/device.cpp
+++ b/intern/cycles/device/multi/device.cpp
@@ -117,6 +117,10 @@ class MultiDevice : public Device {
       return BVH_LAYOUT_MULTI_METAL;
     }
 
+    if (bvh_layout_mask == BVH_LAYOUT_HIPRT) {
+      return BVH_LAYOUT_MULTI_HIPRT;
+    }
+
     /* When devices do not share a common BVH layout, fall back to creating one for each */
     const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
     if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
@@ -158,8 +162,10 @@ class MultiDevice : public Device {
 
     assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
            bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL ||
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT ||
            bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
-           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE);
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE ||
+           bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE);
 
     BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
     bvh_multi->sub_bvhs.resize(devices.size());
@@ -184,12 +190,17 @@ class MultiDevice : public Device {
           params.bvh_layout = BVH_LAYOUT_OPTIX;
         else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
           params.bvh_layout = BVH_LAYOUT_METAL;
+        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT)
+          params.bvh_layout = BVH_LAYOUT_HIPRT;
         else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
           params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
                                                                       BVH_LAYOUT_EMBREE;
         else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL_EMBREE)
           params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
                                                                       BVH_LAYOUT_EMBREE;
+        else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE)
+          params.bvh_layout = sub.device->info.type == DEVICE_HIPRT ? BVH_LAYOUT_HIPRT :
+                                                                      BVH_LAYOUT_EMBREE;
 
         /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
          * (since they are put into the top level directly, see bvh_embree.cpp) */
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index da2e38730e2..4812ff2614d 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -1149,6 +1149,8 @@ static const char *device_type_for_description(const DeviceType type)
       return "OptiX";
     case DEVICE_HIP:
       return "HIP";
+    case DEVICE_HIPRT:
+      return "HIPRT";
     case DEVICE_ONEAPI:
       return "oneAPI";
     case DEVICE_DUMMY:
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 71466ace54c..b22146eaf83 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -26,6 +26,10 @@ set(SRC_KERNEL_DEVICE_HIP
   device/hip/kernel.cpp
 )
 
+set(SRC_KERNEL_DEVICE_HIPRT
+  device/hiprt/kernel.cpp
+)
+
 set(SRC_KERNEL_DEVICE_METAL
   device/metal/kernel.metal
 )
@@ -77,6 +81,13 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS
   device/hip/globals.h
 )
 
+set(SRC_KERNEL_DEVICE_HIPRT_HEADERS
+  device/hiprt/bvh.h
+  device/hiprt/common.h
+  device/hiprt/globals.h
+  device/hiprt/hiprt_kernels.h
+)
+
 set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
   device/optix/bvh.h
   device/optix/compat.h
@@ -643,6 +654,86 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
   cycles_set_solution_folder(cycles_kernel_hip)
 endif()
 
+# HIP RT module
+
+if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
+  set(hiprt_sources device/hiprt/kernel.cpp
+    ${SRC_KERNEL_HEADERS}
+    ${SRC_KERNEL_DEVICE_GPU_HEADERS}
+    ${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
+    ${SRC_UTIL_HEADERS})
+  set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
+  set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
+  set(kernel_sources ${sources})
+  set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
+  if(WIN32)
+    set(hiprt_compile_command ${CMAKE_COMMAND})
+    set(hiprt_compile_flags
+      -E env "HIP_PATH=${HIP_ROOT_DIR}"
+      ${HIP_HIPCC_EXECUTABLE}.bat)
+  else()
+    set(hiprt_compile_command ${HIP_HIPCC_EXECUTABLE})
+    set(hiprt_compile_flags)
+  endif()
+  set(target_gpus)
+  foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
+    list(APPEND target_gpus "--offload-arch=${arch}")
+  endforeach()
+  set(hiprt_compile_flags
+    ${hiprt_compile_flags}
+    ${target_gpus}
+    ${HIP_HIPCC_FLAGS}
+    ${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src}
+    ${flags}
+    -D CCL_NAMESPACE_BEGIN=
+    -D CCL_NAMESPACE_END=
+    -D HIPCC
+    -D __HIPRT__
+    -std=c++17
+    -fgpu-rdc
+    -c
+    --gpu-bundle-output
+    -emit-llvm
+    -I ${CMAKE_CURRENT_SOURCE_DIR}/..
+    -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt
+    -I ${HIPRT_INCLUDE_DIR}
+    -Wno-parentheses-equality
+    -Wno-unused-value
+    --hipcc-func-supp
+    -ffast-math
+    -o ${bitcode_file})
+  if(WITH_CYCLES_DEBUG)
+    set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
+  endif()
+  add_custom_command(
+    OUTPUT  ${bitcode_file}
+    COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
+    DEPENDS ${kernel_sources})
+  if(WIN32)
+    set(hiprt_link_command ${CMAKE_COMMAND})
+    set(hiprt_link_flags   -E env "HIP_PATH=${HIP_ROOT_DIR}"
+      ${HIP_LINKER_EXECUTABLE})
+  else()
+    # not implemented yet
+  endif()
+  set(hiprt_link_flags
+    ${hiprt_link_flags}
+    ${target_gpus}
+    -fgpu-rdc
+    --hip-link
+    --cuda-device-only
+    ${bitcode_file}
+    ${HIPRT_BITCODE}
+    -o ${hiprt_file})
+  add_custom_command(
+    OUTPUT  ${hiprt_file}
+    COMMAND ${hiprt_link_command} ${hiprt_link_flags}
+    DEPENDS ${bitcode_file})
+  delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib)
+  add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file})
+  cycles_set_solution_folder(cycles_kernel_hiprt)
+endif()
+
 # OptiX PTX modules
 
 if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@@ -1033,6 +1124,7 @@ cycles_add_library(cycles_kernel "${LIB}"
   ${SRC_KERNEL_DEVICE_CPU}
   ${SRC_KERNEL_DEVICE_CUDA}
   ${SRC_KERNEL_DEVICE_HIP}
+  ${SRC_KERNEL_DEVICE_HIPRT}
   ${SRC_KERNEL_DEVICE_OPTIX}
   ${SRC_KERNEL_DEVICE_METAL}
   ${SRC_KERNEL_HEADERS}
@@ -1040,6 +1132,7 @@ cycles_add_library(cycles_kernel "${LIB}"
   ${SRC_KERNEL_DEVICE_GPU_HEADERS}
   ${SRC_KERNEL_DEVICE_CUDA_HEADERS}
   ${SRC_KERNEL_DEVICE_HIP_HEADERS}
+  ${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
   ${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
   ${SRC_KERNEL_DEVICE_METAL_HEADERS}
   ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
@@ -1053,6 +1146,7 @@ source_group("device\\cpu" FILES ${SRC_KERNEL_DEVICE_CPU} ${SRC_KERNEL_DEVICE_CP
 source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_CUDA_HEADERS})
 source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
 source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
+source_group("device\\hiprt" FILES ${SRC_KERNEL_DEVICE_HIPRT} ${SRC_KERNEL_DEVICE_HIPRT_HEADERS})
 source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
 source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
 source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS})
@@ -1090,6 +1184,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_CUDA_HEADERS}"
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_GPU_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/gpu)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index ceee9aa39fb..b39731bd0ad 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -17,6 +17,8 @@
 #  include "kernel/device/metal/bvh.h"
 #elif defined(__KERNEL_OPTIX__)
 #  include "kernel/device/optix/bvh.h"
+#elif defined(__HIPRT__)
+#  include "kernel/device/hiprt/bvh.h"
 #else
 #  define __BVH2__
 #endif
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index b4c84fd0f0b..fcb99328950 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -48,6 +48,9 @@
 
 #include "kernel/film/read.h"
 
+#if defined(__HIPRT__)
+#  include "kernel/device/hiprt/hiprt_kernels.h"
+#endif
 /* --------------------------------------------------------------------
  * Integrator.
  */
@@ -128,11 +131,13 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix
 
+#if !defined(__HIPRT__)
+
 /* Intersection kernels need access to the kernel handler for specialization constants to work
  * properly. */
-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_begin.h"
-#endif
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_begin.h"
+#  endif
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
     ccl_gpu_kernel_signature(integrator_intersect_closest,
@@ -191,8 +196,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
 }
 ccl_gpu_kernel_postfix
 
-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_end.h"
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_end.h"
+#  endif
+
 #endif
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
@@ -259,11 +266,13 @@ ccl_gpu_kernel_postfix
 constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
 #endif
 
+#if !defined(__HIPRT__)
+
 /* Kernels using intersections need access to the kernel handler for specialization constants to
  * work properly. */
-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_begin.h"
-#endif
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_begin.h"
+#  endif
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
     ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
@@ -276,15 +285,15 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
   if (ccl_gpu_kernel_within_bounds(global_index, work_size)) {
     const int state = (path_index_array) ? path_index_array[global_index] : global_index;
 
-#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
+#  if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
     KernelGlobals kg = NULL;
     /* Workaround Ambient Occlusion and Bevel nodes not working with Metal.
      * Dummy offset should not affect result, but somehow fixes bug! */
     kg += __dummy_constant;
     ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
-#else
+#  else
     ccl_gpu_kernel_call(integrator_shade_surface_raytrace(NULL, state, render_buffer));
-#endif
+#  endif
   }
 }
 ccl_gpu_kernel_postfix
@@ -303,8 +312,11 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
   }
 }
 ccl_gpu_kernel_postfix
-#ifdef __KERNEL_ONEAPI__
-#  include "kernel/device/oneapi/context_intersect_end.h"
+
+#  ifdef __KERNEL_ONEAPI__
+#    include "kernel/device/oneapi/context_intersect_end.h"
+#  endif
+
 #endif
 
 ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/device/hip/config.h b/intern/cycles/kernel/device/hip/config.h
index 6b1a9464b34..7239acb2a71 100644
--- a/intern/cycles/kernel/device/hip/config.h
+++ b/intern/cycles/kernel/device/hip/config.h
@@ -20,6 +20,10 @@
 #define GPU_KERNEL_BLOCK_NUM_THREADS 1024
 #define GPU_KERNEL_MAX_REGISTERS 64
 
+/* For performance tuning of hiprt kernels we might have to change the number
+ * that's why we don't use GPU_KERNEL_BLOCK_NUM_THREADS. */
+#define GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS 1024
+
 /* Compute number of threads per block and minimum blocks per multiprocessor
  * given the maximum number of registers per thread. */
 #define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
diff --git a/intern/cycles/kernel/device/hiprt/bvh.h b/intern/cycles/kernel/device/hiprt/bvh.h
new file mode 100644
index 00000000000..2c7daa2f2fa
--- /dev/null
+++ b/intern/cycles/kernel/device/hiprt/bvh.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/device/hiprt/common.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
+{
+  return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
+}
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+                                          ccl_private const Ray *ray,
+                                          const uint visibility,
+                                          ccl_private Intersection *isect)
+{
+  isect->t = ray->tmax;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
+  isect->type = PRIMITIVE_NONE;
+  if (!scene_intersect_valid(ray)) {
+    isect->t = ray->tmax;
+    isect->type = PRIMITIVE_NONE;
+    return false;
+  }
+
+  hiprtRay ray_hip;
+
+  SET_HIPRT_RAY(ray_hip, ray)
+
+  RayPayload payload;
+  payload.self = ray->self;
+  payload.kg = kg;
+  payload.visibility = visibility;
+  payload.prim_type = PRIMITIVE_NONE;
+  payload.ray_time = ray->time;
+
+  hiprtHit hit = {};
+
+  GET_TRAVERSAL_STACK()
+
+  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+    GET_TRAVERSAL_ANY_HIT(table_closest_intersect, 0)
+    hit = traversal.getNextHit();
+  }
+  else {
+    GET_TRAVERSAL_CLOSEST_HIT(table_closest_intersect, 0)
+    hit = traversal.getNextHit();
+  }
+  if (hit.hasHit()) {
+    set_intersect_point(kg, hit, isect);
+    if (isect->type > 1) {  // should be applied only for curves
+      isect->type = payload.prim_type;
+      isect->prim = hit.primID;
+    }
+    return true;
+  }
+  return false;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+                                                ccl_private const Ray *ray,
+                                                ccl_private LocalIntersection *local_isect,
+                                                int local_object,
+                                                ccl_private uint *lcg_state,
+                                                int max_hits)
+{
+  if (!scene_intersect_valid(ray)) {
+    if (local_isect) {
+      local_isect->num_hits = 0;
+    }
+    return false;
+  }
+
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+
+  if (local_isect != NULL) {
+    local_isect->num_hits = 0;
+  }
+
+  const int object_flag = kernel_data_fetch(object_flag, local_object);
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+
+#  if BVH_FEATURE(BVH_MOTION)
+    bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
+#  else
+    bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
+#  endif
+  }
+
+  hiprtRay ray_hip;
+  ray_hip.origin = P;
+  ray_hip.direction = dir;
+  ray_hip.maxT = ray->tmax;
+  ray_hip.minT = ray->tmin;
+
+  LocalPayload payload = {0};
+  payload.kg = kg;
+  payload.self = ray->self;
+  payload.local_object = local_object;
+  payload.max_hits = max_hits;
+  payload.lcg_state = lcg_state;
+  payload.local_isect = local_isect;
+
+  GET_TRAVERSAL_STACK()
+
+  void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
+  // we don't need custom intersection functions for SSR
+#  ifdef HIPRT_SHARED_STACK
+  hiprtGeomTraversalAnyHitCustomStack<Stack> traversal(local_geom,
+                                                       ray_hip,
+                                                       stack,
+                                                       hiprtTraversalHintDefault,
+                                                       &payload,
+                                                       kernel_params.table_local_intersect,
+                                                       2);
+#  else
+  hiprtGeomTraversalAnyHit traversal(
+      local_geom, ray_hip, table, hiprtTraversalHintDefault, &payload);
+#  endif
+  hiprtHit hit = traversal.getNextHit();
+  return hit.hasHit();
+}
+#endif  //__BVH_LOCAL__
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+                                                     IntegratorShadowState state,
+                                                     ccl_private const Ray *ray,
+                                                     uint visibility,
+                                                     uint max_hits,
+                                                     ccl_private uint *num_recorded_hits,
+                                                     ccl_private float *throughput)
+{
+  *throughput = 1.0f;
+  *num_recorded_hits = 0;
+
+  if (!scene_intersect_valid(ray)) {
+    return false;
+  }
+
+  hiprtRay ray_hip;
+
+  SET_HIPRT_RAY(ray_hip, ray)
+  ShadowPayload payload;
+  payload.kg = kg;
+  payload.self = ray->self;
+  payload.in_state = state;
+  payload.max_hits = max_hits;
+  payload.visibility = visibility;
+  payload.prim_type = PRIMITIVE_TRIANGLE;
+  payload.ray_time = ray->time;
+  payload.num_hits = 0;
+  payload.r_num_recorded_hits = num_recorded_hits;
+  payload.r_throughput = throughput;
+  GET_TRAVERSAL_STACK()
+  GET_TRAVERSAL_ANY_HIT(table_shadow_intersect, 1)
+  hiprtHit hit = traversal.getNextHit();
+  num_recorded_hits = payload.r_num_recorded_hits;
+  throughput = payload.r_throughput;
+  return hit.hasHit();
+}
+#endif /* __SHADOW_RECORD_ALL__ */
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private Intersection *isect,
+                                                 const uint visibility)
+{
+  isect->t = ray->tmax;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
+  isect->type = PRIMITIVE_NONE;
+
+  if (!scene_intersect_valid(ray)) {
+    return false;
+  }
+
+  hiprtRay ray_hip;
+
+  SET_HIPRT_RAY(ray_hip, ray)
+
+  RayPayload payload;
+  payload.self = ray->self;
+  payload.kg = kg;
+  payload.visibility = visibility;
+  payload.prim_type = PRIMITIVE_NONE;
+  payload.ray_time = ray->time;
+
+  GET_TRAVERSAL_STACK()
+
+  GET_TRAVERSAL_CLOSEST_HIT(table_volume_intersect, 3)
+  hiprtHit hit = traversal.getNextHit();
+  // return hit.hasHit();
+  if (hit.hasHit()) {
+    set_intersect_point(kg, hit, isect);
+    if (isect->type > 1) {  // should be applied only for curves
+      isect->type = payload.prim_type;
+      isect->prim = hit.primID;
+    }
+    return true;
+  }
+  else
+    return false;
+}
+#endif /* __VOLUME__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/hiprt/common.h b/intern/cycles/kernel/device/hiprt/common.h
new file mode 100644
index 00000000000..1017857d8f9
--- /dev/null
+++ b/intern/cycles/kernel/device/hiprt/common.h
@@ -0,0 +1,637 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifdef __HIPRT__
+
+struct RayPayload {
+  KernelGlobals kg;
+  RaySelfPrimitives self;
+  uint visibility;
+  int prim_type;
+  float ray_time;
+};
+
+struct ShadowPayload {
+  KernelGlobals kg;
+  RaySelfPrimitives self;
+  uint visibility;
+  int prim_type;
+  float ray_time;
+  int in_state;
+  uint max_hits;
+  uint num_hits;
+  uint *r_num_recorded_hits;
+  float *r_throughput;
+};
+
+struct LocalPayload {
+  KernelGlobals kg;
+  RaySelfPrimitives self;
+  int prim_type;
+  float ray_time;
+  int local_object;
+  uint max_hits;
+  uint *lcg_state;
+  LocalIntersection *local_isect;
+};
+
+#  define SET_HIPRT_RAY(RAY_RT, RAY) \
+    RAY_RT.direction = RAY->D; \
+    RAY_RT.origin = RAY->P; \
+    RAY_RT.maxT = RAY->tmax; \
+    RAY_RT.minT = RAY->tmin;
+
+#  if defined(HIPRT_SHARED_STACK)
+#    define GET_TRAVERSAL_STACK() \
+      Stack stack(&kg->global_stack_buffer[0], \
+                  HIPRT_THREAD_STACK_SIZE, \
+                  kg->shared_stack, \
+                  HIPRT_SHARED_STACK_SIZE);
+#  else
+#    define GET_TRAVERSAL_STACK()
+#  endif
+
+#  ifdef HIPRT_SHARED_STACK
+#    define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE) \
+      hiprtSceneTraversalAnyHitCustomStack<Stack> traversal(kernel_data.device_bvh, \
+                                                            ray_hip, \
+                                                            stack, \
+                                                            visibility, \
+                                                            hiprtTraversalHintDefault, \
+                                                            &payload, \
+                                                            kernel_params.FUNCTION_TABLE, \
+                                                            RAY_TYPE); \
+      hiprtSceneTraversalAnyHitCustomStack<Stack> traversal_simple( \
+          kernel_data.device_bvh, ray_hip, stack, visibility);
+#    define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE) \
+      hiprtSceneTraversalClosestCustomStack<Stack> traversal(kernel_data.device_bvh, \
+                                                             ray_hip, \
+                                                             stack, \
+                                                             visibility, \
+                                                             hiprtTraversalHintDefault, \
+                                                             &payload, \
+                                                             kernel_params.FUNCTION_TABLE, \
+                                                             RAY_TYPE); \
+      hiprtSceneTraversalClosestCustomStack<Stack> traversal_simple( \
+          kernel_data.device_bvh, ray_hip, stack, visibility);
+#  else
+#    define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
+      hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
+                                          ray_hip, \
+                                          visibility, \
+                                          FUNCTION_TABLE, \
+                                          hiprtTraversalHintDefault, \
+                                          &payload); \
+      hiprtSceneTraversalAnyHit traversal_simple(kernel_data.device_bvh, ray_hip, visibility);
+#    define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE) \
+      hiprtSceneTraversalClosest traversal(kernel_data.device_bvh, \
+                                           ray_hip, \
+                                           visibility, \
+                                           FUNCTION_TABLE, \
+                                           hiprtTraversalHintDefault, \
+                                           &payload); \
+      hiprtSceneTraversalClosest traversal_simple(kernel_data.device_bvh, ray_hip, visibility);
+#  endif
+
+ccl_device_inline void set_intersect_point(KernelGlobals kg,
+                                           hiprtHit &hit,
+                                           ccl_private Intersection *isect)
+{
+  int prim_offset = 0;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  isect->type = kernel_data_fetch(objects, object_id).primitive_type;
+
+  isect->t = hit.t;
+  isect->prim = hit.primID + prim_offset;
+  isect->object = object_id;
+  isect->u = hit.uv.x;
+  isect->v = hit.uv.y;
+}
+
+// custom intersection functions
+
+ccl_device_inline bool curve_custom_intersect(const hiprtRay &ray,
+                                              const void *userPtr,
+                                              void *payload,
+                                              hiprtHit &hit)
+
+{
+  Intersection isect;
+  RayPayload *local_payload = (RayPayload *)payload;
+  // could also cast shadow payload to get the elements needed to do the intersection
+  // no need to write a separate function for shadow intersection
+
+  KernelGlobals kg = local_payload->kg;
+
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  // data_offset.x: where the data (prim id, type )for the geometry of the current object begins
+  // the prim_id that is in hiprtHit hit is local to the partciular geometry so we add the above
+  // ofstream
+  // to map prim id in hiprtHit to the one compatible to what next stage expects
+
+  // data_offset.y: the offset that has to be added to a local primitive to get the global
+  // primitive id = kernel_data_fetch(object_prim_offset, object_id);
+
+  int prim_offset = data_offset.y;
+
+  int curve_index = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int key_value = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).y;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, curve_index + prim_offset))
+    return false;
+
+  float ray_time = local_payload->ray_time;
+
+  if ((key_value & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
+
+    int time_offset = kernel_data_fetch(prim_time_offset, object_id);
+    float2 prims_time = kernel_data_fetch(prims_time, hit.primID + time_offset);
+
+    if (ray_time < prims_time.x || ray_time > prims_time.y) {
+      return false;
+    }
+  }
+
+  bool b_hit = curve_intersect(kg,
+                               &isect,
+                               ray.origin,
+                               ray.direction,
+                               ray.minT,
+                               ray.maxT,
+                               object_id,
+                               curve_index + prim_offset,
+                               ray_time,
+                               key_value);
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;  // packed_curve_type;
+  }
+  return b_hit;
+}
+
+ccl_device_inline bool motion_triangle_custom_intersect(const hiprtRay &ray,
+                                                        const void *userPtr,
+                                                        void *payload,
+                                                        hiprtHit &hit)
+{
+#  ifdef MOTION_BLUR
+  RayPayload *local_payload = (RayPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+
+  int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
+    return false;
+
+  Intersection isect;
+
+  bool b_hit = motion_triangle_intersect(kg,
+                                         &isect,
+                                         ray.origin,
+                                         ray.direction,
+                                         ray.minT,
+                                         ray.maxT,
+                                         local_payload->ray_time,
+                                         local_payload->visibility,
+                                         object_id,
+                                         prim_id_global,
+                                         prim_id_local);
+
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+ccl_device_inline bool motion_triangle_custom_local_intersect(const hiprtRay &ray,
+                                                              const void *userPtr,
+                                                              void *payload,
+                                                              hiprtHit &hit)
+{
+#  ifdef MOTION_BLUR
+  LocalPayload *local_payload = (LocalPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = local_payload->local_object;
+
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+
+  int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  if (intersection_skip_self_local(local_payload->self, prim_id_global))
+    return false;
+
+  LocalIntersection *local_isect = local_payload->local_isect;
+
+  bool b_hit = motion_triangle_intersect_local(kg,
+                                               local_isect,
+                                               ray.origin,
+                                               ray.direction,
+                                               local_payload->ray_time,
+                                               object_id,
+                                               prim_id_global,
+                                               prim_id_local,
+                                               ray.minT,
+                                               ray.maxT,
+                                               local_payload->lcg_state,
+                                               local_payload->max_hits);
+
+  if (b_hit) {
+    local_payload->prim_type = PRIMITIVE_MOTION_TRIANGLE;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+ccl_device_inline bool motion_triangle_custom_volume_intersect(const hiprtRay &ray,
+                                                               const void *userPtr,
+                                                               void *payload,
+                                                               hiprtHit &hit)
+{
+#  ifdef MOTION_BLUR
+
+  RayPayload *local_payload = (RayPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int object_flag = kernel_data_fetch(object_flag, object_id);
+
+  if (!(object_flag & SD_OBJECT_HAS_VOLUME))
+    return false;
+
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+
+  int prim_id_local = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x).x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
+    return false;
+
+  Intersection isect;
+
+  bool b_hit = motion_triangle_intersect(kg,
+                                         &isect,
+                                         ray.origin,
+                                         ray.direction,
+                                         ray.minT,
+                                         ray.maxT,
+                                         local_payload->ray_time,
+                                         local_payload->visibility,
+                                         object_id,
+                                         prim_id_global,
+                                         prim_id_local);
+
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+ccl_device_inline bool point_custom_intersect(const hiprtRay &ray,
+                                              const void *userPtr,
+                                              void *payload,
+                                              hiprtHit &hit)
+{
+#  ifdef POINT_CLOUD
+  RayPayload *local_payload = (RayPayload *)payload;
+  KernelGlobals kg = local_payload->kg;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+
+  int2 data_offset = kernel_data_fetch(custom_prim_info_offset, object_id);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+
+  int2 prim_info = kernel_data_fetch(custom_prim_info, hit.primID + data_offset.x);
+  int prim_id_local = prim_info.x;
+  int prim_id_global = prim_id_local + prim_offset;
+
+  int type = prim_info.y;
+
+  if (intersection_skip_self_shadow(local_payload->self, object_id, prim_id_global))
+    return false;
+
+  float ray_time = local_payload->ray_time;
+
+  if ((type & PRIMITIVE_MOTION) && kernel_data.bvh.use_bvh_steps) {
+
+    int time_offset = kernel_data_fetch(prim_time_offset, object_id);
+    float2 prims_time = kernel_data_fetch(prims_time, hit.primID + time_offset);
+
+    if (ray_time < prims_time.x || ray_time > prims_time.y) {
+      return false;
+    }
+  }
+
+  Intersection isect;
+
+  bool b_hit = point_intersect(kg,
+                               &isect,
+                               ray.origin,
+                               ray.direction,
+                               ray.minT,
+                               ray.maxT,
+                               object_id,
+                               prim_id_global,
+                               ray_time,
+                               type);
+
+  if (b_hit) {
+    hit.uv.x = isect.u;
+    hit.uv.y = isect.v;
+    hit.t = isect.t;
+    hit.primID = isect.prim;
+    local_payload->prim_type = isect.type;
+  }
+  return b_hit;
+#  else
+  return false;
+#  endif
+}
+
+// intersection filters
+
+ccl_device_inline bool closest_intersection_filter(const hiprtRay &ray,
+                                                   const void *data,
+                                                   void *user_data,
+                                                   const hiprtHit &hit)
+{
+  RayPayload *payload = (RayPayload *)user_data;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int prim = hit.primID + prim_offset;
+
+  if (intersection_skip_self_shadow(payload->self, object_id, prim))
+    return true;
+  else
+    return false;
+}
+
+ccl_device_inline bool shadow_intersection_filter(const hiprtRay &ray,
+                                                  const void *data,
+                                                  void *user_data,
+                                                  const hiprtHit &hit)
+
+{
+  ShadowPayload *payload = (ShadowPayload *)user_data;
+
+  uint num_hits = payload->num_hits;
+  uint num_recorded_hits = *(payload->r_num_recorded_hits);
+  uint max_hits = payload->max_hits;
+  int state = payload->in_state;
+  KernelGlobals kg = payload->kg;
+  RaySelfPrimitives self = payload->self;
+
+  int object = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object);
+  int prim = hit.primID + prim_offset;
+
+  float ray_tmax = hit.t;
+
+#  ifdef __VISIBILITY_FLAG__
+
+  if ((kernel_data_fetch(objects, object).visibility & payload->visibility) == 0) {
+    return true;  // no hit - continue traversal
+  }
+#  endif
+
+  if (intersection_skip_self_shadow(self, object, prim)) {
+    return true;  // no hit -continue traversal
+  }
+
+  float u = hit.uv.x;
+  float v = hit.uv.y;
+  int type = kernel_data_fetch(objects, object).primitive_type;
+#  ifdef __HAIR__
+  if (type & (PRIMITIVE_CURVE_THICK | PRIMITIVE_CURVE_RIBBON)) {
+
+    const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+    type = segment.type;
+    prim = segment.prim;
+  }
+#  endif
+
+#  ifndef __TRANSPARENT_SHADOWS__
+
+  return false;
+
+#  else
+
+  if (num_hits >= max_hits ||
+      !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+    return false;
+  }
+
+  if (type & PRIMITIVE_CURVE) {
+    float throughput = *payload->r_throughput;
+    throughput *= intersection_curve_shadow_transparency(kg, object, prim, type, u);
+    *payload->r_throughput = throughput;
+    payload->num_hits += 1;
+
+    if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+      return false;
+    }
+    else {
+      return true;
+    }
+  }
+
+  uint record_index = num_recorded_hits;
+
+  num_hits += 1;
+  num_recorded_hits += 1;
+  payload->num_hits = num_hits;
+  *(payload->r_num_recorded_hits) = num_recorded_hits;
+
+  const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+  if (record_index >= max_record_hits) {
+    float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+    uint max_recorded_hit = 0;
+
+    for (int i = 1; i < max_record_hits; i++) {
+      const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+      if (isect_t > max_recorded_t) {
+        max_recorded_t = isect_t;
+        max_recorded_hit = i;
+      }
+    }
+
+    if (ray_tmax >= max_recorded_t) {
+
+      return true;
+    }
+
+    record_index = max_recorded_hit;
+  }
+
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+  return true;
+
+#  endif /* __TRANSPARENT_SHADOWS__ */
+}
+
+ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
+                                                 const void *data,
+                                                 void *user_data,
+                                                 const hiprtHit &hit)
+{
+#  ifdef __BVH_LOCAL__
+  LocalPayload *payload = (LocalPayload *)user_data;
+  KernelGlobals kg = payload->kg;
+  int object_id = payload->local_object;
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int prim = hit.primID + prim_offset;
+#    ifndef __RAY_OFFSET__
+  if (intersection_skip_self_local(payload->self, prim)) {
+    return true;  // continue search
+  }
+#    endif
+  uint max_hits = payload->max_hits;
+  if (max_hits == 0) {
+    return false;  // stop search
+  }
+  int hit_index = 0;
+  if (payload->lcg_state) {
+    for (int i = min(max_hits, payload->local_isect->num_hits) - 1; i >= 0; --i) {
+      if (hit.t == payload->local_isect->hits[i].t) {
+        return true;  // continue search
+      }
+    }
+    hit_index = payload->local_isect->num_hits++;
+    if (payload->local_isect->num_hits > max_hits) {
+      hit_index = lcg_step_uint(payload->lcg_state) % payload->local_isect->num_hits;
+      if (hit_index >= max_hits) {
+        return true;  // continue search
+      }
+    }
+  }
+  else {
+    if (payload->local_isect->num_hits && hit.t > payload->local_isect->hits[0].t) {
+      return true;
+    }
+    payload->local_isect->num_hits = 1;
+  }
+  Intersection *isect = &payload->local_isect->hits[hit_index];
+  isect->t = hit.t;
+  isect->prim = prim;
+  isect->object = object_id;
+  isect->type = PRIMITIVE_TRIANGLE;  // kernel_data_fetch(__objects, object_id).primitive_type;
+
+  isect->u = hit.uv.x;
+  isect->v = hit.uv.y;
+
+  payload->local_isect->Ng[hit_index] = hit.normal;
+
+  return true;
+
+#  endif
+}
+
+ccl_device_inline bool volume_intersection_filter(const hiprtRay &ray,
+                                                  const void *data,
+                                                  void *user_data,
+                                                  const hiprtHit &hit)
+{
+  RayPayload *payload = (RayPayload *)user_data;
+  int object_id = kernel_data_fetch(user_instance_id, hit.instanceID);
+  int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
+  int prim = hit.primID + prim_offset;
+  int object_flag = kernel_data_fetch(object_flag, object_id);
+
+  if (intersection_skip_self(payload->self, object_id, prim))
+    return true;
+  else if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0)
+    return true;
+  else
+    return false;
+}
+
+HIPRT_DEVICE bool intersectFunc(u32 geomType,
+                                u32 rayType,
+                                const hiprtFuncTableHeader &tableHeader,
+                                const hiprtRay &ray,
+                                void *payload,
+                                hiprtHit &hit)
+{
+  const u32 index = tableHeader.numGeomTypes * rayType + geomType;
+  const void *data = tableHeader.funcDataSets[index].filterFuncData;
+  switch (index) {
+    case Curve_Intersect_Function:
+    case Curve_Intersect_Shadow:
+      return curve_custom_intersect(ray, data, payload, hit);
+    case Motion_Triangle_Intersect_Function:
+    case Motion_Triangle_Intersect_Shadow:
+      return motion_triangle_custom_intersect(ray, data, payload, hit);
+    case Motion_Triangle_Intersect_Local:
+      return motion_triangle_custom_local_intersect(ray, data, payload, hit);
+    case Motion_Triangle_Intersect_Volume:
+      return motion_triangle_custom_volume_intersect(ray, data, payload, hit);
+    case Point_Intersect_Function:
+    case Point_Intersect_Shadow:
+      return point_custom_intersect(ray, data, payload, hit);
+    default:
+      break;
+  }
+  return false;
+}
+
+HIPRT_DEVICE bool filterFunc(u32 geomType,
+                             u32 rayType,
+                             const hiprtFuncTableHeader &tableHeader,
+                             const hiprtRay &ray,
+                             void *payload,
+                             const hiprtHit &hit)
+{
+  const u32 index = tableHeader.numGeomTypes * rayType + geomType;
+  const void *data = tableHeader.funcDataSets[index].intersectFuncData;
+  switch (index) {
+    case Triangle_Filter_Closest:
+      return closest_intersection_filter(ray, data, payload, hit);
+    case Triangle_Filter_Shadow:
+    case Curve_Filter_Shadow:
+    case Motion_Triangle_Filter_Shadow:
+    case Point_Filter_Shadow:
+      return shadow_intersection_filter(ray, data, payload, hit);
+    case Triangle_Filter_Local:
+    case Motion_Triangle_Filter_Local:
+      return local_intersection_filter(ray, data, payload, hit);
+    case Triangle_Filter_Volume:
+    case Motion_Triangle_Filter_Volume:
+      return volume_intersection_filter(ray, data, payload, hit);
+    default:
+      break;
+  }
+
+  return false;
+}
+
+#endif
diff --git a/intern/cycles/kernel/device/hiprt/globals.h b/intern/cycles/kernel/device/hiprt/globals.h
new file mode 100644
index 00000000000..7fff15b97b8
--- /dev/null
+++ b/intern/cycles/kernel/device/hiprt/globals.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/types.h"
+
+#include "kernel/integrator/state.h"
+
+#include "kernel/util/profiling.h"
+
+#define HIPRT_SHARED_STACK
+
+/* The size of global stack  availavle to each thread (memory reserved for each thread in
+ * global_stack_buffer). */
+#define HIPRT_THREAD_STACK_SIZE 64
+
+/* LDS (Local Data Storage) allocation for each thread, the number is obtained empirically. */
+#define HIPRT_SHARED_STACK_SIZE 24
+
+/* HIPRT_THREAD_GROUP_SIZE is the number of threads per work group for intersection kernels
+ * The default number of threads per workgroup is 1024, however, since HIP RT intersection kernels
+ * use local memory, and the local memory size in those kernels scales up with the number of
+ * threads, the number of threads to is scaled down to 256 to avoid going over maximum local memory
+ * and to strike a balance between memory access and the number of waves.
+ *
+ * Total local stack size would be number of threads * HIPRT_SHARED_STACK_SIZE. */
+#define HIPRT_THREAD_GROUP_SIZE 256
+
+CCL_NAMESPACE_BEGIN
+
+struct KernelGlobalsGPU {
+  int *global_stack_buffer;
+#ifdef HIPRT_SHARED_STACK
+  int *shared_stack;
+#endif
+};
+
+typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
+
+#if defined(HIPRT_SHARED_STACK)
+
+/* This macro allocates shared memory and to pass the shared memory down to intersection functions
+ * KernelGlobals is used. */
+#  define HIPRT_INIT_KERNEL_GLOBAL() \
+    ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
+    ccl_global KernelGlobalsGPU kg_gpu; \
+    KernelGlobals kg = &kg_gpu; \
+    kg->shared_stack = &shared_stack[0]; \
+    kg->global_stack_buffer = stack_buffer;
+#else
+#  define HIPRT_INIT_KERNEL_GLOBAL() \
+    KernelGlobals kg = NULL; \
+    kg->global_stack_buffer = stack_buffer;
+#endif
+
+struct KernelParamsHIPRT {
+  KernelData data;
+#define KERNEL_DATA_ARRAY(type, name) const type *name;
+  KERNEL_DATA_ARRAY(int, user_instance_id)
+  KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
+  KERNEL_DATA_ARRAY(int2, custom_prim_info)
+  KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
+  KERNEL_DATA_ARRAY(float2, prims_time)
+  KERNEL_DATA_ARRAY(int, prim_time_offset)
+#include "kernel/data_arrays.h"
+
+  /* Integrator state */
+  IntegratorStateGPU integrator_state;
+
+  hiprtFuncTable table_closest_intersect;
+  hiprtFuncTable table_shadow_intersect;
+  hiprtFuncTable table_local_intersect;
+  hiprtFuncTable table_volume_intersect;
+};
+
+/* Intersection_Function_Table_Index defines index values to retrieve custom intersection
+ * functions from function table. */
+
+enum Intersection_Function_Table_Index {
+  // Triangles use the intersection function provided by HIP RT and don't need custom intersection
+  // functions
+  // Custom intersection functions for closest intersect.
+  Curve_Intersect_Function = 1,        // Custom intersection for curves
+  Motion_Triangle_Intersect_Function,  // Custom intersection for triangles with vertex motion blur
+                                       // attributes.
+  Point_Intersect_Function,            // Custom intersection for point cloud.
+  // Custom intersection functions for shadow rendering are the same as the function for closest
+  // intersect.
+  // However, the table indices are different
+  Triangle_Intersect_Shadow_None,
+  Curve_Intersect_Shadow,
+  Motion_Triangle_Intersect_Shadow,
+  Point_Intersect_Shadow,
+  // Custom intersection functions for subsurface scattering.
+  // Only motion triangles have valid custom intersection function
+  Triangle_Intersect_Local_None,
+  Curve_Intersect_Local_None,
+  Motion_Triangle_Intersect_Local,
+  Point_Intersect_Local_None,
+  // Custom intersection functions for volume rendering.
+  // Only motion triangles have valid custom intersection function
+  Triangle_Intersect_Volume_None,
+  Curve_Intersect_Volume_None,
+  Motion_Triangle_Intersect_Volume,
+  Point_Intersect_Volume_None,
+};
+
+// Filter functions, filter hits, i.e. test whether a hit should be accepted or not, and whether
+// traversal should stop or continue.
+enum Filter_Function_Table_Index {
+  Triangle_Filter_Closest = 0,  // Filter function for triangles for closest intersect, no custom
+                                // intersection function is needed.
+  Curve_Filter_Opaque_None,     // No filter function is needed and everything is handled in the
+                                // intersection function.
+  Motion_Triangle_Filter_Opaque_None,  // No filter function is needed and everything is handled in
+                                       // intersection function.
+  Point_Filter_Opaque_Non,             // No filter function is needed.
+  // Filter function for all primitives for shadow intersection.
+  // All primitives use the same function but each has a different index in the table.
+  Triangle_Filter_Shadow,
+  Curve_Filter_Shadow,
+  Motion_Triangle_Filter_Shadow,
+  Point_Filter_Shadow,
+  // Filter functions for subsurface scattering. Triangles and motion triangles need function
+  // assignment. They indices for triangles and motion triangles point to the same function. Points
+  // and curves dont need any function since subsurface scattering is not applied on either.
+  Triangle_Filter_Local,    // Filter functions for triangles
+  Curve_Filter_Local_None,  // Subsurface scattering is not applied on curves, no filter function
+                            // is
+                            // needed.
+  Motion_Triangle_Filter_Local,
+  Point_Filter_Local_None,
+  // Filter functions for volume rendering.
+  // Volume rendering only applies to triangles and motion triangles.
+  // Triangles and motion triangles use the same filter functions for volume rendering
+  Triangle_Filter_Volume,
+  Curve_Filter_Volume_None,
+  Motion_Triangle_Filter_Volume,
+  Point_Filter_Volume_None,
+};
+
+#ifdef __KERNEL_GPU__
+__constant__ KernelParamsHIPRT kernel_params;
+
+#  ifdef HIPRT_SHARED_STACK
+typedef hiprtGlobalStack Stack;
+#  endif
+
+#endif
+
+/* Abstraction macros */
+#define kernel_data kernel_params.data
+#define kernel_data_fetch(name, index) kernel_params.name[(index)]
+#define kernel_data_array(name) (kernel_params.name)
+#define kernel_integrator_state kernel_params.integrator_state
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/hiprt/hiprt_kernels.h b/intern/cycles/kernel/device/hiprt/hiprt_kernels.h
new file mode 100644
index 00000000000..cd92e9ff30c
--- /dev/null
+++ b/intern/cycles/kernel/device/hiprt/hiprt_kernels.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifdef __HIPRT__
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_closest,
+                             ccl_global const int *path_index_array,
+                             ccl_global float *render_buffer,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_closest(kg, state, render_buffer));
+  }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_shadow,
+                             ccl_global const int *path_index_array,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_shadow(kg, state));
+  }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_subsurface,
+                             ccl_global const int *path_index_array,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_subsurface(kg, state));
+  }
+}
+ccl_gpu_kernel_postfix
+
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
+                             ccl_global const int *path_index_array,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_intersect_volume_stack(kg, state));
+  }
+}
+ccl_gpu_kernel_postfix
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
+                             ccl_global const int *path_index_array,
+                             ccl_global float *render_buffer,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
+  }
+}
+ccl_gpu_kernel_postfix
+ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
+    ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
+                             ccl_global const int *path_index_array,
+                             ccl_global float *render_buffer,
+                             const int work_size,
+                             ccl_global int *stack_buffer)
+{
+  const int global_index = ccl_gpu_global_id_x();
+  if (global_index < work_size) {
+    HIPRT_INIT_KERNEL_GLOBAL()
+    const int state = (path_index_array) ? path_index_array[global_index] : global_index;
+    ccl_gpu_kernel_call(integrator_shade_surface_mnee(kg, state, render_buffer));
+  }
+}
+ccl_gpu_kernel_postfix
+
+#endif /* __HIPRT__ */
diff --git a/intern/cycles/kernel/device/hiprt/kernel.cpp b/intern/cycles/kernel/device/hiprt/kernel.cpp
new file mode 100644
index 00000000000..34456e1d7c0
--- /dev/null
+++ b/intern/cycles/kernel/device/hiprt/kernel.cpp
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2023 Blender Foundation */
+
+#ifdef __HIP_DEVICE_COMPILE__
+
+#  include "kernel/device/hip/compat.h"
+#  include "kernel/device/hip/config.h"
+
+#  include <hiprt/hiprt_device.h>
+
+#  include "kernel/device/hiprt/globals.h"
+
+#  include "kernel/device/gpu/image.h"
+#  include "kernel/device/gpu/kernel.h"
+
+#endif
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 4fa379e4b15..d833290a34d 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -1169,10 +1169,14 @@ typedef enum KernelBVHLayout {
   BVH_LAYOUT_METAL = (1 << 5),
   BVH_LAYOUT_MULTI_METAL = (1 << 6),
   BVH_LAYOUT_MULTI_METAL_EMBREE = (1 << 7),
+  BVH_LAYOUT_HIPRT = (1 << 8),
+  BVH_LAYOUT_MULTI_HIPRT = (1 << 9),
+  BVH_LAYOUT_MULTI_HIPRT_EMBREE = (1 << 10),
 
   /* Default BVH layout to use for CPU. */
   BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
-  BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL,
+  BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX | BVH_LAYOUT_METAL |
+                   BVH_LAYOUT_HIPRT | BVH_LAYOUT_MULTI_HIPRT | BVH_LAYOUT_MULTI_HIPRT_EMBREE,
 } KernelBVHLayout;
 
 /* Specialized struct that can become constants in dynamic compilation. */
@@ -1225,6 +1229,8 @@ typedef struct KernelData {
   OptixTraversableHandle device_bvh;
 #elif defined __METALRT__
   metalrt_as_type device_bvh;
+#elif defined(__HIPRT__)
+  void *device_bvh;
 #else
 #  ifdef __EMBREE__
   RTCScene device_bvh;
diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp
index 21d3b6a52a6..d268b5570f4 100644
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@@ -114,7 +114,9 @@ bool Geometry::need_build_bvh(BVHLayout layout) const
 {
   return is_instanced() || layout == BVH_LAYOUT_OPTIX || layout == BVH_LAYOUT_MULTI_OPTIX ||
          layout == BVH_LAYOUT_METAL || layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE ||
-         layout == BVH_LAYOUT_MULTI_METAL || layout == BVH_LAYOUT_MULTI_METAL_EMBREE;
+         layout == BVH_LAYOUT_MULTI_METAL || layout == BVH_LAYOUT_MULTI_METAL_EMBREE ||
+         layout == BVH_LAYOUT_HIPRT || layout == BVH_LAYOUT_MULTI_HIPRT ||
+         layout == BVH_LAYOUT_MULTI_HIPRT_EMBREE;
 }
 
 bool Geometry::is_instanced() const
diff --git a/intern/cycles/scene/object.cpp b/intern/cycles/scene/object.cpp
index c19a28583e9..8d17b0803a7 100644
--- a/intern/cycles/scene/object.cpp
+++ b/intern/cycles/scene/object.cpp
@@ -597,7 +597,8 @@ void ObjectManager::device_update_prim_offsets(Device *device, DeviceScene *dsce
   if (!scene->integrator->get_use_light_tree()) {
     BVHLayoutMask layout_mask = device->get_bvh_layout_mask(dscene->data.kernel_features);
     if (layout_mask != BVH_LAYOUT_METAL && layout_mask != BVH_LAYOUT_MULTI_METAL &&
-        layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE) {
+        layout_mask != BVH_LAYOUT_MULTI_METAL_EMBREE && layout_mask != BVH_LAYOUT_HIPRT &&
+        layout_mask != BVH_LAYOUT_MULTI_HIPRT && layout_mask != BVH_LAYOUT_MULTI_HIPRT_EMBREE) {
       return;
     }
   }