Cycles / CUDA: Change inline rules for BVH intersection functions.
* On sm_30 and above there is no change (was not inlined already before), this just fixes a speed regression from yesterday. 6359c36ba4
* On sm_2x (tested with sm_21), I get a nice 8% speedup in the bmw scene with this. As a bonus, cubin compilation time and memory usage is significantly reduced. Regular cubin size went from 2.5MB to 2.0MB, Experimental one from 3.8MB to 2.5MB.
This commit is contained in:
@@ -28,6 +28,13 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* Don't inline intersect functions on GPU, this is faster */
|
||||
#ifdef __KERNEL_GPU__
|
||||
#define ccl_device_intersect ccl_device_noinline
|
||||
#else
|
||||
#define ccl_device_intersect ccl_device_inline
|
||||
#endif
|
||||
|
||||
/* BVH intersection function variations */
|
||||
|
||||
#define BVH_INSTANCING 1
|
||||
@@ -161,7 +168,7 @@ CCL_NAMESPACE_BEGIN
|
||||
#include "geom_bvh_volume.h"
|
||||
#endif
|
||||
|
||||
ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
|
||||
uint *lcg_state, float difl, float extmax)
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
@@ -200,7 +207,7 @@ ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const
|
||||
}
|
||||
|
||||
#ifdef __SUBSURFACE__
|
||||
ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
|
||||
ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
if(kernel_data.bvh.have_motion) {
|
||||
@@ -239,7 +246,7 @@ ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *
|
||||
#endif
|
||||
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
|
||||
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
if(kernel_data.bvh.have_motion) {
|
||||
@@ -267,7 +274,7 @@ ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *
|
||||
#endif
|
||||
|
||||
#ifdef __VOLUME__
|
||||
ccl_device_inline bool scene_intersect_volume(KernelGlobals *kg,
|
||||
ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
|
||||
const Ray *ray,
|
||||
Intersection *isect)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user