Cycles / CUDA: Change inline rules for BVH intersection functions.

* On sm_30 and above there is no change (was not inlined already before), this just fixes a speed regression from yesterday. 6359c36ba4
* On sm_2x (tested with sm_21), I get a nice 8% speedup in the bmw scene with this. As a bonus, cubin compilation time and memory usage is significantly reduced. Regular cubin size went from 2.5MB to 2.0MB, Experimental one from 3.8MB to 2.5MB.
This commit is contained in:
Thomas Dinges
2014-10-05 03:53:51 +02:00
parent 24ddfe0c63
commit dde740bcd7

View File

@@ -28,6 +28,13 @@
CCL_NAMESPACE_BEGIN
/* Don't inline intersect functions on GPU, this is faster */
#ifdef __KERNEL_GPU__
#define ccl_device_intersect ccl_device_noinline
#else
#define ccl_device_intersect ccl_device_inline
#endif
/* BVH intersection function variations */
#define BVH_INSTANCING 1
@@ -161,7 +168,7 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_volume.h"
#endif
ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
uint *lcg_state, float difl, float extmax)
{
#ifdef __OBJECT_MOTION__
@@ -200,7 +207,7 @@ ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const
}
#ifdef __SUBSURFACE__
ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -239,7 +246,7 @@ ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *
#endif
#ifdef __SHADOW_RECORD_ALL__
ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -267,7 +274,7 @@ ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *
#endif
#ifdef __VOLUME__
ccl_device_inline bool scene_intersect_volume(KernelGlobals *kg,
ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
const Ray *ray,
Intersection *isect)
{