Attempts to fix CUDA issues on sm 2.0 cards, still no luck getting motion blur
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Wed, 17 Oct 2012 22:48:29 +0000 (22:48 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Wed, 17 Oct 2012 22:48:29 +0000 (22:48 +0000)
working, but this should make it not crash.

Also fix for wrong shutter time, should have been shorter.

intern/cycles/kernel/kernel_bvh.h
intern/cycles/kernel/kernel_camera.h
intern/cycles/kernel/kernel_displace.h
intern/cycles/kernel/kernel_emission.h
intern/cycles/kernel/kernel_light.h
intern/cycles/kernel/kernel_object.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/render/object.cpp

index 9d8ad6f30723e3963193a6af56d53c85918bf3da..d033fb1d14591c780a5497b415f2acd70e135d62 100644 (file)
@@ -87,7 +87,7 @@ __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *
 __device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax)
 {
        Transform itfm;
-       *tfm = object_fetch_transform_motion(kg, object, ray->time, &itfm);
+       *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
 
        *P = transform_point(&itfm, ray->P);
 
@@ -104,9 +104,8 @@ __device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, con
 
 __device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax)
 {
-       if(*t != FLT_MAX) {
+       if(*t != FLT_MAX)
                *t *= len(transform_direction(tfm, 1.0f/(*idir)));
-       }
 
        *P = ray->P;
        *idir = bvh_inverse_direction(ray->D);
@@ -163,7 +162,7 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg,
 
 /* Sven Woop's algorithm */
 __device_inline void bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect,
-       float3 P, float3 idir, uint visibility, int object, int triAddr, Transform *tfm)
+       float3 P, float3 idir, uint visibility, int object, int triAddr)
 {
        /* compute and check intersection t-value */
        float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
@@ -285,7 +284,7 @@ __device_inline bool bvh_intersect(KernelGlobals *kg, const Ray *ray, const uint
                                        /* triangle intersection */
                                        while(primAddr < primAddr2) {
                                                /* intersect ray against triangle */
-                                               bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, NULL);
+                                               bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
 
                                                /* shadow ray early termination */
                                                if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0)
@@ -405,7 +404,7 @@ __device_inline bool bvh_intersect_motion(KernelGlobals *kg, const Ray *ray, con
                                        /* triangle intersection */
                                        while(primAddr < primAddr2) {
                                                /* intersect ray against triangle */
-                                               bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr, &ob_tfm);
+                                               bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
 
                                                /* shadow ray early termination */
                                                if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0)
@@ -444,7 +443,8 @@ __device_inline bool bvh_intersect_motion(KernelGlobals *kg, const Ray *ray, con
 
 __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
 {
-#ifdef __OBJECT_MOTION__
+       /* todo: fix cuda sm 2.0 motion blur */
+#if defined(__OBJECT_MOTION__) && (!defined(__KERNEL_CUDA) || (__CUDA_ARCH__ >= 210))
        if(kernel_data.bvh.have_motion)
                return bvh_intersect_motion(kg, ray, visibility, isect);
        else
index 08674d0e379e6f0813d3e7d540c3ff3529b40a3b..1b2fe8c56ee4889f70179df854edb5b63e605b68 100644 (file)
@@ -217,7 +217,7 @@ __device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, flo
        if(kernel_data.cam.shuttertime == 0.0f)
                ray->time = TIME_INVALID;
        else
-               ray->time = 0.5f + (time - 0.5f)*kernel_data.cam.shuttertime;
+               ray->time = 0.5f + 0.5f*(time - 0.5f)*kernel_data.cam.shuttertime;
 #endif
 
        /* sample */
index 6461a1eea389a1825d18ce77dbca81fac29ff97b..a55f7a7fd7510d21df7778cc87a243dc3c2226b0 100644 (file)
@@ -47,6 +47,9 @@ __device void kernel_shader_evaluate(KernelGlobals *kg, uint4 *input, float4 *ou
                ray.P = make_float3(0.0f, 0.0f, 0.0f);
                ray.D = equirectangular_to_direction(u, v);
                ray.t = 0.0f;
+#ifdef __CAMERA_MOTION__
+               ray.time = 0.5f;
+#endif
 
 #ifdef __RAY_DIFFERENTIALS__
                ray.dD.dx = make_float3(0.0f, 0.0f, 0.0f);
index 53d53b4bedd498b71aed4e21aaa56dec566fca45..75b6df5f08f39ee06deb95d3350831fd675e5128 100644 (file)
@@ -34,6 +34,9 @@ __device float3 direct_emissive_eval(KernelGlobals *kg, float rando,
                ray.P = ls->P;
                ray.dP.dx = make_float3(0.0f, 0.0f, 0.0f);
                ray.dP.dy = make_float3(0.0f, 0.0f, 0.0f);
+#ifdef __CAMERA_MOTION__
+               ray.time = time;
+#endif
                shader_setup_from_background(kg, &sd, &ray);
                eval = shader_eval_background(kg, &sd, 0);
        }
index 4bb17c0bd5ac3a22db13f07c3746e786ff88c729..2791b3abbb6b857f3cf15f627366d89d61d567f1 100644 (file)
@@ -303,7 +303,7 @@ __device void triangle_light_sample(KernelGlobals *kg, int prim, int object,
        if(ls->object >= 0) {
 #ifdef __OBJECT_MOTION__
                Transform itfm;
-               Transform tfm = object_fetch_transform_motion(kg, ls->object, time, &itfm);
+               Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
 #else
                Transform tfm = object_fetch_transform(kg, ls->object, OBJECT_TRANSFORM);
                Transform itfm = object_fetch_transform(kg, ls->object, OBJECT_INVERSE_TRANSFORM);
index 79ff7e2020afe7a6ddfc88a7282e7a37bae003a1..2fa9443766ea6e8297129bde90e5eb3bf49aa3d1 100644 (file)
@@ -25,7 +25,7 @@ enum ObjectTransform {
        OBJECT_TRANSFORM_MOTION_PRE = 8,
        OBJECT_TRANSFORM_MOTION_MID = 12,
        OBJECT_TRANSFORM_MOTION_POST = 16,
-       OBJECT_DUPLI = 18
+       OBJECT_DUPLI = 20
 };
 
 __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
@@ -42,48 +42,52 @@ __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object,
 }
 
 #ifdef __OBJECT_MOTION__
-__device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time, Transform *itfm)
+__device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
 {
-       Transform tfm;
+       MotionTransform motion;
 
-       int object_flag = kernel_tex_fetch(__object_flag, object);
+       int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE;
 
-       /* if we do motion blur */
-       if(object_flag & SD_OBJECT_MOTION) {
-               /* fetch motion transforms */
-               MotionTransform motion;
+       motion.pre.x = kernel_tex_fetch(__objects, offset + 0);
+       motion.pre.y = kernel_tex_fetch(__objects, offset + 1);
+       motion.pre.z = kernel_tex_fetch(__objects, offset + 2);
+       motion.pre.w = kernel_tex_fetch(__objects, offset + 3);
 
-               int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE;
+       motion.mid.x = kernel_tex_fetch(__objects, offset + 4);
+       motion.mid.y = kernel_tex_fetch(__objects, offset + 5);
+       motion.mid.z = kernel_tex_fetch(__objects, offset + 6);
+       motion.mid.w = kernel_tex_fetch(__objects, offset + 7);
 
-               motion.pre.x = kernel_tex_fetch(__objects, offset + 0);
-               motion.pre.y = kernel_tex_fetch(__objects, offset + 1);
-               motion.pre.z = kernel_tex_fetch(__objects, offset + 2);
-               motion.pre.w = kernel_tex_fetch(__objects, offset + 3);
+       motion.post.x = kernel_tex_fetch(__objects, offset + 8);
+       motion.post.y = kernel_tex_fetch(__objects, offset + 9);
+       motion.post.z = kernel_tex_fetch(__objects, offset + 10);
+       motion.post.w = kernel_tex_fetch(__objects, offset + 11);
 
-               motion.mid.x = kernel_tex_fetch(__objects, offset + 4);
-               motion.mid.y = kernel_tex_fetch(__objects, offset + 5);
-               motion.mid.z = kernel_tex_fetch(__objects, offset + 6);
-               motion.mid.w = kernel_tex_fetch(__objects, offset + 7);
+       Transform tfm;
+       transform_motion_interpolate(&tfm, &motion, time);
 
-               motion.post.x = kernel_tex_fetch(__objects, offset + 8);
-               motion.post.y = kernel_tex_fetch(__objects, offset + 9);
-               motion.post.z = kernel_tex_fetch(__objects, offset + 10);
-               motion.post.w = kernel_tex_fetch(__objects, offset + 11);
+       return tfm;
+}
 
-               transform_motion_interpolate(&tfm, &motion, time);
+__device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm)
+{
+       int object_flag = kernel_tex_fetch(__object_flag, object);
+
+       if(object_flag & SD_OBJECT_MOTION) {
+               /* if we do motion blur */
+               Transform tfm = object_fetch_transform_motion(kg, object, time);
 
-               /* invert */
                if(itfm)
                        *itfm = transform_quick_inverse(tfm);
+
+               return tfm;
        }
        else {
-               tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+               Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+               *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
-               if(itfm)
-                       *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+               return tfm;
        }
-
-       return tfm;
 }
 #endif
 
@@ -271,6 +275,5 @@ __device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
        return make_float3(f3.z, f3.w, f4.x);
 }
 
-
 CCL_NAMESPACE_END
 
index 36f7122a3803ac87d26e27166e4cca1380d62016..814c32dfbd3a8cfe310bf0a76eb2742e1ef307de 100644 (file)
@@ -43,6 +43,22 @@ CCL_NAMESPACE_BEGIN
 
 /* ShaderData setup from incoming ray */
 
+#ifdef __OBJECT_MOTION__
+__device_noinline void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
+{
+       /* note that this is a separate non-inlined function to work around crash
+        * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
+       if(sd->flag & SD_OBJECT_MOTION) {
+               sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
+               sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
+       }
+       else {
+               sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+               sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+       }
+}
+#endif
+
 __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
        const Intersection *isect, const Ray *ray)
 {
@@ -72,14 +88,7 @@ __device_inline void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
 
        /* matrices and time */
 #ifdef __OBJECT_MOTION__
-       if(sd->flag & SD_OBJECT_MOTION) {
-               sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, ray->time, &sd->ob_itfm);
-       }
-       else {
-               sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-               sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-       }
-
+       shader_setup_object_transforms(kg, sd, ray->time);
        sd->time = ray->time;
 #endif
 
@@ -181,13 +190,7 @@ __device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
                sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
 
 #ifdef __OBJECT_MOTION__
-               if(sd->flag & SD_OBJECT_MOTION) {
-                       sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time, &sd->ob_itfm);
-               }
-               else {
-                       sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-                       sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-               }
+               shader_setup_object_transforms(kg, sd, time);
        }
 
        sd->time = time;
index 0b87a5307255a19c231e8f62a7c34a354054effa..25b4d1f08cc7f874fd35b155d11edcff04c3e64d 100644 (file)
@@ -64,8 +64,8 @@ void Object::compute_bounds(bool motion_blur, float shuttertime)
                /* todo: this is really terrible. according to pbrt there is a better
                 * way to find this iteratively, but did not find implementation yet
                 * or try to implement myself */
-               float start_t = 0.5f - shuttertime*0.5f;
-               float end_t = 0.5f - shuttertime*0.5f;
+               float start_t = 0.5f - shuttertime*0.25f;
+               float end_t = 0.5f + shuttertime*0.25f;
 
                for(float t = start_t; t < end_t; t += (1.0f/128.0f)*shuttertime) {
                        Transform ttfm;