Cycles: avoid using float3 in kernel constant memory, just so we're sure alignment
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Tue, 20 Dec 2011 12:25:45 +0000 (12:25 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Tue, 20 Dec 2011 12:25:45 +0000 (12:25 +0000)
is working compatible between cpu and gpu.

intern/cycles/kernel/kernel_camera.h
intern/cycles/kernel/kernel_types.h
intern/cycles/render/camera.cpp
intern/cycles/render/nodes.cpp
intern/cycles/util/util_math.h

index 9cdc2f1f8659f56cb6396175b531fafcca99847c..2dbdd07689190cea7262e065f6169369497a3580 100644 (file)
@@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float
        ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f);
        ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f);
 
-       ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff);
-       ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff);
+       ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff);
+       ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff);
 #endif
 
 #ifdef __CAMERA_CLIPPING__
@@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa
 
 #ifdef __RAY_DIFFERENTIALS__
        /* ray differential */
-       ray->dP.dx = kernel_data.cam.dx;
-       ray->dP.dy = kernel_data.cam.dy;
+       ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
+       ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
 
        ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f);
        ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f);
index 72ebfefbd9082b97f4a9662dad18c18178593e2c..ea73f87a8a56385e09e891f4a81e900612346b79 100644 (file)
@@ -295,7 +295,11 @@ typedef struct ShaderData {
 #endif
 } ShaderData;
 
-/* Constrant Kernel Data */
+/* Constrant Kernel Data
+ *
+ * These structs are passed from CPU to various devices, and the struct layout
+ * must match exactly. Structs are padded to ensure 16 byte alignment, and we
+ * do not use float3 because its size may not be the same on all devices. */
 
 typedef struct KernelCamera {
        /* type */
@@ -307,14 +311,8 @@ typedef struct KernelCamera {
        Transform rastertocamera;
 
        /* differentials */
-       float3 dx;
-#ifndef WITH_OPENCL
-       float pad1;
-#endif
-       float3 dy;
-#ifndef WITH_OPENCL
-       float pad2;
-#endif
+       float4 dx;
+       float4 dy;
 
        /* depth of field */
        float aperturesize;
@@ -355,10 +353,6 @@ typedef struct KernelBackground {
 typedef struct KernelSunSky {
        /* sun direction in spherical and cartesian */
        float theta, phi, pad3, pad4;
-       float3 dir;
-#ifndef WITH_OPENCL
-       float pad;
-#endif
 
        /* perez function parameters */
        float zenith_Y, zenith_x, zenith_y, pad2;
index d5fca87491d3f2638cbcfe72278816306818de8e..a83ae81844cbcb805cc734a0c289897671e13080 100644 (file)
@@ -150,8 +150,8 @@ void Camera::device_update(Device *device, DeviceScene *dscene)
        kcam->ortho = ortho;
 
        /* store differentials */
-       kcam->dx = dx;
-       kcam->dy = dy;
+       kcam->dx = float3_to_float4(dx);
+       kcam->dy = float3_to_float4(dy);
 
        /* clipping */
        kcam->nearclip = nearclip;
index 7d873221cd6ece18cda70d8052db7cd979444442..81d156a079dad1f8eb4e08cde493921f4a0264d5 100644 (file)
@@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb
 
        ksunsky->theta = theta;
        ksunsky->phi = phi;
-       ksunsky->dir = dir;
 
        float theta2 = theta*theta;
        float theta3 = theta*theta*theta;
index 7c56f0fbb124fcc10eb435766998044fe07842d7..0a1d8ff4555c48b5454b97c7b4eab719870c76b4 100644 (file)
@@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a)
        return make_float3(a.x, a.y, a.z);
 }
 
+__device_inline float4 float3_to_float4(const float3 a)
+{
+       return make_float4(a.x, a.y, a.z, 1.0f);
+}
+
 #ifndef __KERNEL_GPU__
 
 __device_inline void print_float3(const char *label, const float3& a)