Fix T40228: cycles CUDA multi GPU + world MIS giving error.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 5 Jun 2014 16:10:06 +0000 (18:10 +0200)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 5 Jun 2014 16:10:32 +0000 (18:10 +0200)
intern/cycles/device/device_cuda.cpp
intern/cycles/kernel/kernel.cu

index 48d1c18555afd416d1e69cf5ba9f97992f9b4fa9..0429bfc6e971d45883296f1f180e07ec9b06ff03 100644 (file)
@@ -762,6 +762,8 @@ public:
                        if(task.get_cancel())
                                break;
 
+                       int shader_w = min(shader_chunk_size, end - shader_x);
+
                        /* pass in parameters */
                        int offset = 0;
 
@@ -780,13 +782,15 @@ public:
                        cuda_assert(cuParamSeti(cuShader, offset, shader_x));
                        offset += sizeof(shader_x);
 
+                       cuda_assert(cuParamSeti(cuShader, offset, shader_w));
+                       offset += sizeof(shader_w);
+
                        cuda_assert(cuParamSetSize(cuShader, offset));
 
                        /* launch kernel */
                        int threads_per_block;
                        cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
 
-                       int shader_w = min(shader_chunk_size, end - shader_x);
                        int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
 
                        cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
index 69c2600ea982c19cd93ffaed49d19fbd882493ac..12273cc2853f0231c7a1857d223c6092fd2d742b 100644 (file)
@@ -146,20 +146,22 @@ kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scal
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx, int sw)
 {
        int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
 
-       kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+       if(x < sx + sw)
+               kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
 }
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx, int sw)
 {
        int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
 
-       kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+       if(x < sx + sw)
+               kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
 }
 
 #endif