Cycles: make CUDA code a bit more robust to host/device alloc failures.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Tue, 2 Jan 2018 21:56:07 +0000 (22:56 +0100)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Tue, 2 Jan 2018 22:46:19 +0000 (23:46 +0100)
Fixes a few corner cases found while stress testing host mapped memory.

intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_memory.cpp
intern/cycles/render/buffers.cpp
intern/cycles/render/image.cpp
intern/cycles/render/image.h
intern/cycles/render/object.cpp

index 391809e527807c746e7380fa27c2be69446c0845..29aabd3169c21cccddd7c8ceae11a83ef147c4ce 100644 (file)
@@ -650,7 +650,7 @@ public:
 
        void generic_copy_to(device_memory& mem)
        {
-               if(mem.device_pointer) {
+               if(mem.host_pointer && mem.device_pointer) {
                        CUDAContextScope scope(this);
                        cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
                }
@@ -715,11 +715,11 @@ public:
                        size_t offset = elem*y*w;
                        size_t size = elem*w*h;
 
-                       if(mem.device_pointer) {
+                       if(mem.host_pointer && mem.device_pointer) {
                                cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
                                                                                 (CUdeviceptr)(mem.device_pointer + offset), size));
                        }
-                       else {
+                       else if(mem.host_pointer) {
                                memset((char*)mem.host_pointer + offset, 0, size);
                        }
                }
@@ -1118,13 +1118,17 @@ public:
 
                int shift_stride = stride*h;
                int num_shifts = (2*r+1)*(2*r+1);
-               int mem_size = sizeof(float)*shift_stride*2*num_shifts;
+               int mem_size = sizeof(float)*shift_stride*num_shifts;
                int channel_offset = 0;
 
-               CUdeviceptr temporary_mem;
-               cuda_assert(cuMemAlloc(&temporary_mem, mem_size));
-               CUdeviceptr difference     = temporary_mem;
-               CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts;
+               device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
+               temporary_mem.alloc_to_device(2*mem_size);
+
+               if(have_error())
+                       return false;
+
+               CUdeviceptr difference     = cuda_device_ptr(temporary_mem.device_pointer);
+               CUdeviceptr blurDifference = difference + mem_size;
 
                CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr;
                cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride));
@@ -1156,7 +1160,7 @@ public:
                        CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
                }
 
-               cuMemFree(temporary_mem);
+               temporary_mem.free();
 
                {
                        CUfunction cuNLMNormalize;
@@ -1225,10 +1229,14 @@ public:
                int num_shifts = (2*r+1)*(2*r+1);
                int mem_size = sizeof(float)*shift_stride*num_shifts;
 
-               CUdeviceptr temporary_mem;
-               cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size));
-               CUdeviceptr difference     = temporary_mem;
-               CUdeviceptr blurDifference = temporary_mem + mem_size;
+               device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
+               temporary_mem.alloc_to_device(2*mem_size);
+
+               if(have_error())
+                       return false;
+
+               CUdeviceptr difference     = cuda_device_ptr(temporary_mem.device_pointer);
+               CUdeviceptr blurDifference = difference + mem_size;
 
                {
                        CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
@@ -1268,7 +1276,7 @@ public:
                        CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
                }
 
-               cuMemFree(temporary_mem);
+               temporary_mem.free();
 
                {
                        CUfunction cuFinalize;
index 3ad0946330bc550e6694b1fc980af656f95ee136..b5db76bb3df36b7698122c338f216b36289f2fc8 100644 (file)
@@ -86,7 +86,7 @@ void device_memory::device_free()
 
 void device_memory::device_copy_to()
 {
-       if(data_size) {
+       if(host_pointer) {
                device->mem_copy_to(*this);
        }
 }
index 5c7729ec89fb9f517bfdf6a1c92bd3c103ea3c38..9899fa1c39c736a7131580c6d043195b30e8af17 100644 (file)
@@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device()
 
 bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels)
 {
+       if(buffer.data() == NULL) {
+               return false;
+       }
+
        float invsample = 1.0f/sample;
        float scale = invsample;
        bool variance = (offset == DENOISING_PASS_NORMAL_VAR) ||
@@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
 
 bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels)
 {
+       if(buffer.data() == NULL) {
+               return false;
+       }
+
        int pass_offset = 0;
 
        for(size_t j = 0; j < params.passes.size(); j++) {
index 482442cce29fe9bd0dab42355a710f0fad20eab2..feaa17148ee40db2e2eecbce34331aa11bc030c0 100644 (file)
@@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device,
 
        /* Slot assignment */
        int flat_slot = type_index_to_flattened_slot(slot, type);
-       string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
+       img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
 
        /* Free previous texture in slot. */
        if(img->mem) {
@@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device,
        /* Create new texture. */
        if(type == IMAGE_DATA_TYPE_FLOAT4) {
                device_vector<float4> *tex_img
-                       = new device_vector<float4>(device, name.c_str(), MEM_TEXTURE);
+                       = new device_vector<float4>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
                if(!file_load_image<TypeDesc::FLOAT, float>(img,
                                                            type,
@@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device,
        }
        else if(type == IMAGE_DATA_TYPE_FLOAT) {
                device_vector<float> *tex_img
-                       = new device_vector<float>(device, name.c_str(), MEM_TEXTURE);
+                       = new device_vector<float>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
                if(!file_load_image<TypeDesc::FLOAT, float>(img,
                                                            type,
@@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device,
        }
        else if(type == IMAGE_DATA_TYPE_BYTE4) {
                device_vector<uchar4> *tex_img
-                       = new device_vector<uchar4>(device, name.c_str(), MEM_TEXTURE);
+                       = new device_vector<uchar4>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
                if(!file_load_image<TypeDesc::UINT8, uchar>(img,
                                                            type,
@@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device,
        }
        else if(type == IMAGE_DATA_TYPE_BYTE) {
                device_vector<uchar> *tex_img
-                       = new device_vector<uchar>(device, name.c_str(), MEM_TEXTURE);
+                       = new device_vector<uchar>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
                if(!file_load_image<TypeDesc::UINT8, uchar>(img,
                                                            type,
@@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device,
        }
        else if(type == IMAGE_DATA_TYPE_HALF4) {
                device_vector<half4> *tex_img
-                       = new device_vector<half4>(device, name.c_str(), MEM_TEXTURE);
+                       = new device_vector<half4>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
                if(!file_load_image<TypeDesc::HALF, half>(img,
                                                          type,
@@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device,
        }
        else if(type == IMAGE_DATA_TYPE_HALF) {
                device_vector<half> *tex_img
-                       = new device_vector<half>(device, name.c_str(), MEM_TEXTURE);
+                       = new device_vector<half>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
                if(!file_load_image<TypeDesc::HALF, half>(img,
                                                          type,
index cc7c8544bedc936e5ea62f3b0cf8c80efe28c1d8..3519a67bc0500a206811967132c6a8494411ba52 100644 (file)
@@ -111,6 +111,7 @@ public:
                InterpolationType interpolation;
                ExtensionType extension;
 
+               string mem_name;
                device_memory *mem;
 
                int users;
index aef7fc295733983932213b2c54b0255af43da3cc..d7143f248501e71a43c7a94a79c6e64811c75d22 100644 (file)
@@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *,
 
 void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene)
 {
-       if(scene->objects.size() == 0) {
+       if(dscene->objects.size() == 0) {
                return;
        }