Cycles Denoising: Refactor denoiser tile handling
authorLukas Stockner <lukas.stockner@freenet.de>
Wed, 4 Jul 2018 12:26:15 +0000 (14:26 +0200)
committerLukas Stockner <lukas.stockner@freenet.de>
Wed, 4 Jul 2018 12:36:01 +0000 (14:36 +0200)
This deduplicates the calls for tile (un)mapping and allows to have a target buffer that is different from the source buffer (needed for baking and animation denoising).

intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_denoising.cpp
intern/cycles/device/device_denoising.h
intern/cycles/device/opencl/opencl.h
intern/cycles/device/opencl/opencl_base.cpp
intern/cycles/device/opencl/opencl_mega.cpp
intern/cycles/device/opencl/opencl_split.cpp
intern/cycles/render/session.cpp

index 81a084c03b2fb28315e07444d2cc47565efa36d0..b824cddd87b8a68506072f3a534a94dceaaf1dfd 100644 (file)
@@ -635,7 +635,7 @@ public:
                                                              (float*) buffer_variance_ptr,
                                                              &task->rect.x,
                                                              task->render_buffer.pass_stride,
-                                                             task->render_buffer.denoising_data_offset);
+                                                             task->render_buffer.offset);
                        }
                }
                return true;
@@ -658,7 +658,7 @@ public:
                                                            (float*) variance_ptr,
                                                            &task->rect.x,
                                                            task->render_buffer.pass_stride,
-                                                           task->render_buffer.denoising_data_offset);
+                                                           task->render_buffer.offset);
                        }
                }
                return true;
@@ -711,7 +711,7 @@ public:
                }
        }
 
-       void denoise(DeviceTask &task, DenoisingTask& denoising, RenderTile &tile)
+       void denoise(DenoisingTask& denoising, RenderTile &tile)
        {
                tile.sample = tile.start_sample + tile.num_samples;
 
@@ -727,16 +727,7 @@ public:
                denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
                denoising.render_buffer.samples = tile.sample;
 
-               RenderTile rtiles[9];
-               rtiles[4] = tile;
-               task.map_neighbor_tiles(rtiles, this);
-               denoising.tiles_from_rendertiles(rtiles);
-
-               denoising.run_denoising();
-
-               task.unmap_neighbor_tiles(rtiles, this);
-
-               task.update_progress(&tile, tile.w*tile.h);
+               denoising.run_denoising(&tile);
        }
 
        void thread_render(DeviceTask& task)
@@ -777,7 +768,9 @@ public:
                                }
                        }
                        else if(tile.task == RenderTile::DENOISE) {
-                               denoise(task, denoising, tile);
+                               denoise(denoising, tile);
+
+                               task.update_progress(&tile, tile.w*tile.h);
                        }
 
                        task.release_tile(tile);
index 34c64feb80abe84b7d1da4548dad13f04c8b289a..1e7883f612b1dc48b16c0f83112fe5b46170a2ac 100644 (file)
@@ -1542,7 +1542,7 @@ public:
                                &buffer_variance_ptr,
                                &task->rect,
                                &task->render_buffer.pass_stride,
-                               &task->render_buffer.denoising_data_offset};
+                               &task->render_buffer.offset};
                CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
                cuda_assert(cuCtxSynchronize());
 
@@ -1575,7 +1575,7 @@ public:
                                &variance_ptr,
                                &task->rect,
                                &task->render_buffer.pass_stride,
-                               &task->render_buffer.denoising_data_offset};
+                               &task->render_buffer.offset};
                CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
                cuda_assert(cuCtxSynchronize());
 
@@ -1613,7 +1613,7 @@ public:
                return !have_error();
        }
 
-       void denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task)
+       void denoise(RenderTile &rtile, DenoisingTask& denoising)
        {
                denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
                denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising);
@@ -1627,14 +1627,7 @@ public:
                denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
                denoising.render_buffer.samples = rtile.sample;
 
-               RenderTile rtiles[9];
-               rtiles[4] = rtile;
-               task.map_neighbor_tiles(rtiles, this);
-               denoising.tiles_from_rendertiles(rtiles);
-
-               denoising.run_denoising();
-
-               task.unmap_neighbor_tiles(rtiles, this);
+               denoising.run_denoising(&rtile);
        }
 
        void path_trace(DeviceTask& task, RenderTile& rtile, device_vector<WorkTile>& work_tiles)
@@ -2087,7 +2080,7 @@ public:
                                else if(tile.task == RenderTile::DENOISE) {
                                        tile.sample = tile.start_sample + tile.num_samples;
 
-                                       denoise(tile, denoising, *task);
+                                       denoise(tile, denoising);
 
                                        task->update_progress(&tile, tile.w*tile.h);
                                }
index 4d2ba508aeca543c52eb0674b2a5914885df4b09..fe6b53fc3740b431d0d985697544fbd61b997fc6 100644 (file)
@@ -36,8 +36,13 @@ DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
        }
 
        render_buffer.pass_stride = task.pass_stride;
-       render_buffer.denoising_data_offset = task.pass_denoising_data;
-       render_buffer.denoising_clean_offset = task.pass_denoising_clean;
+       render_buffer.offset = task.pass_denoising_data;
+
+       target_buffer.pass_stride = task.pass_stride;
+       target_buffer.denoising_clean_offset = task.pass_denoising_clean;
+
+       functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
+       functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
 }
 
 DenoisingTask::~DenoisingTask()
@@ -53,8 +58,7 @@ DenoisingTask::~DenoisingTask()
        tiles_mem.free();
 }
 
-
-void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
+void DenoisingTask::set_render_buffer(RenderTile *rtiles)
 {
        tiles = (TilesInfo*) tiles_mem.alloc(sizeof(TilesInfo)/sizeof(int));
 
@@ -73,9 +77,9 @@ void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
        tiles->y[2] = rtiles[7].y;
        tiles->y[3] = rtiles[7].y + rtiles[7].h;
 
-       render_buffer.offset = rtiles[4].offset;
-       render_buffer.stride = rtiles[4].stride;
-       render_buffer.ptr    = rtiles[4].buffer;
+       target_buffer.offset = rtiles[9].offset;
+       target_buffer.stride = rtiles[9].stride;
+       target_buffer.ptr    = rtiles[9].buffer;
 
        functions.set_tiles(buffers);
 }
@@ -228,21 +232,26 @@ void DenoisingTask::reconstruct()
        storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false);
 
        reconstruction_state.filter_window = rect_from_shape(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
-       int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x;
-       reconstruction_state.buffer_params = make_int4(render_buffer.offset + tile_coordinate_offset,
-                                                      render_buffer.stride,
-                                                      render_buffer.pass_stride,
-                                                      render_buffer.denoising_clean_offset);
+       int tile_coordinate_offset = filter_area.y*target_buffer.stride + filter_area.x;
+       reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
+                                                      target_buffer.stride,
+                                                      target_buffer.pass_stride,
+                                                      target_buffer.denoising_clean_offset);
        reconstruction_state.source_w = rect.z-rect.x;
        reconstruction_state.source_h = rect.w-rect.y;
 
        device_sub_ptr color_ptr    (buffer.mem,  8*buffer.pass_stride, 3*buffer.pass_stride);
        device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
-       functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr);
+       functions.reconstruct(*color_ptr, *color_var_ptr, target_buffer.ptr);
 }
 
-void DenoisingTask::run_denoising()
+void DenoisingTask::run_denoising(RenderTile *tile)
 {
+       RenderTile rtiles[10];
+       rtiles[4] = *tile;
+       functions.map_neighbor_tiles(rtiles);
+       set_render_buffer(rtiles);
+
        setup_denoising_buffer();
 
        prefilter_shadowing();
@@ -252,6 +261,7 @@ void DenoisingTask::run_denoising()
        construct_transform();
        reconstruct();
 
+       functions.unmap_neighbor_tiles(rtiles);
 }
 
 CCL_NAMESPACE_END
index a802151703bec8fc0f2223a155b3c383033b7d9d..21af1b10fc5d96de8b47d0eeaf9accf7917b7438 100644 (file)
@@ -32,20 +32,24 @@ public:
        float nlm_k_2;
        float pca_threshold;
 
-       /* Pointer and parameters of the RenderBuffers. */
+       /* Parameters of the RenderBuffers. */
        struct RenderBuffers {
-               int denoising_data_offset;
-               int denoising_clean_offset;
+               int offset;
                int pass_stride;
+               int samples;
+       } render_buffer;
+
+       /* Pointer and parameters of the target buffer. */
+       struct TargetBuffer {
                int offset;
                int stride;
+               int pass_stride;
+               int denoising_clean_offset;
                device_ptr ptr;
-               int samples;
-       } render_buffer;
+       } target_buffer;
 
        TilesInfo *tiles;
        device_vector<int> tiles_mem;
-       void tiles_from_rendertiles(RenderTile *rtiles);
 
        int4 rect;
        int4 filter_area;
@@ -86,6 +90,8 @@ public:
                              device_ptr output_ptr
                              )> detect_outliers;
                function<bool(device_ptr*)> set_tiles;
+               function<void(RenderTile *rtiles)> map_neighbor_tiles;
+               function<void(RenderTile *rtiles)> unmap_neighbor_tiles;
        } functions;
 
        /* Stores state of the current Reconstruction operation,
@@ -141,7 +147,7 @@ public:
        DenoisingTask(Device *device, const DeviceTask &task);
        ~DenoisingTask();
 
-       void run_denoising();
+       void run_denoising(RenderTile *tile);
 
        struct DenoiseBuffers {
                int pass_stride;
index 85ef14ee29ad489ad603e21fffbd93f80c90a2e1..7526f1e15a1a430d2fcc33a54159b0dae3a0882d 100644 (file)
@@ -362,7 +362,7 @@ public:
        void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
        void shader(DeviceTask& task);
 
-       void denoise(RenderTile& tile, DenoisingTask& denoising, const DeviceTask& task);
+       void denoise(RenderTile& tile, DenoisingTask& denoising);
 
        class OpenCLDeviceTask : public DeviceTask {
        public:
index ca7a23efb242c910970acc453378b36b5487be6c..4e49e0ef1669d98bb3692d4d3a1cb5e49c085fce 100644 (file)
@@ -991,7 +991,7 @@ bool OpenCLDeviceBase::denoising_divide_shadow(device_ptr a_ptr,
                        buffer_variance_mem,
                        task->rect,
                        task->render_buffer.pass_stride,
-                       task->render_buffer.denoising_data_offset);
+                       task->render_buffer.offset);
        enqueue_kernel(ckFilterDivideShadow,
                       task->rect.z-task->rect.x,
                       task->rect.w-task->rect.y);
@@ -1021,7 +1021,7 @@ bool OpenCLDeviceBase::denoising_get_feature(int mean_offset,
                        variance_mem,
                        task->rect,
                        task->render_buffer.pass_stride,
-                       task->render_buffer.denoising_data_offset);
+                       task->render_buffer.offset);
        enqueue_kernel(ckFilterGetFeature,
                       task->rect.z-task->rect.x,
                       task->rect.w-task->rect.y);
@@ -1076,7 +1076,7 @@ bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
        return true;
 }
 
-void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task)
+void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising)
 {
        denoising.functions.set_tiles = function_bind(&OpenCLDeviceBase::denoising_set_tiles, this, _1, &denoising);
        denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising);
@@ -1090,14 +1090,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising, cons
        denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
        denoising.render_buffer.samples = rtile.sample;
 
-       RenderTile rtiles[9];
-       rtiles[4] = rtile;
-       task.map_neighbor_tiles(rtiles, this);
-       denoising.tiles_from_rendertiles(rtiles);
-
-       denoising.run_denoising();
-
-       task.unmap_neighbor_tiles(rtiles, this);
+       denoising.run_denoising(&rtile);
 }
 
 void OpenCLDeviceBase::shader(DeviceTask& task)
index 61c353d7a8aed19bdb2f86c383ab21a616ccfed7..e004c0b44f4822f0a7411f8c372d0c2e0184920d 100644 (file)
@@ -141,7 +141,7 @@ public:
                                }
                                else if(tile.task == RenderTile::DENOISE) {
                                        tile.sample = tile.start_sample + tile.num_samples;
-                                       denoise(tile, denoising, *task);
+                                       denoise(tile, denoising);
                                        task->update_progress(&tile, tile.w*tile.h);
                                }
 
index 86181af7fe0a96c7658f58bef853ba20d82ca865..66a4aa7e891b8d54740f912a1955c8de89f25b7e 100644 (file)
@@ -159,7 +159,7 @@ public:
                                }
                                else if(tile.task == RenderTile::DENOISE) {
                                        tile.sample = tile.start_sample + tile.num_samples;
-                                       denoise(tile, denoising, *task);
+                                       denoise(tile, denoising);
                                        task->update_progress(&tile, tile.w*tile.h);
                                }
 
index bb636dd962abbc5e9c1a26ad700ba14a67367bc5..3106ee538321d7ebf2cc828a1e64a66e35351e77 100644 (file)
@@ -502,6 +502,9 @@ void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
 
        assert(tiles[4].buffers);
        device->map_neighbor_tiles(tile_device, tiles);
+
+       /* The denoised result is written back to the original tile. */
+       tiles[9] = tiles[4];
 }
 
 void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)