Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / render / buffers.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <stdlib.h>
18
19 #include "render/buffers.h"
20 #include "device/device.h"
21
22 #include "util/util_foreach.h"
23 #include "util/util_hash.h"
24 #include "util/util_math.h"
25 #include "util/util_opengl.h"
26 #include "util/util_time.h"
27 #include "util/util_types.h"
28
29 CCL_NAMESPACE_BEGIN
30
31 /* Buffer Params */
32
33 BufferParams::BufferParams()
34 {
35         width = 0;
36         height = 0;
37
38         full_x = 0;
39         full_y = 0;
40         full_width = 0;
41         full_height = 0;
42
43         denoising_data_pass = false;
44         denoising_clean_pass = false;
45
46         Pass::add(PASS_COMBINED, passes);
47 }
48
49 void BufferParams::get_offset_stride(int& offset, int& stride)
50 {
51         offset = -(full_x + full_y*width);
52         stride = width;
53 }
54
55 bool BufferParams::modified(const BufferParams& params)
56 {
57         return !(full_x == params.full_x
58                 && full_y == params.full_y
59                 && width == params.width
60                 && height == params.height
61                 && full_width == params.full_width
62                 && full_height == params.full_height
63                 && Pass::equals(passes, params.passes));
64 }
65
66 int BufferParams::get_passes_size()
67 {
68         int size = 0;
69
70         for(size_t i = 0; i < passes.size(); i++)
71                 size += passes[i].components;
72
73         if(denoising_data_pass) {
74                 size += DENOISING_PASS_SIZE_BASE;
75                 if(denoising_clean_pass) size += DENOISING_PASS_SIZE_CLEAN;
76         }
77
78         return align_up(size, 4);
79 }
80
81 int BufferParams::get_denoising_offset()
82 {
83         int offset = 0;
84
85         for(size_t i = 0; i < passes.size(); i++)
86                 offset += passes[i].components;
87
88         return offset;
89 }
90
91 /* Render Buffer Task */
92
93 RenderTile::RenderTile()
94 {
95         x = 0;
96         y = 0;
97         w = 0;
98         h = 0;
99
100         sample = 0;
101         start_sample = 0;
102         num_samples = 0;
103         resolution = 0;
104
105         offset = 0;
106         stride = 0;
107
108         buffer = 0;
109
110         buffers = NULL;
111 }
112
113 /* Render Buffers */
114
115 RenderBuffers::RenderBuffers(Device *device)
116 : buffer(device, "RenderBuffers", MEM_READ_WRITE),
117   map_neighbor_copied(false), render_time(0.0f)
118 {
119 }
120
121 RenderBuffers::~RenderBuffers()
122 {
123         buffer.free();
124 }
125
126 void RenderBuffers::reset(BufferParams& params_)
127 {
128         params = params_;
129
130         /* re-allocate buffer */
131         buffer.alloc(params.width*params.height*params.get_passes_size());
132         buffer.zero_to_device();
133 }
134
135 void RenderBuffers::zero()
136 {
137         buffer.zero_to_device();
138 }
139
140 bool RenderBuffers::copy_from_device()
141 {
142         if(!buffer.device_pointer)
143                 return false;
144
145         buffer.copy_from_device(0, params.width * params.get_passes_size(), params.height);
146
147         return true;
148 }
149
150 bool RenderBuffers::get_denoising_pass_rect(int type, float exposure, int sample, int components, float *pixels)
151 {
152         if(buffer.data() == NULL) {
153                 return false;
154         }
155
156         float invsample = 1.0f/sample;
157         float scale = invsample;
158         bool variance = (type == DENOISING_PASS_NORMAL_VAR) ||
159                         (type == DENOISING_PASS_ALBEDO_VAR) ||
160                         (type == DENOISING_PASS_DEPTH_VAR) ||
161                         (type == DENOISING_PASS_COLOR_VAR);
162
163         float scale_exposure = scale;
164         if(type == DENOISING_PASS_COLOR || type == DENOISING_PASS_CLEAN) {
165                 scale_exposure *= exposure;
166         }
167         else if(type == DENOISING_PASS_COLOR_VAR) {
168                 scale_exposure *= exposure*exposure;
169         }
170
171         int offset = type + params.get_denoising_offset();
172         int pass_stride = params.get_passes_size();
173         int size = params.width*params.height;
174
175         if(variance) {
176                 /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance
177                  * update does not work efficiently with atomics in the kernel. */
178                 int mean_offset = offset - components;
179                 float *mean = buffer.data() + mean_offset;
180                 float *var = buffer.data() + offset;
181                 assert(mean_offset >= 0);
182
183                 if(components == 1) {
184                         for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels++) {
185                                 pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure;
186                         }
187                 }
188                 else if(components == 3) {
189                         for(int i = 0; i < size; i++, mean += pass_stride, var += pass_stride, pixels += 3) {
190                                 pixels[0] = max(0.0f, var[0] - mean[0]*mean[0]*invsample)*scale_exposure;
191                                 pixels[1] = max(0.0f, var[1] - mean[1]*mean[1]*invsample)*scale_exposure;
192                                 pixels[2] = max(0.0f, var[2] - mean[2]*mean[2]*invsample)*scale_exposure;
193                         }
194                 }
195                 else {
196                         return false;
197                 }
198         }
199         else {
200                 float *in = buffer.data() + offset;
201
202                 if(components == 1) {
203                         for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
204                                 pixels[0] = in[0]*scale_exposure;
205                         }
206                 }
207                 else if(components == 3) {
208                         for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
209                                 pixels[0] = in[0]*scale_exposure;
210                                 pixels[1] = in[1]*scale_exposure;
211                                 pixels[2] = in[2]*scale_exposure;
212                         }
213                 }
214                 else if(components == 4) {
215                         assert(type == DENOISING_PASS_COLOR);
216
217                         /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */
218                         assert(params.passes[0].type == PASS_COMBINED);
219                         float *in_combined = buffer.data();
220
221                         for(int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) {
222                                 pixels[0] = in[0]*scale_exposure;
223                                 pixels[1] = in[1]*scale_exposure;
224                                 pixels[2] = in[2]*scale_exposure;
225                                 pixels[3] = saturate(in_combined[3]*scale);
226                         }
227                 }
228                 else {
229                         return false;
230                 }
231         }
232
233         return true;
234 }
235
236 bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels, const string &name)
237 {
238         if(buffer.data() == NULL) {
239                 return false;
240         }
241
242         int pass_offset = 0;
243
244         for(size_t j = 0; j < params.passes.size(); j++) {
245                 Pass& pass = params.passes[j];
246
247                 if(pass.type != type) {
248                         pass_offset += pass.components;
249                         continue;
250                 }
251
252                 /* Tell Cryptomatte passes apart by their name. */
253                 if(pass.type == PASS_CRYPTOMATTE) {
254                         if(pass.name != name) {
255                                 pass_offset += pass.components;
256                                 continue;
257                         }
258                 }
259
260                 float *in = buffer.data() + pass_offset;
261                 int pass_stride = params.get_passes_size();
262
263                 float scale = (pass.filter)? 1.0f/(float)sample: 1.0f;
264                 float scale_exposure = (pass.exposure)? scale*exposure: scale;
265
266                 int size = params.width*params.height;
267
268                 if(components == 1 && type == PASS_RENDER_TIME) {
269                         /* Render time is not stored by kernel, but measured per tile. */
270                         float val = (float) (1000.0 * render_time/(params.width * params.height * sample));
271                         for(int i = 0; i < size; i++, pixels++) {
272                                 pixels[0] = val;
273                         }
274                 }
275                 else if(components == 1) {
276                         assert(pass.components == components);
277
278                         /* Scalar */
279                         if(type == PASS_DEPTH) {
280                                 for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
281                                         float f = *in;
282                                         pixels[0] = (f == 0.0f)? 1e10f: f*scale_exposure;
283                                 }
284                         }
285                         else if(type == PASS_MIST) {
286                                 for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
287                                         float f = *in;
288                                         pixels[0] = saturate(f*scale_exposure);
289                                 }
290                         }
291 #ifdef WITH_CYCLES_DEBUG
292                         else if(type == PASS_BVH_TRAVERSED_NODES ||
293                                 type == PASS_BVH_TRAVERSED_INSTANCES ||
294                                 type == PASS_BVH_INTERSECTIONS ||
295                                 type == PASS_RAY_BOUNCES)
296                         {
297                                 for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
298                                         float f = *in;
299                                         pixels[0] = f*scale;
300                                 }
301                         }
302 #endif
303                         else {
304                                 for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
305                                         float f = *in;
306                                         pixels[0] = f*scale_exposure;
307                                 }
308                         }
309                 }
310                 else if(components == 3) {
311                         assert(pass.components == 4);
312
313                         /* RGBA */
314                         if(type == PASS_SHADOW) {
315                                 for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
316                                         float4 f = make_float4(in[0], in[1], in[2], in[3]);
317                                         float invw = (f.w > 0.0f)? 1.0f/f.w: 1.0f;
318
319                                         pixels[0] = f.x*invw;
320                                         pixels[1] = f.y*invw;
321                                         pixels[2] = f.z*invw;
322                                 }
323                         }
324                         else if(pass.divide_type != PASS_NONE) {
325                                 /* RGB lighting passes that need to divide out color */
326                                 pass_offset = 0;
327                                 for(size_t k = 0; k < params.passes.size(); k++) {
328                                         Pass& color_pass = params.passes[k];
329                                         if(color_pass.type == pass.divide_type)
330                                                 break;
331                                         pass_offset += color_pass.components;
332                                 }
333
334                                 float *in_divide = buffer.data() + pass_offset;
335
336                                 for(int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) {
337                                         float3 f = make_float3(in[0], in[1], in[2]);
338                                         float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
339
340                                         f = safe_divide_even_color(f*exposure, f_divide);
341
342                                         pixels[0] = f.x;
343                                         pixels[1] = f.y;
344                                         pixels[2] = f.z;
345                                 }
346                         }
347                         else {
348                                 /* RGB/vector */
349                                 for(int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
350                                         float3 f = make_float3(in[0], in[1], in[2]);
351
352                                         pixels[0] = f.x*scale_exposure;
353                                         pixels[1] = f.y*scale_exposure;
354                                         pixels[2] = f.z*scale_exposure;
355                                 }
356                         }
357                 }
358                 else if(components == 4) {
359                         assert(pass.components == components);
360
361                         /* RGBA */
362                         if(type == PASS_SHADOW) {
363                                 for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
364                                         float4 f = make_float4(in[0], in[1], in[2], in[3]);
365                                         float invw = (f.w > 0.0f)? 1.0f/f.w: 1.0f;
366
367                                         pixels[0] = f.x*invw;
368                                         pixels[1] = f.y*invw;
369                                         pixels[2] = f.z*invw;
370                                         pixels[3] = 1.0f;
371                                 }
372                         }
373                         else if(type == PASS_MOTION) {
374                                 /* need to normalize by number of samples accumulated for motion */
375                                 pass_offset = 0;
376                                 for(size_t k = 0; k < params.passes.size(); k++) {
377                                         Pass& color_pass = params.passes[k];
378                                         if(color_pass.type == PASS_MOTION_WEIGHT)
379                                                 break;
380                                         pass_offset += color_pass.components;
381                                 }
382
383                                 float *in_weight = buffer.data() + pass_offset;
384
385                                 for(int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) {
386                                         float4 f = make_float4(in[0], in[1], in[2], in[3]);
387                                         float w = in_weight[0];
388                                         float invw = (w > 0.0f)? 1.0f/w: 0.0f;
389
390                                         pixels[0] = f.x*invw;
391                                         pixels[1] = f.y*invw;
392                                         pixels[2] = f.z*invw;
393                                         pixels[3] = f.w*invw;
394                                 }
395                         }
396                         else if(type == PASS_CRYPTOMATTE) {
397                                 for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
398                                         float4 f = make_float4(in[0], in[1], in[2], in[3]);
399                                         /* x and z contain integer IDs, don't rescale them.
400                                            y and w contain matte weights, they get scaled. */
401                                         pixels[0] = f.x;
402                                         pixels[1] = f.y * scale;
403                                         pixels[2] = f.z;
404                                         pixels[3] = f.w * scale;
405                                 }
406                         }
407                         else {
408                                 for(int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
409                                         float4 f = make_float4(in[0], in[1], in[2], in[3]);
410
411                                         pixels[0] = f.x*scale_exposure;
412                                         pixels[1] = f.y*scale_exposure;
413                                         pixels[2] = f.z*scale_exposure;
414
415                                         /* clamp since alpha might be > 1.0 due to russian roulette */
416                                         pixels[3] = saturate(f.w*scale);
417                                 }
418                         }
419                 }
420
421                 return true;
422         }
423
424         return false;
425 }
426
427 /* Display Buffer */
428
429 DisplayBuffer::DisplayBuffer(Device *device, bool linear)
430 : draw_width(0),
431   draw_height(0),
432   transparent(true), /* todo: determine from background */
433   half_float(linear),
434   rgba_byte(device, "display buffer byte"),
435   rgba_half(device, "display buffer half")
436 {
437 }
438
439 DisplayBuffer::~DisplayBuffer()
440 {
441         rgba_byte.free();
442         rgba_half.free();
443 }
444
445 void DisplayBuffer::reset(BufferParams& params_)
446 {
447         draw_width = 0;
448         draw_height = 0;
449
450         params = params_;
451
452         /* allocate display pixels */
453         if(half_float) {
454                 rgba_half.alloc_to_device(params.width, params.height);
455         }
456         else {
457                 rgba_byte.alloc_to_device(params.width, params.height);
458         }
459 }
460
461 void DisplayBuffer::draw_set(int width, int height)
462 {
463         assert(width <= params.width && height <= params.height);
464
465         draw_width = width;
466         draw_height = height;
467 }
468
469 void DisplayBuffer::draw(Device *device, const DeviceDrawParams& draw_params)
470 {
471         if(draw_width != 0 && draw_height != 0) {
472                 device_memory& rgba = (half_float)? (device_memory&)rgba_half:
473                                                     (device_memory&)rgba_byte;
474
475                 device->draw_pixels(
476                             rgba, 0,
477                             draw_width, draw_height, params.width, params.height,
478                             params.full_x, params.full_y, params.full_width, params.full_height,
479                             transparent, draw_params);
480         }
481 }
482
483 bool DisplayBuffer::draw_ready()
484 {
485         return (draw_width != 0 && draw_height != 0);
486 }
487
488 CCL_NAMESPACE_END