8b527691bd973f355631715982d52fecad2c07ab
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include "device.h"
20 #include "graph.h"
21 #include "light.h"
22 #include "mesh.h"
23 #include "scene.h"
24 #include "shader.h"
25 #include "svm.h"
26
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
44 {
45         if(!need_update)
46                 return;
47
48         /* test if we need to update */
49         device_free(device, dscene);
50
51         /* svm_nodes */
52         vector<int4> svm_nodes;
53         size_t i;
54
55         for(i = 0; i < scene->shaders.size(); i++) {
56                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
57                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
58         }
59         
60         bool sunsky_done = false;
61
62         for(i = 0; i < scene->shaders.size(); i++) {
63                 Shader *shader = scene->shaders[i];
64
65                 if(progress.get_cancel()) return;
66
67                 assert(shader->graph);
68
69                 if(shader->has_surface_emission)
70                         scene->light_manager->need_update = true;
71
72                 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
73                         scene->params.use_multi_closure);
74                 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
75                 compiler.background = ((int)i == scene->default_background);
76                 compiler.compile(shader, svm_nodes, i);
77                 if(!compiler.sunsky)
78                         sunsky_done = true;
79         }
80
81         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
82         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
83
84         for(i = 0; i < scene->shaders.size(); i++) {
85                 Shader *shader = scene->shaders[i];
86                 shader->need_update = false;
87         }
88
89         need_update = false;
90 }
91
92 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene)
93 {
94         device->tex_free(dscene->svm_nodes);
95         dscene->svm_nodes.clear();
96 }
97
98 /* Graph Compiler */
99
100 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
101 {
102         shader_manager = shader_manager_;
103         image_manager = image_manager_;
104         sunsky = NULL;
105         max_stack_use = 0;
106         current_type = SHADER_TYPE_SURFACE;
107         current_shader = NULL;
108         background = false;
109         mix_weight_offset = SVM_STACK_INVALID;
110         use_multi_closure = use_multi_closure_;
111 }
112
113 int SVMCompiler::stack_size(ShaderSocketType type)
114 {
115         if(type == SHADER_SOCKET_FLOAT)
116                 return 1;
117         else if(type == SHADER_SOCKET_COLOR)
118                 return 3;
119         else if(type == SHADER_SOCKET_VECTOR)
120                 return 3;
121         else if(type == SHADER_SOCKET_NORMAL)
122                 return 3;
123         else if(type == SHADER_SOCKET_POINT)
124                 return 3;
125         else if(type == SHADER_SOCKET_CLOSURE)
126                 return 0;
127
128         assert(0);
129         return 0;
130 }
131
132 int SVMCompiler::stack_find_offset(ShaderSocketType type)
133 {
134         int size = stack_size(type);
135         int offset = -1;
136         
137         /* find free space in stack & mark as used */
138         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
139                 if(active_stack.users[i]) num_unused = 0;
140                 else num_unused++;
141
142                 if(num_unused == size) {
143                         offset = i+1 - size;
144                         max_stack_use = max(i+1, max_stack_use);
145
146                         while(i >= offset)
147                                 active_stack.users[i--] = 1;
148
149                         return offset;
150                 }
151         }
152
153         fprintf(stderr, "Out of SVM stack space.\n");
154         assert(0);
155
156         return offset;
157 }
158
159 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
160 {
161         backup.done = done;
162         backup.stack = active_stack;
163
164         foreach(ShaderNode *node, current_graph->nodes) {
165                 foreach(ShaderInput *input, node->inputs)
166                         backup.offsets.push_back(input->stack_offset);
167                 foreach(ShaderOutput *output, node->outputs)
168                         backup.offsets.push_back(output->stack_offset);
169         }
170 }
171
172 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
173 {
174         int i = 0;
175
176         done = backup.done;
177         active_stack = backup.stack;
178
179         foreach(ShaderNode *node, current_graph->nodes) {
180                 foreach(ShaderInput *input, node->inputs)
181                         input->stack_offset = backup.offsets[i++];
182                 foreach(ShaderOutput *output, node->outputs)
183                         output->stack_offset = backup.offsets[i++];
184         }
185 }
186
187 void SVMCompiler::stack_assign(ShaderInput *input)
188 {
189         /* stack offset assign? */
190         if(input->stack_offset == SVM_STACK_INVALID) {
191                 if(input->link) {
192                         /* linked to output -> use output offset */
193                         input->stack_offset = input->link->stack_offset;
194                 }
195                 else {
196                         /* not linked to output -> add nodes to load default value */
197                         input->stack_offset = stack_find_offset(input->type);
198
199                         if(input->type == SHADER_SOCKET_FLOAT) {
200                                 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
201                         }
202                         else if(input->type == SHADER_SOCKET_VECTOR ||
203                                 input->type == SHADER_SOCKET_NORMAL ||
204                                 input->type == SHADER_SOCKET_POINT ||
205                                 input->type == SHADER_SOCKET_COLOR) {
206
207                                 add_node(NODE_VALUE_V, input->stack_offset);
208                                 add_node(NODE_VALUE_V, input->value);
209                         }
210                         else /* should not get called for closure */
211                                 assert(0);
212                 }
213         }
214 }
215
216 void SVMCompiler::stack_assign(ShaderOutput *output)
217 {
218         /* if no stack offset assigned yet, find one */
219         if(output->stack_offset == SVM_STACK_INVALID)
220                 output->stack_offset = stack_find_offset(output->type);
221 }
222
223 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
224 {
225         if(output->stack_offset == SVM_STACK_INVALID) {
226                 assert(input->link);
227                 assert(stack_size(output->type) == stack_size(input->link->type));
228
229                 output->stack_offset = input->link->stack_offset;
230
231                 int size = stack_size(output->type);
232
233                 for(int i = 0; i < size; i++)
234                         active_stack.users[output->stack_offset + i]++;
235         }
236 }
237
238 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
239 {
240         /* optimization we should add:
241            find and lower user counts for outputs for which all inputs are done.
242            this is done before the node is compiled, under the assumption that the
243            node will first load all inputs from the stack and then writes its
244            outputs. this used to work, but was disabled because it gave trouble
245            with inputs getting stack positions assigned */
246
247         foreach(ShaderInput *input, node->inputs) {
248                 ShaderOutput *output = input->link;
249
250                 if(output && output->stack_offset != SVM_STACK_INVALID) {
251                         bool all_done = true;
252
253                         /* optimization we should add: verify if in->parent is actually used */
254                         foreach(ShaderInput *in, output->links)
255                                 if(in->parent != node && done.find(in->parent) == done.end())
256                                         all_done = false;
257
258                         if(all_done) {
259                                 int size = stack_size(output->type);
260
261                                 for(int i = 0; i < size; i++)
262                                         active_stack.users[output->stack_offset + i]--;
263
264                                 output->stack_offset = SVM_STACK_INVALID;
265
266                                 foreach(ShaderInput *in, output->links)
267                                         in->stack_offset = SVM_STACK_INVALID;
268                         }
269                 }
270         }
271 }
272
273 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
274 {
275         foreach(ShaderInput *input, node->inputs) {
276                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
277                         int size = stack_size(input->type);
278
279                         for(int i = 0; i < size; i++)
280                                 active_stack.users[input->stack_offset + i]--;
281
282                         input->stack_offset = SVM_STACK_INVALID;
283                 }
284         }
285 }
286
287 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
288 {
289         assert(x <= 255);
290         assert(y <= 255);
291         assert(z <= 255);
292         assert(w <= 255);
293
294         return (x) | (y << 8) | (z << 16) | (w << 24);
295 }
296
297 void SVMCompiler::add_node(int a, int b, int c, int d)
298 {
299         svm_nodes.push_back(make_int4(a, b, c, d));
300 }
301
302 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
303 {
304         svm_nodes.push_back(make_int4(type, a, b, c));
305 }
306
307 void SVMCompiler::add_node(NodeType type, const float3& f)
308 {
309         svm_nodes.push_back(make_int4(type,
310                 __float_as_int(f.x),
311                 __float_as_int(f.y),
312                 __float_as_int(f.z)));
313 }
314
315 void SVMCompiler::add_node(const float4& f)
316 {
317         svm_nodes.push_back(make_int4(
318                 __float_as_int(f.x),
319                 __float_as_int(f.y),
320                 __float_as_int(f.z),
321                 __float_as_int(f.w)));
322 }
323
324 uint SVMCompiler::attribute(ustring name)
325 {
326         return shader_manager->get_attribute_id(name);
327 }
328
329 uint SVMCompiler::attribute(Attribute::Standard std)
330 {
331         return shader_manager->get_attribute_id(std);
332 }
333
334 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
335 {
336         /* nasty exception .. */
337         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
338                 return true;
339         
340         return false;
341 }
342
343 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
344 {
345         ShaderNode *node = (input->link)? input->link->parent: NULL;
346
347         if(node && done.find(node) == done.end()) {
348                 foreach(ShaderInput *in, node->inputs)
349                         if(!node_skip_input(node, in))
350                                 find_dependencies(dependencies, done, in);
351
352                 dependencies.insert(node);
353         }
354 }
355
356 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
357 {
358         bool nodes_done;
359
360         do {
361                 nodes_done = true;
362
363                 foreach(ShaderNode *node, nodes) {
364                         if(done.find(node) == done.end()) {
365                                 bool inputs_done = true;
366
367                                 foreach(ShaderInput *input, node->inputs)
368                                         if(!node_skip_input(node, input))
369                                                 if(input->link && done.find(input->link->parent) == done.end())
370                                                         inputs_done = false;
371
372                                 if(inputs_done) {
373                                         node->compile(*this);
374                                         stack_clear_users(node, done);
375                                         stack_clear_temporary(node);
376                                         done.insert(node);
377                                 }
378                                 else
379                                         nodes_done = false;
380                         }
381                 }
382         } while(!nodes_done);
383 }
384
385 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
386 {
387         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
388                 ShaderInput *fin = node->input("Fac");
389                 ShaderInput *cl1in = node->input("Closure1");
390                 ShaderInput *cl2in = node->input("Closure2");
391
392                 /* execute dependencies for mix weight */
393                 if(fin) {
394                         set<ShaderNode*> dependencies;
395                         find_dependencies(dependencies, done, fin);
396                         generate_svm_nodes(dependencies, done);
397
398                         /* add mix node */
399                         stack_assign(fin);
400                 }
401
402                 int mix_offset = svm_nodes.size();
403
404                 if(fin)
405                         add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
406                 else
407                         add_node(NODE_ADD_CLOSURE, 0, 0, 0);
408
409                 /* generate code for closure 1
410                    note we backup all compiler state and restore it afterwards, so one
411                    closure choice doesn't influence the other*/
412                 if(cl1in->link) {
413                         StackBackup backup;
414                         stack_backup(backup, done);
415
416                         generate_closure(cl1in->link->parent, done);
417                         add_node(NODE_END, 0, 0, 0);
418
419                         stack_restore(backup, done);
420                 }
421                 else
422                         add_node(NODE_END, 0, 0, 0);
423
424                 /* generate code for closure 2 */
425                 int cl2_offset = svm_nodes.size();
426
427                 if(cl2in->link) {
428                         StackBackup backup;
429                         stack_backup(backup, done);
430
431                         generate_closure(cl2in->link->parent, done);
432                         add_node(NODE_END, 0, 0, 0);
433
434                         stack_restore(backup, done);
435                 }
436                 else
437                         add_node(NODE_END, 0, 0, 0);
438
439                 /* set jump for mix node, -1 because offset is already
440                    incremented when this jump is added to it */
441                 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
442
443                 done.insert(node);
444                 stack_clear_users(node, done);
445                 stack_clear_temporary(node);
446         }
447         else {
448                 /* execute dependencies for closure */
449                 foreach(ShaderInput *in, node->inputs) {
450                         if(!node_skip_input(node, in) && in->link) {
451                                 set<ShaderNode*> dependencies;
452                                 find_dependencies(dependencies, done, in);
453                                 generate_svm_nodes(dependencies, done);
454                         }
455                 }
456
457                 /* compile closure itself */
458                 node->compile(*this);
459                 stack_clear_users(node, done);
460                 stack_clear_temporary(node);
461
462                 if(node->name == ustring("emission"))
463                         current_shader->has_surface_emission = true;
464
465                 /* end node is added outside of this */
466         }
467 }
468
469 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, uint in_offset)
470 {
471         /* todo: the weaks point here is that unlike the single closure sampling 
472            we will evaluate all nodes even if they are used as input for closures
473            that are unused. it's not clear what would be the best way to skip such
474            nodes at runtime, especially if they are tangled up  */
475
476         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
477                 ShaderInput *fin = node->input("Fac");
478                 ShaderInput *cl1in = node->input("Closure1");
479                 ShaderInput *cl2in = node->input("Closure2");
480
481                 uint out1_offset = SVM_STACK_INVALID;
482                 uint out2_offset = SVM_STACK_INVALID;
483
484                 if(fin) {
485                         /* mix closure */
486                         set<ShaderNode*> dependencies;
487                         find_dependencies(dependencies, done, fin);
488                         generate_svm_nodes(dependencies, done);
489
490                         stack_assign(fin);
491
492                         if(cl1in->link)
493                                 out1_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
494                         if(cl2in->link)
495                                 out2_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
496
497                         add_node(NODE_MIX_CLOSURE, 
498                                 encode_uchar4(fin->stack_offset, in_offset, out1_offset, out2_offset));
499                 }
500                 else {
501                         /* add closure */
502                         out1_offset = in_offset;
503                         out2_offset = in_offset;
504                 }
505
506                 if(cl1in->link) {
507                         generate_multi_closure(cl1in->link->parent, done, out1_offset);
508
509                         if(fin)
510                                 active_stack.users[out1_offset]--;
511                 }
512
513                 if(cl2in->link) {
514                         generate_multi_closure(cl2in->link->parent, done, out2_offset);
515
516                         if(fin)
517                                 active_stack.users[out2_offset]--;
518                 }
519         }
520         else {
521                 /* execute dependencies for closure */
522                 foreach(ShaderInput *in, node->inputs) {
523                         if(!node_skip_input(node, in) && in->link) {
524                                 set<ShaderNode*> dependencies;
525                                 find_dependencies(dependencies, done, in);
526                                 generate_svm_nodes(dependencies, done);
527                         }
528                 }
529
530                 mix_weight_offset = in_offset;
531
532                 /* compile closure itself */
533                 node->compile(*this);
534                 stack_clear_users(node, done);
535                 stack_clear_temporary(node);
536
537                 mix_weight_offset = SVM_STACK_INVALID;
538
539                 if(node->name == ustring("emission"))
540                         current_shader->has_surface_emission = true;
541
542                 /* end node is added outside of this */
543         }
544 }
545
546
547 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
548 {
549         /* Converting a shader graph into svm_nodes that can be executed
550          * sequentially on the virtual machine is fairly simple. We can keep
551          * looping over nodes and each time all the inputs of a node are
552          * ready, we add svm_nodes for it that read the inputs from the
553          * stack and write outputs back to the stack.
554          *
555          * With the SVM, we always sample only a single closure. We can think
556          * of all closures nodes as a binary tree with mix closures as inner
557          * nodes and other closures as leafs. The SVM will traverse that tree,
558          * each time deciding to go left or right depending on the mix weights,
559          * until a closure is found.
560          *
561          * We only execute nodes that are needed for the mix weights and chosen
562          * closure.
563          */
564
565         current_type = type;
566         current_graph = graph;
567
568         /* get input in output node */
569         ShaderNode *node = graph->output();
570         ShaderInput *clin = NULL;
571         
572         if(type == SHADER_TYPE_SURFACE)
573                 clin = node->input("Surface");
574         else if(type == SHADER_TYPE_VOLUME)
575                 clin = node->input("Volume");
576         else if(type == SHADER_TYPE_DISPLACEMENT)
577                 clin = node->input("Displacement");
578         else
579                 assert(0);
580
581         /* clear all compiler state */
582         memset(&active_stack, 0, sizeof(active_stack));
583         svm_nodes.clear();
584
585         foreach(ShaderNode *node, graph->nodes) {
586                 foreach(ShaderInput *input, node->inputs)
587                         input->stack_offset = SVM_STACK_INVALID;
588                 foreach(ShaderOutput *output, node->outputs)
589                         output->stack_offset = SVM_STACK_INVALID;
590         }
591
592         if(clin->link) {
593                 bool generate = false;
594                 if(type == SHADER_TYPE_SURFACE) {
595                         /* generate surface shader */
596                         generate = true;
597                         shader->has_surface = true;
598                 }
599                 else if(type == SHADER_TYPE_VOLUME) {
600                         /* generate volume shader */
601                         generate = true;
602                         shader->has_volume = true;
603                 }
604                 else if(type == SHADER_TYPE_DISPLACEMENT) {
605                         /* generate displacement shader */
606                         generate = true;
607                         shader->has_displacement = true;
608                 }
609
610                 if(generate) {
611                         set<ShaderNode*> done;
612
613                         if(use_multi_closure)
614                                 generate_multi_closure(clin->link->parent, done, SVM_STACK_INVALID);
615                         else
616                                 generate_closure(clin->link->parent, done);
617                 }
618         }
619
620         /* compile output node */
621         node->compile(*this);
622
623         add_node(NODE_END, 0, 0, 0);
624 }
625
626 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
627 {
628         /* copy graph for shader with bump mapping */
629         ShaderNode *node = shader->graph->output();
630
631         if(node->input("Surface")->link && node->input("Displacement")->link)
632                 if(!shader->graph_bump)
633                         shader->graph_bump = shader->graph->copy();
634
635         /* finalize */
636         shader->graph->finalize(false, false);
637         if(shader->graph_bump)
638                 shader->graph_bump->finalize(true, false);
639
640         current_shader = shader;
641
642         shader->has_surface = false;
643         shader->has_surface_emission = false;
644         shader->has_volume = false;
645         shader->has_displacement = false;
646
647         /* generate surface shader */
648         compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
649         global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
650         global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
651         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
652
653         if(shader->graph_bump) {
654                 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
655                 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
656                 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
657         }
658
659         /* generate volume shader */
660         compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
661         global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
662         global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
663         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
664
665         /* generate displacement shader */
666         compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
667         global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
668         global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
669         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
670 }
671
672 CCL_NAMESPACE_END
673