054989bbc283e1fdd6d9307c04a5c8df9df7b062
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include "device.h"
20 #include "graph.h"
21 #include "light.h"
22 #include "mesh.h"
23 #include "scene.h"
24 #include "shader.h"
25 #include "svm.h"
26
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
44 {
45         if(!need_update)
46                 return;
47
48         /* test if we need to update */
49         device_free(device, dscene);
50
51         /* svm_nodes */
52         vector<int4> svm_nodes;
53         size_t i;
54
55         for(i = 0; i < scene->shaders.size(); i++) {
56                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
57                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
58         }
59         
60         bool sunsky_done = false;
61
62         for(i = 0; i < scene->shaders.size(); i++) {
63                 Shader *shader = scene->shaders[i];
64
65                 if(progress.get_cancel()) return;
66
67                 assert(shader->graph);
68
69                 if(shader->sample_as_light && shader->has_surface_emission)
70                         scene->light_manager->need_update = true;
71
72                 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
73                         scene->params.use_multi_closure);
74                 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
75                 compiler.background = ((int)i == scene->default_background);
76                 compiler.compile(shader, svm_nodes, i);
77                 if(!compiler.sunsky)
78                         sunsky_done = true;
79         }
80
81         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
82         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
83
84         for(i = 0; i < scene->shaders.size(); i++) {
85                 Shader *shader = scene->shaders[i];
86                 shader->need_update = false;
87         }
88
89         device_update_common(device, dscene, scene, progress);
90
91         need_update = false;
92 }
93
94 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene)
95 {
96         device_free_common(device, dscene);
97
98         device->tex_free(dscene->svm_nodes);
99         dscene->svm_nodes.clear();
100 }
101
102 /* Graph Compiler */
103
104 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
105 {
106         shader_manager = shader_manager_;
107         image_manager = image_manager_;
108         sunsky = NULL;
109         max_stack_use = 0;
110         current_type = SHADER_TYPE_CLOSURE;
111         current_shader = NULL;
112         background = false;
113         mix_weight_offset = SVM_STACK_INVALID;
114         use_multi_closure = use_multi_closure_;
115 }
116
117 int SVMCompiler::stack_size(ShaderSocketType type)
118 {
119         if(type == SHADER_SOCKET_FLOAT)
120                 return 1;
121         else if(type == SHADER_SOCKET_COLOR)
122                 return 3;
123         else if(type == SHADER_SOCKET_VECTOR)
124                 return 3;
125         else if(type == SHADER_SOCKET_NORMAL)
126                 return 3;
127         else if(type == SHADER_SOCKET_POINT)
128                 return 3;
129         else if(type == SHADER_SOCKET_CLOSURE)
130                 return 0;
131
132         assert(0);
133         return 0;
134 }
135
136 int SVMCompiler::stack_find_offset(ShaderSocketType type)
137 {
138         int size = stack_size(type);
139         int offset = -1;
140         
141         /* find free space in stack & mark as used */
142         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
143                 if(active_stack.users[i]) num_unused = 0;
144                 else num_unused++;
145
146                 if(num_unused == size) {
147                         offset = i+1 - size;
148                         max_stack_use = max(i+1, max_stack_use);
149
150                         while(i >= offset)
151                                 active_stack.users[i--] = 1;
152
153                         return offset;
154                 }
155         }
156
157         fprintf(stderr, "Out of SVM stack space.\n");
158         assert(0);
159
160         return offset;
161 }
162
163 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
164 {
165         backup.done = done;
166         backup.stack = active_stack;
167
168         foreach(ShaderNode *node, current_graph->nodes) {
169                 foreach(ShaderInput *input, node->inputs)
170                         backup.offsets.push_back(input->stack_offset);
171                 foreach(ShaderOutput *output, node->outputs)
172                         backup.offsets.push_back(output->stack_offset);
173         }
174 }
175
176 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
177 {
178         int i = 0;
179
180         done = backup.done;
181         active_stack = backup.stack;
182
183         foreach(ShaderNode *node, current_graph->nodes) {
184                 foreach(ShaderInput *input, node->inputs)
185                         input->stack_offset = backup.offsets[i++];
186                 foreach(ShaderOutput *output, node->outputs)
187                         output->stack_offset = backup.offsets[i++];
188         }
189 }
190
191 void SVMCompiler::stack_assign(ShaderInput *input)
192 {
193         /* stack offset assign? */
194         if(input->stack_offset == SVM_STACK_INVALID) {
195                 if(input->link) {
196                         /* linked to output -> use output offset */
197                         input->stack_offset = input->link->stack_offset;
198                 }
199                 else {
200                         /* not linked to output -> add nodes to load default value */
201                         input->stack_offset = stack_find_offset(input->type);
202
203                         if(input->type == SHADER_SOCKET_FLOAT) {
204                                 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
205                         }
206                         else if(input->type == SHADER_SOCKET_VECTOR ||
207                                 input->type == SHADER_SOCKET_NORMAL ||
208                                 input->type == SHADER_SOCKET_POINT ||
209                                 input->type == SHADER_SOCKET_COLOR) {
210
211                                 add_node(NODE_VALUE_V, input->stack_offset);
212                                 add_node(NODE_VALUE_V, input->value);
213                         }
214                         else /* should not get called for closure */
215                                 assert(0);
216                 }
217         }
218 }
219
220 void SVMCompiler::stack_assign(ShaderOutput *output)
221 {
222         /* if no stack offset assigned yet, find one */
223         if(output->stack_offset == SVM_STACK_INVALID)
224                 output->stack_offset = stack_find_offset(output->type);
225 }
226
227 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
228 {
229         if(output->stack_offset == SVM_STACK_INVALID) {
230                 assert(input->link);
231                 assert(stack_size(output->type) == stack_size(input->link->type));
232
233                 output->stack_offset = input->link->stack_offset;
234
235                 int size = stack_size(output->type);
236
237                 for(int i = 0; i < size; i++)
238                         active_stack.users[output->stack_offset + i]++;
239         }
240 }
241
242 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
243 {
244         /* optimization we should add:
245            find and lower user counts for outputs for which all inputs are done.
246            this is done before the node is compiled, under the assumption that the
247            node will first load all inputs from the stack and then writes its
248            outputs. this used to work, but was disabled because it gave trouble
249            with inputs getting stack positions assigned */
250
251         foreach(ShaderInput *input, node->inputs) {
252                 ShaderOutput *output = input->link;
253
254                 if(output && output->stack_offset != SVM_STACK_INVALID) {
255                         bool all_done = true;
256
257                         /* optimization we should add: verify if in->parent is actually used */
258                         foreach(ShaderInput *in, output->links)
259                                 if(in->parent != node && done.find(in->parent) == done.end())
260                                         all_done = false;
261
262                         if(all_done) {
263                                 int size = stack_size(output->type);
264
265                                 for(int i = 0; i < size; i++)
266                                         active_stack.users[output->stack_offset + i]--;
267
268                                 output->stack_offset = SVM_STACK_INVALID;
269
270                                 foreach(ShaderInput *in, output->links)
271                                         in->stack_offset = SVM_STACK_INVALID;
272                         }
273                 }
274         }
275 }
276
277 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
278 {
279         foreach(ShaderInput *input, node->inputs) {
280                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
281                         int size = stack_size(input->type);
282
283                         for(int i = 0; i < size; i++)
284                                 active_stack.users[input->stack_offset + i]--;
285
286                         input->stack_offset = SVM_STACK_INVALID;
287                 }
288         }
289 }
290
291 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
292 {
293         assert(x <= 255);
294         assert(y <= 255);
295         assert(z <= 255);
296         assert(w <= 255);
297
298         return (x) | (y << 8) | (z << 16) | (w << 24);
299 }
300
301 void SVMCompiler::add_node(int a, int b, int c, int d)
302 {
303         svm_nodes.push_back(make_int4(a, b, c, d));
304 }
305
306 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
307 {
308         svm_nodes.push_back(make_int4(type, a, b, c));
309 }
310
311 void SVMCompiler::add_node(NodeType type, const float3& f)
312 {
313         svm_nodes.push_back(make_int4(type,
314                 __float_as_int(f.x),
315                 __float_as_int(f.y),
316                 __float_as_int(f.z)));
317 }
318
319 void SVMCompiler::add_node(const float4& f)
320 {
321         svm_nodes.push_back(make_int4(
322                 __float_as_int(f.x),
323                 __float_as_int(f.y),
324                 __float_as_int(f.z),
325                 __float_as_int(f.w)));
326 }
327
328 uint SVMCompiler::attribute(ustring name)
329 {
330         return shader_manager->get_attribute_id(name);
331 }
332
333 uint SVMCompiler::attribute(Attribute::Standard std)
334 {
335         return shader_manager->get_attribute_id(std);
336 }
337
338 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
339 {
340         /* nasty exception .. */
341         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
342                 return true;
343         
344         return false;
345 }
346
347 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
348 {
349         ShaderNode *node = (input->link)? input->link->parent: NULL;
350
351         if(node && done.find(node) == done.end()) {
352                 foreach(ShaderInput *in, node->inputs)
353                         if(!node_skip_input(node, in))
354                                 find_dependencies(dependencies, done, in);
355
356                 dependencies.insert(node);
357         }
358 }
359
360 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
361 {
362         bool nodes_done;
363
364         do {
365                 nodes_done = true;
366
367                 foreach(ShaderNode *node, nodes) {
368                         if(done.find(node) == done.end()) {
369                                 bool inputs_done = true;
370
371                                 foreach(ShaderInput *input, node->inputs)
372                                         if(!node_skip_input(node, input))
373                                                 if(input->link && done.find(input->link->parent) == done.end())
374                                                         inputs_done = false;
375
376                                 if(inputs_done) {
377                                         node->compile(*this);
378                                         stack_clear_users(node, done);
379                                         stack_clear_temporary(node);
380                                         done.insert(node);
381                                 }
382                                 else
383                                         nodes_done = false;
384                         }
385                 }
386         } while(!nodes_done);
387 }
388
389 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
390 {
391         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
392                 ShaderInput *fin = node->input("Fac");
393                 ShaderInput *cl1in = node->input("Closure1");
394                 ShaderInput *cl2in = node->input("Closure2");
395
396                 /* execute dependencies for mix weight */
397                 if(fin) {
398                         set<ShaderNode*> dependencies;
399                         find_dependencies(dependencies, done, fin);
400                         generate_svm_nodes(dependencies, done);
401
402                         /* add mix node */
403                         stack_assign(fin);
404                 }
405
406                 int mix_offset = svm_nodes.size();
407
408                 if(fin)
409                         add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
410                 else
411                         add_node(NODE_ADD_CLOSURE, 0, 0, 0);
412
413                 /* generate code for closure 1
414                    note we backup all compiler state and restore it afterwards, so one
415                    closure choice doesn't influence the other*/
416                 if(cl1in->link) {
417                         StackBackup backup;
418                         stack_backup(backup, done);
419
420                         generate_closure(cl1in->link->parent, done);
421                         add_node(NODE_END, 0, 0, 0);
422
423                         stack_restore(backup, done);
424                 }
425                 else
426                         add_node(NODE_END, 0, 0, 0);
427
428                 /* generate code for closure 2 */
429                 int cl2_offset = svm_nodes.size();
430
431                 if(cl2in->link) {
432                         StackBackup backup;
433                         stack_backup(backup, done);
434
435                         generate_closure(cl2in->link->parent, done);
436                         add_node(NODE_END, 0, 0, 0);
437
438                         stack_restore(backup, done);
439                 }
440                 else
441                         add_node(NODE_END, 0, 0, 0);
442
443                 /* set jump for mix node, -1 because offset is already
444                    incremented when this jump is added to it */
445                 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
446
447                 done.insert(node);
448                 stack_clear_users(node, done);
449                 stack_clear_temporary(node);
450         }
451         else {
452                 /* execute dependencies for closure */
453                 foreach(ShaderInput *in, node->inputs) {
454                         if(!node_skip_input(node, in) && in->link) {
455                                 set<ShaderNode*> dependencies;
456                                 find_dependencies(dependencies, done, in);
457                                 generate_svm_nodes(dependencies, done);
458                         }
459                 }
460
461                 /* compile closure itself */
462                 node->compile(*this);
463                 stack_clear_users(node, done);
464                 stack_clear_temporary(node);
465
466                 if(node->name == ustring("emission"))
467                         current_shader->has_surface_emission = true;
468                 if(node->name == ustring("transparent"))
469                         current_shader->has_surface_transparent = true;
470                 if(node->name == ustring("volume"))
471                         current_shader->has_volume = true;
472
473                 /* end node is added outside of this */
474         }
475 }
476
477 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, uint in_offset)
478 {
479         /* todo: the weaks point here is that unlike the single closure sampling 
480            we will evaluate all nodes even if they are used as input for closures
481            that are unused. it's not clear what would be the best way to skip such
482            nodes at runtime, especially if they are tangled up  */
483
484         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
485                 ShaderInput *fin = node->input("Fac");
486                 ShaderInput *cl1in = node->input("Closure1");
487                 ShaderInput *cl2in = node->input("Closure2");
488
489                 uint out1_offset = SVM_STACK_INVALID;
490                 uint out2_offset = SVM_STACK_INVALID;
491
492                 if(fin) {
493                         /* mix closure */
494                         set<ShaderNode*> dependencies;
495                         find_dependencies(dependencies, done, fin);
496                         generate_svm_nodes(dependencies, done);
497
498                         stack_assign(fin);
499
500                         if(cl1in->link)
501                                 out1_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
502                         if(cl2in->link)
503                                 out2_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
504
505                         add_node(NODE_MIX_CLOSURE, 
506                                 encode_uchar4(fin->stack_offset, in_offset, out1_offset, out2_offset));
507                 }
508                 else {
509                         /* add closure */
510                         out1_offset = in_offset;
511                         out2_offset = in_offset;
512                 }
513
514                 if(cl1in->link) {
515                         generate_multi_closure(cl1in->link->parent, done, out1_offset);
516
517                         if(fin)
518                                 active_stack.users[out1_offset]--;
519                 }
520
521                 if(cl2in->link) {
522                         generate_multi_closure(cl2in->link->parent, done, out2_offset);
523
524                         if(fin)
525                                 active_stack.users[out2_offset]--;
526                 }
527         }
528         else {
529                 /* execute dependencies for closure */
530                 foreach(ShaderInput *in, node->inputs) {
531                         if(!node_skip_input(node, in) && in->link) {
532                                 set<ShaderNode*> dependencies;
533                                 find_dependencies(dependencies, done, in);
534                                 generate_svm_nodes(dependencies, done);
535                         }
536                 }
537
538                 mix_weight_offset = in_offset;
539
540                 /* compile closure itself */
541                 node->compile(*this);
542                 stack_clear_users(node, done);
543                 stack_clear_temporary(node);
544
545                 mix_weight_offset = SVM_STACK_INVALID;
546
547                 if(node->name == ustring("emission"))
548                         current_shader->has_surface_emission = true;
549                 if(node->name == ustring("transparent"))
550                         current_shader->has_surface_transparent = true;
551
552                 /* end node is added outside of this */
553         }
554 }
555
556
557 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
558 {
559         /* Converting a shader graph into svm_nodes that can be executed
560          * sequentially on the virtual machine is fairly simple. We can keep
561          * looping over nodes and each time all the inputs of a node are
562          * ready, we add svm_nodes for it that read the inputs from the
563          * stack and write outputs back to the stack.
564          *
565          * With the SVM, we always sample only a single closure. We can think
566          * of all closures nodes as a binary tree with mix closures as inner
567          * nodes and other closures as leafs. The SVM will traverse that tree,
568          * each time deciding to go left or right depending on the mix weights,
569          * until a closure is found.
570          *
571          * We only execute nodes that are needed for the mix weights and chosen
572          * closure.
573          */
574
575         current_type = type;
576         current_graph = graph;
577
578         /* get input in output node */
579         ShaderNode *node = graph->output();
580         ShaderInput *clin = NULL;
581         
582         if(type == SHADER_TYPE_CLOSURE)
583                 clin = node->input("Closure");
584         else if(type == SHADER_TYPE_DISPLACEMENT)
585                 clin = node->input("Displacement");
586         else
587                 assert(0);
588
589         /* clear all compiler state */
590         memset(&active_stack, 0, sizeof(active_stack));
591         svm_nodes.clear();
592
593         foreach(ShaderNode *node, graph->nodes) {
594                 foreach(ShaderInput *input, node->inputs)
595                         input->stack_offset = SVM_STACK_INVALID;
596                 foreach(ShaderOutput *output, node->outputs)
597                         output->stack_offset = SVM_STACK_INVALID;
598         }
599
600         if(clin->link) {
601                 bool generate = false;
602                 if(type == SHADER_TYPE_CLOSURE) {
603                         /* generate surface shader */
604                         generate = true;
605                 }
606                 else if(type == SHADER_TYPE_DISPLACEMENT) {
607                         /* generate displacement shader */
608                         generate = true;
609                         shader->has_displacement = true;
610                 }
611
612                 if(generate) {
613                         set<ShaderNode*> done;
614
615                         if(use_multi_closure)
616                                 generate_multi_closure(clin->link->parent, done, SVM_STACK_INVALID);
617                         else
618                                 generate_closure(clin->link->parent, done);
619                 }
620         }
621
622         /* compile output node */
623         node->compile(*this);
624
625         add_node(NODE_END, 0, 0, 0);
626 }
627
628 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
629 {
630         /* copy graph for shader with bump mapping */
631         ShaderNode *node = shader->graph->output();
632
633         if(node->input("Closure")->link && node->input("Displacement")->link)
634                 if(!shader->graph_bump)
635                         shader->graph_bump = shader->graph->copy();
636
637         /* finalize */
638         shader->graph->finalize(false, false);
639         if(shader->graph_bump)
640                 shader->graph_bump->finalize(true, false);
641
642         current_shader = shader;
643
644         shader->has_surface_emission = false;
645         shader->has_surface_transparent = false;
646         shader->has_volume = false;
647         shader->has_displacement = false;
648
649         /* generate surface shader */
650         compile_type(shader, shader->graph, SHADER_TYPE_CLOSURE);
651         global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
652         global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
653         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
654
655         if(shader->graph_bump) {
656                 compile_type(shader, shader->graph_bump, SHADER_TYPE_CLOSURE);
657                 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
658                 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
659         }
660
661         /* generate displacement shader */
662         compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
663         global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
664         global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
665         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
666 }
667
668 CCL_NAMESPACE_END
669