Merging r46111 through r46136 from trunk into soc-2011-tomato
[blender-staging.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include "device.h"
20 #include "graph.h"
21 #include "light.h"
22 #include "mesh.h"
23 #include "scene.h"
24 #include "shader.h"
25 #include "svm.h"
26
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
44 {
45         if(!need_update)
46                 return;
47
48         /* test if we need to update */
49         device_free(device, dscene);
50
51         /* svm_nodes */
52         vector<int4> svm_nodes;
53         size_t i;
54
55         for(i = 0; i < scene->shaders.size(); i++) {
56                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
57                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
58         }
59         
60         bool sunsky_done = false;
61         bool use_multi_closure = device->info.advanced_shading;
62
63         for(i = 0; i < scene->shaders.size(); i++) {
64                 Shader *shader = scene->shaders[i];
65
66                 if(progress.get_cancel()) return;
67
68                 assert(shader->graph);
69
70                 if(shader->sample_as_light && shader->has_surface_emission)
71                         scene->light_manager->need_update = true;
72
73                 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
74                         use_multi_closure);
75                 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
76                 compiler.background = ((int)i == scene->default_background);
77                 compiler.compile(shader, svm_nodes, i);
78                 if(!compiler.sunsky)
79                         sunsky_done = true;
80         }
81
82         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
83         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
84
85         for(i = 0; i < scene->shaders.size(); i++) {
86                 Shader *shader = scene->shaders[i];
87                 shader->need_update = false;
88         }
89
90         device_update_common(device, dscene, scene, progress);
91
92         need_update = false;
93 }
94
95 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene)
96 {
97         device_free_common(device, dscene);
98
99         device->tex_free(dscene->svm_nodes);
100         dscene->svm_nodes.clear();
101 }
102
103 /* Graph Compiler */
104
105 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
106 {
107         shader_manager = shader_manager_;
108         image_manager = image_manager_;
109         sunsky = NULL;
110         max_stack_use = 0;
111         current_type = SHADER_TYPE_SURFACE;
112         current_shader = NULL;
113         background = false;
114         mix_weight_offset = SVM_STACK_INVALID;
115         use_multi_closure = use_multi_closure_;
116 }
117
118 int SVMCompiler::stack_size(ShaderSocketType type)
119 {
120         if(type == SHADER_SOCKET_FLOAT)
121                 return 1;
122         else if(type == SHADER_SOCKET_COLOR)
123                 return 3;
124         else if(type == SHADER_SOCKET_VECTOR)
125                 return 3;
126         else if(type == SHADER_SOCKET_NORMAL)
127                 return 3;
128         else if(type == SHADER_SOCKET_POINT)
129                 return 3;
130         else if(type == SHADER_SOCKET_CLOSURE)
131                 return 0;
132
133         assert(0);
134         return 0;
135 }
136
137 int SVMCompiler::stack_find_offset(ShaderSocketType type)
138 {
139         int size = stack_size(type);
140         int offset = -1;
141         
142         /* find free space in stack & mark as used */
143         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
144                 if(active_stack.users[i]) num_unused = 0;
145                 else num_unused++;
146
147                 if(num_unused == size) {
148                         offset = i+1 - size;
149                         max_stack_use = max(i+1, max_stack_use);
150
151                         while(i >= offset)
152                                 active_stack.users[i--] = 1;
153
154                         return offset;
155                 }
156         }
157
158         fprintf(stderr, "Out of SVM stack space.\n");
159         assert(0);
160
161         return offset;
162 }
163
164 void SVMCompiler::stack_clear_offset(ShaderSocketType type, int offset)
165 {
166         int size = stack_size(type);
167
168         for(int i = 0; i < size; i++)
169                 active_stack.users[offset + i]--;
170 }
171
172 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
173 {
174         backup.done = done;
175         backup.stack = active_stack;
176
177         foreach(ShaderNode *node, current_graph->nodes) {
178                 foreach(ShaderInput *input, node->inputs)
179                         backup.offsets.push_back(input->stack_offset);
180                 foreach(ShaderOutput *output, node->outputs)
181                         backup.offsets.push_back(output->stack_offset);
182         }
183 }
184
185 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
186 {
187         int i = 0;
188
189         done = backup.done;
190         active_stack = backup.stack;
191
192         foreach(ShaderNode *node, current_graph->nodes) {
193                 foreach(ShaderInput *input, node->inputs)
194                         input->stack_offset = backup.offsets[i++];
195                 foreach(ShaderOutput *output, node->outputs)
196                         output->stack_offset = backup.offsets[i++];
197         }
198 }
199
200 void SVMCompiler::stack_assign(ShaderInput *input)
201 {
202         /* stack offset assign? */
203         if(input->stack_offset == SVM_STACK_INVALID) {
204                 if(input->link) {
205                         /* linked to output -> use output offset */
206                         input->stack_offset = input->link->stack_offset;
207                 }
208                 else {
209                         /* not linked to output -> add nodes to load default value */
210                         input->stack_offset = stack_find_offset(input->type);
211
212                         if(input->type == SHADER_SOCKET_FLOAT) {
213                                 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
214                         }
215                         else if(input->type == SHADER_SOCKET_VECTOR ||
216                                 input->type == SHADER_SOCKET_NORMAL ||
217                                 input->type == SHADER_SOCKET_POINT ||
218                                 input->type == SHADER_SOCKET_COLOR) {
219
220                                 add_node(NODE_VALUE_V, input->stack_offset);
221                                 add_node(NODE_VALUE_V, input->value);
222                         }
223                         else /* should not get called for closure */
224                                 assert(0);
225                 }
226         }
227 }
228
229 void SVMCompiler::stack_assign(ShaderOutput *output)
230 {
231         /* if no stack offset assigned yet, find one */
232         if(output->stack_offset == SVM_STACK_INVALID)
233                 output->stack_offset = stack_find_offset(output->type);
234 }
235
236 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
237 {
238         if(output->stack_offset == SVM_STACK_INVALID) {
239                 assert(input->link);
240                 assert(stack_size(output->type) == stack_size(input->link->type));
241
242                 output->stack_offset = input->link->stack_offset;
243
244                 int size = stack_size(output->type);
245
246                 for(int i = 0; i < size; i++)
247                         active_stack.users[output->stack_offset + i]++;
248         }
249 }
250
251 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
252 {
253         /* optimization we should add:
254            find and lower user counts for outputs for which all inputs are done.
255            this is done before the node is compiled, under the assumption that the
256            node will first load all inputs from the stack and then writes its
257            outputs. this used to work, but was disabled because it gave trouble
258            with inputs getting stack positions assigned */
259
260         foreach(ShaderInput *input, node->inputs) {
261                 ShaderOutput *output = input->link;
262
263                 if(output && output->stack_offset != SVM_STACK_INVALID) {
264                         bool all_done = true;
265
266                         /* optimization we should add: verify if in->parent is actually used */
267                         foreach(ShaderInput *in, output->links)
268                                 if(in->parent != node && done.find(in->parent) == done.end())
269                                         all_done = false;
270
271                         if(all_done) {
272                                 stack_clear_offset(output->type, output->stack_offset);
273                                 output->stack_offset = SVM_STACK_INVALID;
274
275                                 foreach(ShaderInput *in, output->links)
276                                         in->stack_offset = SVM_STACK_INVALID;
277                         }
278                 }
279         }
280 }
281
282 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
283 {
284         foreach(ShaderInput *input, node->inputs) {
285                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
286                         stack_clear_offset(input->type, input->stack_offset);
287                         input->stack_offset = SVM_STACK_INVALID;
288                 }
289         }
290 }
291
292 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
293 {
294         assert(x <= 255);
295         assert(y <= 255);
296         assert(z <= 255);
297         assert(w <= 255);
298
299         return (x) | (y << 8) | (z << 16) | (w << 24);
300 }
301
302 void SVMCompiler::add_node(int a, int b, int c, int d)
303 {
304         svm_nodes.push_back(make_int4(a, b, c, d));
305 }
306
307 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
308 {
309         svm_nodes.push_back(make_int4(type, a, b, c));
310 }
311
312 void SVMCompiler::add_node(NodeType type, const float3& f)
313 {
314         svm_nodes.push_back(make_int4(type,
315                 __float_as_int(f.x),
316                 __float_as_int(f.y),
317                 __float_as_int(f.z)));
318 }
319
320 void SVMCompiler::add_node(const float4& f)
321 {
322         svm_nodes.push_back(make_int4(
323                 __float_as_int(f.x),
324                 __float_as_int(f.y),
325                 __float_as_int(f.z),
326                 __float_as_int(f.w)));
327 }
328
329 void SVMCompiler::add_array(float4 *f, int num)
330 {
331         for(int i = 0; i < num; i++)
332                 add_node(f[i]);
333 }
334
335 uint SVMCompiler::attribute(ustring name)
336 {
337         return shader_manager->get_attribute_id(name);
338 }
339
340 uint SVMCompiler::attribute(AttributeStandard std)
341 {
342         return shader_manager->get_attribute_id(std);
343 }
344
345 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
346 {
347         /* nasty exception .. */
348         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
349                 return true;
350         
351         return false;
352 }
353
354 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
355 {
356         ShaderNode *node = (input->link)? input->link->parent: NULL;
357
358         if(node && done.find(node) == done.end()) {
359                 foreach(ShaderInput *in, node->inputs)
360                         if(!node_skip_input(node, in))
361                                 find_dependencies(dependencies, done, in);
362
363                 dependencies.insert(node);
364         }
365 }
366
367 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
368 {
369         bool nodes_done;
370
371         do {
372                 nodes_done = true;
373
374                 foreach(ShaderNode *node, nodes) {
375                         if(done.find(node) == done.end()) {
376                                 bool inputs_done = true;
377
378                                 foreach(ShaderInput *input, node->inputs)
379                                         if(!node_skip_input(node, input))
380                                                 if(input->link && done.find(input->link->parent) == done.end())
381                                                         inputs_done = false;
382
383                                 if(inputs_done) {
384                                         node->compile(*this);
385                                         stack_clear_users(node, done);
386                                         stack_clear_temporary(node);
387                                         done.insert(node);
388                                 }
389                                 else
390                                         nodes_done = false;
391                         }
392                 }
393         } while(!nodes_done);
394 }
395
396 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
397 {
398         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
399                 ShaderInput *fin = node->input("Fac");
400                 ShaderInput *cl1in = node->input("Closure1");
401                 ShaderInput *cl2in = node->input("Closure2");
402
403                 /* execute dependencies for mix weight */
404                 if(fin) {
405                         set<ShaderNode*> dependencies;
406                         find_dependencies(dependencies, done, fin);
407                         generate_svm_nodes(dependencies, done);
408
409                         /* add mix node */
410                         stack_assign(fin);
411                 }
412
413                 int mix_offset = svm_nodes.size();
414
415                 if(fin)
416                         add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
417                 else
418                         add_node(NODE_ADD_CLOSURE, 0, 0, 0);
419
420                 /* generate code for closure 1
421                    note we backup all compiler state and restore it afterwards, so one
422                    closure choice doesn't influence the other*/
423                 if(cl1in->link) {
424                         StackBackup backup;
425                         stack_backup(backup, done);
426
427                         generate_closure(cl1in->link->parent, done);
428                         add_node(NODE_END, 0, 0, 0);
429
430                         stack_restore(backup, done);
431                 }
432                 else
433                         add_node(NODE_END, 0, 0, 0);
434
435                 /* generate code for closure 2 */
436                 int cl2_offset = svm_nodes.size();
437
438                 if(cl2in->link) {
439                         StackBackup backup;
440                         stack_backup(backup, done);
441
442                         generate_closure(cl2in->link->parent, done);
443                         add_node(NODE_END, 0, 0, 0);
444
445                         stack_restore(backup, done);
446                 }
447                 else
448                         add_node(NODE_END, 0, 0, 0);
449
450                 /* set jump for mix node, -1 because offset is already
451                    incremented when this jump is added to it */
452                 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
453
454                 done.insert(node);
455                 stack_clear_users(node, done);
456                 stack_clear_temporary(node);
457         }
458         else {
459                 /* execute dependencies for closure */
460                 foreach(ShaderInput *in, node->inputs) {
461                         if(!node_skip_input(node, in) && in->link) {
462                                 set<ShaderNode*> dependencies;
463                                 find_dependencies(dependencies, done, in);
464                                 generate_svm_nodes(dependencies, done);
465                         }
466                 }
467
468                 /* compile closure itself */
469                 node->compile(*this);
470                 stack_clear_users(node, done);
471                 stack_clear_temporary(node);
472
473                 if(node->name == ustring("emission"))
474                         current_shader->has_surface_emission = true;
475                 if(node->name == ustring("transparent"))
476                         current_shader->has_surface_transparent = true;
477
478                 /* end node is added outside of this */
479         }
480 }
481
482 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, uint in_offset)
483 {
484         /* todo: the weaks point here is that unlike the single closure sampling 
485            we will evaluate all nodes even if they are used as input for closures
486            that are unused. it's not clear what would be the best way to skip such
487            nodes at runtime, especially if they are tangled up  */
488
489         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
490                 ShaderInput *fin = node->input("Fac");
491                 ShaderInput *cl1in = node->input("Closure1");
492                 ShaderInput *cl2in = node->input("Closure2");
493
494                 uint out1_offset = SVM_STACK_INVALID;
495                 uint out2_offset = SVM_STACK_INVALID;
496
497                 if(fin) {
498                         /* mix closure */
499                         set<ShaderNode*> dependencies;
500                         find_dependencies(dependencies, done, fin);
501                         generate_svm_nodes(dependencies, done);
502
503                         stack_assign(fin);
504
505                         if(cl1in->link)
506                                 out1_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
507                         if(cl2in->link)
508                                 out2_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
509
510                         add_node(NODE_MIX_CLOSURE, 
511                                 encode_uchar4(fin->stack_offset, in_offset, out1_offset, out2_offset));
512                 }
513                 else {
514                         /* add closure */
515                         out1_offset = in_offset;
516                         out2_offset = in_offset;
517                 }
518
519                 if(cl1in->link) {
520                         generate_multi_closure(cl1in->link->parent, done, out1_offset);
521
522                         if(fin)
523                                 stack_clear_offset(SHADER_SOCKET_FLOAT, out1_offset);
524                 }
525
526                 if(cl2in->link) {
527                         generate_multi_closure(cl2in->link->parent, done, out2_offset);
528
529                         if(fin)
530                                 stack_clear_offset(SHADER_SOCKET_FLOAT, out2_offset);
531                 }
532         }
533         else {
534                 /* execute dependencies for closure */
535                 foreach(ShaderInput *in, node->inputs) {
536                         if(!node_skip_input(node, in) && in->link) {
537                                 set<ShaderNode*> dependencies;
538                                 find_dependencies(dependencies, done, in);
539                                 generate_svm_nodes(dependencies, done);
540                         }
541                 }
542
543                 mix_weight_offset = in_offset;
544
545                 /* compile closure itself */
546                 node->compile(*this);
547                 stack_clear_users(node, done);
548                 stack_clear_temporary(node);
549
550                 mix_weight_offset = SVM_STACK_INVALID;
551
552                 if(node->name == ustring("emission"))
553                         current_shader->has_surface_emission = true;
554                 if(node->name == ustring("transparent"))
555                         current_shader->has_surface_transparent = true;
556
557                 /* end node is added outside of this */
558         }
559 }
560
561
562 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
563 {
564         /* Converting a shader graph into svm_nodes that can be executed
565          * sequentially on the virtual machine is fairly simple. We can keep
566          * looping over nodes and each time all the inputs of a node are
567          * ready, we add svm_nodes for it that read the inputs from the
568          * stack and write outputs back to the stack.
569          *
570          * With the SVM, we always sample only a single closure. We can think
571          * of all closures nodes as a binary tree with mix closures as inner
572          * nodes and other closures as leafs. The SVM will traverse that tree,
573          * each time deciding to go left or right depending on the mix weights,
574          * until a closure is found.
575          *
576          * We only execute nodes that are needed for the mix weights and chosen
577          * closure.
578          */
579
580         current_type = type;
581         current_graph = graph;
582
583         /* get input in output node */
584         ShaderNode *node = graph->output();
585         ShaderInput *clin = NULL;
586         
587         if(type == SHADER_TYPE_SURFACE)
588                 clin = node->input("Surface");
589         else if(type == SHADER_TYPE_VOLUME)
590                 clin = node->input("Volume");
591         else if(type == SHADER_TYPE_DISPLACEMENT)
592                 clin = node->input("Displacement");
593         else
594                 assert(0);
595
596         /* clear all compiler state */
597         memset(&active_stack, 0, sizeof(active_stack));
598         svm_nodes.clear();
599
600         foreach(ShaderNode *node, graph->nodes) {
601                 foreach(ShaderInput *input, node->inputs)
602                         input->stack_offset = SVM_STACK_INVALID;
603                 foreach(ShaderOutput *output, node->outputs)
604                         output->stack_offset = SVM_STACK_INVALID;
605         }
606
607         if(clin->link) {
608                 bool generate = false;
609                 if(type == SHADER_TYPE_SURFACE) {
610                         /* generate surface shader */
611                         generate = true;
612                         shader->has_surface = true;
613                 }
614                 else if(type == SHADER_TYPE_VOLUME) {
615                         /* generate volume shader */
616                         generate = true;
617                         shader->has_volume = true;
618                 }
619                 else if(type == SHADER_TYPE_DISPLACEMENT) {
620                         /* generate displacement shader */
621                         generate = true;
622                         shader->has_displacement = true;
623                 }
624
625                 if(generate) {
626                         set<ShaderNode*> done;
627
628                         if(use_multi_closure)
629                                 generate_multi_closure(clin->link->parent, done, SVM_STACK_INVALID);
630                         else
631                                 generate_closure(clin->link->parent, done);
632                 }
633         }
634
635         /* compile output node */
636         node->compile(*this);
637
638         add_node(NODE_END, 0, 0, 0);
639 }
640
641 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
642 {
643         /* copy graph for shader with bump mapping */
644         ShaderNode *node = shader->graph->output();
645
646         if(node->input("Surface")->link && node->input("Displacement")->link)
647                 if(!shader->graph_bump)
648                         shader->graph_bump = shader->graph->copy();
649
650         /* finalize */
651         shader->graph->finalize(false, false);
652         if(shader->graph_bump)
653                 shader->graph_bump->finalize(true, false);
654
655         current_shader = shader;
656
657         shader->has_surface = false;
658         shader->has_surface_emission = false;
659         shader->has_surface_transparent = false;
660         shader->has_volume = false;
661         shader->has_displacement = false;
662
663         /* generate surface shader */
664         compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
665         global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
666         global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
667         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
668
669         if(shader->graph_bump) {
670                 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
671                 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
672                 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
673         }
674
675         /* generate volume shader */
676         compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
677         global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
678         global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
679         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
680
681         /* generate displacement shader */
682         compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
683         global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
684         global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
685         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
686 }
687
688 CCL_NAMESPACE_END
689