b9c4219f85201181ad9d0214780866ef43cd3381
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include "device.h"
20 #include "graph.h"
21 #include "light.h"
22 #include "mesh.h"
23 #include "scene.h"
24 #include "shader.h"
25 #include "svm.h"
26
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
44 {
45         if(!need_update)
46                 return;
47
48         /* test if we need to update */
49         device_free(device, dscene);
50
51         /* svm_nodes */
52         vector<int4> svm_nodes;
53         size_t i;
54
55         for(i = 0; i < scene->shaders.size(); i++) {
56                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
57                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
58         }
59         
60         bool sunsky_done = false;
61         bool use_multi_closure = (scene->params.use_multi_closure && device->type() != DEVICE_OPENCL);
62
63         for(i = 0; i < scene->shaders.size(); i++) {
64                 Shader *shader = scene->shaders[i];
65
66                 if(progress.get_cancel()) return;
67
68                 assert(shader->graph);
69
70                 if(shader->sample_as_light && shader->has_surface_emission)
71                         scene->light_manager->need_update = true;
72
73                 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
74                         use_multi_closure);
75                 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
76                 compiler.background = ((int)i == scene->default_background);
77                 compiler.compile(shader, svm_nodes, i);
78                 if(!compiler.sunsky)
79                         sunsky_done = true;
80         }
81
82         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
83         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
84
85         for(i = 0; i < scene->shaders.size(); i++) {
86                 Shader *shader = scene->shaders[i];
87                 shader->need_update = false;
88         }
89
90         device_update_common(device, dscene, scene, progress);
91
92         need_update = false;
93 }
94
95 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene)
96 {
97         device_free_common(device, dscene);
98
99         device->tex_free(dscene->svm_nodes);
100         dscene->svm_nodes.clear();
101 }
102
103 /* Graph Compiler */
104
105 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
106 {
107         shader_manager = shader_manager_;
108         image_manager = image_manager_;
109         sunsky = NULL;
110         max_stack_use = 0;
111         current_type = SHADER_TYPE_SURFACE;
112         current_shader = NULL;
113         background = false;
114         mix_weight_offset = SVM_STACK_INVALID;
115         use_multi_closure = use_multi_closure_;
116 }
117
118 int SVMCompiler::stack_size(ShaderSocketType type)
119 {
120         if(type == SHADER_SOCKET_FLOAT)
121                 return 1;
122         else if(type == SHADER_SOCKET_COLOR)
123                 return 3;
124         else if(type == SHADER_SOCKET_VECTOR)
125                 return 3;
126         else if(type == SHADER_SOCKET_NORMAL)
127                 return 3;
128         else if(type == SHADER_SOCKET_POINT)
129                 return 3;
130         else if(type == SHADER_SOCKET_CLOSURE)
131                 return 0;
132
133         assert(0);
134         return 0;
135 }
136
137 int SVMCompiler::stack_find_offset(ShaderSocketType type)
138 {
139         int size = stack_size(type);
140         int offset = -1;
141         
142         /* find free space in stack & mark as used */
143         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
144                 if(active_stack.users[i]) num_unused = 0;
145                 else num_unused++;
146
147                 if(num_unused == size) {
148                         offset = i+1 - size;
149                         max_stack_use = max(i+1, max_stack_use);
150
151                         while(i >= offset)
152                                 active_stack.users[i--] = 1;
153
154                         return offset;
155                 }
156         }
157
158         fprintf(stderr, "Out of SVM stack space.\n");
159         assert(0);
160
161         return offset;
162 }
163
164 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
165 {
166         backup.done = done;
167         backup.stack = active_stack;
168
169         foreach(ShaderNode *node, current_graph->nodes) {
170                 foreach(ShaderInput *input, node->inputs)
171                         backup.offsets.push_back(input->stack_offset);
172                 foreach(ShaderOutput *output, node->outputs)
173                         backup.offsets.push_back(output->stack_offset);
174         }
175 }
176
177 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
178 {
179         int i = 0;
180
181         done = backup.done;
182         active_stack = backup.stack;
183
184         foreach(ShaderNode *node, current_graph->nodes) {
185                 foreach(ShaderInput *input, node->inputs)
186                         input->stack_offset = backup.offsets[i++];
187                 foreach(ShaderOutput *output, node->outputs)
188                         output->stack_offset = backup.offsets[i++];
189         }
190 }
191
192 void SVMCompiler::stack_assign(ShaderInput *input)
193 {
194         /* stack offset assign? */
195         if(input->stack_offset == SVM_STACK_INVALID) {
196                 if(input->link) {
197                         /* linked to output -> use output offset */
198                         input->stack_offset = input->link->stack_offset;
199                 }
200                 else {
201                         /* not linked to output -> add nodes to load default value */
202                         input->stack_offset = stack_find_offset(input->type);
203
204                         if(input->type == SHADER_SOCKET_FLOAT) {
205                                 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
206                         }
207                         else if(input->type == SHADER_SOCKET_VECTOR ||
208                                 input->type == SHADER_SOCKET_NORMAL ||
209                                 input->type == SHADER_SOCKET_POINT ||
210                                 input->type == SHADER_SOCKET_COLOR) {
211
212                                 add_node(NODE_VALUE_V, input->stack_offset);
213                                 add_node(NODE_VALUE_V, input->value);
214                         }
215                         else /* should not get called for closure */
216                                 assert(0);
217                 }
218         }
219 }
220
221 void SVMCompiler::stack_assign(ShaderOutput *output)
222 {
223         /* if no stack offset assigned yet, find one */
224         if(output->stack_offset == SVM_STACK_INVALID)
225                 output->stack_offset = stack_find_offset(output->type);
226 }
227
228 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
229 {
230         if(output->stack_offset == SVM_STACK_INVALID) {
231                 assert(input->link);
232                 assert(stack_size(output->type) == stack_size(input->link->type));
233
234                 output->stack_offset = input->link->stack_offset;
235
236                 int size = stack_size(output->type);
237
238                 for(int i = 0; i < size; i++)
239                         active_stack.users[output->stack_offset + i]++;
240         }
241 }
242
243 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
244 {
245         /* optimization we should add:
246            find and lower user counts for outputs for which all inputs are done.
247            this is done before the node is compiled, under the assumption that the
248            node will first load all inputs from the stack and then writes its
249            outputs. this used to work, but was disabled because it gave trouble
250            with inputs getting stack positions assigned */
251
252         foreach(ShaderInput *input, node->inputs) {
253                 ShaderOutput *output = input->link;
254
255                 if(output && output->stack_offset != SVM_STACK_INVALID) {
256                         bool all_done = true;
257
258                         /* optimization we should add: verify if in->parent is actually used */
259                         foreach(ShaderInput *in, output->links)
260                                 if(in->parent != node && done.find(in->parent) == done.end())
261                                         all_done = false;
262
263                         if(all_done) {
264                                 int size = stack_size(output->type);
265
266                                 for(int i = 0; i < size; i++)
267                                         active_stack.users[output->stack_offset + i]--;
268
269                                 output->stack_offset = SVM_STACK_INVALID;
270
271                                 foreach(ShaderInput *in, output->links)
272                                         in->stack_offset = SVM_STACK_INVALID;
273                         }
274                 }
275         }
276 }
277
278 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
279 {
280         foreach(ShaderInput *input, node->inputs) {
281                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
282                         int size = stack_size(input->type);
283
284                         for(int i = 0; i < size; i++)
285                                 active_stack.users[input->stack_offset + i]--;
286
287                         input->stack_offset = SVM_STACK_INVALID;
288                 }
289         }
290 }
291
292 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
293 {
294         assert(x <= 255);
295         assert(y <= 255);
296         assert(z <= 255);
297         assert(w <= 255);
298
299         return (x) | (y << 8) | (z << 16) | (w << 24);
300 }
301
302 void SVMCompiler::add_node(int a, int b, int c, int d)
303 {
304         svm_nodes.push_back(make_int4(a, b, c, d));
305 }
306
307 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
308 {
309         svm_nodes.push_back(make_int4(type, a, b, c));
310 }
311
312 void SVMCompiler::add_node(NodeType type, const float3& f)
313 {
314         svm_nodes.push_back(make_int4(type,
315                 __float_as_int(f.x),
316                 __float_as_int(f.y),
317                 __float_as_int(f.z)));
318 }
319
320 void SVMCompiler::add_node(const float4& f)
321 {
322         svm_nodes.push_back(make_int4(
323                 __float_as_int(f.x),
324                 __float_as_int(f.y),
325                 __float_as_int(f.z),
326                 __float_as_int(f.w)));
327 }
328
329 uint SVMCompiler::attribute(ustring name)
330 {
331         return shader_manager->get_attribute_id(name);
332 }
333
334 uint SVMCompiler::attribute(Attribute::Standard std)
335 {
336         return shader_manager->get_attribute_id(std);
337 }
338
339 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
340 {
341         /* nasty exception .. */
342         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
343                 return true;
344         
345         return false;
346 }
347
348 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
349 {
350         ShaderNode *node = (input->link)? input->link->parent: NULL;
351
352         if(node && done.find(node) == done.end()) {
353                 foreach(ShaderInput *in, node->inputs)
354                         if(!node_skip_input(node, in))
355                                 find_dependencies(dependencies, done, in);
356
357                 dependencies.insert(node);
358         }
359 }
360
361 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
362 {
363         bool nodes_done;
364
365         do {
366                 nodes_done = true;
367
368                 foreach(ShaderNode *node, nodes) {
369                         if(done.find(node) == done.end()) {
370                                 bool inputs_done = true;
371
372                                 foreach(ShaderInput *input, node->inputs)
373                                         if(!node_skip_input(node, input))
374                                                 if(input->link && done.find(input->link->parent) == done.end())
375                                                         inputs_done = false;
376
377                                 if(inputs_done) {
378                                         node->compile(*this);
379                                         stack_clear_users(node, done);
380                                         stack_clear_temporary(node);
381                                         done.insert(node);
382                                 }
383                                 else
384                                         nodes_done = false;
385                         }
386                 }
387         } while(!nodes_done);
388 }
389
390 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
391 {
392         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
393                 ShaderInput *fin = node->input("Fac");
394                 ShaderInput *cl1in = node->input("Closure1");
395                 ShaderInput *cl2in = node->input("Closure2");
396
397                 /* execute dependencies for mix weight */
398                 if(fin) {
399                         set<ShaderNode*> dependencies;
400                         find_dependencies(dependencies, done, fin);
401                         generate_svm_nodes(dependencies, done);
402
403                         /* add mix node */
404                         stack_assign(fin);
405                 }
406
407                 int mix_offset = svm_nodes.size();
408
409                 if(fin)
410                         add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
411                 else
412                         add_node(NODE_ADD_CLOSURE, 0, 0, 0);
413
414                 /* generate code for closure 1
415                    note we backup all compiler state and restore it afterwards, so one
416                    closure choice doesn't influence the other*/
417                 if(cl1in->link) {
418                         StackBackup backup;
419                         stack_backup(backup, done);
420
421                         generate_closure(cl1in->link->parent, done);
422                         add_node(NODE_END, 0, 0, 0);
423
424                         stack_restore(backup, done);
425                 }
426                 else
427                         add_node(NODE_END, 0, 0, 0);
428
429                 /* generate code for closure 2 */
430                 int cl2_offset = svm_nodes.size();
431
432                 if(cl2in->link) {
433                         StackBackup backup;
434                         stack_backup(backup, done);
435
436                         generate_closure(cl2in->link->parent, done);
437                         add_node(NODE_END, 0, 0, 0);
438
439                         stack_restore(backup, done);
440                 }
441                 else
442                         add_node(NODE_END, 0, 0, 0);
443
444                 /* set jump for mix node, -1 because offset is already
445                    incremented when this jump is added to it */
446                 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
447
448                 done.insert(node);
449                 stack_clear_users(node, done);
450                 stack_clear_temporary(node);
451         }
452         else {
453                 /* execute dependencies for closure */
454                 foreach(ShaderInput *in, node->inputs) {
455                         if(!node_skip_input(node, in) && in->link) {
456                                 set<ShaderNode*> dependencies;
457                                 find_dependencies(dependencies, done, in);
458                                 generate_svm_nodes(dependencies, done);
459                         }
460                 }
461
462                 /* compile closure itself */
463                 node->compile(*this);
464                 stack_clear_users(node, done);
465                 stack_clear_temporary(node);
466
467                 if(node->name == ustring("emission"))
468                         current_shader->has_surface_emission = true;
469                 if(node->name == ustring("transparent"))
470                         current_shader->has_surface_transparent = true;
471
472                 /* end node is added outside of this */
473         }
474 }
475
476 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, uint in_offset)
477 {
478         /* todo: the weaks point here is that unlike the single closure sampling 
479            we will evaluate all nodes even if they are used as input for closures
480            that are unused. it's not clear what would be the best way to skip such
481            nodes at runtime, especially if they are tangled up  */
482
483         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
484                 ShaderInput *fin = node->input("Fac");
485                 ShaderInput *cl1in = node->input("Closure1");
486                 ShaderInput *cl2in = node->input("Closure2");
487
488                 uint out1_offset = SVM_STACK_INVALID;
489                 uint out2_offset = SVM_STACK_INVALID;
490
491                 if(fin) {
492                         /* mix closure */
493                         set<ShaderNode*> dependencies;
494                         find_dependencies(dependencies, done, fin);
495                         generate_svm_nodes(dependencies, done);
496
497                         stack_assign(fin);
498
499                         if(cl1in->link)
500                                 out1_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
501                         if(cl2in->link)
502                                 out2_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
503
504                         add_node(NODE_MIX_CLOSURE, 
505                                 encode_uchar4(fin->stack_offset, in_offset, out1_offset, out2_offset));
506                 }
507                 else {
508                         /* add closure */
509                         out1_offset = in_offset;
510                         out2_offset = in_offset;
511                 }
512
513                 if(cl1in->link) {
514                         generate_multi_closure(cl1in->link->parent, done, out1_offset);
515
516                         if(fin)
517                                 active_stack.users[out1_offset]--;
518                 }
519
520                 if(cl2in->link) {
521                         generate_multi_closure(cl2in->link->parent, done, out2_offset);
522
523                         if(fin)
524                                 active_stack.users[out2_offset]--;
525                 }
526         }
527         else {
528                 /* execute dependencies for closure */
529                 foreach(ShaderInput *in, node->inputs) {
530                         if(!node_skip_input(node, in) && in->link) {
531                                 set<ShaderNode*> dependencies;
532                                 find_dependencies(dependencies, done, in);
533                                 generate_svm_nodes(dependencies, done);
534                         }
535                 }
536
537                 mix_weight_offset = in_offset;
538
539                 /* compile closure itself */
540                 node->compile(*this);
541                 stack_clear_users(node, done);
542                 stack_clear_temporary(node);
543
544                 mix_weight_offset = SVM_STACK_INVALID;
545
546                 if(node->name == ustring("emission"))
547                         current_shader->has_surface_emission = true;
548                 if(node->name == ustring("transparent"))
549                         current_shader->has_surface_transparent = true;
550
551                 /* end node is added outside of this */
552         }
553 }
554
555
556 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
557 {
558         /* Converting a shader graph into svm_nodes that can be executed
559          * sequentially on the virtual machine is fairly simple. We can keep
560          * looping over nodes and each time all the inputs of a node are
561          * ready, we add svm_nodes for it that read the inputs from the
562          * stack and write outputs back to the stack.
563          *
564          * With the SVM, we always sample only a single closure. We can think
565          * of all closures nodes as a binary tree with mix closures as inner
566          * nodes and other closures as leafs. The SVM will traverse that tree,
567          * each time deciding to go left or right depending on the mix weights,
568          * until a closure is found.
569          *
570          * We only execute nodes that are needed for the mix weights and chosen
571          * closure.
572          */
573
574         current_type = type;
575         current_graph = graph;
576
577         /* get input in output node */
578         ShaderNode *node = graph->output();
579         ShaderInput *clin = NULL;
580         
581         if(type == SHADER_TYPE_SURFACE)
582                 clin = node->input("Surface");
583         else if(type == SHADER_TYPE_VOLUME)
584                 clin = node->input("Volume");
585         else if(type == SHADER_TYPE_DISPLACEMENT)
586                 clin = node->input("Displacement");
587         else
588                 assert(0);
589
590         /* clear all compiler state */
591         memset(&active_stack, 0, sizeof(active_stack));
592         svm_nodes.clear();
593
594         foreach(ShaderNode *node, graph->nodes) {
595                 foreach(ShaderInput *input, node->inputs)
596                         input->stack_offset = SVM_STACK_INVALID;
597                 foreach(ShaderOutput *output, node->outputs)
598                         output->stack_offset = SVM_STACK_INVALID;
599         }
600
601         if(clin->link) {
602                 bool generate = false;
603                 if(type == SHADER_TYPE_SURFACE) {
604                         /* generate surface shader */
605                         generate = true;
606                         shader->has_surface = true;
607                 }
608                 else if(type == SHADER_TYPE_VOLUME) {
609                         /* generate volume shader */
610                         generate = true;
611                         shader->has_volume = true;
612                 }
613                 else if(type == SHADER_TYPE_DISPLACEMENT) {
614                         /* generate displacement shader */
615                         generate = true;
616                         shader->has_displacement = true;
617                 }
618
619                 if(generate) {
620                         set<ShaderNode*> done;
621
622                         if(use_multi_closure)
623                                 generate_multi_closure(clin->link->parent, done, SVM_STACK_INVALID);
624                         else
625                                 generate_closure(clin->link->parent, done);
626                 }
627         }
628
629         /* compile output node */
630         node->compile(*this);
631
632         add_node(NODE_END, 0, 0, 0);
633 }
634
635 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
636 {
637         /* copy graph for shader with bump mapping */
638         ShaderNode *node = shader->graph->output();
639
640         if(node->input("Surface")->link && node->input("Displacement")->link)
641                 if(!shader->graph_bump)
642                         shader->graph_bump = shader->graph->copy();
643
644         /* finalize */
645         shader->graph->finalize(false, false);
646         if(shader->graph_bump)
647                 shader->graph_bump->finalize(true, false);
648
649         current_shader = shader;
650
651         shader->has_surface = false;
652         shader->has_surface_emission = false;
653         shader->has_surface_transparent = false;
654         shader->has_volume = false;
655         shader->has_displacement = false;
656
657         /* generate surface shader */
658         compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
659         global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
660         global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
661         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
662
663         if(shader->graph_bump) {
664                 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
665                 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
666                 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
667         }
668
669         /* generate volume shader */
670         compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
671         global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
672         global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
673         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
674
675         /* generate displacement shader */
676         compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
677         global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
678         global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
679         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
680 }
681
682 CCL_NAMESPACE_END
683