2 * Copyright 2011, Blender Foundation.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
35 SVMShaderManager::SVMShaderManager()
39 SVMShaderManager::~SVMShaderManager()
43 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
48 /* test if we need to update */
49 device_free(device, dscene);
52 vector<int4> svm_nodes;
55 for(i = 0; i < scene->shaders.size(); i++) {
56 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
57 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
60 bool sunsky_done = false;
62 for(i = 0; i < scene->shaders.size(); i++) {
63 Shader *shader = scene->shaders[i];
65 if(progress.get_cancel()) return;
67 assert(shader->graph);
69 if(shader->sample_as_light && shader->has_surface_emission)
70 scene->light_manager->need_update = true;
72 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
73 scene->params.use_multi_closure);
74 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
75 compiler.background = ((int)i == scene->default_background);
76 compiler.compile(shader, svm_nodes, i);
81 dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
82 device->tex_alloc("__svm_nodes", dscene->svm_nodes);
84 for(i = 0; i < scene->shaders.size(); i++) {
85 Shader *shader = scene->shaders[i];
86 shader->need_update = false;
89 device_update_common(device, dscene, scene, progress);
94 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene)
96 device_free_common(device, dscene);
98 device->tex_free(dscene->svm_nodes);
99 dscene->svm_nodes.clear();
104 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
106 shader_manager = shader_manager_;
107 image_manager = image_manager_;
110 current_type = SHADER_TYPE_SURFACE;
111 current_shader = NULL;
113 mix_weight_offset = SVM_STACK_INVALID;
114 use_multi_closure = use_multi_closure_;
117 int SVMCompiler::stack_size(ShaderSocketType type)
119 if(type == SHADER_SOCKET_FLOAT)
121 else if(type == SHADER_SOCKET_COLOR)
123 else if(type == SHADER_SOCKET_VECTOR)
125 else if(type == SHADER_SOCKET_NORMAL)
127 else if(type == SHADER_SOCKET_POINT)
129 else if(type == SHADER_SOCKET_CLOSURE)
136 int SVMCompiler::stack_find_offset(ShaderSocketType type)
138 int size = stack_size(type);
141 /* find free space in stack & mark as used */
142 for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
143 if(active_stack.users[i]) num_unused = 0;
146 if(num_unused == size) {
148 max_stack_use = max(i+1, max_stack_use);
151 active_stack.users[i--] = 1;
157 fprintf(stderr, "Out of SVM stack space.\n");
163 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
166 backup.stack = active_stack;
168 foreach(ShaderNode *node, current_graph->nodes) {
169 foreach(ShaderInput *input, node->inputs)
170 backup.offsets.push_back(input->stack_offset);
171 foreach(ShaderOutput *output, node->outputs)
172 backup.offsets.push_back(output->stack_offset);
176 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
181 active_stack = backup.stack;
183 foreach(ShaderNode *node, current_graph->nodes) {
184 foreach(ShaderInput *input, node->inputs)
185 input->stack_offset = backup.offsets[i++];
186 foreach(ShaderOutput *output, node->outputs)
187 output->stack_offset = backup.offsets[i++];
191 void SVMCompiler::stack_assign(ShaderInput *input)
193 /* stack offset assign? */
194 if(input->stack_offset == SVM_STACK_INVALID) {
196 /* linked to output -> use output offset */
197 input->stack_offset = input->link->stack_offset;
200 /* not linked to output -> add nodes to load default value */
201 input->stack_offset = stack_find_offset(input->type);
203 if(input->type == SHADER_SOCKET_FLOAT) {
204 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
206 else if(input->type == SHADER_SOCKET_VECTOR ||
207 input->type == SHADER_SOCKET_NORMAL ||
208 input->type == SHADER_SOCKET_POINT ||
209 input->type == SHADER_SOCKET_COLOR) {
211 add_node(NODE_VALUE_V, input->stack_offset);
212 add_node(NODE_VALUE_V, input->value);
214 else /* should not get called for closure */
220 void SVMCompiler::stack_assign(ShaderOutput *output)
222 /* if no stack offset assigned yet, find one */
223 if(output->stack_offset == SVM_STACK_INVALID)
224 output->stack_offset = stack_find_offset(output->type);
227 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
229 if(output->stack_offset == SVM_STACK_INVALID) {
231 assert(stack_size(output->type) == stack_size(input->link->type));
233 output->stack_offset = input->link->stack_offset;
235 int size = stack_size(output->type);
237 for(int i = 0; i < size; i++)
238 active_stack.users[output->stack_offset + i]++;
242 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
244 /* optimization we should add:
245 find and lower user counts for outputs for which all inputs are done.
246 this is done before the node is compiled, under the assumption that the
247 node will first load all inputs from the stack and then writes its
248 outputs. this used to work, but was disabled because it gave trouble
249 with inputs getting stack positions assigned */
251 foreach(ShaderInput *input, node->inputs) {
252 ShaderOutput *output = input->link;
254 if(output && output->stack_offset != SVM_STACK_INVALID) {
255 bool all_done = true;
257 /* optimization we should add: verify if in->parent is actually used */
258 foreach(ShaderInput *in, output->links)
259 if(in->parent != node && done.find(in->parent) == done.end())
263 int size = stack_size(output->type);
265 for(int i = 0; i < size; i++)
266 active_stack.users[output->stack_offset + i]--;
268 output->stack_offset = SVM_STACK_INVALID;
270 foreach(ShaderInput *in, output->links)
271 in->stack_offset = SVM_STACK_INVALID;
277 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
279 foreach(ShaderInput *input, node->inputs) {
280 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
281 int size = stack_size(input->type);
283 for(int i = 0; i < size; i++)
284 active_stack.users[input->stack_offset + i]--;
286 input->stack_offset = SVM_STACK_INVALID;
291 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
298 return (x) | (y << 8) | (z << 16) | (w << 24);
301 void SVMCompiler::add_node(int a, int b, int c, int d)
303 svm_nodes.push_back(make_int4(a, b, c, d));
306 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
308 svm_nodes.push_back(make_int4(type, a, b, c));
311 void SVMCompiler::add_node(NodeType type, const float3& f)
313 svm_nodes.push_back(make_int4(type,
316 __float_as_int(f.z)));
319 void SVMCompiler::add_node(const float4& f)
321 svm_nodes.push_back(make_int4(
325 __float_as_int(f.w)));
328 uint SVMCompiler::attribute(ustring name)
330 return shader_manager->get_attribute_id(name);
333 uint SVMCompiler::attribute(Attribute::Standard std)
335 return shader_manager->get_attribute_id(std);
338 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
340 /* nasty exception .. */
341 if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
347 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
349 ShaderNode *node = (input->link)? input->link->parent: NULL;
351 if(node && done.find(node) == done.end()) {
352 foreach(ShaderInput *in, node->inputs)
353 if(!node_skip_input(node, in))
354 find_dependencies(dependencies, done, in);
356 dependencies.insert(node);
360 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
367 foreach(ShaderNode *node, nodes) {
368 if(done.find(node) == done.end()) {
369 bool inputs_done = true;
371 foreach(ShaderInput *input, node->inputs)
372 if(!node_skip_input(node, input))
373 if(input->link && done.find(input->link->parent) == done.end())
377 node->compile(*this);
378 stack_clear_users(node, done);
379 stack_clear_temporary(node);
386 } while(!nodes_done);
389 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
391 if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
392 ShaderInput *fin = node->input("Fac");
393 ShaderInput *cl1in = node->input("Closure1");
394 ShaderInput *cl2in = node->input("Closure2");
396 /* execute dependencies for mix weight */
398 set<ShaderNode*> dependencies;
399 find_dependencies(dependencies, done, fin);
400 generate_svm_nodes(dependencies, done);
406 int mix_offset = svm_nodes.size();
409 add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
411 add_node(NODE_ADD_CLOSURE, 0, 0, 0);
413 /* generate code for closure 1
414 note we backup all compiler state and restore it afterwards, so one
415 closure choice doesn't influence the other*/
418 stack_backup(backup, done);
420 generate_closure(cl1in->link->parent, done);
421 add_node(NODE_END, 0, 0, 0);
423 stack_restore(backup, done);
426 add_node(NODE_END, 0, 0, 0);
428 /* generate code for closure 2 */
429 int cl2_offset = svm_nodes.size();
433 stack_backup(backup, done);
435 generate_closure(cl2in->link->parent, done);
436 add_node(NODE_END, 0, 0, 0);
438 stack_restore(backup, done);
441 add_node(NODE_END, 0, 0, 0);
443 /* set jump for mix node, -1 because offset is already
444 incremented when this jump is added to it */
445 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
448 stack_clear_users(node, done);
449 stack_clear_temporary(node);
452 /* execute dependencies for closure */
453 foreach(ShaderInput *in, node->inputs) {
454 if(!node_skip_input(node, in) && in->link) {
455 set<ShaderNode*> dependencies;
456 find_dependencies(dependencies, done, in);
457 generate_svm_nodes(dependencies, done);
461 /* compile closure itself */
462 node->compile(*this);
463 stack_clear_users(node, done);
464 stack_clear_temporary(node);
466 if(node->name == ustring("emission"))
467 current_shader->has_surface_emission = true;
468 if(node->name == ustring("transparent"))
469 current_shader->has_surface_transparent = true;
471 /* end node is added outside of this */
475 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, uint in_offset)
477 /* todo: the weaks point here is that unlike the single closure sampling
478 we will evaluate all nodes even if they are used as input for closures
479 that are unused. it's not clear what would be the best way to skip such
480 nodes at runtime, especially if they are tangled up */
482 if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
483 ShaderInput *fin = node->input("Fac");
484 ShaderInput *cl1in = node->input("Closure1");
485 ShaderInput *cl2in = node->input("Closure2");
487 uint out1_offset = SVM_STACK_INVALID;
488 uint out2_offset = SVM_STACK_INVALID;
492 set<ShaderNode*> dependencies;
493 find_dependencies(dependencies, done, fin);
494 generate_svm_nodes(dependencies, done);
499 out1_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
501 out2_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
503 add_node(NODE_MIX_CLOSURE,
504 encode_uchar4(fin->stack_offset, in_offset, out1_offset, out2_offset));
508 out1_offset = in_offset;
509 out2_offset = in_offset;
513 generate_multi_closure(cl1in->link->parent, done, out1_offset);
516 active_stack.users[out1_offset]--;
520 generate_multi_closure(cl2in->link->parent, done, out2_offset);
523 active_stack.users[out2_offset]--;
527 /* execute dependencies for closure */
528 foreach(ShaderInput *in, node->inputs) {
529 if(!node_skip_input(node, in) && in->link) {
530 set<ShaderNode*> dependencies;
531 find_dependencies(dependencies, done, in);
532 generate_svm_nodes(dependencies, done);
536 mix_weight_offset = in_offset;
538 /* compile closure itself */
539 node->compile(*this);
540 stack_clear_users(node, done);
541 stack_clear_temporary(node);
543 mix_weight_offset = SVM_STACK_INVALID;
545 if(node->name == ustring("emission"))
546 current_shader->has_surface_emission = true;
547 if(node->name == ustring("transparent"))
548 current_shader->has_surface_transparent = true;
550 /* end node is added outside of this */
555 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
557 /* Converting a shader graph into svm_nodes that can be executed
558 * sequentially on the virtual machine is fairly simple. We can keep
559 * looping over nodes and each time all the inputs of a node are
560 * ready, we add svm_nodes for it that read the inputs from the
561 * stack and write outputs back to the stack.
563 * With the SVM, we always sample only a single closure. We can think
564 * of all closures nodes as a binary tree with mix closures as inner
565 * nodes and other closures as leafs. The SVM will traverse that tree,
566 * each time deciding to go left or right depending on the mix weights,
567 * until a closure is found.
569 * We only execute nodes that are needed for the mix weights and chosen
574 current_graph = graph;
576 /* get input in output node */
577 ShaderNode *node = graph->output();
578 ShaderInput *clin = NULL;
580 if(type == SHADER_TYPE_SURFACE)
581 clin = node->input("Surface");
582 else if(type == SHADER_TYPE_VOLUME)
583 clin = node->input("Volume");
584 else if(type == SHADER_TYPE_DISPLACEMENT)
585 clin = node->input("Displacement");
589 /* clear all compiler state */
590 memset(&active_stack, 0, sizeof(active_stack));
593 foreach(ShaderNode *node, graph->nodes) {
594 foreach(ShaderInput *input, node->inputs)
595 input->stack_offset = SVM_STACK_INVALID;
596 foreach(ShaderOutput *output, node->outputs)
597 output->stack_offset = SVM_STACK_INVALID;
601 bool generate = false;
602 if(type == SHADER_TYPE_SURFACE) {
603 /* generate surface shader */
605 shader->has_surface = true;
607 else if(type == SHADER_TYPE_VOLUME) {
608 /* generate volume shader */
610 shader->has_volume = true;
612 else if(type == SHADER_TYPE_DISPLACEMENT) {
613 /* generate displacement shader */
615 shader->has_displacement = true;
619 set<ShaderNode*> done;
621 if(use_multi_closure)
622 generate_multi_closure(clin->link->parent, done, SVM_STACK_INVALID);
624 generate_closure(clin->link->parent, done);
628 /* compile output node */
629 node->compile(*this);
631 add_node(NODE_END, 0, 0, 0);
634 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
636 /* copy graph for shader with bump mapping */
637 ShaderNode *node = shader->graph->output();
639 if(node->input("Surface")->link && node->input("Displacement")->link)
640 if(!shader->graph_bump)
641 shader->graph_bump = shader->graph->copy();
644 shader->graph->finalize(false, false);
645 if(shader->graph_bump)
646 shader->graph_bump->finalize(true, false);
648 current_shader = shader;
650 shader->has_surface = false;
651 shader->has_surface_emission = false;
652 shader->has_surface_transparent = false;
653 shader->has_volume = false;
654 shader->has_displacement = false;
656 /* generate surface shader */
657 compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
658 global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
659 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
660 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
662 if(shader->graph_bump) {
663 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
664 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
665 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
668 /* generate volume shader */
669 compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
670 global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
671 global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
672 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
674 /* generate displacement shader */
675 compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
676 global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
677 global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
678 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());