2 * Copyright 2011, Blender Foundation.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
35 SVMShaderManager::SVMShaderManager()
39 SVMShaderManager::~SVMShaderManager()
43 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
48 /* test if we need to update */
49 device_free(device, dscene);
52 vector<int4> svm_nodes;
55 for(i = 0; i < scene->shaders.size(); i++) {
56 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
57 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
60 bool sunsky_done = false;
61 bool use_multi_closure = (scene->params.use_multi_closure && device->type() != DEVICE_OPENCL);
63 for(i = 0; i < scene->shaders.size(); i++) {
64 Shader *shader = scene->shaders[i];
66 if(progress.get_cancel()) return;
68 assert(shader->graph);
70 if(shader->sample_as_light && shader->has_surface_emission)
71 scene->light_manager->need_update = true;
73 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
75 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
76 compiler.background = ((int)i == scene->default_background);
77 compiler.compile(shader, svm_nodes, i);
82 dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
83 device->tex_alloc("__svm_nodes", dscene->svm_nodes);
85 for(i = 0; i < scene->shaders.size(); i++) {
86 Shader *shader = scene->shaders[i];
87 shader->need_update = false;
90 device_update_common(device, dscene, scene, progress);
95 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene)
97 device_free_common(device, dscene);
99 device->tex_free(dscene->svm_nodes);
100 dscene->svm_nodes.clear();
105 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
107 shader_manager = shader_manager_;
108 image_manager = image_manager_;
111 current_type = SHADER_TYPE_SURFACE;
112 current_shader = NULL;
114 mix_weight_offset = SVM_STACK_INVALID;
115 use_multi_closure = use_multi_closure_;
118 int SVMCompiler::stack_size(ShaderSocketType type)
120 if(type == SHADER_SOCKET_FLOAT)
122 else if(type == SHADER_SOCKET_COLOR)
124 else if(type == SHADER_SOCKET_VECTOR)
126 else if(type == SHADER_SOCKET_NORMAL)
128 else if(type == SHADER_SOCKET_POINT)
130 else if(type == SHADER_SOCKET_CLOSURE)
137 int SVMCompiler::stack_find_offset(ShaderSocketType type)
139 int size = stack_size(type);
142 /* find free space in stack & mark as used */
143 for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
144 if(active_stack.users[i]) num_unused = 0;
147 if(num_unused == size) {
149 max_stack_use = max(i+1, max_stack_use);
152 active_stack.users[i--] = 1;
158 fprintf(stderr, "Out of SVM stack space.\n");
164 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
167 backup.stack = active_stack;
169 foreach(ShaderNode *node, current_graph->nodes) {
170 foreach(ShaderInput *input, node->inputs)
171 backup.offsets.push_back(input->stack_offset);
172 foreach(ShaderOutput *output, node->outputs)
173 backup.offsets.push_back(output->stack_offset);
177 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
182 active_stack = backup.stack;
184 foreach(ShaderNode *node, current_graph->nodes) {
185 foreach(ShaderInput *input, node->inputs)
186 input->stack_offset = backup.offsets[i++];
187 foreach(ShaderOutput *output, node->outputs)
188 output->stack_offset = backup.offsets[i++];
192 void SVMCompiler::stack_assign(ShaderInput *input)
194 /* stack offset assign? */
195 if(input->stack_offset == SVM_STACK_INVALID) {
197 /* linked to output -> use output offset */
198 input->stack_offset = input->link->stack_offset;
201 /* not linked to output -> add nodes to load default value */
202 input->stack_offset = stack_find_offset(input->type);
204 if(input->type == SHADER_SOCKET_FLOAT) {
205 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
207 else if(input->type == SHADER_SOCKET_VECTOR ||
208 input->type == SHADER_SOCKET_NORMAL ||
209 input->type == SHADER_SOCKET_POINT ||
210 input->type == SHADER_SOCKET_COLOR) {
212 add_node(NODE_VALUE_V, input->stack_offset);
213 add_node(NODE_VALUE_V, input->value);
215 else /* should not get called for closure */
221 void SVMCompiler::stack_assign(ShaderOutput *output)
223 /* if no stack offset assigned yet, find one */
224 if(output->stack_offset == SVM_STACK_INVALID)
225 output->stack_offset = stack_find_offset(output->type);
228 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
230 if(output->stack_offset == SVM_STACK_INVALID) {
232 assert(stack_size(output->type) == stack_size(input->link->type));
234 output->stack_offset = input->link->stack_offset;
236 int size = stack_size(output->type);
238 for(int i = 0; i < size; i++)
239 active_stack.users[output->stack_offset + i]++;
243 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
245 /* optimization we should add:
246 find and lower user counts for outputs for which all inputs are done.
247 this is done before the node is compiled, under the assumption that the
248 node will first load all inputs from the stack and then writes its
249 outputs. this used to work, but was disabled because it gave trouble
250 with inputs getting stack positions assigned */
252 foreach(ShaderInput *input, node->inputs) {
253 ShaderOutput *output = input->link;
255 if(output && output->stack_offset != SVM_STACK_INVALID) {
256 bool all_done = true;
258 /* optimization we should add: verify if in->parent is actually used */
259 foreach(ShaderInput *in, output->links)
260 if(in->parent != node && done.find(in->parent) == done.end())
264 int size = stack_size(output->type);
266 for(int i = 0; i < size; i++)
267 active_stack.users[output->stack_offset + i]--;
269 output->stack_offset = SVM_STACK_INVALID;
271 foreach(ShaderInput *in, output->links)
272 in->stack_offset = SVM_STACK_INVALID;
278 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
280 foreach(ShaderInput *input, node->inputs) {
281 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
282 int size = stack_size(input->type);
284 for(int i = 0; i < size; i++)
285 active_stack.users[input->stack_offset + i]--;
287 input->stack_offset = SVM_STACK_INVALID;
292 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
299 return (x) | (y << 8) | (z << 16) | (w << 24);
302 void SVMCompiler::add_node(int a, int b, int c, int d)
304 svm_nodes.push_back(make_int4(a, b, c, d));
307 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
309 svm_nodes.push_back(make_int4(type, a, b, c));
312 void SVMCompiler::add_node(NodeType type, const float3& f)
314 svm_nodes.push_back(make_int4(type,
317 __float_as_int(f.z)));
320 void SVMCompiler::add_node(const float4& f)
322 svm_nodes.push_back(make_int4(
326 __float_as_int(f.w)));
329 uint SVMCompiler::attribute(ustring name)
331 return shader_manager->get_attribute_id(name);
334 uint SVMCompiler::attribute(Attribute::Standard std)
336 return shader_manager->get_attribute_id(std);
339 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
341 /* nasty exception .. */
342 if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
348 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
350 ShaderNode *node = (input->link)? input->link->parent: NULL;
352 if(node && done.find(node) == done.end()) {
353 foreach(ShaderInput *in, node->inputs)
354 if(!node_skip_input(node, in))
355 find_dependencies(dependencies, done, in);
357 dependencies.insert(node);
361 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
368 foreach(ShaderNode *node, nodes) {
369 if(done.find(node) == done.end()) {
370 bool inputs_done = true;
372 foreach(ShaderInput *input, node->inputs)
373 if(!node_skip_input(node, input))
374 if(input->link && done.find(input->link->parent) == done.end())
378 node->compile(*this);
379 stack_clear_users(node, done);
380 stack_clear_temporary(node);
387 } while(!nodes_done);
390 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
392 if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
393 ShaderInput *fin = node->input("Fac");
394 ShaderInput *cl1in = node->input("Closure1");
395 ShaderInput *cl2in = node->input("Closure2");
397 /* execute dependencies for mix weight */
399 set<ShaderNode*> dependencies;
400 find_dependencies(dependencies, done, fin);
401 generate_svm_nodes(dependencies, done);
407 int mix_offset = svm_nodes.size();
410 add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
412 add_node(NODE_ADD_CLOSURE, 0, 0, 0);
414 /* generate code for closure 1
415 note we backup all compiler state and restore it afterwards, so one
416 closure choice doesn't influence the other*/
419 stack_backup(backup, done);
421 generate_closure(cl1in->link->parent, done);
422 add_node(NODE_END, 0, 0, 0);
424 stack_restore(backup, done);
427 add_node(NODE_END, 0, 0, 0);
429 /* generate code for closure 2 */
430 int cl2_offset = svm_nodes.size();
434 stack_backup(backup, done);
436 generate_closure(cl2in->link->parent, done);
437 add_node(NODE_END, 0, 0, 0);
439 stack_restore(backup, done);
442 add_node(NODE_END, 0, 0, 0);
444 /* set jump for mix node, -1 because offset is already
445 incremented when this jump is added to it */
446 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
449 stack_clear_users(node, done);
450 stack_clear_temporary(node);
453 /* execute dependencies for closure */
454 foreach(ShaderInput *in, node->inputs) {
455 if(!node_skip_input(node, in) && in->link) {
456 set<ShaderNode*> dependencies;
457 find_dependencies(dependencies, done, in);
458 generate_svm_nodes(dependencies, done);
462 /* compile closure itself */
463 node->compile(*this);
464 stack_clear_users(node, done);
465 stack_clear_temporary(node);
467 if(node->name == ustring("emission"))
468 current_shader->has_surface_emission = true;
469 if(node->name == ustring("transparent"))
470 current_shader->has_surface_transparent = true;
472 /* end node is added outside of this */
476 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, uint in_offset)
478 /* todo: the weaks point here is that unlike the single closure sampling
479 we will evaluate all nodes even if they are used as input for closures
480 that are unused. it's not clear what would be the best way to skip such
481 nodes at runtime, especially if they are tangled up */
483 if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
484 ShaderInput *fin = node->input("Fac");
485 ShaderInput *cl1in = node->input("Closure1");
486 ShaderInput *cl2in = node->input("Closure2");
488 uint out1_offset = SVM_STACK_INVALID;
489 uint out2_offset = SVM_STACK_INVALID;
493 set<ShaderNode*> dependencies;
494 find_dependencies(dependencies, done, fin);
495 generate_svm_nodes(dependencies, done);
500 out1_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
502 out2_offset = stack_find_offset(SHADER_SOCKET_FLOAT);
504 add_node(NODE_MIX_CLOSURE,
505 encode_uchar4(fin->stack_offset, in_offset, out1_offset, out2_offset));
509 out1_offset = in_offset;
510 out2_offset = in_offset;
514 generate_multi_closure(cl1in->link->parent, done, out1_offset);
517 active_stack.users[out1_offset]--;
521 generate_multi_closure(cl2in->link->parent, done, out2_offset);
524 active_stack.users[out2_offset]--;
528 /* execute dependencies for closure */
529 foreach(ShaderInput *in, node->inputs) {
530 if(!node_skip_input(node, in) && in->link) {
531 set<ShaderNode*> dependencies;
532 find_dependencies(dependencies, done, in);
533 generate_svm_nodes(dependencies, done);
537 mix_weight_offset = in_offset;
539 /* compile closure itself */
540 node->compile(*this);
541 stack_clear_users(node, done);
542 stack_clear_temporary(node);
544 mix_weight_offset = SVM_STACK_INVALID;
546 if(node->name == ustring("emission"))
547 current_shader->has_surface_emission = true;
548 if(node->name == ustring("transparent"))
549 current_shader->has_surface_transparent = true;
551 /* end node is added outside of this */
556 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
558 /* Converting a shader graph into svm_nodes that can be executed
559 * sequentially on the virtual machine is fairly simple. We can keep
560 * looping over nodes and each time all the inputs of a node are
561 * ready, we add svm_nodes for it that read the inputs from the
562 * stack and write outputs back to the stack.
564 * With the SVM, we always sample only a single closure. We can think
565 * of all closures nodes as a binary tree with mix closures as inner
566 * nodes and other closures as leafs. The SVM will traverse that tree,
567 * each time deciding to go left or right depending on the mix weights,
568 * until a closure is found.
570 * We only execute nodes that are needed for the mix weights and chosen
575 current_graph = graph;
577 /* get input in output node */
578 ShaderNode *node = graph->output();
579 ShaderInput *clin = NULL;
581 if(type == SHADER_TYPE_SURFACE)
582 clin = node->input("Surface");
583 else if(type == SHADER_TYPE_VOLUME)
584 clin = node->input("Volume");
585 else if(type == SHADER_TYPE_DISPLACEMENT)
586 clin = node->input("Displacement");
590 /* clear all compiler state */
591 memset(&active_stack, 0, sizeof(active_stack));
594 foreach(ShaderNode *node, graph->nodes) {
595 foreach(ShaderInput *input, node->inputs)
596 input->stack_offset = SVM_STACK_INVALID;
597 foreach(ShaderOutput *output, node->outputs)
598 output->stack_offset = SVM_STACK_INVALID;
602 bool generate = false;
603 if(type == SHADER_TYPE_SURFACE) {
604 /* generate surface shader */
606 shader->has_surface = true;
608 else if(type == SHADER_TYPE_VOLUME) {
609 /* generate volume shader */
611 shader->has_volume = true;
613 else if(type == SHADER_TYPE_DISPLACEMENT) {
614 /* generate displacement shader */
616 shader->has_displacement = true;
620 set<ShaderNode*> done;
622 if(use_multi_closure)
623 generate_multi_closure(clin->link->parent, done, SVM_STACK_INVALID);
625 generate_closure(clin->link->parent, done);
629 /* compile output node */
630 node->compile(*this);
632 add_node(NODE_END, 0, 0, 0);
635 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
637 /* copy graph for shader with bump mapping */
638 ShaderNode *node = shader->graph->output();
640 if(node->input("Surface")->link && node->input("Displacement")->link)
641 if(!shader->graph_bump)
642 shader->graph_bump = shader->graph->copy();
645 shader->graph->finalize(false, false);
646 if(shader->graph_bump)
647 shader->graph_bump->finalize(true, false);
649 current_shader = shader;
651 shader->has_surface = false;
652 shader->has_surface_emission = false;
653 shader->has_surface_transparent = false;
654 shader->has_volume = false;
655 shader->has_displacement = false;
657 /* generate surface shader */
658 compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
659 global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
660 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
661 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
663 if(shader->graph_bump) {
664 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
665 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
666 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
669 /* generate volume shader */
670 compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
671 global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
672 global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
673 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
675 /* generate displacement shader */
676 compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
677 global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
678 global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
679 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());