Cycles / Sky Texture:
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16
17 #include "device.h"
18 #include "graph.h"
19 #include "light.h"
20 #include "mesh.h"
21 #include "scene.h"
22 #include "shader.h"
23 #include "svm.h"
24
25 #include "util_debug.h"
26 #include "util_foreach.h"
27 #include "util_progress.h"
28
29 CCL_NAMESPACE_BEGIN
30
31 /* Shader Manager */
32
33 SVMShaderManager::SVMShaderManager()
34 {
35 }
36
37 SVMShaderManager::~SVMShaderManager()
38 {
39 }
40
41 void SVMShaderManager::reset(Scene *scene)
42 {
43 }
44
45 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
46 {
47         if(!need_update)
48                 return;
49
50         /* test if we need to update */
51         device_free(device, dscene, scene);
52
53         /* determine which shaders are in use */
54         device_update_shaders_used(scene);
55
56         /* svm_nodes */
57         vector<int4> svm_nodes;
58         size_t i;
59
60         for(i = 0; i < scene->shaders.size(); i++) {
61                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
62                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
63         }
64         
65         bool use_multi_closure = device->info.advanced_shading;
66
67         for(i = 0; i < scene->shaders.size(); i++) {
68                 Shader *shader = scene->shaders[i];
69
70                 if(progress.get_cancel()) return;
71
72                 assert(shader->graph);
73
74                 if(shader->use_mis && shader->has_surface_emission)
75                         scene->light_manager->need_update = true;
76
77                 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
78                         use_multi_closure);
79                 compiler.background = ((int)i == scene->default_background);
80                 compiler.compile(shader, svm_nodes, i);
81         }
82
83         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
84         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
85
86         for(i = 0; i < scene->shaders.size(); i++) {
87                 Shader *shader = scene->shaders[i];
88                 shader->need_update = false;
89         }
90
91         device_update_common(device, dscene, scene, progress);
92
93         need_update = false;
94 }
95
96 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
97 {
98         device_free_common(device, dscene, scene);
99
100         device->tex_free(dscene->svm_nodes);
101         dscene->svm_nodes.clear();
102 }
103
104 /* Graph Compiler */
105
106 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
107 {
108         shader_manager = shader_manager_;
109         image_manager = image_manager_;
110         max_stack_use = 0;
111         current_type = SHADER_TYPE_SURFACE;
112         current_shader = NULL;
113         background = false;
114         mix_weight_offset = SVM_STACK_INVALID;
115         use_multi_closure = use_multi_closure_;
116 }
117
118 int SVMCompiler::stack_size(ShaderSocketType type)
119 {
120         int size = 0;
121         
122         switch (type) {
123                 case SHADER_SOCKET_FLOAT:
124                 case SHADER_SOCKET_INT:
125                         size = 1;
126                         break;
127                 case SHADER_SOCKET_COLOR:
128                 case SHADER_SOCKET_VECTOR:
129                 case SHADER_SOCKET_NORMAL:
130                 case SHADER_SOCKET_POINT:
131                         size = 3;
132                         break;
133                 case SHADER_SOCKET_CLOSURE:
134                         size = 0;
135                         break;
136                 default:
137                         assert(0);
138                         break;
139         }
140         
141         return size;
142 }
143
144 int SVMCompiler::stack_find_offset(ShaderSocketType type)
145 {
146         int size = stack_size(type);
147         int offset = -1;
148         
149         /* find free space in stack & mark as used */
150         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
151                 if(active_stack.users[i]) num_unused = 0;
152                 else num_unused++;
153
154                 if(num_unused == size) {
155                         offset = i+1 - size;
156                         max_stack_use = max(i+1, max_stack_use);
157
158                         while(i >= offset)
159                                 active_stack.users[i--] = 1;
160
161                         return offset;
162                 }
163         }
164
165         fprintf(stderr, "Out of SVM stack space.\n");
166         assert(0);
167
168         return offset;
169 }
170
171 void SVMCompiler::stack_clear_offset(ShaderSocketType type, int offset)
172 {
173         int size = stack_size(type);
174
175         for(int i = 0; i < size; i++)
176                 active_stack.users[offset + i]--;
177 }
178
179 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
180 {
181         backup.done = done;
182         backup.stack = active_stack;
183
184         foreach(ShaderNode *node, current_graph->nodes) {
185                 foreach(ShaderInput *input, node->inputs)
186                         backup.offsets.push_back(input->stack_offset);
187                 foreach(ShaderOutput *output, node->outputs)
188                         backup.offsets.push_back(output->stack_offset);
189         }
190 }
191
192 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
193 {
194         int i = 0;
195
196         done = backup.done;
197         active_stack = backup.stack;
198
199         foreach(ShaderNode *node, current_graph->nodes) {
200                 foreach(ShaderInput *input, node->inputs)
201                         input->stack_offset = backup.offsets[i++];
202                 foreach(ShaderOutput *output, node->outputs)
203                         output->stack_offset = backup.offsets[i++];
204         }
205 }
206
207 void SVMCompiler::stack_assign(ShaderInput *input)
208 {
209         /* stack offset assign? */
210         if(input->stack_offset == SVM_STACK_INVALID) {
211                 if(input->link) {
212                         /* linked to output -> use output offset */
213                         input->stack_offset = input->link->stack_offset;
214                 }
215                 else {
216                         /* not linked to output -> add nodes to load default value */
217                         input->stack_offset = stack_find_offset(input->type);
218
219                         if(input->type == SHADER_SOCKET_FLOAT) {
220                                 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
221                         }
222                         else if(input->type == SHADER_SOCKET_INT) {
223                                 add_node(NODE_VALUE_F, (int)input->value.x, input->stack_offset);
224                         }
225                         else if(input->type == SHADER_SOCKET_VECTOR ||
226                                 input->type == SHADER_SOCKET_NORMAL ||
227                                 input->type == SHADER_SOCKET_POINT ||
228                                 input->type == SHADER_SOCKET_COLOR) {
229
230                                 add_node(NODE_VALUE_V, input->stack_offset);
231                                 add_node(NODE_VALUE_V, input->value);
232                         }
233                         else /* should not get called for closure */
234                                 assert(0);
235                 }
236         }
237 }
238
239 void SVMCompiler::stack_assign(ShaderOutput *output)
240 {
241         /* if no stack offset assigned yet, find one */
242         if(output->stack_offset == SVM_STACK_INVALID)
243                 output->stack_offset = stack_find_offset(output->type);
244 }
245
246 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
247 {
248         if(output->stack_offset == SVM_STACK_INVALID) {
249                 assert(input->link);
250                 assert(stack_size(output->type) == stack_size(input->link->type));
251
252                 output->stack_offset = input->link->stack_offset;
253
254                 int size = stack_size(output->type);
255
256                 for(int i = 0; i < size; i++)
257                         active_stack.users[output->stack_offset + i]++;
258         }
259 }
260
261 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
262 {
263         /* optimization we should add:
264          * find and lower user counts for outputs for which all inputs are done.
265          * this is done before the node is compiled, under the assumption that the
266          * node will first load all inputs from the stack and then writes its
267          * outputs. this used to work, but was disabled because it gave trouble
268          * with inputs getting stack positions assigned */
269
270         foreach(ShaderInput *input, node->inputs) {
271                 ShaderOutput *output = input->link;
272
273                 if(output && output->stack_offset != SVM_STACK_INVALID) {
274                         bool all_done = true;
275
276                         /* optimization we should add: verify if in->parent is actually used */
277                         foreach(ShaderInput *in, output->links)
278                                 if(in->parent != node && done.find(in->parent) == done.end())
279                                         all_done = false;
280
281                         if(all_done) {
282                                 stack_clear_offset(output->type, output->stack_offset);
283                                 output->stack_offset = SVM_STACK_INVALID;
284
285                                 foreach(ShaderInput *in, output->links)
286                                         in->stack_offset = SVM_STACK_INVALID;
287                         }
288                 }
289         }
290 }
291
292 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
293 {
294         foreach(ShaderInput *input, node->inputs) {
295                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
296                         stack_clear_offset(input->type, input->stack_offset);
297                         input->stack_offset = SVM_STACK_INVALID;
298                 }
299         }
300 }
301
302 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
303 {
304         assert(x <= 255);
305         assert(y <= 255);
306         assert(z <= 255);
307         assert(w <= 255);
308
309         return (x) | (y << 8) | (z << 16) | (w << 24);
310 }
311
312 void SVMCompiler::add_node(int a, int b, int c, int d)
313 {
314         svm_nodes.push_back(make_int4(a, b, c, d));
315 }
316
317 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
318 {
319         svm_nodes.push_back(make_int4(type, a, b, c));
320 }
321
322 void SVMCompiler::add_node(NodeType type, const float3& f)
323 {
324         svm_nodes.push_back(make_int4(type,
325                 __float_as_int(f.x),
326                 __float_as_int(f.y),
327                 __float_as_int(f.z)));
328 }
329
330 void SVMCompiler::add_node(const float4& f)
331 {
332         svm_nodes.push_back(make_int4(
333                 __float_as_int(f.x),
334                 __float_as_int(f.y),
335                 __float_as_int(f.z),
336                 __float_as_int(f.w)));
337 }
338
339 void SVMCompiler::add_array(float4 *f, int num)
340 {
341         for(int i = 0; i < num; i++)
342                 add_node(f[i]);
343 }
344
345 uint SVMCompiler::attribute(ustring name)
346 {
347         return shader_manager->get_attribute_id(name);
348 }
349
350 uint SVMCompiler::attribute(AttributeStandard std)
351 {
352         return shader_manager->get_attribute_id(std);
353 }
354
355 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
356 {
357         /* nasty exception .. */
358         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
359                 return true;
360         
361         return false;
362 }
363
364 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
365 {
366         ShaderNode *node = (input->link)? input->link->parent: NULL;
367
368         if(node && done.find(node) == done.end()) {
369                 foreach(ShaderInput *in, node->inputs)
370                         if(!node_skip_input(node, in))
371                                 find_dependencies(dependencies, done, in);
372
373                 dependencies.insert(node);
374         }
375 }
376
377 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
378 {
379         bool nodes_done;
380
381         do {
382                 nodes_done = true;
383
384                 foreach(ShaderNode *node, nodes) {
385                         if(done.find(node) == done.end()) {
386                                 bool inputs_done = true;
387
388                                 foreach(ShaderInput *input, node->inputs)
389                                         if(!node_skip_input(node, input))
390                                                 if(input->link && done.find(input->link->parent) == done.end())
391                                                         inputs_done = false;
392
393                                 if(inputs_done) {
394                                         /* Detect if we have a blackbody converter, to prepare lookup table */
395                                         if(node->has_converter_blackbody())
396                                         current_shader->has_converter_blackbody = true;
397
398                                         node->compile(*this);
399                                         stack_clear_users(node, done);
400                                         stack_clear_temporary(node);
401                                         done.insert(node);
402                                 }
403                                 else
404                                         nodes_done = false;
405                         }
406                 }
407         } while(!nodes_done);
408 }
409
410 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
411 {
412         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
413                 ShaderInput *fin = node->input("Fac");
414                 ShaderInput *cl1in = node->input("Closure1");
415                 ShaderInput *cl2in = node->input("Closure2");
416
417                 /* execute dependencies for mix weight */
418                 if(fin) {
419                         set<ShaderNode*> dependencies;
420                         find_dependencies(dependencies, done, fin);
421                         generate_svm_nodes(dependencies, done);
422
423                         /* add mix node */
424                         stack_assign(fin);
425                 }
426
427                 int mix_offset = svm_nodes.size();
428
429                 if(fin)
430                         add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
431                 else
432                         add_node(NODE_ADD_CLOSURE, 0, 0, 0);
433
434                 /* generate code for closure 1
435                  * note we backup all compiler state and restore it afterwards, so one
436                  * closure choice doesn't influence the other*/
437                 if(cl1in->link) {
438                         StackBackup backup;
439                         stack_backup(backup, done);
440
441                         generate_closure(cl1in->link->parent, done);
442                         add_node(NODE_END, 0, 0, 0);
443
444                         stack_restore(backup, done);
445                 }
446                 else
447                         add_node(NODE_END, 0, 0, 0);
448
449                 /* generate code for closure 2 */
450                 int cl2_offset = svm_nodes.size();
451
452                 if(cl2in->link) {
453                         StackBackup backup;
454                         stack_backup(backup, done);
455
456                         generate_closure(cl2in->link->parent, done);
457                         add_node(NODE_END, 0, 0, 0);
458
459                         stack_restore(backup, done);
460                 }
461                 else
462                         add_node(NODE_END, 0, 0, 0);
463
464                 /* set jump for mix node, -1 because offset is already
465                  * incremented when this jump is added to it */
466                 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
467
468                 done.insert(node);
469                 stack_clear_users(node, done);
470                 stack_clear_temporary(node);
471         }
472         else {
473                 /* execute dependencies for closure */
474                 foreach(ShaderInput *in, node->inputs) {
475                         if(!node_skip_input(node, in) && in->link) {
476                                 set<ShaderNode*> dependencies;
477                                 find_dependencies(dependencies, done, in);
478                                 generate_svm_nodes(dependencies, done);
479                         }
480                 }
481
482                 /* compile closure itself */
483                 node->compile(*this);
484                 stack_clear_users(node, done);
485                 stack_clear_temporary(node);
486
487                 if(node->has_surface_emission())
488                         current_shader->has_surface_emission = true;
489                 if(node->has_surface_transparent())
490                         current_shader->has_surface_transparent = true;
491                 if(node->has_surface_bssrdf()) {
492                         current_shader->has_surface_bssrdf = true;
493                         if(node->has_bssrdf_bump())
494                                 current_shader->has_bssrdf_bump = true;
495                 }
496
497                 /* end node is added outside of this */
498         }
499 }
500
501 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done)
502 {
503         /* todo: the weak point here is that unlike the single closure sampling 
504          * we will evaluate all nodes even if they are used as input for closures
505          * that are unused. it's not clear what would be the best way to skip such
506          * nodes at runtime, especially if they are tangled up  */
507         
508         /* only generate once */
509         if(closure_done.find(node) != closure_done.end())
510                 return;
511
512         closure_done.insert(node);
513
514         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
515                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
516                 ShaderInput *cl1in = node->input("Closure1");
517                 ShaderInput *cl2in = node->input("Closure2");
518
519                 if(cl1in->link)
520                         generate_multi_closure(cl1in->link->parent, done, closure_done);
521                 if(cl2in->link)
522                         generate_multi_closure(cl2in->link->parent, done, closure_done);
523         }
524         else {
525                 /* execute dependencies for closure */
526                 foreach(ShaderInput *in, node->inputs) {
527                         if(!node_skip_input(node, in) && in->link) {
528                                 set<ShaderNode*> dependencies;
529                                 find_dependencies(dependencies, done, in);
530                                 generate_svm_nodes(dependencies, done);
531                         }
532                 }
533
534                 /* closure mix weight */
535                 const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
536                 ShaderInput *weight_in = node->input(weight_name);
537
538                 if(weight_in && (weight_in->link || weight_in->value.x != 1.0f)) {
539                         stack_assign(weight_in);
540                         mix_weight_offset = weight_in->stack_offset;
541                 }
542                 else
543                         mix_weight_offset = SVM_STACK_INVALID;
544
545                 /* compile closure itself */
546                 node->compile(*this);
547                 stack_clear_users(node, done);
548                 stack_clear_temporary(node);
549
550                 mix_weight_offset = SVM_STACK_INVALID;
551
552                 if(node->has_surface_emission())
553                         current_shader->has_surface_emission = true;
554                 if(node->has_surface_transparent())
555                         current_shader->has_surface_transparent = true;
556                 if(node->has_surface_bssrdf()) {
557                         current_shader->has_surface_bssrdf = true;
558                         if(node->has_bssrdf_bump())
559                                 current_shader->has_bssrdf_bump = true;
560                 }
561         }
562
563         done.insert(node);
564 }
565
566
567 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
568 {
569         /* Converting a shader graph into svm_nodes that can be executed
570          * sequentially on the virtual machine is fairly simple. We can keep
571          * looping over nodes and each time all the inputs of a node are
572          * ready, we add svm_nodes for it that read the inputs from the
573          * stack and write outputs back to the stack.
574          *
575          * With the SVM, we always sample only a single closure. We can think
576          * of all closures nodes as a binary tree with mix closures as inner
577          * nodes and other closures as leafs. The SVM will traverse that tree,
578          * each time deciding to go left or right depending on the mix weights,
579          * until a closure is found.
580          *
581          * We only execute nodes that are needed for the mix weights and chosen
582          * closure.
583          */
584
585         current_type = type;
586         current_graph = graph;
587
588         /* get input in output node */
589         ShaderNode *node = graph->output();
590         ShaderInput *clin = NULL;
591         
592         switch (type) {
593                 case SHADER_TYPE_SURFACE:
594                         clin = node->input("Surface");
595                         break;
596                 case SHADER_TYPE_VOLUME:
597                         clin = node->input("Volume");
598                         break;
599                 case SHADER_TYPE_DISPLACEMENT:
600                         clin = node->input("Displacement");
601                         break;
602                 default:
603                         assert(0);
604                         break;
605         }
606
607         /* clear all compiler state */
608         memset(&active_stack, 0, sizeof(active_stack));
609         svm_nodes.clear();
610
611         foreach(ShaderNode *node_iter, graph->nodes) {
612                 foreach(ShaderInput *input, node_iter->inputs)
613                         input->stack_offset = SVM_STACK_INVALID;
614                 foreach(ShaderOutput *output, node_iter->outputs)
615                         output->stack_offset = SVM_STACK_INVALID;
616         }
617
618         if(shader->used) {
619                 if(clin->link) {
620                         bool generate = false;
621                         
622                         switch (type) {
623                                 case SHADER_TYPE_SURFACE: /* generate surface shader */         
624                                         generate = true;
625                                         shader->has_surface = true;
626                                         break;
627                                 case SHADER_TYPE_VOLUME: /* generate volume shader */
628                                         generate = true;
629                                         shader->has_volume = true;
630                                         break;
631                                 case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
632                                         generate = true;
633                                         shader->has_displacement = true;
634                                         break;
635                                 default:
636                                         break;
637                         }
638
639                         if(generate) {
640                                 set<ShaderNode*> done;
641
642                                 if(use_multi_closure) {
643                                         set<ShaderNode*> closure_done;
644                                         generate_multi_closure(clin->link->parent, done, closure_done);
645                                 }
646                                 else
647                                         generate_closure(clin->link->parent, done);
648                         }
649                 }
650
651                 /* compile output node */
652                 node->compile(*this);
653         }
654
655         add_node(NODE_END, 0, 0, 0);
656 }
657
658 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
659 {
660         /* copy graph for shader with bump mapping */
661         ShaderNode *node = shader->graph->output();
662
663         if(node->input("Surface")->link && node->input("Displacement")->link)
664                 if(!shader->graph_bump)
665                         shader->graph_bump = shader->graph->copy();
666
667         /* finalize */
668         shader->graph->finalize(false, false, use_multi_closure);
669         if(shader->graph_bump)
670                 shader->graph_bump->finalize(true, false, use_multi_closure);
671
672         current_shader = shader;
673
674         shader->has_surface = false;
675         shader->has_surface_emission = false;
676         shader->has_surface_transparent = false;
677         shader->has_surface_bssrdf = false;
678         shader->has_bssrdf_bump = false;
679         shader->has_converter_blackbody = false;
680         shader->has_volume = false;
681         shader->has_displacement = false;
682
683         /* generate surface shader */
684         compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
685         global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
686         global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
687         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
688
689         if(shader->graph_bump) {
690                 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
691                 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
692                 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
693         }
694
695         /* generate volume shader */
696         compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
697         global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
698         global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
699         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
700
701         /* generate displacement shader */
702         compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
703         global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
704         global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
705         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
706 }
707
708 CCL_NAMESPACE_END
709