add BLI_strcpy_rlen, replace strcat, which was used in misleading way.
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include "device.h"
20 #include "graph.h"
21 #include "light.h"
22 #include "mesh.h"
23 #include "scene.h"
24 #include "shader.h"
25 #include "svm.h"
26
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::reset(Scene *scene)
44 {
45 }
46
47 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
48 {
49         if(!need_update)
50                 return;
51
52         /* test if we need to update */
53         device_free(device, dscene, scene);
54
55         /* determine which shaders are in use */
56         device_update_shaders_used(scene);
57
58         /* svm_nodes */
59         vector<int4> svm_nodes;
60         size_t i;
61
62         for(i = 0; i < scene->shaders.size(); i++) {
63                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
64                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
65         }
66         
67         bool sunsky_done = false;
68         bool use_multi_closure = device->info.advanced_shading;
69
70         for(i = 0; i < scene->shaders.size(); i++) {
71                 Shader *shader = scene->shaders[i];
72
73                 if(progress.get_cancel()) return;
74
75                 assert(shader->graph);
76
77                 if(shader->sample_as_light && shader->has_surface_emission)
78                         scene->light_manager->need_update = true;
79
80                 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
81                         use_multi_closure);
82                 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
83                 compiler.background = ((int)i == scene->default_background);
84                 compiler.compile(shader, svm_nodes, i);
85                 if(!compiler.sunsky)
86                         sunsky_done = true;
87         }
88
89         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
90         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
91
92         for(i = 0; i < scene->shaders.size(); i++) {
93                 Shader *shader = scene->shaders[i];
94                 shader->need_update = false;
95         }
96
97         device_update_common(device, dscene, scene, progress);
98
99         need_update = false;
100 }
101
102 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
103 {
104         device_free_common(device, dscene, scene);
105
106         device->tex_free(dscene->svm_nodes);
107         dscene->svm_nodes.clear();
108 }
109
110 /* Graph Compiler */
111
112 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
113 {
114         shader_manager = shader_manager_;
115         image_manager = image_manager_;
116         sunsky = NULL;
117         max_stack_use = 0;
118         current_type = SHADER_TYPE_SURFACE;
119         current_shader = NULL;
120         background = false;
121         mix_weight_offset = SVM_STACK_INVALID;
122         use_multi_closure = use_multi_closure_;
123 }
124
125 int SVMCompiler::stack_size(ShaderSocketType type)
126 {
127         int size = 0;
128         
129         switch (type) {
130                 case SHADER_SOCKET_FLOAT:
131                 case SHADER_SOCKET_INT:
132                         size = 1;
133                         break;
134                 case SHADER_SOCKET_COLOR:
135                 case SHADER_SOCKET_VECTOR:
136                 case SHADER_SOCKET_NORMAL:
137                 case SHADER_SOCKET_POINT:
138                         size = 3;
139                         break;
140                 case SHADER_SOCKET_CLOSURE:
141                         size = 0;
142                         break;
143                 default:
144                         assert(0);
145                         break;
146         }
147         
148         return size;
149 }
150
151 int SVMCompiler::stack_find_offset(ShaderSocketType type)
152 {
153         int size = stack_size(type);
154         int offset = -1;
155         
156         /* find free space in stack & mark as used */
157         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
158                 if(active_stack.users[i]) num_unused = 0;
159                 else num_unused++;
160
161                 if(num_unused == size) {
162                         offset = i+1 - size;
163                         max_stack_use = max(i+1, max_stack_use);
164
165                         while(i >= offset)
166                                 active_stack.users[i--] = 1;
167
168                         return offset;
169                 }
170         }
171
172         fprintf(stderr, "Out of SVM stack space.\n");
173         assert(0);
174
175         return offset;
176 }
177
178 void SVMCompiler::stack_clear_offset(ShaderSocketType type, int offset)
179 {
180         int size = stack_size(type);
181
182         for(int i = 0; i < size; i++)
183                 active_stack.users[offset + i]--;
184 }
185
186 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
187 {
188         backup.done = done;
189         backup.stack = active_stack;
190
191         foreach(ShaderNode *node, current_graph->nodes) {
192                 foreach(ShaderInput *input, node->inputs)
193                         backup.offsets.push_back(input->stack_offset);
194                 foreach(ShaderOutput *output, node->outputs)
195                         backup.offsets.push_back(output->stack_offset);
196         }
197 }
198
199 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
200 {
201         int i = 0;
202
203         done = backup.done;
204         active_stack = backup.stack;
205
206         foreach(ShaderNode *node, current_graph->nodes) {
207                 foreach(ShaderInput *input, node->inputs)
208                         input->stack_offset = backup.offsets[i++];
209                 foreach(ShaderOutput *output, node->outputs)
210                         output->stack_offset = backup.offsets[i++];
211         }
212 }
213
214 void SVMCompiler::stack_assign(ShaderInput *input)
215 {
216         /* stack offset assign? */
217         if(input->stack_offset == SVM_STACK_INVALID) {
218                 if(input->link) {
219                         /* linked to output -> use output offset */
220                         input->stack_offset = input->link->stack_offset;
221                 }
222                 else {
223                         /* not linked to output -> add nodes to load default value */
224                         input->stack_offset = stack_find_offset(input->type);
225
226                         if(input->type == SHADER_SOCKET_FLOAT) {
227                                 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
228                         }
229                         else if(input->type == SHADER_SOCKET_INT) {
230                                 add_node(NODE_VALUE_F, (int)input->value.x, input->stack_offset);
231                         }
232                         else if(input->type == SHADER_SOCKET_VECTOR ||
233                                 input->type == SHADER_SOCKET_NORMAL ||
234                                 input->type == SHADER_SOCKET_POINT ||
235                                 input->type == SHADER_SOCKET_COLOR) {
236
237                                 add_node(NODE_VALUE_V, input->stack_offset);
238                                 add_node(NODE_VALUE_V, input->value);
239                         }
240                         else /* should not get called for closure */
241                                 assert(0);
242                 }
243         }
244 }
245
246 void SVMCompiler::stack_assign(ShaderOutput *output)
247 {
248         /* if no stack offset assigned yet, find one */
249         if(output->stack_offset == SVM_STACK_INVALID)
250                 output->stack_offset = stack_find_offset(output->type);
251 }
252
253 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
254 {
255         if(output->stack_offset == SVM_STACK_INVALID) {
256                 assert(input->link);
257                 assert(stack_size(output->type) == stack_size(input->link->type));
258
259                 output->stack_offset = input->link->stack_offset;
260
261                 int size = stack_size(output->type);
262
263                 for(int i = 0; i < size; i++)
264                         active_stack.users[output->stack_offset + i]++;
265         }
266 }
267
268 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
269 {
270         /* optimization we should add:
271          * find and lower user counts for outputs for which all inputs are done.
272          * this is done before the node is compiled, under the assumption that the
273          * node will first load all inputs from the stack and then writes its
274          * outputs. this used to work, but was disabled because it gave trouble
275          * with inputs getting stack positions assigned */
276
277         foreach(ShaderInput *input, node->inputs) {
278                 ShaderOutput *output = input->link;
279
280                 if(output && output->stack_offset != SVM_STACK_INVALID) {
281                         bool all_done = true;
282
283                         /* optimization we should add: verify if in->parent is actually used */
284                         foreach(ShaderInput *in, output->links)
285                                 if(in->parent != node && done.find(in->parent) == done.end())
286                                         all_done = false;
287
288                         if(all_done) {
289                                 stack_clear_offset(output->type, output->stack_offset);
290                                 output->stack_offset = SVM_STACK_INVALID;
291
292                                 foreach(ShaderInput *in, output->links)
293                                         in->stack_offset = SVM_STACK_INVALID;
294                         }
295                 }
296         }
297 }
298
299 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
300 {
301         foreach(ShaderInput *input, node->inputs) {
302                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
303                         stack_clear_offset(input->type, input->stack_offset);
304                         input->stack_offset = SVM_STACK_INVALID;
305                 }
306         }
307 }
308
309 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
310 {
311         assert(x <= 255);
312         assert(y <= 255);
313         assert(z <= 255);
314         assert(w <= 255);
315
316         return (x) | (y << 8) | (z << 16) | (w << 24);
317 }
318
319 void SVMCompiler::add_node(int a, int b, int c, int d)
320 {
321         svm_nodes.push_back(make_int4(a, b, c, d));
322 }
323
324 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
325 {
326         svm_nodes.push_back(make_int4(type, a, b, c));
327 }
328
329 void SVMCompiler::add_node(NodeType type, const float3& f)
330 {
331         svm_nodes.push_back(make_int4(type,
332                 __float_as_int(f.x),
333                 __float_as_int(f.y),
334                 __float_as_int(f.z)));
335 }
336
337 void SVMCompiler::add_node(const float4& f)
338 {
339         svm_nodes.push_back(make_int4(
340                 __float_as_int(f.x),
341                 __float_as_int(f.y),
342                 __float_as_int(f.z),
343                 __float_as_int(f.w)));
344 }
345
346 void SVMCompiler::add_array(float4 *f, int num)
347 {
348         for(int i = 0; i < num; i++)
349                 add_node(f[i]);
350 }
351
352 uint SVMCompiler::attribute(ustring name)
353 {
354         return shader_manager->get_attribute_id(name);
355 }
356
357 uint SVMCompiler::attribute(AttributeStandard std)
358 {
359         return shader_manager->get_attribute_id(std);
360 }
361
362 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
363 {
364         /* nasty exception .. */
365         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
366                 return true;
367         
368         return false;
369 }
370
371 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
372 {
373         ShaderNode *node = (input->link)? input->link->parent: NULL;
374
375         if(node && done.find(node) == done.end()) {
376                 foreach(ShaderInput *in, node->inputs)
377                         if(!node_skip_input(node, in))
378                                 find_dependencies(dependencies, done, in);
379
380                 dependencies.insert(node);
381         }
382 }
383
384 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
385 {
386         bool nodes_done;
387
388         do {
389                 nodes_done = true;
390
391                 foreach(ShaderNode *node, nodes) {
392                         if(done.find(node) == done.end()) {
393                                 bool inputs_done = true;
394
395                                 foreach(ShaderInput *input, node->inputs)
396                                         if(!node_skip_input(node, input))
397                                                 if(input->link && done.find(input->link->parent) == done.end())
398                                                         inputs_done = false;
399
400                                 if(inputs_done) {
401                                         node->compile(*this);
402                                         stack_clear_users(node, done);
403                                         stack_clear_temporary(node);
404                                         done.insert(node);
405                                 }
406                                 else
407                                         nodes_done = false;
408                         }
409                 }
410         } while(!nodes_done);
411 }
412
413 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
414 {
415         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
416                 ShaderInput *fin = node->input("Fac");
417                 ShaderInput *cl1in = node->input("Closure1");
418                 ShaderInput *cl2in = node->input("Closure2");
419
420                 /* execute dependencies for mix weight */
421                 if(fin) {
422                         set<ShaderNode*> dependencies;
423                         find_dependencies(dependencies, done, fin);
424                         generate_svm_nodes(dependencies, done);
425
426                         /* add mix node */
427                         stack_assign(fin);
428                 }
429
430                 int mix_offset = svm_nodes.size();
431
432                 if(fin)
433                         add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
434                 else
435                         add_node(NODE_ADD_CLOSURE, 0, 0, 0);
436
437                 /* generate code for closure 1
438                  * note we backup all compiler state and restore it afterwards, so one
439                  * closure choice doesn't influence the other*/
440                 if(cl1in->link) {
441                         StackBackup backup;
442                         stack_backup(backup, done);
443
444                         generate_closure(cl1in->link->parent, done);
445                         add_node(NODE_END, 0, 0, 0);
446
447                         stack_restore(backup, done);
448                 }
449                 else
450                         add_node(NODE_END, 0, 0, 0);
451
452                 /* generate code for closure 2 */
453                 int cl2_offset = svm_nodes.size();
454
455                 if(cl2in->link) {
456                         StackBackup backup;
457                         stack_backup(backup, done);
458
459                         generate_closure(cl2in->link->parent, done);
460                         add_node(NODE_END, 0, 0, 0);
461
462                         stack_restore(backup, done);
463                 }
464                 else
465                         add_node(NODE_END, 0, 0, 0);
466
467                 /* set jump for mix node, -1 because offset is already
468                  * incremented when this jump is added to it */
469                 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
470
471                 done.insert(node);
472                 stack_clear_users(node, done);
473                 stack_clear_temporary(node);
474         }
475         else {
476                 /* execute dependencies for closure */
477                 foreach(ShaderInput *in, node->inputs) {
478                         if(!node_skip_input(node, in) && in->link) {
479                                 set<ShaderNode*> dependencies;
480                                 find_dependencies(dependencies, done, in);
481                                 generate_svm_nodes(dependencies, done);
482                         }
483                 }
484
485                 /* compile closure itself */
486                 node->compile(*this);
487                 stack_clear_users(node, done);
488                 stack_clear_temporary(node);
489
490                 if(node->has_surface_emission())
491                         current_shader->has_surface_emission = true;
492                 if(node->has_surface_transparent())
493                         current_shader->has_surface_transparent = true;
494                 if(node->has_surface_bssrdf())
495                         current_shader->has_surface_bssrdf = true;
496
497                 /* end node is added outside of this */
498         }
499 }
500
501 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done)
502 {
503         /* todo: the weaks point here is that unlike the single closure sampling 
504          * we will evaluate all nodes even if they are used as input for closures
505          * that are unused. it's not clear what would be the best way to skip such
506          * nodes at runtime, especially if they are tangled up  */
507         
508         /* only generate once */
509         if(closure_done.find(node) != closure_done.end())
510                 return;
511
512         closure_done.insert(node);
513
514         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
515                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
516                 ShaderInput *cl1in = node->input("Closure1");
517                 ShaderInput *cl2in = node->input("Closure2");
518
519                 if(cl1in->link)
520                         generate_multi_closure(cl1in->link->parent, done, closure_done);
521                 if(cl2in->link)
522                         generate_multi_closure(cl2in->link->parent, done, closure_done);
523         }
524         else {
525                 /* execute dependencies for closure */
526                 foreach(ShaderInput *in, node->inputs) {
527                         if(!node_skip_input(node, in) && in->link) {
528                                 set<ShaderNode*> dependencies;
529                                 find_dependencies(dependencies, done, in);
530                                 generate_svm_nodes(dependencies, done);
531                         }
532                 }
533
534                 /* closure mix weight */
535                 const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
536                 ShaderInput *weight_in = node->input(weight_name);
537
538                 if(weight_in && (weight_in->link || weight_in->value.x != 1.0f)) {
539                         stack_assign(weight_in);
540                         mix_weight_offset = weight_in->stack_offset;
541                 }
542                 else
543                         mix_weight_offset = SVM_STACK_INVALID;
544
545                 /* compile closure itself */
546                 node->compile(*this);
547                 stack_clear_users(node, done);
548                 stack_clear_temporary(node);
549
550                 mix_weight_offset = SVM_STACK_INVALID;
551
552                 if(node->has_surface_emission())
553                         current_shader->has_surface_emission = true;
554                 if(node->has_surface_transparent())
555                         current_shader->has_surface_transparent = true;
556                 if(node->has_surface_bssrdf())
557                         current_shader->has_surface_bssrdf = true;
558         }
559
560         done.insert(node);
561 }
562
563
564 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
565 {
566         /* Converting a shader graph into svm_nodes that can be executed
567          * sequentially on the virtual machine is fairly simple. We can keep
568          * looping over nodes and each time all the inputs of a node are
569          * ready, we add svm_nodes for it that read the inputs from the
570          * stack and write outputs back to the stack.
571          *
572          * With the SVM, we always sample only a single closure. We can think
573          * of all closures nodes as a binary tree with mix closures as inner
574          * nodes and other closures as leafs. The SVM will traverse that tree,
575          * each time deciding to go left or right depending on the mix weights,
576          * until a closure is found.
577          *
578          * We only execute nodes that are needed for the mix weights and chosen
579          * closure.
580          */
581
582         current_type = type;
583         current_graph = graph;
584
585         /* get input in output node */
586         ShaderNode *node = graph->output();
587         ShaderInput *clin = NULL;
588         
589         switch (type) {
590                 case SHADER_TYPE_SURFACE:
591                         clin = node->input("Surface");
592                         break;
593                 case SHADER_TYPE_VOLUME:
594                         clin = node->input("Volume");
595                         break;
596                 case SHADER_TYPE_DISPLACEMENT:
597                         clin = node->input("Displacement");
598                         break;
599                 default:
600                         assert(0);
601                         break;
602         }
603
604         /* clear all compiler state */
605         memset(&active_stack, 0, sizeof(active_stack));
606         svm_nodes.clear();
607
608         foreach(ShaderNode *node_iter, graph->nodes) {
609                 foreach(ShaderInput *input, node_iter->inputs)
610                         input->stack_offset = SVM_STACK_INVALID;
611                 foreach(ShaderOutput *output, node_iter->outputs)
612                         output->stack_offset = SVM_STACK_INVALID;
613         }
614
615         if(shader->used) {
616                 if(clin->link) {
617                         bool generate = false;
618                         
619                         switch (type) {
620                                 case SHADER_TYPE_SURFACE: /* generate surface shader */         
621                                         generate = true;
622                                         shader->has_surface = true;
623                                         break;
624                                 case SHADER_TYPE_VOLUME: /* generate volume shader */
625                                         generate = true;
626                                         shader->has_volume = true;
627                                         break;
628                                 case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
629                                         generate = true;
630                                         shader->has_displacement = true;
631                                         break;
632                                 default:
633                                         break;
634                         }
635
636                         if(generate) {
637                                 set<ShaderNode*> done;
638
639                                 if(use_multi_closure) {
640                                         set<ShaderNode*> closure_done;
641                                         generate_multi_closure(clin->link->parent, done, closure_done);
642                                 }
643                                 else
644                                         generate_closure(clin->link->parent, done);
645                         }
646                 }
647
648                 /* compile output node */
649                 node->compile(*this);
650         }
651
652         add_node(NODE_END, 0, 0, 0);
653 }
654
655 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
656 {
657         /* copy graph for shader with bump mapping */
658         ShaderNode *node = shader->graph->output();
659
660         if(node->input("Surface")->link && node->input("Displacement")->link)
661                 if(!shader->graph_bump)
662                         shader->graph_bump = shader->graph->copy();
663
664         /* finalize */
665         shader->graph->finalize(false, false, use_multi_closure);
666         if(shader->graph_bump)
667                 shader->graph_bump->finalize(true, false, use_multi_closure);
668
669         current_shader = shader;
670
671         shader->has_surface = false;
672         shader->has_surface_emission = false;
673         shader->has_surface_transparent = false;
674         shader->has_surface_bssrdf = false;
675         shader->has_volume = false;
676         shader->has_displacement = false;
677
678         /* generate surface shader */
679         compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
680         global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
681         global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
682         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
683
684         if(shader->graph_bump) {
685                 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
686                 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
687                 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
688         }
689
690         /* generate volume shader */
691         compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
692         global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
693         global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
694         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
695
696         /* generate displacement shader */
697         compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
698         global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
699         global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
700         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
701 }
702
703 CCL_NAMESPACE_END
704