eb8a35a271fd3532d48ca1da01615760630876d2
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "device/device.h"
18 #include "render/graph.h"
19 #include "render/light.h"
20 #include "render/mesh.h"
21 #include "render/nodes.h"
22 #include "render/scene.h"
23 #include "render/shader.h"
24 #include "render/svm.h"
25
26 #include "util/util_logging.h"
27 #include "util/util_foreach.h"
28 #include "util/util_progress.h"
29 #include "util/util_task.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::reset(Scene * /*scene*/)
44 {
45 }
46
47 void SVMShaderManager::device_update_shader(Scene *scene,
48                                             Shader *shader,
49                                             Progress *progress,
50                                             array<int4> *global_svm_nodes)
51 {
52         if(progress->get_cancel()) {
53                 return;
54         }
55         assert(shader->graph);
56
57         array<int4> svm_nodes;
58         svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
59
60         SVMCompiler::Summary summary;
61         SVMCompiler compiler(scene->shader_manager, scene->image_manager, scene->light_manager);
62         compiler.background = (shader == scene->default_background);
63         compiler.compile(scene, shader, svm_nodes, 0, &summary);
64
65         VLOG(2) << "Compilation summary:\n"
66                 << "Shader name: " << shader->name << "\n"
67                 << summary.full_report();
68
69         nodes_lock_.lock();
70         if(shader->use_mis && shader->has_surface_emission) {
71                 scene->light_manager->need_update = true;
72         }
73
74         /* The copy needs to be done inside the lock, if another thread resizes the array 
75          * while memcpy is running, it'll be copying into possibly invalid/freed ram. 
76          */
77         size_t global_nodes_size = global_svm_nodes->size();
78         global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
79         
80         /* Offset local SVM nodes to a global address space. */
81         int4& jump_node = (*global_svm_nodes)[shader->id];
82         jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
83         jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
84         jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
85         /* Copy new nodes to global storage. */
86         memcpy(&(*global_svm_nodes)[global_nodes_size],
87                &svm_nodes[1],
88                sizeof(int4) * (svm_nodes.size() - 1));
89         nodes_lock_.unlock();
90 }
91
92 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
93 {
94         if(!need_update)
95                 return;
96
97         VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
98
99         double start_time = time_dt();
100
101         /* test if we need to update */
102         device_free(device, dscene, scene);
103
104         /* determine which shaders are in use */
105         device_update_shaders_used(scene);
106
107         /* svm_nodes */
108         array<int4> svm_nodes;
109         size_t i;
110
111         for(i = 0; i < scene->shaders.size(); i++) {
112                 svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
113         }
114
115         TaskPool task_pool;
116         foreach(Shader *shader, scene->shaders) {
117                 task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
118                                              this,
119                                              scene,
120                                              shader,
121                                              &progress,
122                                              &svm_nodes),
123                                false);
124         }
125         task_pool.wait_work();
126
127         if(progress.get_cancel()) {
128                 return;
129         }
130
131         dscene->svm_nodes.steal_data(svm_nodes);
132         dscene->svm_nodes.copy_to_device();
133
134         for(i = 0; i < scene->shaders.size(); i++) {
135                 Shader *shader = scene->shaders[i];
136                 shader->need_update = false;
137         }
138
139         device_update_common(device, dscene, scene, progress);
140
141         need_update = false;
142
143         VLOG(1) << "Shader manager updated "
144                 << scene->shaders.size() << " shaders in "
145                 << time_dt() - start_time << " seconds.";
146 }
147
148 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
149 {
150         device_free_common(device, dscene, scene);
151
152         dscene->svm_nodes.free();
153 }
154
155 /* Graph Compiler */
156
157 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_,
158                          ImageManager *image_manager_,
159                          LightManager *light_manager_)
160 {
161         shader_manager = shader_manager_;
162         image_manager = image_manager_;
163         light_manager = light_manager_;
164         max_stack_use = 0;
165         current_type = SHADER_TYPE_SURFACE;
166         current_shader = NULL;
167         current_graph = NULL;
168         background = false;
169         mix_weight_offset = SVM_STACK_INVALID;
170         compile_failed = false;
171 }
172
173 int SVMCompiler::stack_size(SocketType::Type type)
174 {
175         int size = 0;
176         
177         switch(type) {
178                 case SocketType::FLOAT:
179                 case SocketType::INT:
180                         size = 1;
181                         break;
182                 case SocketType::COLOR:
183                 case SocketType::VECTOR:
184                 case SocketType::NORMAL:
185                 case SocketType::POINT:
186                         size = 3;
187                         break;
188                 case SocketType::CLOSURE:
189                         size = 0;
190                         break;
191                 default:
192                         assert(0);
193                         break;
194         }
195         
196         return size;
197 }
198
199 int SVMCompiler::stack_find_offset(int size)
200 {
201         int offset = -1;
202         
203         /* find free space in stack & mark as used */
204         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
205                 if(active_stack.users[i]) num_unused = 0;
206                 else num_unused++;
207
208                 if(num_unused == size) {
209                         offset = i+1 - size;
210                         max_stack_use = max(i+1, max_stack_use);
211
212                         while(i >= offset)
213                                 active_stack.users[i--] = 1;
214
215                         return offset;
216                 }
217         }
218
219         if(!compile_failed) {
220                 compile_failed = true;
221                 fprintf(stderr, "Cycles: out of SVM stack space, shader \"%s\" too big.\n", current_shader->name.c_str());
222         }
223
224         return 0;
225 }
226
227 int SVMCompiler::stack_find_offset(SocketType::Type type)
228 {
229         return stack_find_offset(stack_size(type));
230 }
231
232 void SVMCompiler::stack_clear_offset(SocketType::Type type, int offset)
233 {
234         int size = stack_size(type);
235
236         for(int i = 0; i < size; i++)
237                 active_stack.users[offset + i]--;
238 }
239
240 int SVMCompiler::stack_assign(ShaderInput *input)
241 {
242         /* stack offset assign? */
243         if(input->stack_offset == SVM_STACK_INVALID) {
244                 if(input->link) {
245                         /* linked to output -> use output offset */
246                         input->stack_offset = input->link->stack_offset;
247                 }
248                 else {
249                         Node *node = input->parent;
250
251                         /* not linked to output -> add nodes to load default value */
252                         input->stack_offset = stack_find_offset(input->type());
253
254                         if(input->type() == SocketType::FLOAT) {
255                                 add_node(NODE_VALUE_F, __float_as_int(node->get_float(input->socket_type)), input->stack_offset);
256                         }
257                         else if(input->type() == SocketType::INT) {
258                                 add_node(NODE_VALUE_F, node->get_int(input->socket_type), input->stack_offset);
259                         }
260                         else if(input->type() == SocketType::VECTOR ||
261                                 input->type() == SocketType::NORMAL ||
262                                 input->type() == SocketType::POINT ||
263                                 input->type() == SocketType::COLOR)
264                         {
265
266                                 add_node(NODE_VALUE_V, input->stack_offset);
267                                 add_node(NODE_VALUE_V, node->get_float3(input->socket_type));
268                         }
269                         else /* should not get called for closure */
270                                 assert(0);
271                 }
272         }
273
274         return input->stack_offset;
275 }
276
277 int SVMCompiler::stack_assign(ShaderOutput *output)
278 {
279         /* if no stack offset assigned yet, find one */
280         if(output->stack_offset == SVM_STACK_INVALID)
281                 output->stack_offset = stack_find_offset(output->type());
282
283         return output->stack_offset;
284 }
285
286 int SVMCompiler::stack_assign_if_linked(ShaderInput *input)
287 {
288         if(input->link)
289                 return stack_assign(input);
290
291         return SVM_STACK_INVALID;
292 }
293
294 int SVMCompiler::stack_assign_if_linked(ShaderOutput *output)
295 {
296         if(!output->links.empty())
297                 return stack_assign(output);
298
299         return SVM_STACK_INVALID;
300 }
301
302 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
303 {
304         if(output->stack_offset == SVM_STACK_INVALID) {
305                 assert(input->link);
306                 assert(stack_size(output->type()) == stack_size(input->link->type()));
307
308                 output->stack_offset = input->link->stack_offset;
309
310                 int size = stack_size(output->type());
311
312                 for(int i = 0; i < size; i++)
313                         active_stack.users[output->stack_offset + i]++;
314         }
315 }
316
317 void SVMCompiler::stack_clear_users(ShaderNode *node, ShaderNodeSet& done)
318 {
319         /* optimization we should add:
320          * find and lower user counts for outputs for which all inputs are done.
321          * this is done before the node is compiled, under the assumption that the
322          * node will first load all inputs from the stack and then writes its
323          * outputs. this used to work, but was disabled because it gave trouble
324          * with inputs getting stack positions assigned */
325
326         foreach(ShaderInput *input, node->inputs) {
327                 ShaderOutput *output = input->link;
328
329                 if(output && output->stack_offset != SVM_STACK_INVALID) {
330                         bool all_done = true;
331
332                         /* optimization we should add: verify if in->parent is actually used */
333                         foreach(ShaderInput *in, output->links)
334                                 if(in->parent != node && done.find(in->parent) == done.end())
335                                         all_done = false;
336
337                         if(all_done) {
338                                 stack_clear_offset(output->type(), output->stack_offset);
339                                 output->stack_offset = SVM_STACK_INVALID;
340
341                                 foreach(ShaderInput *in, output->links)
342                                         in->stack_offset = SVM_STACK_INVALID;
343                         }
344                 }
345         }
346 }
347
348 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
349 {
350         foreach(ShaderInput *input, node->inputs) {
351                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
352                         stack_clear_offset(input->type(), input->stack_offset);
353                         input->stack_offset = SVM_STACK_INVALID;
354                 }
355         }
356 }
357
358 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
359 {
360         assert(x <= 255);
361         assert(y <= 255);
362         assert(z <= 255);
363         assert(w <= 255);
364
365         return (x) | (y << 8) | (z << 16) | (w << 24);
366 }
367
368 void SVMCompiler::add_node(int a, int b, int c, int d)
369 {
370         current_svm_nodes.push_back_slow(make_int4(a, b, c, d));
371 }
372
373 void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
374 {
375         current_svm_nodes.push_back_slow(make_int4(type, a, b, c));
376 }
377
378 void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
379 {
380         current_svm_nodes.push_back_slow(make_int4(type,
381                 __float_as_int(f.x),
382                 __float_as_int(f.y),
383                 __float_as_int(f.z)));
384 }
385
386 void SVMCompiler::add_node(const float4& f)
387 {
388         current_svm_nodes.push_back_slow(make_int4(
389                 __float_as_int(f.x),
390                 __float_as_int(f.y),
391                 __float_as_int(f.z),
392                 __float_as_int(f.w)));
393 }
394
395 uint SVMCompiler::attribute(ustring name)
396 {
397         return shader_manager->get_attribute_id(name);
398 }
399
400 uint SVMCompiler::attribute(AttributeStandard std)
401 {
402         return shader_manager->get_attribute_id(std);
403 }
404
405 uint SVMCompiler::attribute_standard(ustring name)
406 {
407         AttributeStandard std = Attribute::name_standard(name.c_str());
408         return (std)? attribute(std): attribute(name);
409 }
410
411 bool SVMCompiler::node_skip_input(ShaderNode * /*node*/, ShaderInput *input)
412 {
413         /* nasty exception .. */
414         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->special_type == SHADER_SPECIAL_TYPE_BUMP)
415                 return true;
416         
417         return false;
418 }
419
420 void SVMCompiler::find_dependencies(ShaderNodeSet& dependencies,
421                                     const ShaderNodeSet& done,
422                                     ShaderInput *input,
423                                     ShaderNode *skip_node)
424 {
425         ShaderNode *node = (input->link)? input->link->parent: NULL;
426
427         if(node != NULL &&
428            done.find(node) == done.end() &&
429            node != skip_node &&
430            dependencies.find(node) == dependencies.end())
431         {
432                 foreach(ShaderInput *in, node->inputs)
433                         if(!node_skip_input(node, in))
434                                 find_dependencies(dependencies, done, in, skip_node);
435
436                 dependencies.insert(node);
437         }
438 }
439
440 void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet& done)
441 {
442         node->compile(*this);
443         stack_clear_users(node, done);
444         stack_clear_temporary(node);
445
446         if(current_type == SHADER_TYPE_SURFACE) {
447                 if(node->has_spatial_varying())
448                         current_shader->has_surface_spatial_varying = true;
449         }
450         else if(current_type == SHADER_TYPE_VOLUME) {
451                 if(node->has_spatial_varying())
452                         current_shader->has_volume_spatial_varying = true;
453         }
454
455         if(node->has_object_dependency()) {
456                 current_shader->has_object_dependency = true;
457         }
458
459         if(node->has_attribute_dependency()) {
460                 current_shader->has_attribute_dependency = true;
461         }
462
463         if(node->has_integrator_dependency()) {
464                 current_shader->has_integrator_dependency = true;
465         }
466 }
467
468 void SVMCompiler::generate_svm_nodes(const ShaderNodeSet& nodes,
469                                      CompilerState *state)
470 {
471         ShaderNodeSet& done = state->nodes_done;
472         vector<bool>& done_flag = state->nodes_done_flag;
473
474         bool nodes_done;
475         do {
476                 nodes_done = true;
477
478                 foreach(ShaderNode *node, nodes) {
479                         if(!done_flag[node->id]) {
480                                 bool inputs_done = true;
481
482                                 foreach(ShaderInput *input, node->inputs)
483                                         if(!node_skip_input(node, input))
484                                                 if(input->link && !done_flag[input->link->parent->id])
485                                                         inputs_done = false;
486
487                                 if(inputs_done) {
488                                         generate_node(node, done);
489                                         done.insert(node);
490                                         done_flag[node->id] = true;
491                                 }
492                                 else
493                                         nodes_done = false;
494                         }
495                 }
496         } while(!nodes_done);
497 }
498
499 void SVMCompiler::generate_closure_node(ShaderNode *node,
500                                         CompilerState *state)
501 {
502         /* execute dependencies for closure */
503         foreach(ShaderInput *in, node->inputs) {
504                 if(!node_skip_input(node, in) && in->link) {
505                         ShaderNodeSet dependencies;
506                         find_dependencies(dependencies, state->nodes_done, in);
507                         generate_svm_nodes(dependencies, state);
508                 }
509         }
510
511         /* closure mix weight */
512         const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
513         ShaderInput *weight_in = node->input(weight_name);
514
515         if(weight_in && (weight_in->link || node->get_float(weight_in->socket_type) != 1.0f))
516                 mix_weight_offset = stack_assign(weight_in);
517         else
518                 mix_weight_offset = SVM_STACK_INVALID;
519
520         /* compile closure itself */
521         generate_node(node, state->nodes_done);
522
523         mix_weight_offset = SVM_STACK_INVALID;
524
525         if(current_type == SHADER_TYPE_SURFACE) {
526                 if(node->has_surface_emission())
527                         current_shader->has_surface_emission = true;
528                 if(node->has_surface_transparent())
529                         current_shader->has_surface_transparent = true;
530                 if(node->has_surface_bssrdf()) {
531                         current_shader->has_surface_bssrdf = true;
532                         if(node->has_bssrdf_bump())
533                                 current_shader->has_bssrdf_bump = true;
534                 }
535                 if(node->has_bump()) {
536                         current_shader->has_bump = true;
537                 }
538         }
539 }
540
541 void SVMCompiler::generated_shared_closure_nodes(ShaderNode *root_node,
542                                                  ShaderNode *node,
543                                                  CompilerState *state,
544                                                  const ShaderNodeSet& shared)
545 {
546         if(shared.find(node) != shared.end()) {
547                 generate_multi_closure(root_node, node, state);
548         }
549         else {
550                 foreach(ShaderInput *in, node->inputs) {
551                         if(in->type() == SocketType::CLOSURE && in->link)
552                                 generated_shared_closure_nodes(root_node,
553                                                                in->link->parent,
554                                                                state,
555                                                                shared);
556                 }
557         }
558 }
559
560 void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
561                                          ShaderNode *node,
562                                          CompilerState *state)
563 {
564         /* only generate once */
565         if(state->closure_done.find(node) != state->closure_done.end())
566                 return;
567
568         state->closure_done.insert(node);
569
570         if(node->special_type == SHADER_SPECIAL_TYPE_COMBINE_CLOSURE) {
571                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
572                 ShaderInput *cl1in = node->input("Closure1");
573                 ShaderInput *cl2in = node->input("Closure2");
574                 ShaderInput *facin = node->input("Fac");
575
576                 /* skip empty mix/add closure nodes */
577                 if(!cl1in->link && !cl2in->link)
578                         return;
579
580                 if(facin && facin->link) {
581                         /* mix closure: generate instructions to compute mix weight */
582                         ShaderNodeSet dependencies;
583                         find_dependencies(dependencies, state->nodes_done, facin);
584                         generate_svm_nodes(dependencies, state);
585
586                         /* execute shared dependencies. this is needed to allow skipping
587                          * of zero weight closures and their dependencies later, so we
588                          * ensure that they only skip dependencies that are unique to them */
589                         ShaderNodeSet cl1deps, cl2deps, shareddeps;
590
591                         find_dependencies(cl1deps, state->nodes_done, cl1in);
592                         find_dependencies(cl2deps, state->nodes_done, cl2in);
593
594                         ShaderNodeIDComparator node_id_comp;
595                         set_intersection(cl1deps.begin(), cl1deps.end(),
596                                          cl2deps.begin(), cl2deps.end(),
597                                          std::inserter(shareddeps, shareddeps.begin()),
598                                          node_id_comp);
599
600                         /* it's possible some nodes are not shared between this mix node
601                          * inputs, but still needed to be always executed, this mainly
602                          * happens when a node of current subbranch is used by a parent
603                          * node or so */
604                         if(root_node != node) {
605                                 foreach(ShaderInput *in, root_node->inputs) {
606                                         ShaderNodeSet rootdeps;
607                                         find_dependencies(rootdeps, state->nodes_done, in, node);
608                                         set_intersection(rootdeps.begin(), rootdeps.end(),
609                                                          cl1deps.begin(), cl1deps.end(),
610                                                          std::inserter(shareddeps, shareddeps.begin()),
611                                                          node_id_comp);
612                                         set_intersection(rootdeps.begin(), rootdeps.end(),
613                                                          cl2deps.begin(), cl2deps.end(),
614                                                          std::inserter(shareddeps, shareddeps.begin()),
615                                                          node_id_comp);
616                                 }
617                         }
618
619                         if(!shareddeps.empty()) {
620                                 if(cl1in->link) {
621                                         generated_shared_closure_nodes(root_node,
622                                                                        cl1in->link->parent,
623                                                                        state,
624                                                                        shareddeps);
625                                 }
626                                 if(cl2in->link) {
627                                         generated_shared_closure_nodes(root_node,
628                                                                        cl2in->link->parent,
629                                                                        state,
630                                                                        shareddeps);
631                                 }
632
633                                 generate_svm_nodes(shareddeps, state);
634                         }
635
636                         /* generate instructions for input closure 1 */
637                         if(cl1in->link) {
638                                 /* Add instruction to skip closure and its dependencies if mix
639                                  * weight is zero.
640                                  */
641                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE,
642                                                                       0,
643                                                                       stack_assign(facin),
644                                                                       0));
645                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
646
647                                 generate_multi_closure(root_node, cl1in->link->parent, state);
648
649                                 /* Fill in jump instruction location to be after closure. */
650                                 current_svm_nodes[node_jump_skip_index].y =
651                                         current_svm_nodes.size() - node_jump_skip_index - 1;
652                         }
653
654                         /* generate instructions for input closure 2 */
655                         if(cl2in->link) {
656                                 /* Add instruction to skip closure and its dependencies if mix
657                                  * weight is zero.
658                                  */
659                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO,
660                                                                       0,
661                                                                       stack_assign(facin),
662                                                                       0));
663                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
664
665                                 generate_multi_closure(root_node, cl2in->link->parent, state);
666
667                                 /* Fill in jump instruction location to be after closure. */
668                                 current_svm_nodes[node_jump_skip_index].y =
669                                         current_svm_nodes.size() - node_jump_skip_index - 1;
670                         }
671
672                         /* unassign */
673                         facin->stack_offset = SVM_STACK_INVALID;
674                 }
675                 else {
676                         /* execute closures and their dependencies, no runtime checks
677                          * to skip closures here because was already optimized due to
678                          * fixed weight or add closure that always needs both */
679                         if(cl1in->link)
680                                 generate_multi_closure(root_node, cl1in->link->parent, state);
681                         if(cl2in->link)
682                                 generate_multi_closure(root_node, cl2in->link->parent, state);
683                 }
684         }
685         else {
686                 generate_closure_node(node, state);
687         }
688
689         state->nodes_done.insert(node);
690         state->nodes_done_flag[node->id] = true;
691 }
692
693
694 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
695 {
696         /* Converting a shader graph into svm_nodes that can be executed
697          * sequentially on the virtual machine is fairly simple. We can keep
698          * looping over nodes and each time all the inputs of a node are
699          * ready, we add svm_nodes for it that read the inputs from the
700          * stack and write outputs back to the stack.
701          *
702          * With the SVM, we always sample only a single closure. We can think
703          * of all closures nodes as a binary tree with mix closures as inner
704          * nodes and other closures as leafs. The SVM will traverse that tree,
705          * each time deciding to go left or right depending on the mix weights,
706          * until a closure is found.
707          *
708          * We only execute nodes that are needed for the mix weights and chosen
709          * closure.
710          */
711
712         current_type = type;
713         current_graph = graph;
714
715         /* get input in output node */
716         ShaderNode *node = graph->output();
717         ShaderInput *clin = NULL;
718         
719         switch(type) {
720                 case SHADER_TYPE_SURFACE:
721                         clin = node->input("Surface");
722                         break;
723                 case SHADER_TYPE_VOLUME:
724                         clin = node->input("Volume");
725                         break;
726                 case SHADER_TYPE_DISPLACEMENT:
727                         clin = node->input("Displacement");
728                         break;
729                 case SHADER_TYPE_BUMP:
730                         clin = node->input("Normal");
731                         break;
732                 default:
733                         assert(0);
734                         break;
735         }
736
737         /* clear all compiler state */
738         memset(&active_stack, 0, sizeof(active_stack));
739         current_svm_nodes.clear();
740
741         foreach(ShaderNode *node_iter, graph->nodes) {
742                 foreach(ShaderInput *input, node_iter->inputs)
743                         input->stack_offset = SVM_STACK_INVALID;
744                 foreach(ShaderOutput *output, node_iter->outputs)
745                         output->stack_offset = SVM_STACK_INVALID;
746         }
747
748         /* for the bump shader we need add a node to store the shader state */
749         bool need_bump_state = (type == SHADER_TYPE_BUMP) && (shader->displacement_method == DISPLACE_BOTH);
750         int bump_state_offset = SVM_STACK_INVALID;
751         if(need_bump_state) {
752                 bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
753                 add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
754         }
755
756         if(shader->used) {
757                 if(clin->link) {
758                         bool generate = false;
759                         
760                         switch(type) {
761                                 case SHADER_TYPE_SURFACE: /* generate surface shader */         
762                                         generate = true;
763                                         shader->has_surface = true;
764                                         break;
765                                 case SHADER_TYPE_VOLUME: /* generate volume shader */
766                                         generate = true;
767                                         shader->has_volume = true;
768                                         break;
769                                 case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
770                                         generate = true;
771                                         shader->has_displacement = true;
772                                         break;
773                                 case SHADER_TYPE_BUMP: /* generate bump shader */
774                                         generate = true;
775                                         break;
776                                 default:
777                                         break;
778                         }
779
780                         if(generate) {
781                                 CompilerState state(graph);
782                                 generate_multi_closure(clin->link->parent,
783                                                        clin->link->parent,
784                                                        &state);
785                         }
786                 }
787
788                 /* compile output node */
789                 node->compile(*this);
790         }
791
792         /* add node to restore state after bump shader has finished */
793         if(need_bump_state) {
794                 add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
795         }
796
797         /* if compile failed, generate empty shader */
798         if(compile_failed) {
799                 current_svm_nodes.clear();
800                 compile_failed = false;
801         }
802
803         /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader it ends here */
804         if(type != SHADER_TYPE_BUMP) {
805                 add_node(NODE_END, 0, 0, 0);
806         }
807 }
808
809 void SVMCompiler::compile(Scene *scene,
810                           Shader *shader,
811                           array<int4>& svm_nodes,
812                           int index,
813                           Summary *summary)
814 {
815         /* copy graph for shader with bump mapping */
816         ShaderNode *output = shader->graph->output();
817         int start_num_svm_nodes = svm_nodes.size();
818
819         const double time_start = time_dt();
820
821         bool has_bump = (shader->displacement_method != DISPLACE_TRUE) &&
822                         output->input("Surface")->link && output->input("Displacement")->link;
823
824         /* finalize */
825         {
826                 scoped_timer timer((summary != NULL)? &summary->time_finalize: NULL);
827                 shader->graph->finalize(scene,
828                                         has_bump,
829                                         shader->has_integrator_dependency,
830                                         shader->displacement_method == DISPLACE_BOTH);
831         }
832
833         current_shader = shader;
834
835         shader->has_surface = false;
836         shader->has_surface_emission = false;
837         shader->has_surface_transparent = false;
838         shader->has_surface_bssrdf = false;
839         shader->has_bump = has_bump;
840         shader->has_bssrdf_bump = has_bump;
841         shader->has_volume = false;
842         shader->has_displacement = false;
843         shader->has_surface_spatial_varying = false;
844         shader->has_volume_spatial_varying = false;
845         shader->has_object_dependency = false;
846         shader->has_attribute_dependency = false;
847         shader->has_integrator_dependency = false;
848
849         /* generate bump shader */
850         if(has_bump) {
851                 scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
852                 compile_type(shader, shader->graph, SHADER_TYPE_BUMP);
853                 svm_nodes[index].y = svm_nodes.size();
854                 svm_nodes.append(current_svm_nodes);
855         }
856
857         /* generate surface shader */
858         {
859                 scoped_timer timer((summary != NULL)? &summary->time_generate_surface: NULL);
860                 compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
861                 /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
862                 if(!has_bump) {
863                         svm_nodes[index].y = svm_nodes.size();
864                 }
865                 svm_nodes.append(current_svm_nodes);
866         }
867
868         /* generate volume shader */
869         {
870                 scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
871                 compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
872                 svm_nodes[index].z = svm_nodes.size();
873                 svm_nodes.append(current_svm_nodes);
874         }
875
876         /* generate displacement shader */
877         {
878                 scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
879                 compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
880                 svm_nodes[index].w = svm_nodes.size();
881                 svm_nodes.append(current_svm_nodes);
882         }
883
884         /* Fill in summary information. */
885         if(summary != NULL) {
886                 summary->time_total = time_dt() - time_start;
887                 summary->peak_stack_usage = max_stack_use;
888                 summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
889         }
890 }
891
892 /* Compiler summary implementation. */
893
894 SVMCompiler::Summary::Summary()
895         : num_svm_nodes(0),
896           peak_stack_usage(0),
897           time_finalize(0.0),
898           time_generate_surface(0.0),
899           time_generate_bump(0.0),
900           time_generate_volume(0.0),
901           time_generate_displacement(0.0),
902           time_total(0.0)
903 {
904 }
905
906 string SVMCompiler::Summary::full_report() const
907 {
908         string report = "";
909         report += string_printf("Number of SVM nodes: %d\n", num_svm_nodes);
910         report += string_printf("Peak stack usage:    %d\n", peak_stack_usage);
911
912         report += string_printf("Time (in seconds):\n");
913         report += string_printf("Finalize:            %f\n", time_finalize);
914         report += string_printf("  Surface:           %f\n", time_generate_surface);
915         report += string_printf("  Bump:              %f\n", time_generate_bump);
916         report += string_printf("  Volume:            %f\n", time_generate_volume);
917         report += string_printf("  Displacement:      %f\n", time_generate_displacement);
918         report += string_printf("Generate:            %f\n", time_generate_surface +
919                                                              time_generate_bump +
920                                                              time_generate_volume +
921                                                              time_generate_displacement);
922         report += string_printf("Total:               %f\n", time_total);
923
924         return report;
925 }
926
927 /* Global state of the compiler. */
928
929 SVMCompiler::CompilerState::CompilerState(ShaderGraph *graph)
930 {
931         int max_id = 0;
932         foreach(ShaderNode *node, graph->nodes) {
933                 max_id = max(node->id, max_id);
934         }
935         nodes_done_flag.resize(max_id + 1, false);
936 }
937
938 CCL_NAMESPACE_END
939