01fda0a9e019985f7f0b4a2170c1ec398c62de54
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "device/device.h"
18 #include "render/graph.h"
19 #include "render/light.h"
20 #include "render/mesh.h"
21 #include "render/nodes.h"
22 #include "render/scene.h"
23 #include "render/shader.h"
24 #include "render/svm.h"
25
26 #include "util/util_logging.h"
27 #include "util/util_foreach.h"
28 #include "util/util_progress.h"
29 #include "util/util_task.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::reset(Scene * /*scene*/)
44 {
45 }
46
47 void SVMShaderManager::device_update_shader(Scene *scene,
48                                             Shader *shader,
49                                             Progress *progress,
50                                             array<int4> *global_svm_nodes)
51 {
52         if(progress->get_cancel()) {
53                 return;
54         }
55         assert(shader->graph);
56
57         array<int4> svm_nodes;
58         svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
59
60         SVMCompiler::Summary summary;
61         SVMCompiler compiler(scene->shader_manager, scene->image_manager, scene->light_manager);
62         compiler.background = (shader == scene->default_background);
63         compiler.compile(scene, shader, svm_nodes, 0, &summary);
64
65         VLOG(2) << "Compilation summary:\n"
66                 << "Shader name: " << shader->name << "\n"
67                 << summary.full_report();
68
69         nodes_lock_.lock();
70         if(shader->use_mis && shader->has_surface_emission) {
71                 scene->light_manager->need_update = true;
72         }
73
74         /* The copy needs to be done inside the lock, if another thread resizes the array
75          * while memcpy is running, it'll be copying into possibly invalid/freed ram.
76          */
77         size_t global_nodes_size = global_svm_nodes->size();
78         global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
79
80         /* Offset local SVM nodes to a global address space. */
81         int4& jump_node = (*global_svm_nodes)[shader->id];
82         jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
83         jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
84         jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
85         /* Copy new nodes to global storage. */
86         memcpy(&(*global_svm_nodes)[global_nodes_size],
87                &svm_nodes[1],
88                sizeof(int4) * (svm_nodes.size() - 1));
89         nodes_lock_.unlock();
90 }
91
92 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
93 {
94         if(!need_update)
95                 return;
96
97         VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
98
99         double start_time = time_dt();
100
101         /* test if we need to update */
102         device_free(device, dscene, scene);
103
104         /* determine which shaders are in use */
105         device_update_shaders_used(scene);
106
107         /* svm_nodes */
108         array<int4> svm_nodes;
109         size_t i;
110
111         for(i = 0; i < scene->shaders.size(); i++) {
112                 svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
113         }
114
115         TaskPool task_pool;
116         foreach(Shader *shader, scene->shaders) {
117                 task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
118                                              this,
119                                              scene,
120                                              shader,
121                                              &progress,
122                                              &svm_nodes),
123                                false);
124         }
125         task_pool.wait_work();
126
127         if(progress.get_cancel()) {
128                 return;
129         }
130
131         dscene->svm_nodes.steal_data(svm_nodes);
132         dscene->svm_nodes.copy_to_device();
133
134         for(i = 0; i < scene->shaders.size(); i++) {
135                 Shader *shader = scene->shaders[i];
136                 shader->need_update = false;
137         }
138
139         device_update_common(device, dscene, scene, progress);
140
141         need_update = false;
142
143         VLOG(1) << "Shader manager updated "
144                 << scene->shaders.size() << " shaders in "
145                 << time_dt() - start_time << " seconds.";
146 }
147
148 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
149 {
150         device_free_common(device, dscene, scene);
151
152         dscene->svm_nodes.free();
153 }
154
155 /* Graph Compiler */
156
157 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_,
158                          ImageManager *image_manager_,
159                          LightManager *light_manager_)
160 {
161         shader_manager = shader_manager_;
162         image_manager = image_manager_;
163         light_manager = light_manager_;
164         max_stack_use = 0;
165         current_type = SHADER_TYPE_SURFACE;
166         current_shader = NULL;
167         current_graph = NULL;
168         background = false;
169         mix_weight_offset = SVM_STACK_INVALID;
170         compile_failed = false;
171 }
172
173 int SVMCompiler::stack_size(SocketType::Type type)
174 {
175         int size = 0;
176
177         switch(type) {
178                 case SocketType::FLOAT:
179                 case SocketType::INT:
180                         size = 1;
181                         break;
182                 case SocketType::COLOR:
183                 case SocketType::VECTOR:
184                 case SocketType::NORMAL:
185                 case SocketType::POINT:
186                         size = 3;
187                         break;
188                 case SocketType::CLOSURE:
189                         size = 0;
190                         break;
191                 default:
192                         assert(0);
193                         break;
194         }
195
196         return size;
197 }
198
199 int SVMCompiler::stack_find_offset(int size)
200 {
201         int offset = -1;
202
203         /* find free space in stack & mark as used */
204         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
205                 if(active_stack.users[i]) num_unused = 0;
206                 else num_unused++;
207
208                 if(num_unused == size) {
209                         offset = i+1 - size;
210                         max_stack_use = max(i+1, max_stack_use);
211
212                         while(i >= offset)
213                                 active_stack.users[i--] = 1;
214
215                         if (offset == 255) {
216                                 abort();
217                         }
218                         return offset;
219                 }
220         }
221
222         if(!compile_failed) {
223                 compile_failed = true;
224                 fprintf(stderr, "Cycles: out of SVM stack space, shader \"%s\" too big.\n", current_shader->name.c_str());
225         }
226
227         return 0;
228 }
229
230 int SVMCompiler::stack_find_offset(SocketType::Type type)
231 {
232         return stack_find_offset(stack_size(type));
233 }
234
235 void SVMCompiler::stack_clear_offset(SocketType::Type type, int offset)
236 {
237         int size = stack_size(type);
238
239         for(int i = 0; i < size; i++)
240                 active_stack.users[offset + i]--;
241 }
242
243 int SVMCompiler::stack_assign(ShaderInput *input)
244 {
245         /* stack offset assign? */
246         if(input->stack_offset == SVM_STACK_INVALID) {
247                 if(input->link) {
248                         /* linked to output -> use output offset */
249                         input->stack_offset = input->link->stack_offset;
250                 }
251                 else {
252                         Node *node = input->parent;
253
254                         /* not linked to output -> add nodes to load default value */
255                         input->stack_offset = stack_find_offset(input->type());
256
257                         if(input->type() == SocketType::FLOAT) {
258                                 add_node(NODE_VALUE_F, __float_as_int(node->get_float(input->socket_type)), input->stack_offset);
259                         }
260                         else if(input->type() == SocketType::INT) {
261                                 add_node(NODE_VALUE_F, node->get_int(input->socket_type), input->stack_offset);
262                         }
263                         else if(input->type() == SocketType::VECTOR ||
264                                 input->type() == SocketType::NORMAL ||
265                                 input->type() == SocketType::POINT ||
266                                 input->type() == SocketType::COLOR)
267                         {
268
269                                 add_node(NODE_VALUE_V, input->stack_offset);
270                                 add_node(NODE_VALUE_V, node->get_float3(input->socket_type));
271                         }
272                         else  /* should not get called for closure */
273                                 assert(0);
274                 }
275         }
276
277         return input->stack_offset;
278 }
279
280 int SVMCompiler::stack_assign(ShaderOutput *output)
281 {
282         /* if no stack offset assigned yet, find one */
283         if(output->stack_offset == SVM_STACK_INVALID)
284                 output->stack_offset = stack_find_offset(output->type());
285
286         return output->stack_offset;
287 }
288
289 int SVMCompiler::stack_assign_if_linked(ShaderInput *input)
290 {
291         if(input->link)
292                 return stack_assign(input);
293
294         return SVM_STACK_INVALID;
295 }
296
297 int SVMCompiler::stack_assign_if_linked(ShaderOutput *output)
298 {
299         if(!output->links.empty())
300                 return stack_assign(output);
301
302         return SVM_STACK_INVALID;
303 }
304
305 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
306 {
307         if(output->stack_offset == SVM_STACK_INVALID) {
308                 assert(input->link);
309                 assert(stack_size(output->type()) == stack_size(input->link->type()));
310
311                 output->stack_offset = input->link->stack_offset;
312
313                 int size = stack_size(output->type());
314
315                 for(int i = 0; i < size; i++)
316                         active_stack.users[output->stack_offset + i]++;
317         }
318 }
319
320 void SVMCompiler::stack_clear_users(ShaderNode *node, ShaderNodeSet& done)
321 {
322         /* optimization we should add:
323          * find and lower user counts for outputs for which all inputs are done.
324          * this is done before the node is compiled, under the assumption that the
325          * node will first load all inputs from the stack and then writes its
326          * outputs. this used to work, but was disabled because it gave trouble
327          * with inputs getting stack positions assigned */
328
329         foreach(ShaderInput *input, node->inputs) {
330                 ShaderOutput *output = input->link;
331
332                 if(output && output->stack_offset != SVM_STACK_INVALID) {
333                         bool all_done = true;
334
335                         /* optimization we should add: verify if in->parent is actually used */
336                         foreach(ShaderInput *in, output->links)
337                                 if(in->parent != node && done.find(in->parent) == done.end())
338                                         all_done = false;
339
340                         if(all_done) {
341                                 stack_clear_offset(output->type(), output->stack_offset);
342                                 output->stack_offset = SVM_STACK_INVALID;
343
344                                 foreach(ShaderInput *in, output->links)
345                                         in->stack_offset = SVM_STACK_INVALID;
346                         }
347                 }
348         }
349 }
350
351 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
352 {
353         foreach(ShaderInput *input, node->inputs) {
354                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
355                         stack_clear_offset(input->type(), input->stack_offset);
356                         input->stack_offset = SVM_STACK_INVALID;
357                 }
358         }
359 }
360
361 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
362 {
363         assert(x <= 255);
364         assert(y <= 255);
365         assert(z <= 255);
366         assert(w <= 255);
367
368         return (x) | (y << 8) | (z << 16) | (w << 24);
369 }
370
371 void SVMCompiler::add_node(int a, int b, int c, int d)
372 {
373         current_svm_nodes.push_back_slow(make_int4(a, b, c, d));
374 }
375
376 void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
377 {
378         current_svm_nodes.push_back_slow(make_int4(type, a, b, c));
379 }
380
381 void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
382 {
383         current_svm_nodes.push_back_slow(make_int4(type,
384                 __float_as_int(f.x),
385                 __float_as_int(f.y),
386                 __float_as_int(f.z)));
387 }
388
389 void SVMCompiler::add_node(const float4& f)
390 {
391         current_svm_nodes.push_back_slow(make_int4(
392                 __float_as_int(f.x),
393                 __float_as_int(f.y),
394                 __float_as_int(f.z),
395                 __float_as_int(f.w)));
396 }
397
398 uint SVMCompiler::attribute(ustring name)
399 {
400         return shader_manager->get_attribute_id(name);
401 }
402
403 uint SVMCompiler::attribute(AttributeStandard std)
404 {
405         return shader_manager->get_attribute_id(std);
406 }
407
408 uint SVMCompiler::attribute_standard(ustring name)
409 {
410         AttributeStandard std = Attribute::name_standard(name.c_str());
411         return (std)? attribute(std): attribute(name);
412 }
413
414 void SVMCompiler::find_dependencies(ShaderNodeSet& dependencies,
415                                     const ShaderNodeSet& done,
416                                     ShaderInput *input,
417                                     ShaderNode *skip_node)
418 {
419         ShaderNode *node = (input->link)? input->link->parent: NULL;
420         if(node != NULL &&
421            done.find(node) == done.end() &&
422            node != skip_node &&
423            dependencies.find(node) == dependencies.end())
424         {
425                 foreach(ShaderInput *in, node->inputs) {
426                         find_dependencies(dependencies, done, in, skip_node);
427                 }
428                 dependencies.insert(node);
429         }
430 }
431
432 void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet& done)
433 {
434         node->compile(*this);
435         stack_clear_users(node, done);
436         stack_clear_temporary(node);
437
438         if(current_type == SHADER_TYPE_SURFACE) {
439                 if(node->has_spatial_varying())
440                         current_shader->has_surface_spatial_varying = true;
441         }
442         else if(current_type == SHADER_TYPE_VOLUME) {
443                 if(node->has_spatial_varying())
444                         current_shader->has_volume_spatial_varying = true;
445         }
446
447         if(node->has_object_dependency()) {
448                 current_shader->has_object_dependency = true;
449         }
450
451         if(node->has_attribute_dependency()) {
452                 current_shader->has_attribute_dependency = true;
453         }
454
455         if(node->has_integrator_dependency()) {
456                 current_shader->has_integrator_dependency = true;
457         }
458 }
459
460 void SVMCompiler::generate_svm_nodes(const ShaderNodeSet& nodes,
461                                      CompilerState *state)
462 {
463         ShaderNodeSet& done = state->nodes_done;
464         vector<bool>& done_flag = state->nodes_done_flag;
465
466         bool nodes_done;
467         do {
468                 nodes_done = true;
469
470                 foreach(ShaderNode *node, nodes) {
471                         if(!done_flag[node->id]) {
472                                 bool inputs_done = true;
473
474                                 foreach(ShaderInput *input, node->inputs) {
475                                         if(input->link && !done_flag[input->link->parent->id]) {
476                                                 inputs_done = false;
477                                         }
478                                 }
479                                 if(inputs_done) {
480                                         generate_node(node, done);
481                                         done.insert(node);
482                                         done_flag[node->id] = true;
483                                 }
484                                 else {
485                                         nodes_done = false;
486                                 }
487                         }
488                 }
489         } while(!nodes_done);
490 }
491
492 void SVMCompiler::generate_closure_node(ShaderNode *node,
493                                         CompilerState *state)
494 {
495         /* execute dependencies for closure */
496         foreach(ShaderInput *in, node->inputs) {
497                 if(in->link != NULL) {
498                         ShaderNodeSet dependencies;
499                         find_dependencies(dependencies, state->nodes_done, in);
500                         generate_svm_nodes(dependencies, state);
501                 }
502         }
503
504         /* closure mix weight */
505         const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
506         ShaderInput *weight_in = node->input(weight_name);
507
508         if(weight_in && (weight_in->link || node->get_float(weight_in->socket_type) != 1.0f))
509                 mix_weight_offset = stack_assign(weight_in);
510         else
511                 mix_weight_offset = SVM_STACK_INVALID;
512
513         /* compile closure itself */
514         generate_node(node, state->nodes_done);
515
516         mix_weight_offset = SVM_STACK_INVALID;
517
518         if(current_type == SHADER_TYPE_SURFACE) {
519                 if(node->has_surface_emission())
520                         current_shader->has_surface_emission = true;
521                 if(node->has_surface_transparent())
522                         current_shader->has_surface_transparent = true;
523                 if(node->has_surface_bssrdf()) {
524                         current_shader->has_surface_bssrdf = true;
525                         if(node->has_bssrdf_bump())
526                                 current_shader->has_bssrdf_bump = true;
527                 }
528                 if(node->has_bump()) {
529                         current_shader->has_bump = true;
530                 }
531         }
532 }
533
534 void SVMCompiler::generated_shared_closure_nodes(ShaderNode *root_node,
535                                                  ShaderNode *node,
536                                                  CompilerState *state,
537                                                  const ShaderNodeSet& shared)
538 {
539         if(shared.find(node) != shared.end()) {
540                 generate_multi_closure(root_node, node, state);
541         }
542         else {
543                 foreach(ShaderInput *in, node->inputs) {
544                         if(in->type() == SocketType::CLOSURE && in->link)
545                                 generated_shared_closure_nodes(root_node,
546                                                                in->link->parent,
547                                                                state,
548                                                                shared);
549                 }
550         }
551 }
552
553 void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
554                                          ShaderNode *node,
555                                          CompilerState *state)
556 {
557         /* only generate once */
558         if(state->closure_done.find(node) != state->closure_done.end())
559                 return;
560
561         state->closure_done.insert(node);
562
563         if(node->special_type == SHADER_SPECIAL_TYPE_COMBINE_CLOSURE) {
564                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
565                 ShaderInput *cl1in = node->input("Closure1");
566                 ShaderInput *cl2in = node->input("Closure2");
567                 ShaderInput *facin = node->input("Fac");
568
569                 /* skip empty mix/add closure nodes */
570                 if(!cl1in->link && !cl2in->link)
571                         return;
572
573                 if(facin && facin->link) {
574                         /* mix closure: generate instructions to compute mix weight */
575                         ShaderNodeSet dependencies;
576                         find_dependencies(dependencies, state->nodes_done, facin);
577                         generate_svm_nodes(dependencies, state);
578
579                         /* execute shared dependencies. this is needed to allow skipping
580                          * of zero weight closures and their dependencies later, so we
581                          * ensure that they only skip dependencies that are unique to them */
582                         ShaderNodeSet cl1deps, cl2deps, shareddeps;
583
584                         find_dependencies(cl1deps, state->nodes_done, cl1in);
585                         find_dependencies(cl2deps, state->nodes_done, cl2in);
586
587                         ShaderNodeIDComparator node_id_comp;
588                         set_intersection(cl1deps.begin(), cl1deps.end(),
589                                          cl2deps.begin(), cl2deps.end(),
590                                          std::inserter(shareddeps, shareddeps.begin()),
591                                          node_id_comp);
592
593                         /* it's possible some nodes are not shared between this mix node
594                          * inputs, but still needed to be always executed, this mainly
595                          * happens when a node of current subbranch is used by a parent
596                          * node or so */
597                         if(root_node != node) {
598                                 foreach(ShaderInput *in, root_node->inputs) {
599                                         ShaderNodeSet rootdeps;
600                                         find_dependencies(rootdeps, state->nodes_done, in, node);
601                                         set_intersection(rootdeps.begin(), rootdeps.end(),
602                                                          cl1deps.begin(), cl1deps.end(),
603                                                          std::inserter(shareddeps, shareddeps.begin()),
604                                                          node_id_comp);
605                                         set_intersection(rootdeps.begin(), rootdeps.end(),
606                                                          cl2deps.begin(), cl2deps.end(),
607                                                          std::inserter(shareddeps, shareddeps.begin()),
608                                                          node_id_comp);
609                                 }
610                         }
611
612                         if(!shareddeps.empty()) {
613                                 if(cl1in->link) {
614                                         generated_shared_closure_nodes(root_node,
615                                                                        cl1in->link->parent,
616                                                                        state,
617                                                                        shareddeps);
618                                 }
619                                 if(cl2in->link) {
620                                         generated_shared_closure_nodes(root_node,
621                                                                        cl2in->link->parent,
622                                                                        state,
623                                                                        shareddeps);
624                                 }
625
626                                 generate_svm_nodes(shareddeps, state);
627                         }
628
629                         /* generate instructions for input closure 1 */
630                         if(cl1in->link) {
631                                 /* Add instruction to skip closure and its dependencies if mix
632                                  * weight is zero.
633                                  */
634                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE,
635                                                                       0,
636                                                                       stack_assign(facin),
637                                                                       0));
638                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
639
640                                 generate_multi_closure(root_node, cl1in->link->parent, state);
641
642                                 /* Fill in jump instruction location to be after closure. */
643                                 current_svm_nodes[node_jump_skip_index].y =
644                                         current_svm_nodes.size() - node_jump_skip_index - 1;
645                         }
646
647                         /* generate instructions for input closure 2 */
648                         if(cl2in->link) {
649                                 /* Add instruction to skip closure and its dependencies if mix
650                                  * weight is zero.
651                                  */
652                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO,
653                                                                       0,
654                                                                       stack_assign(facin),
655                                                                       0));
656                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
657
658                                 generate_multi_closure(root_node, cl2in->link->parent, state);
659
660                                 /* Fill in jump instruction location to be after closure. */
661                                 current_svm_nodes[node_jump_skip_index].y =
662                                         current_svm_nodes.size() - node_jump_skip_index - 1;
663                         }
664
665                         /* unassign */
666                         facin->stack_offset = SVM_STACK_INVALID;
667                 }
668                 else {
669                         /* execute closures and their dependencies, no runtime checks
670                          * to skip closures here because was already optimized due to
671                          * fixed weight or add closure that always needs both */
672                         if(cl1in->link)
673                                 generate_multi_closure(root_node, cl1in->link->parent, state);
674                         if(cl2in->link)
675                                 generate_multi_closure(root_node, cl2in->link->parent, state);
676                 }
677         }
678         else {
679                 generate_closure_node(node, state);
680         }
681
682         state->nodes_done.insert(node);
683         state->nodes_done_flag[node->id] = true;
684 }
685
686
687 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
688 {
689         /* Converting a shader graph into svm_nodes that can be executed
690          * sequentially on the virtual machine is fairly simple. We can keep
691          * looping over nodes and each time all the inputs of a node are
692          * ready, we add svm_nodes for it that read the inputs from the
693          * stack and write outputs back to the stack.
694          *
695          * With the SVM, we always sample only a single closure. We can think
696          * of all closures nodes as a binary tree with mix closures as inner
697          * nodes and other closures as leafs. The SVM will traverse that tree,
698          * each time deciding to go left or right depending on the mix weights,
699          * until a closure is found.
700          *
701          * We only execute nodes that are needed for the mix weights and chosen
702          * closure.
703          */
704
705         current_type = type;
706         current_graph = graph;
707
708         /* get input in output node */
709         ShaderNode *node = graph->output();
710         ShaderInput *clin = NULL;
711
712         switch(type) {
713                 case SHADER_TYPE_SURFACE:
714                         clin = node->input("Surface");
715                         break;
716                 case SHADER_TYPE_VOLUME:
717                         clin = node->input("Volume");
718                         break;
719                 case SHADER_TYPE_DISPLACEMENT:
720                         clin = node->input("Displacement");
721                         break;
722                 case SHADER_TYPE_BUMP:
723                         clin = node->input("Normal");
724                         break;
725                 default:
726                         assert(0);
727                         break;
728         }
729
730         /* clear all compiler state */
731         memset((void *)&active_stack, 0, sizeof(active_stack));
732         current_svm_nodes.clear();
733
734         foreach(ShaderNode *node_iter, graph->nodes) {
735                 foreach(ShaderInput *input, node_iter->inputs)
736                         input->stack_offset = SVM_STACK_INVALID;
737                 foreach(ShaderOutput *output, node_iter->outputs)
738                         output->stack_offset = SVM_STACK_INVALID;
739         }
740
741         /* for the bump shader we need add a node to store the shader state */
742         bool need_bump_state = (type == SHADER_TYPE_BUMP) && (shader->displacement_method == DISPLACE_BOTH);
743         int bump_state_offset = SVM_STACK_INVALID;
744         if(need_bump_state) {
745                 bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
746                 add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
747         }
748
749         if(shader->used) {
750                 if(clin->link) {
751                         bool generate = false;
752
753                         switch(type) {
754                                 case SHADER_TYPE_SURFACE: /* generate surface shader */
755                                         generate = true;
756                                         shader->has_surface = true;
757                                         break;
758                                 case SHADER_TYPE_VOLUME: /* generate volume shader */
759                                         generate = true;
760                                         shader->has_volume = true;
761                                         break;
762                                 case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
763                                         generate = true;
764                                         shader->has_displacement = true;
765                                         break;
766                                 case SHADER_TYPE_BUMP: /* generate bump shader */
767                                         generate = true;
768                                         break;
769                                 default:
770                                         break;
771                         }
772
773                         if(generate) {
774                                 CompilerState state(graph);
775                                 generate_multi_closure(clin->link->parent,
776                                                        clin->link->parent,
777                                                        &state);
778                         }
779                 }
780
781                 /* compile output node */
782                 node->compile(*this);
783         }
784
785         /* add node to restore state after bump shader has finished */
786         if(need_bump_state) {
787                 add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
788         }
789
790         /* if compile failed, generate empty shader */
791         if(compile_failed) {
792                 current_svm_nodes.clear();
793                 compile_failed = false;
794         }
795
796         /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader it ends here */
797         if(type != SHADER_TYPE_BUMP) {
798                 add_node(NODE_END, 0, 0, 0);
799         }
800 }
801
802 void SVMCompiler::compile(Scene *scene,
803                           Shader *shader,
804                           array<int4>& svm_nodes,
805                           int index,
806                           Summary *summary)
807 {
808         /* copy graph for shader with bump mapping */
809         ShaderNode *output = shader->graph->output();
810         int start_num_svm_nodes = svm_nodes.size();
811
812         const double time_start = time_dt();
813
814         bool has_bump = (shader->displacement_method != DISPLACE_TRUE) &&
815                         output->input("Surface")->link && output->input("Displacement")->link;
816
817         /* finalize */
818         {
819                 scoped_timer timer((summary != NULL)? &summary->time_finalize: NULL);
820                 shader->graph->finalize(scene,
821                                         has_bump,
822                                         shader->has_integrator_dependency,
823                                         shader->displacement_method == DISPLACE_BOTH);
824         }
825
826         current_shader = shader;
827
828         shader->has_surface = false;
829         shader->has_surface_emission = false;
830         shader->has_surface_transparent = false;
831         shader->has_surface_bssrdf = false;
832         shader->has_bump = has_bump;
833         shader->has_bssrdf_bump = has_bump;
834         shader->has_volume = false;
835         shader->has_displacement = false;
836         shader->has_surface_spatial_varying = false;
837         shader->has_volume_spatial_varying = false;
838         shader->has_object_dependency = false;
839         shader->has_attribute_dependency = false;
840         shader->has_integrator_dependency = false;
841
842         /* generate bump shader */
843         if(has_bump) {
844                 scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
845                 compile_type(shader, shader->graph, SHADER_TYPE_BUMP);
846                 svm_nodes[index].y = svm_nodes.size();
847                 svm_nodes.append(current_svm_nodes);
848         }
849
850         /* generate surface shader */
851         {
852                 scoped_timer timer((summary != NULL)? &summary->time_generate_surface: NULL);
853                 compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
854                 /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
855                 if(!has_bump) {
856                         svm_nodes[index].y = svm_nodes.size();
857                 }
858                 svm_nodes.append(current_svm_nodes);
859         }
860
861         /* generate volume shader */
862         {
863                 scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
864                 compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
865                 svm_nodes[index].z = svm_nodes.size();
866                 svm_nodes.append(current_svm_nodes);
867         }
868
869         /* generate displacement shader */
870         {
871                 scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
872                 compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
873                 svm_nodes[index].w = svm_nodes.size();
874                 svm_nodes.append(current_svm_nodes);
875         }
876
877         /* Fill in summary information. */
878         if(summary != NULL) {
879                 summary->time_total = time_dt() - time_start;
880                 summary->peak_stack_usage = max_stack_use;
881                 summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
882         }
883 }
884
885 /* Compiler summary implementation. */
886
887 SVMCompiler::Summary::Summary()
888         : num_svm_nodes(0),
889           peak_stack_usage(0),
890           time_finalize(0.0),
891           time_generate_surface(0.0),
892           time_generate_bump(0.0),
893           time_generate_volume(0.0),
894           time_generate_displacement(0.0),
895           time_total(0.0)
896 {
897 }
898
899 string SVMCompiler::Summary::full_report() const
900 {
901         string report = "";
902         report += string_printf("Number of SVM nodes: %d\n", num_svm_nodes);
903         report += string_printf("Peak stack usage:    %d\n", peak_stack_usage);
904
905         report += string_printf("Time (in seconds):\n");
906         report += string_printf("Finalize:            %f\n", time_finalize);
907         report += string_printf("  Surface:           %f\n", time_generate_surface);
908         report += string_printf("  Bump:              %f\n", time_generate_bump);
909         report += string_printf("  Volume:            %f\n", time_generate_volume);
910         report += string_printf("  Displacement:      %f\n", time_generate_displacement);
911         report += string_printf("Generate:            %f\n", time_generate_surface +
912                                                              time_generate_bump +
913                                                              time_generate_volume +
914                                                              time_generate_displacement);
915         report += string_printf("Total:               %f\n", time_total);
916
917         return report;
918 }
919
920 /* Global state of the compiler. */
921
922 SVMCompiler::CompilerState::CompilerState(ShaderGraph *graph)
923 {
924         int max_id = 0;
925         foreach(ShaderNode *node, graph->nodes) {
926                 max_id = max(node->id, max_id);
927         }
928         nodes_done_flag.resize(max_id + 1, false);
929 }
930
931 CCL_NAMESPACE_END