Code refactor: move more memory allocation logic into device API.
[blender-staging.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "device/device.h"
18 #include "render/graph.h"
19 #include "render/light.h"
20 #include "render/mesh.h"
21 #include "render/nodes.h"
22 #include "render/scene.h"
23 #include "render/shader.h"
24 #include "render/svm.h"
25
26 #include "util/util_debug.h"
27 #include "util/util_logging.h"
28 #include "util/util_foreach.h"
29 #include "util/util_progress.h"
30 #include "util/util_task.h"
31
32 CCL_NAMESPACE_BEGIN
33
34 /* Shader Manager */
35
36 SVMShaderManager::SVMShaderManager()
37 {
38 }
39
40 SVMShaderManager::~SVMShaderManager()
41 {
42 }
43
44 void SVMShaderManager::reset(Scene * /*scene*/)
45 {
46 }
47
48 void SVMShaderManager::device_update_shader(Scene *scene,
49                                             Shader *shader,
50                                             Progress *progress,
51                                             array<int4> *global_svm_nodes)
52 {
53         if(progress->get_cancel()) {
54                 return;
55         }
56         assert(shader->graph);
57
58         array<int4> svm_nodes;
59         svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
60
61         SVMCompiler::Summary summary;
62         SVMCompiler compiler(scene->shader_manager, scene->image_manager);
63         compiler.background = (shader == scene->default_background);
64         compiler.compile(scene, shader, svm_nodes, 0, &summary);
65
66         VLOG(2) << "Compilation summary:\n"
67                 << "Shader name: " << shader->name << "\n"
68                 << summary.full_report();
69
70         nodes_lock_.lock();
71         if(shader->use_mis && shader->has_surface_emission) {
72                 scene->light_manager->need_update = true;
73         }
74
75         /* The copy needs to be done inside the lock, if another thread resizes the array 
76          * while memcpy is running, it'll be copying into possibly invalid/freed ram. 
77          */
78         size_t global_nodes_size = global_svm_nodes->size();
79         global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
80         
81         /* Offset local SVM nodes to a global address space. */
82         int4& jump_node = (*global_svm_nodes)[shader->id];
83         jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
84         jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
85         jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
86         /* Copy new nodes to global storage. */
87         memcpy(&(*global_svm_nodes)[global_nodes_size],
88                &svm_nodes[1],
89                sizeof(int4) * (svm_nodes.size() - 1));
90         nodes_lock_.unlock();
91 }
92
93 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
94 {
95         if(!need_update)
96                 return;
97
98         VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
99
100         double start_time = time_dt();
101
102         /* test if we need to update */
103         device_free(device, dscene, scene);
104
105         /* determine which shaders are in use */
106         device_update_shaders_used(scene);
107
108         /* svm_nodes */
109         array<int4> svm_nodes;
110         size_t i;
111
112         for(i = 0; i < scene->shaders.size(); i++) {
113                 svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
114         }
115
116         TaskPool task_pool;
117         foreach(Shader *shader, scene->shaders) {
118                 task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
119                                              this,
120                                              scene,
121                                              shader,
122                                              &progress,
123                                              &svm_nodes),
124                                false);
125         }
126         task_pool.wait_work();
127
128         if(progress.get_cancel()) {
129                 return;
130         }
131
132         dscene->svm_nodes.steal_data(svm_nodes);
133         dscene->svm_nodes.copy_to_device();
134
135         for(i = 0; i < scene->shaders.size(); i++) {
136                 Shader *shader = scene->shaders[i];
137                 shader->need_update = false;
138         }
139
140         device_update_common(device, dscene, scene, progress);
141
142         need_update = false;
143
144         VLOG(1) << "Shader manager updated "
145                 << scene->shaders.size() << " shaders in "
146                 << time_dt() - start_time << " seconds.";
147 }
148
149 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
150 {
151         device_free_common(device, dscene, scene);
152
153         dscene->svm_nodes.free();
154 }
155
156 /* Graph Compiler */
157
158 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_)
159 {
160         shader_manager = shader_manager_;
161         image_manager = image_manager_;
162         max_stack_use = 0;
163         current_type = SHADER_TYPE_SURFACE;
164         current_shader = NULL;
165         current_graph = NULL;
166         background = false;
167         mix_weight_offset = SVM_STACK_INVALID;
168         compile_failed = false;
169 }
170
171 int SVMCompiler::stack_size(SocketType::Type type)
172 {
173         int size = 0;
174         
175         switch(type) {
176                 case SocketType::FLOAT:
177                 case SocketType::INT:
178                         size = 1;
179                         break;
180                 case SocketType::COLOR:
181                 case SocketType::VECTOR:
182                 case SocketType::NORMAL:
183                 case SocketType::POINT:
184                         size = 3;
185                         break;
186                 case SocketType::CLOSURE:
187                         size = 0;
188                         break;
189                 default:
190                         assert(0);
191                         break;
192         }
193         
194         return size;
195 }
196
197 int SVMCompiler::stack_find_offset(int size)
198 {
199         int offset = -1;
200         
201         /* find free space in stack & mark as used */
202         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
203                 if(active_stack.users[i]) num_unused = 0;
204                 else num_unused++;
205
206                 if(num_unused == size) {
207                         offset = i+1 - size;
208                         max_stack_use = max(i+1, max_stack_use);
209
210                         while(i >= offset)
211                                 active_stack.users[i--] = 1;
212
213                         return offset;
214                 }
215         }
216
217         if(!compile_failed) {
218                 compile_failed = true;
219                 fprintf(stderr, "Cycles: out of SVM stack space, shader \"%s\" too big.\n", current_shader->name.c_str());
220         }
221
222         return 0;
223 }
224
225 int SVMCompiler::stack_find_offset(SocketType::Type type)
226 {
227         return stack_find_offset(stack_size(type));
228 }
229
230 void SVMCompiler::stack_clear_offset(SocketType::Type type, int offset)
231 {
232         int size = stack_size(type);
233
234         for(int i = 0; i < size; i++)
235                 active_stack.users[offset + i]--;
236 }
237
238 int SVMCompiler::stack_assign(ShaderInput *input)
239 {
240         /* stack offset assign? */
241         if(input->stack_offset == SVM_STACK_INVALID) {
242                 if(input->link) {
243                         /* linked to output -> use output offset */
244                         input->stack_offset = input->link->stack_offset;
245                 }
246                 else {
247                         Node *node = input->parent;
248
249                         /* not linked to output -> add nodes to load default value */
250                         input->stack_offset = stack_find_offset(input->type());
251
252                         if(input->type() == SocketType::FLOAT) {
253                                 add_node(NODE_VALUE_F, __float_as_int(node->get_float(input->socket_type)), input->stack_offset);
254                         }
255                         else if(input->type() == SocketType::INT) {
256                                 add_node(NODE_VALUE_F, node->get_int(input->socket_type), input->stack_offset);
257                         }
258                         else if(input->type() == SocketType::VECTOR ||
259                                 input->type() == SocketType::NORMAL ||
260                                 input->type() == SocketType::POINT ||
261                                 input->type() == SocketType::COLOR)
262                         {
263
264                                 add_node(NODE_VALUE_V, input->stack_offset);
265                                 add_node(NODE_VALUE_V, node->get_float3(input->socket_type));
266                         }
267                         else /* should not get called for closure */
268                                 assert(0);
269                 }
270         }
271
272         return input->stack_offset;
273 }
274
275 int SVMCompiler::stack_assign(ShaderOutput *output)
276 {
277         /* if no stack offset assigned yet, find one */
278         if(output->stack_offset == SVM_STACK_INVALID)
279                 output->stack_offset = stack_find_offset(output->type());
280
281         return output->stack_offset;
282 }
283
284 int SVMCompiler::stack_assign_if_linked(ShaderInput *input)
285 {
286         if(input->link)
287                 return stack_assign(input);
288
289         return SVM_STACK_INVALID;
290 }
291
292 int SVMCompiler::stack_assign_if_linked(ShaderOutput *output)
293 {
294         if(!output->links.empty())
295                 return stack_assign(output);
296
297         return SVM_STACK_INVALID;
298 }
299
300 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
301 {
302         if(output->stack_offset == SVM_STACK_INVALID) {
303                 assert(input->link);
304                 assert(stack_size(output->type()) == stack_size(input->link->type()));
305
306                 output->stack_offset = input->link->stack_offset;
307
308                 int size = stack_size(output->type());
309
310                 for(int i = 0; i < size; i++)
311                         active_stack.users[output->stack_offset + i]++;
312         }
313 }
314
315 void SVMCompiler::stack_clear_users(ShaderNode *node, ShaderNodeSet& done)
316 {
317         /* optimization we should add:
318          * find and lower user counts for outputs for which all inputs are done.
319          * this is done before the node is compiled, under the assumption that the
320          * node will first load all inputs from the stack and then writes its
321          * outputs. this used to work, but was disabled because it gave trouble
322          * with inputs getting stack positions assigned */
323
324         foreach(ShaderInput *input, node->inputs) {
325                 ShaderOutput *output = input->link;
326
327                 if(output && output->stack_offset != SVM_STACK_INVALID) {
328                         bool all_done = true;
329
330                         /* optimization we should add: verify if in->parent is actually used */
331                         foreach(ShaderInput *in, output->links)
332                                 if(in->parent != node && done.find(in->parent) == done.end())
333                                         all_done = false;
334
335                         if(all_done) {
336                                 stack_clear_offset(output->type(), output->stack_offset);
337                                 output->stack_offset = SVM_STACK_INVALID;
338
339                                 foreach(ShaderInput *in, output->links)
340                                         in->stack_offset = SVM_STACK_INVALID;
341                         }
342                 }
343         }
344 }
345
346 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
347 {
348         foreach(ShaderInput *input, node->inputs) {
349                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
350                         stack_clear_offset(input->type(), input->stack_offset);
351                         input->stack_offset = SVM_STACK_INVALID;
352                 }
353         }
354 }
355
356 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
357 {
358         assert(x <= 255);
359         assert(y <= 255);
360         assert(z <= 255);
361         assert(w <= 255);
362
363         return (x) | (y << 8) | (z << 16) | (w << 24);
364 }
365
366 void SVMCompiler::add_node(int a, int b, int c, int d)
367 {
368         current_svm_nodes.push_back_slow(make_int4(a, b, c, d));
369 }
370
371 void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
372 {
373         current_svm_nodes.push_back_slow(make_int4(type, a, b, c));
374 }
375
376 void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
377 {
378         current_svm_nodes.push_back_slow(make_int4(type,
379                 __float_as_int(f.x),
380                 __float_as_int(f.y),
381                 __float_as_int(f.z)));
382 }
383
384 void SVMCompiler::add_node(const float4& f)
385 {
386         current_svm_nodes.push_back_slow(make_int4(
387                 __float_as_int(f.x),
388                 __float_as_int(f.y),
389                 __float_as_int(f.z),
390                 __float_as_int(f.w)));
391 }
392
393 uint SVMCompiler::attribute(ustring name)
394 {
395         return shader_manager->get_attribute_id(name);
396 }
397
398 uint SVMCompiler::attribute(AttributeStandard std)
399 {
400         return shader_manager->get_attribute_id(std);
401 }
402
403 bool SVMCompiler::node_skip_input(ShaderNode * /*node*/, ShaderInput *input)
404 {
405         /* nasty exception .. */
406         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->special_type == SHADER_SPECIAL_TYPE_BUMP)
407                 return true;
408         
409         return false;
410 }
411
412 void SVMCompiler::find_dependencies(ShaderNodeSet& dependencies,
413                                     const ShaderNodeSet& done,
414                                     ShaderInput *input,
415                                     ShaderNode *skip_node)
416 {
417         ShaderNode *node = (input->link)? input->link->parent: NULL;
418
419         if(node != NULL &&
420            done.find(node) == done.end() &&
421            node != skip_node &&
422            dependencies.find(node) == dependencies.end())
423         {
424                 foreach(ShaderInput *in, node->inputs)
425                         if(!node_skip_input(node, in))
426                                 find_dependencies(dependencies, done, in, skip_node);
427
428                 dependencies.insert(node);
429         }
430 }
431
432 void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet& done)
433 {
434         node->compile(*this);
435         stack_clear_users(node, done);
436         stack_clear_temporary(node);
437
438         if(current_type == SHADER_TYPE_SURFACE) {
439                 if(node->has_spatial_varying())
440                         current_shader->has_surface_spatial_varying = true;
441         }
442         else if(current_type == SHADER_TYPE_VOLUME) {
443                 if(node->has_spatial_varying())
444                         current_shader->has_volume_spatial_varying = true;
445         }
446
447         if(node->has_object_dependency()) {
448                 current_shader->has_object_dependency = true;
449         }
450
451         if(node->has_integrator_dependency()) {
452                 current_shader->has_integrator_dependency = true;
453         }
454 }
455
456 void SVMCompiler::generate_svm_nodes(const ShaderNodeSet& nodes,
457                                      CompilerState *state)
458 {
459         ShaderNodeSet& done = state->nodes_done;
460         vector<bool>& done_flag = state->nodes_done_flag;
461
462         bool nodes_done;
463         do {
464                 nodes_done = true;
465
466                 foreach(ShaderNode *node, nodes) {
467                         if(!done_flag[node->id]) {
468                                 bool inputs_done = true;
469
470                                 foreach(ShaderInput *input, node->inputs)
471                                         if(!node_skip_input(node, input))
472                                                 if(input->link && !done_flag[input->link->parent->id])
473                                                         inputs_done = false;
474
475                                 if(inputs_done) {
476                                         generate_node(node, done);
477                                         done.insert(node);
478                                         done_flag[node->id] = true;
479                                 }
480                                 else
481                                         nodes_done = false;
482                         }
483                 }
484         } while(!nodes_done);
485 }
486
487 void SVMCompiler::generate_closure_node(ShaderNode *node,
488                                         CompilerState *state)
489 {
490         /* execute dependencies for closure */
491         foreach(ShaderInput *in, node->inputs) {
492                 if(!node_skip_input(node, in) && in->link) {
493                         ShaderNodeSet dependencies;
494                         find_dependencies(dependencies, state->nodes_done, in);
495                         generate_svm_nodes(dependencies, state);
496                 }
497         }
498
499         /* closure mix weight */
500         const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
501         ShaderInput *weight_in = node->input(weight_name);
502
503         if(weight_in && (weight_in->link || node->get_float(weight_in->socket_type) != 1.0f))
504                 mix_weight_offset = stack_assign(weight_in);
505         else
506                 mix_weight_offset = SVM_STACK_INVALID;
507
508         /* compile closure itself */
509         generate_node(node, state->nodes_done);
510
511         mix_weight_offset = SVM_STACK_INVALID;
512
513         if(current_type == SHADER_TYPE_SURFACE) {
514                 if(node->has_surface_emission())
515                         current_shader->has_surface_emission = true;
516                 if(node->has_surface_transparent())
517                         current_shader->has_surface_transparent = true;
518                 if(node->has_surface_bssrdf()) {
519                         current_shader->has_surface_bssrdf = true;
520                         if(node->has_bssrdf_bump())
521                                 current_shader->has_bssrdf_bump = true;
522                 }
523                 if(node->has_bump()) {
524                         current_shader->has_bump = true;
525                 }
526         }
527 }
528
529 void SVMCompiler::generated_shared_closure_nodes(ShaderNode *root_node,
530                                                  ShaderNode *node,
531                                                  CompilerState *state,
532                                                  const ShaderNodeSet& shared)
533 {
534         if(shared.find(node) != shared.end()) {
535                 generate_multi_closure(root_node, node, state);
536         }
537         else {
538                 foreach(ShaderInput *in, node->inputs) {
539                         if(in->type() == SocketType::CLOSURE && in->link)
540                                 generated_shared_closure_nodes(root_node,
541                                                                in->link->parent,
542                                                                state,
543                                                                shared);
544                 }
545         }
546 }
547
548 void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
549                                          ShaderNode *node,
550                                          CompilerState *state)
551 {
552         /* only generate once */
553         if(state->closure_done.find(node) != state->closure_done.end())
554                 return;
555
556         state->closure_done.insert(node);
557
558         if(node->special_type == SHADER_SPECIAL_TYPE_COMBINE_CLOSURE) {
559                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
560                 ShaderInput *cl1in = node->input("Closure1");
561                 ShaderInput *cl2in = node->input("Closure2");
562                 ShaderInput *facin = node->input("Fac");
563
564                 /* skip empty mix/add closure nodes */
565                 if(!cl1in->link && !cl2in->link)
566                         return;
567
568                 if(facin && facin->link) {
569                         /* mix closure: generate instructions to compute mix weight */
570                         ShaderNodeSet dependencies;
571                         find_dependencies(dependencies, state->nodes_done, facin);
572                         generate_svm_nodes(dependencies, state);
573
574                         /* execute shared dependencies. this is needed to allow skipping
575                          * of zero weight closures and their dependencies later, so we
576                          * ensure that they only skip dependencies that are unique to them */
577                         ShaderNodeSet cl1deps, cl2deps, shareddeps;
578
579                         find_dependencies(cl1deps, state->nodes_done, cl1in);
580                         find_dependencies(cl2deps, state->nodes_done, cl2in);
581
582                         ShaderNodeIDComparator node_id_comp;
583                         set_intersection(cl1deps.begin(), cl1deps.end(),
584                                          cl2deps.begin(), cl2deps.end(),
585                                          std::inserter(shareddeps, shareddeps.begin()),
586                                          node_id_comp);
587
588                         /* it's possible some nodes are not shared between this mix node
589                          * inputs, but still needed to be always executed, this mainly
590                          * happens when a node of current subbranch is used by a parent
591                          * node or so */
592                         if(root_node != node) {
593                                 foreach(ShaderInput *in, root_node->inputs) {
594                                         ShaderNodeSet rootdeps;
595                                         find_dependencies(rootdeps, state->nodes_done, in, node);
596                                         set_intersection(rootdeps.begin(), rootdeps.end(),
597                                                          cl1deps.begin(), cl1deps.end(),
598                                                          std::inserter(shareddeps, shareddeps.begin()),
599                                                          node_id_comp);
600                                         set_intersection(rootdeps.begin(), rootdeps.end(),
601                                                          cl2deps.begin(), cl2deps.end(),
602                                                          std::inserter(shareddeps, shareddeps.begin()),
603                                                          node_id_comp);
604                                 }
605                         }
606
607                         if(!shareddeps.empty()) {
608                                 if(cl1in->link) {
609                                         generated_shared_closure_nodes(root_node,
610                                                                        cl1in->link->parent,
611                                                                        state,
612                                                                        shareddeps);
613                                 }
614                                 if(cl2in->link) {
615                                         generated_shared_closure_nodes(root_node,
616                                                                        cl2in->link->parent,
617                                                                        state,
618                                                                        shareddeps);
619                                 }
620
621                                 generate_svm_nodes(shareddeps, state);
622                         }
623
624                         /* generate instructions for input closure 1 */
625                         if(cl1in->link) {
626                                 /* Add instruction to skip closure and its dependencies if mix
627                                  * weight is zero.
628                                  */
629                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE,
630                                                                       0,
631                                                                       stack_assign(facin),
632                                                                       0));
633                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
634
635                                 generate_multi_closure(root_node, cl1in->link->parent, state);
636
637                                 /* Fill in jump instruction location to be after closure. */
638                                 current_svm_nodes[node_jump_skip_index].y =
639                                         current_svm_nodes.size() - node_jump_skip_index - 1;
640                         }
641
642                         /* generate instructions for input closure 2 */
643                         if(cl2in->link) {
644                                 /* Add instruction to skip closure and its dependencies if mix
645                                  * weight is zero.
646                                  */
647                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO,
648                                                                       0,
649                                                                       stack_assign(facin),
650                                                                       0));
651                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
652
653                                 generate_multi_closure(root_node, cl2in->link->parent, state);
654
655                                 /* Fill in jump instruction location to be after closure. */
656                                 current_svm_nodes[node_jump_skip_index].y =
657                                         current_svm_nodes.size() - node_jump_skip_index - 1;
658                         }
659
660                         /* unassign */
661                         facin->stack_offset = SVM_STACK_INVALID;
662                 }
663                 else {
664                         /* execute closures and their dependencies, no runtime checks
665                          * to skip closures here because was already optimized due to
666                          * fixed weight or add closure that always needs both */
667                         if(cl1in->link)
668                                 generate_multi_closure(root_node, cl1in->link->parent, state);
669                         if(cl2in->link)
670                                 generate_multi_closure(root_node, cl2in->link->parent, state);
671                 }
672         }
673         else {
674                 generate_closure_node(node, state);
675         }
676
677         state->nodes_done.insert(node);
678         state->nodes_done_flag[node->id] = true;
679 }
680
681
682 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
683 {
684         /* Converting a shader graph into svm_nodes that can be executed
685          * sequentially on the virtual machine is fairly simple. We can keep
686          * looping over nodes and each time all the inputs of a node are
687          * ready, we add svm_nodes for it that read the inputs from the
688          * stack and write outputs back to the stack.
689          *
690          * With the SVM, we always sample only a single closure. We can think
691          * of all closures nodes as a binary tree with mix closures as inner
692          * nodes and other closures as leafs. The SVM will traverse that tree,
693          * each time deciding to go left or right depending on the mix weights,
694          * until a closure is found.
695          *
696          * We only execute nodes that are needed for the mix weights and chosen
697          * closure.
698          */
699
700         current_type = type;
701         current_graph = graph;
702
703         /* get input in output node */
704         ShaderNode *node = graph->output();
705         ShaderInput *clin = NULL;
706         
707         switch(type) {
708                 case SHADER_TYPE_SURFACE:
709                         clin = node->input("Surface");
710                         break;
711                 case SHADER_TYPE_VOLUME:
712                         clin = node->input("Volume");
713                         break;
714                 case SHADER_TYPE_DISPLACEMENT:
715                         clin = node->input("Displacement");
716                         break;
717                 case SHADER_TYPE_BUMP:
718                         clin = node->input("Normal");
719                         break;
720                 default:
721                         assert(0);
722                         break;
723         }
724
725         /* clear all compiler state */
726         memset(&active_stack, 0, sizeof(active_stack));
727         current_svm_nodes.clear();
728
729         foreach(ShaderNode *node_iter, graph->nodes) {
730                 foreach(ShaderInput *input, node_iter->inputs)
731                         input->stack_offset = SVM_STACK_INVALID;
732                 foreach(ShaderOutput *output, node_iter->outputs)
733                         output->stack_offset = SVM_STACK_INVALID;
734         }
735
736         /* for the bump shader we need add a node to store the shader state */
737         bool need_bump_state = (type == SHADER_TYPE_BUMP) && (shader->displacement_method == DISPLACE_BOTH);
738         int bump_state_offset = SVM_STACK_INVALID;
739         if(need_bump_state) {
740                 bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
741                 add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
742         }
743
744         if(shader->used) {
745                 if(clin->link) {
746                         bool generate = false;
747                         
748                         switch(type) {
749                                 case SHADER_TYPE_SURFACE: /* generate surface shader */         
750                                         generate = true;
751                                         shader->has_surface = true;
752                                         break;
753                                 case SHADER_TYPE_VOLUME: /* generate volume shader */
754                                         generate = true;
755                                         shader->has_volume = true;
756                                         break;
757                                 case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
758                                         generate = true;
759                                         shader->has_displacement = true;
760                                         break;
761                                 case SHADER_TYPE_BUMP: /* generate bump shader */
762                                         generate = true;
763                                         break;
764                                 default:
765                                         break;
766                         }
767
768                         if(generate) {
769                                 CompilerState state(graph);
770                                 generate_multi_closure(clin->link->parent,
771                                                        clin->link->parent,
772                                                        &state);
773                         }
774                 }
775
776                 /* compile output node */
777                 node->compile(*this);
778         }
779
780         /* add node to restore state after bump shader has finished */
781         if(need_bump_state) {
782                 add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
783         }
784
785         /* if compile failed, generate empty shader */
786         if(compile_failed) {
787                 current_svm_nodes.clear();
788                 compile_failed = false;
789         }
790
791         /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader it ends here */
792         if(type != SHADER_TYPE_BUMP) {
793                 add_node(NODE_END, 0, 0, 0);
794         }
795 }
796
797 void SVMCompiler::compile(Scene *scene,
798                           Shader *shader,
799                           array<int4>& svm_nodes,
800                           int index,
801                           Summary *summary)
802 {
803         /* copy graph for shader with bump mapping */
804         ShaderNode *output = shader->graph->output();
805         int start_num_svm_nodes = svm_nodes.size();
806
807         const double time_start = time_dt();
808
809         bool has_bump = (shader->displacement_method != DISPLACE_TRUE) &&
810                         output->input("Surface")->link && output->input("Displacement")->link;
811
812         /* finalize */
813         {
814                 scoped_timer timer((summary != NULL)? &summary->time_finalize: NULL);
815                 shader->graph->finalize(scene,
816                                         has_bump,
817                                         shader->has_integrator_dependency,
818                                         shader->displacement_method == DISPLACE_BOTH);
819         }
820
821         current_shader = shader;
822
823         shader->has_surface = false;
824         shader->has_surface_emission = false;
825         shader->has_surface_transparent = false;
826         shader->has_surface_bssrdf = false;
827         shader->has_bump = has_bump;
828         shader->has_bssrdf_bump = has_bump;
829         shader->has_volume = false;
830         shader->has_displacement = false;
831         shader->has_surface_spatial_varying = false;
832         shader->has_volume_spatial_varying = false;
833         shader->has_object_dependency = false;
834         shader->has_integrator_dependency = false;
835
836         /* generate bump shader */
837         if(has_bump) {
838                 scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
839                 compile_type(shader, shader->graph, SHADER_TYPE_BUMP);
840                 svm_nodes[index].y = svm_nodes.size();
841                 svm_nodes.append(current_svm_nodes);
842         }
843
844         /* generate surface shader */
845         {
846                 scoped_timer timer((summary != NULL)? &summary->time_generate_surface: NULL);
847                 compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
848                 /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
849                 if(!has_bump) {
850                         svm_nodes[index].y = svm_nodes.size();
851                 }
852                 svm_nodes.append(current_svm_nodes);
853         }
854
855         /* generate volume shader */
856         {
857                 scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
858                 compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
859                 svm_nodes[index].z = svm_nodes.size();
860                 svm_nodes.append(current_svm_nodes);
861         }
862
863         /* generate displacement shader */
864         {
865                 scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
866                 compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
867                 svm_nodes[index].w = svm_nodes.size();
868                 svm_nodes.append(current_svm_nodes);
869         }
870
871         /* Fill in summary information. */
872         if(summary != NULL) {
873                 summary->time_total = time_dt() - time_start;
874                 summary->peak_stack_usage = max_stack_use;
875                 summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
876         }
877 }
878
879 /* Compiler summary implementation. */
880
881 SVMCompiler::Summary::Summary()
882         : num_svm_nodes(0),
883           peak_stack_usage(0),
884           time_finalize(0.0),
885           time_generate_surface(0.0),
886           time_generate_bump(0.0),
887           time_generate_volume(0.0),
888           time_generate_displacement(0.0),
889           time_total(0.0)
890 {
891 }
892
893 string SVMCompiler::Summary::full_report() const
894 {
895         string report = "";
896         report += string_printf("Number of SVM nodes: %d\n", num_svm_nodes);
897         report += string_printf("Peak stack usage:    %d\n", peak_stack_usage);
898
899         report += string_printf("Time (in seconds):\n");
900         report += string_printf("Finalize:            %f\n", time_finalize);
901         report += string_printf("  Surface:           %f\n", time_generate_surface);
902         report += string_printf("  Bump:              %f\n", time_generate_bump);
903         report += string_printf("  Volume:            %f\n", time_generate_volume);
904         report += string_printf("  Displacement:      %f\n", time_generate_displacement);
905         report += string_printf("Generate:            %f\n", time_generate_surface +
906                                                              time_generate_bump +
907                                                              time_generate_volume +
908                                                              time_generate_displacement);
909         report += string_printf("Total:               %f\n", time_total);
910
911         return report;
912 }
913
914 /* Global state of the compiler. */
915
916 SVMCompiler::CompilerState::CompilerState(ShaderGraph *graph)
917 {
918         int max_id = 0;
919         foreach(ShaderNode *node, graph->nodes) {
920                 max_id = max(node->id, max_id);
921         }
922         nodes_done_flag.resize(max_id + 1, false);
923 }
924
925 CCL_NAMESPACE_END
926