Cycles: Add strict assert when assigning input socket stack offset
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "device/device.h"
18 #include "render/graph.h"
19 #include "render/light.h"
20 #include "render/mesh.h"
21 #include "render/nodes.h"
22 #include "render/scene.h"
23 #include "render/shader.h"
24 #include "render/svm.h"
25
26 #include "util/util_logging.h"
27 #include "util/util_foreach.h"
28 #include "util/util_progress.h"
29 #include "util/util_task.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::reset(Scene * /*scene*/)
44 {
45 }
46
47 void SVMShaderManager::device_update_shader(Scene *scene,
48                                             Shader *shader,
49                                             Progress *progress,
50                                             array<int4> *global_svm_nodes)
51 {
52         if(progress->get_cancel()) {
53                 return;
54         }
55         assert(shader->graph);
56
57         array<int4> svm_nodes;
58         svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
59
60         SVMCompiler::Summary summary;
61         SVMCompiler compiler(scene->shader_manager, scene->image_manager, scene->light_manager);
62         compiler.background = (shader == scene->default_background);
63         compiler.compile(scene, shader, svm_nodes, 0, &summary);
64
65         VLOG(2) << "Compilation summary:\n"
66                 << "Shader name: " << shader->name << "\n"
67                 << summary.full_report();
68
69         nodes_lock_.lock();
70         if(shader->use_mis && shader->has_surface_emission) {
71                 scene->light_manager->need_update = true;
72         }
73
74         /* The copy needs to be done inside the lock, if another thread resizes the array
75          * while memcpy is running, it'll be copying into possibly invalid/freed ram.
76          */
77         size_t global_nodes_size = global_svm_nodes->size();
78         global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
79
80         /* Offset local SVM nodes to a global address space. */
81         int4& jump_node = (*global_svm_nodes)[shader->id];
82         jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
83         jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
84         jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
85         /* Copy new nodes to global storage. */
86         memcpy(&(*global_svm_nodes)[global_nodes_size],
87                &svm_nodes[1],
88                sizeof(int4) * (svm_nodes.size() - 1));
89         nodes_lock_.unlock();
90 }
91
92 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
93 {
94         if(!need_update)
95                 return;
96
97         VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
98
99         double start_time = time_dt();
100
101         /* test if we need to update */
102         device_free(device, dscene, scene);
103
104         /* determine which shaders are in use */
105         device_update_shaders_used(scene);
106
107         /* svm_nodes */
108         array<int4> svm_nodes;
109         size_t i;
110
111         for(i = 0; i < scene->shaders.size(); i++) {
112                 svm_nodes.push_back_slow(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
113         }
114
115         TaskPool task_pool;
116         foreach(Shader *shader, scene->shaders) {
117                 task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
118                                              this,
119                                              scene,
120                                              shader,
121                                              &progress,
122                                              &svm_nodes),
123                                false);
124         }
125         task_pool.wait_work();
126
127         if(progress.get_cancel()) {
128                 return;
129         }
130
131         dscene->svm_nodes.steal_data(svm_nodes);
132         dscene->svm_nodes.copy_to_device();
133
134         for(i = 0; i < scene->shaders.size(); i++) {
135                 Shader *shader = scene->shaders[i];
136                 shader->need_update = false;
137         }
138
139         device_update_common(device, dscene, scene, progress);
140
141         need_update = false;
142
143         VLOG(1) << "Shader manager updated "
144                 << scene->shaders.size() << " shaders in "
145                 << time_dt() - start_time << " seconds.";
146 }
147
148 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
149 {
150         device_free_common(device, dscene, scene);
151
152         dscene->svm_nodes.free();
153 }
154
155 /* Graph Compiler */
156
157 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_,
158                          ImageManager *image_manager_,
159                          LightManager *light_manager_)
160 {
161         shader_manager = shader_manager_;
162         image_manager = image_manager_;
163         light_manager = light_manager_;
164         max_stack_use = 0;
165         current_type = SHADER_TYPE_SURFACE;
166         current_shader = NULL;
167         current_graph = NULL;
168         background = false;
169         mix_weight_offset = SVM_STACK_INVALID;
170         compile_failed = false;
171 }
172
173 int SVMCompiler::stack_size(SocketType::Type type)
174 {
175         int size = 0;
176
177         switch(type) {
178                 case SocketType::FLOAT:
179                 case SocketType::INT:
180                         size = 1;
181                         break;
182                 case SocketType::COLOR:
183                 case SocketType::VECTOR:
184                 case SocketType::NORMAL:
185                 case SocketType::POINT:
186                         size = 3;
187                         break;
188                 case SocketType::CLOSURE:
189                         size = 0;
190                         break;
191                 default:
192                         assert(0);
193                         break;
194         }
195
196         return size;
197 }
198
199 int SVMCompiler::stack_find_offset(int size)
200 {
201         int offset = -1;
202
203         /* find free space in stack & mark as used */
204         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
205                 if(active_stack.users[i]) num_unused = 0;
206                 else num_unused++;
207
208                 if(num_unused == size) {
209                         offset = i+1 - size;
210                         max_stack_use = max(i+1, max_stack_use);
211
212                         while(i >= offset)
213                                 active_stack.users[i--] = 1;
214
215                         if (offset == 255) {
216                                 abort();
217                         }
218                         return offset;
219                 }
220         }
221
222         if(!compile_failed) {
223                 compile_failed = true;
224                 fprintf(stderr, "Cycles: out of SVM stack space, shader \"%s\" too big.\n", current_shader->name.c_str());
225         }
226
227         return 0;
228 }
229
230 int SVMCompiler::stack_find_offset(SocketType::Type type)
231 {
232         return stack_find_offset(stack_size(type));
233 }
234
235 void SVMCompiler::stack_clear_offset(SocketType::Type type, int offset)
236 {
237         int size = stack_size(type);
238
239         for(int i = 0; i < size; i++)
240                 active_stack.users[offset + i]--;
241 }
242
243 int SVMCompiler::stack_assign(ShaderInput *input)
244 {
245         /* stack offset assign? */
246         if(input->stack_offset == SVM_STACK_INVALID) {
247                 if(input->link) {
248                         /* linked to output -> use output offset */
249                         assert(input->link->stack_offset != SVM_STACK_INVALID);
250                         input->stack_offset = input->link->stack_offset;
251                 }
252                 else {
253                         Node *node = input->parent;
254
255                         /* not linked to output -> add nodes to load default value */
256                         input->stack_offset = stack_find_offset(input->type());
257
258                         if(input->type() == SocketType::FLOAT) {
259                                 add_node(NODE_VALUE_F, __float_as_int(node->get_float(input->socket_type)), input->stack_offset);
260                         }
261                         else if(input->type() == SocketType::INT) {
262                                 add_node(NODE_VALUE_F, node->get_int(input->socket_type), input->stack_offset);
263                         }
264                         else if(input->type() == SocketType::VECTOR ||
265                                 input->type() == SocketType::NORMAL ||
266                                 input->type() == SocketType::POINT ||
267                                 input->type() == SocketType::COLOR)
268                         {
269
270                                 add_node(NODE_VALUE_V, input->stack_offset);
271                                 add_node(NODE_VALUE_V, node->get_float3(input->socket_type));
272                         }
273                         else  /* should not get called for closure */
274                                 assert(0);
275                 }
276         }
277
278         return input->stack_offset;
279 }
280
281 int SVMCompiler::stack_assign(ShaderOutput *output)
282 {
283         /* if no stack offset assigned yet, find one */
284         if(output->stack_offset == SVM_STACK_INVALID)
285                 output->stack_offset = stack_find_offset(output->type());
286
287         return output->stack_offset;
288 }
289
290 int SVMCompiler::stack_assign_if_linked(ShaderInput *input)
291 {
292         if(input->link)
293                 return stack_assign(input);
294
295         return SVM_STACK_INVALID;
296 }
297
298 int SVMCompiler::stack_assign_if_linked(ShaderOutput *output)
299 {
300         if(!output->links.empty())
301                 return stack_assign(output);
302
303         return SVM_STACK_INVALID;
304 }
305
306 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
307 {
308         if(output->stack_offset == SVM_STACK_INVALID) {
309                 assert(input->link);
310                 assert(stack_size(output->type()) == stack_size(input->link->type()));
311
312                 output->stack_offset = input->link->stack_offset;
313
314                 int size = stack_size(output->type());
315
316                 for(int i = 0; i < size; i++)
317                         active_stack.users[output->stack_offset + i]++;
318         }
319 }
320
321 void SVMCompiler::stack_clear_users(ShaderNode *node, ShaderNodeSet& done)
322 {
323         /* optimization we should add:
324          * find and lower user counts for outputs for which all inputs are done.
325          * this is done before the node is compiled, under the assumption that the
326          * node will first load all inputs from the stack and then writes its
327          * outputs. this used to work, but was disabled because it gave trouble
328          * with inputs getting stack positions assigned */
329
330         foreach(ShaderInput *input, node->inputs) {
331                 ShaderOutput *output = input->link;
332
333                 if(output && output->stack_offset != SVM_STACK_INVALID) {
334                         bool all_done = true;
335
336                         /* optimization we should add: verify if in->parent is actually used */
337                         foreach(ShaderInput *in, output->links)
338                                 if(in->parent != node && done.find(in->parent) == done.end())
339                                         all_done = false;
340
341                         if(all_done) {
342                                 stack_clear_offset(output->type(), output->stack_offset);
343                                 output->stack_offset = SVM_STACK_INVALID;
344
345                                 foreach(ShaderInput *in, output->links)
346                                         in->stack_offset = SVM_STACK_INVALID;
347                         }
348                 }
349         }
350 }
351
352 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
353 {
354         foreach(ShaderInput *input, node->inputs) {
355                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
356                         stack_clear_offset(input->type(), input->stack_offset);
357                         input->stack_offset = SVM_STACK_INVALID;
358                 }
359         }
360 }
361
362 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
363 {
364         assert(x <= 255);
365         assert(y <= 255);
366         assert(z <= 255);
367         assert(w <= 255);
368
369         return (x) | (y << 8) | (z << 16) | (w << 24);
370 }
371
372 void SVMCompiler::add_node(int a, int b, int c, int d)
373 {
374         current_svm_nodes.push_back_slow(make_int4(a, b, c, d));
375 }
376
377 void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
378 {
379         current_svm_nodes.push_back_slow(make_int4(type, a, b, c));
380 }
381
382 void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
383 {
384         current_svm_nodes.push_back_slow(make_int4(type,
385                 __float_as_int(f.x),
386                 __float_as_int(f.y),
387                 __float_as_int(f.z)));
388 }
389
390 void SVMCompiler::add_node(const float4& f)
391 {
392         current_svm_nodes.push_back_slow(make_int4(
393                 __float_as_int(f.x),
394                 __float_as_int(f.y),
395                 __float_as_int(f.z),
396                 __float_as_int(f.w)));
397 }
398
399 uint SVMCompiler::attribute(ustring name)
400 {
401         return shader_manager->get_attribute_id(name);
402 }
403
404 uint SVMCompiler::attribute(AttributeStandard std)
405 {
406         return shader_manager->get_attribute_id(std);
407 }
408
409 uint SVMCompiler::attribute_standard(ustring name)
410 {
411         AttributeStandard std = Attribute::name_standard(name.c_str());
412         return (std)? attribute(std): attribute(name);
413 }
414
415 void SVMCompiler::find_dependencies(ShaderNodeSet& dependencies,
416                                     const ShaderNodeSet& done,
417                                     ShaderInput *input,
418                                     ShaderNode *skip_node)
419 {
420         ShaderNode *node = (input->link)? input->link->parent: NULL;
421         if(node != NULL &&
422            done.find(node) == done.end() &&
423            node != skip_node &&
424            dependencies.find(node) == dependencies.end())
425         {
426                 foreach(ShaderInput *in, node->inputs) {
427                         find_dependencies(dependencies, done, in, skip_node);
428                 }
429                 dependencies.insert(node);
430         }
431 }
432
433 void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet& done)
434 {
435         node->compile(*this);
436         stack_clear_users(node, done);
437         stack_clear_temporary(node);
438
439         if(current_type == SHADER_TYPE_SURFACE) {
440                 if(node->has_spatial_varying())
441                         current_shader->has_surface_spatial_varying = true;
442         }
443         else if(current_type == SHADER_TYPE_VOLUME) {
444                 if(node->has_spatial_varying())
445                         current_shader->has_volume_spatial_varying = true;
446         }
447
448         if(node->has_object_dependency()) {
449                 current_shader->has_object_dependency = true;
450         }
451
452         if(node->has_attribute_dependency()) {
453                 current_shader->has_attribute_dependency = true;
454         }
455
456         if(node->has_integrator_dependency()) {
457                 current_shader->has_integrator_dependency = true;
458         }
459 }
460
461 void SVMCompiler::generate_svm_nodes(const ShaderNodeSet& nodes,
462                                      CompilerState *state)
463 {
464         ShaderNodeSet& done = state->nodes_done;
465         vector<bool>& done_flag = state->nodes_done_flag;
466
467         bool nodes_done;
468         do {
469                 nodes_done = true;
470
471                 foreach(ShaderNode *node, nodes) {
472                         if(!done_flag[node->id]) {
473                                 bool inputs_done = true;
474
475                                 foreach(ShaderInput *input, node->inputs) {
476                                         if(input->link && !done_flag[input->link->parent->id]) {
477                                                 inputs_done = false;
478                                         }
479                                 }
480                                 if(inputs_done) {
481                                         generate_node(node, done);
482                                         done.insert(node);
483                                         done_flag[node->id] = true;
484                                 }
485                                 else {
486                                         nodes_done = false;
487                                 }
488                         }
489                 }
490         } while(!nodes_done);
491 }
492
493 void SVMCompiler::generate_closure_node(ShaderNode *node,
494                                         CompilerState *state)
495 {
496         /* execute dependencies for closure */
497         foreach(ShaderInput *in, node->inputs) {
498                 if(in->link != NULL) {
499                         ShaderNodeSet dependencies;
500                         find_dependencies(dependencies, state->nodes_done, in);
501                         generate_svm_nodes(dependencies, state);
502                 }
503         }
504
505         /* closure mix weight */
506         const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
507         ShaderInput *weight_in = node->input(weight_name);
508
509         if(weight_in && (weight_in->link || node->get_float(weight_in->socket_type) != 1.0f))
510                 mix_weight_offset = stack_assign(weight_in);
511         else
512                 mix_weight_offset = SVM_STACK_INVALID;
513
514         /* compile closure itself */
515         generate_node(node, state->nodes_done);
516
517         mix_weight_offset = SVM_STACK_INVALID;
518
519         if(current_type == SHADER_TYPE_SURFACE) {
520                 if(node->has_surface_emission())
521                         current_shader->has_surface_emission = true;
522                 if(node->has_surface_transparent())
523                         current_shader->has_surface_transparent = true;
524                 if(node->has_surface_bssrdf()) {
525                         current_shader->has_surface_bssrdf = true;
526                         if(node->has_bssrdf_bump())
527                                 current_shader->has_bssrdf_bump = true;
528                 }
529                 if(node->has_bump()) {
530                         current_shader->has_bump = true;
531                 }
532         }
533 }
534
535 void SVMCompiler::generated_shared_closure_nodes(ShaderNode *root_node,
536                                                  ShaderNode *node,
537                                                  CompilerState *state,
538                                                  const ShaderNodeSet& shared)
539 {
540         if(shared.find(node) != shared.end()) {
541                 generate_multi_closure(root_node, node, state);
542         }
543         else {
544                 foreach(ShaderInput *in, node->inputs) {
545                         if(in->type() == SocketType::CLOSURE && in->link)
546                                 generated_shared_closure_nodes(root_node,
547                                                                in->link->parent,
548                                                                state,
549                                                                shared);
550                 }
551         }
552 }
553
554 void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
555                                          ShaderNode *node,
556                                          CompilerState *state)
557 {
558         /* only generate once */
559         if(state->closure_done.find(node) != state->closure_done.end())
560                 return;
561
562         state->closure_done.insert(node);
563
564         if(node->special_type == SHADER_SPECIAL_TYPE_COMBINE_CLOSURE) {
565                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
566                 ShaderInput *cl1in = node->input("Closure1");
567                 ShaderInput *cl2in = node->input("Closure2");
568                 ShaderInput *facin = node->input("Fac");
569
570                 /* skip empty mix/add closure nodes */
571                 if(!cl1in->link && !cl2in->link)
572                         return;
573
574                 if(facin && facin->link) {
575                         /* mix closure: generate instructions to compute mix weight */
576                         ShaderNodeSet dependencies;
577                         find_dependencies(dependencies, state->nodes_done, facin);
578                         generate_svm_nodes(dependencies, state);
579
580                         /* execute shared dependencies. this is needed to allow skipping
581                          * of zero weight closures and their dependencies later, so we
582                          * ensure that they only skip dependencies that are unique to them */
583                         ShaderNodeSet cl1deps, cl2deps, shareddeps;
584
585                         find_dependencies(cl1deps, state->nodes_done, cl1in);
586                         find_dependencies(cl2deps, state->nodes_done, cl2in);
587
588                         ShaderNodeIDComparator node_id_comp;
589                         set_intersection(cl1deps.begin(), cl1deps.end(),
590                                          cl2deps.begin(), cl2deps.end(),
591                                          std::inserter(shareddeps, shareddeps.begin()),
592                                          node_id_comp);
593
594                         /* it's possible some nodes are not shared between this mix node
595                          * inputs, but still needed to be always executed, this mainly
596                          * happens when a node of current subbranch is used by a parent
597                          * node or so */
598                         if(root_node != node) {
599                                 foreach(ShaderInput *in, root_node->inputs) {
600                                         ShaderNodeSet rootdeps;
601                                         find_dependencies(rootdeps, state->nodes_done, in, node);
602                                         set_intersection(rootdeps.begin(), rootdeps.end(),
603                                                          cl1deps.begin(), cl1deps.end(),
604                                                          std::inserter(shareddeps, shareddeps.begin()),
605                                                          node_id_comp);
606                                         set_intersection(rootdeps.begin(), rootdeps.end(),
607                                                          cl2deps.begin(), cl2deps.end(),
608                                                          std::inserter(shareddeps, shareddeps.begin()),
609                                                          node_id_comp);
610                                 }
611                         }
612
613                         if(!shareddeps.empty()) {
614                                 if(cl1in->link) {
615                                         generated_shared_closure_nodes(root_node,
616                                                                        cl1in->link->parent,
617                                                                        state,
618                                                                        shareddeps);
619                                 }
620                                 if(cl2in->link) {
621                                         generated_shared_closure_nodes(root_node,
622                                                                        cl2in->link->parent,
623                                                                        state,
624                                                                        shareddeps);
625                                 }
626
627                                 generate_svm_nodes(shareddeps, state);
628                         }
629
630                         /* generate instructions for input closure 1 */
631                         if(cl1in->link) {
632                                 /* Add instruction to skip closure and its dependencies if mix
633                                  * weight is zero.
634                                  */
635                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ONE,
636                                                                       0,
637                                                                       stack_assign(facin),
638                                                                       0));
639                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
640
641                                 generate_multi_closure(root_node, cl1in->link->parent, state);
642
643                                 /* Fill in jump instruction location to be after closure. */
644                                 current_svm_nodes[node_jump_skip_index].y =
645                                         current_svm_nodes.size() - node_jump_skip_index - 1;
646                         }
647
648                         /* generate instructions for input closure 2 */
649                         if(cl2in->link) {
650                                 /* Add instruction to skip closure and its dependencies if mix
651                                  * weight is zero.
652                                  */
653                                 current_svm_nodes.push_back_slow(make_int4(NODE_JUMP_IF_ZERO,
654                                                                       0,
655                                                                       stack_assign(facin),
656                                                                       0));
657                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
658
659                                 generate_multi_closure(root_node, cl2in->link->parent, state);
660
661                                 /* Fill in jump instruction location to be after closure. */
662                                 current_svm_nodes[node_jump_skip_index].y =
663                                         current_svm_nodes.size() - node_jump_skip_index - 1;
664                         }
665
666                         /* unassign */
667                         facin->stack_offset = SVM_STACK_INVALID;
668                 }
669                 else {
670                         /* execute closures and their dependencies, no runtime checks
671                          * to skip closures here because was already optimized due to
672                          * fixed weight or add closure that always needs both */
673                         if(cl1in->link)
674                                 generate_multi_closure(root_node, cl1in->link->parent, state);
675                         if(cl2in->link)
676                                 generate_multi_closure(root_node, cl2in->link->parent, state);
677                 }
678         }
679         else {
680                 generate_closure_node(node, state);
681         }
682
683         state->nodes_done.insert(node);
684         state->nodes_done_flag[node->id] = true;
685 }
686
687
688 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
689 {
690         /* Converting a shader graph into svm_nodes that can be executed
691          * sequentially on the virtual machine is fairly simple. We can keep
692          * looping over nodes and each time all the inputs of a node are
693          * ready, we add svm_nodes for it that read the inputs from the
694          * stack and write outputs back to the stack.
695          *
696          * With the SVM, we always sample only a single closure. We can think
697          * of all closures nodes as a binary tree with mix closures as inner
698          * nodes and other closures as leafs. The SVM will traverse that tree,
699          * each time deciding to go left or right depending on the mix weights,
700          * until a closure is found.
701          *
702          * We only execute nodes that are needed for the mix weights and chosen
703          * closure.
704          */
705
706         current_type = type;
707         current_graph = graph;
708
709         /* get input in output node */
710         ShaderNode *node = graph->output();
711         ShaderInput *clin = NULL;
712
713         switch(type) {
714                 case SHADER_TYPE_SURFACE:
715                         clin = node->input("Surface");
716                         break;
717                 case SHADER_TYPE_VOLUME:
718                         clin = node->input("Volume");
719                         break;
720                 case SHADER_TYPE_DISPLACEMENT:
721                         clin = node->input("Displacement");
722                         break;
723                 case SHADER_TYPE_BUMP:
724                         clin = node->input("Normal");
725                         break;
726                 default:
727                         assert(0);
728                         break;
729         }
730
731         /* clear all compiler state */
732         memset((void *)&active_stack, 0, sizeof(active_stack));
733         current_svm_nodes.clear();
734
735         foreach(ShaderNode *node_iter, graph->nodes) {
736                 foreach(ShaderInput *input, node_iter->inputs)
737                         input->stack_offset = SVM_STACK_INVALID;
738                 foreach(ShaderOutput *output, node_iter->outputs)
739                         output->stack_offset = SVM_STACK_INVALID;
740         }
741
742         /* for the bump shader we need add a node to store the shader state */
743         bool need_bump_state = (type == SHADER_TYPE_BUMP) && (shader->displacement_method == DISPLACE_BOTH);
744         int bump_state_offset = SVM_STACK_INVALID;
745         if(need_bump_state) {
746                 bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
747                 add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
748         }
749
750         if(shader->used) {
751                 if(clin->link) {
752                         bool generate = false;
753
754                         switch(type) {
755                                 case SHADER_TYPE_SURFACE: /* generate surface shader */
756                                         generate = true;
757                                         shader->has_surface = true;
758                                         break;
759                                 case SHADER_TYPE_VOLUME: /* generate volume shader */
760                                         generate = true;
761                                         shader->has_volume = true;
762                                         break;
763                                 case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
764                                         generate = true;
765                                         shader->has_displacement = true;
766                                         break;
767                                 case SHADER_TYPE_BUMP: /* generate bump shader */
768                                         generate = true;
769                                         break;
770                                 default:
771                                         break;
772                         }
773
774                         if(generate) {
775                                 CompilerState state(graph);
776                                 generate_multi_closure(clin->link->parent,
777                                                        clin->link->parent,
778                                                        &state);
779                         }
780                 }
781
782                 /* compile output node */
783                 node->compile(*this);
784         }
785
786         /* add node to restore state after bump shader has finished */
787         if(need_bump_state) {
788                 add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
789         }
790
791         /* if compile failed, generate empty shader */
792         if(compile_failed) {
793                 current_svm_nodes.clear();
794                 compile_failed = false;
795         }
796
797         /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader it ends here */
798         if(type != SHADER_TYPE_BUMP) {
799                 add_node(NODE_END, 0, 0, 0);
800         }
801 }
802
803 void SVMCompiler::compile(Scene *scene,
804                           Shader *shader,
805                           array<int4>& svm_nodes,
806                           int index,
807                           Summary *summary)
808 {
809         /* copy graph for shader with bump mapping */
810         ShaderNode *output = shader->graph->output();
811         int start_num_svm_nodes = svm_nodes.size();
812
813         const double time_start = time_dt();
814
815         bool has_bump = (shader->displacement_method != DISPLACE_TRUE) &&
816                         output->input("Surface")->link && output->input("Displacement")->link;
817
818         /* finalize */
819         {
820                 scoped_timer timer((summary != NULL)? &summary->time_finalize: NULL);
821                 shader->graph->finalize(scene,
822                                         has_bump,
823                                         shader->has_integrator_dependency,
824                                         shader->displacement_method == DISPLACE_BOTH);
825         }
826
827         current_shader = shader;
828
829         shader->has_surface = false;
830         shader->has_surface_emission = false;
831         shader->has_surface_transparent = false;
832         shader->has_surface_bssrdf = false;
833         shader->has_bump = has_bump;
834         shader->has_bssrdf_bump = has_bump;
835         shader->has_volume = false;
836         shader->has_displacement = false;
837         shader->has_surface_spatial_varying = false;
838         shader->has_volume_spatial_varying = false;
839         shader->has_object_dependency = false;
840         shader->has_attribute_dependency = false;
841         shader->has_integrator_dependency = false;
842
843         /* generate bump shader */
844         if(has_bump) {
845                 scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
846                 compile_type(shader, shader->graph, SHADER_TYPE_BUMP);
847                 svm_nodes[index].y = svm_nodes.size();
848                 svm_nodes.append(current_svm_nodes);
849         }
850
851         /* generate surface shader */
852         {
853                 scoped_timer timer((summary != NULL)? &summary->time_generate_surface: NULL);
854                 compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
855                 /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
856                 if(!has_bump) {
857                         svm_nodes[index].y = svm_nodes.size();
858                 }
859                 svm_nodes.append(current_svm_nodes);
860         }
861
862         /* generate volume shader */
863         {
864                 scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
865                 compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
866                 svm_nodes[index].z = svm_nodes.size();
867                 svm_nodes.append(current_svm_nodes);
868         }
869
870         /* generate displacement shader */
871         {
872                 scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
873                 compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
874                 svm_nodes[index].w = svm_nodes.size();
875                 svm_nodes.append(current_svm_nodes);
876         }
877
878         /* Fill in summary information. */
879         if(summary != NULL) {
880                 summary->time_total = time_dt() - time_start;
881                 summary->peak_stack_usage = max_stack_use;
882                 summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
883         }
884 }
885
886 /* Compiler summary implementation. */
887
888 SVMCompiler::Summary::Summary()
889         : num_svm_nodes(0),
890           peak_stack_usage(0),
891           time_finalize(0.0),
892           time_generate_surface(0.0),
893           time_generate_bump(0.0),
894           time_generate_volume(0.0),
895           time_generate_displacement(0.0),
896           time_total(0.0)
897 {
898 }
899
900 string SVMCompiler::Summary::full_report() const
901 {
902         string report = "";
903         report += string_printf("Number of SVM nodes: %d\n", num_svm_nodes);
904         report += string_printf("Peak stack usage:    %d\n", peak_stack_usage);
905
906         report += string_printf("Time (in seconds):\n");
907         report += string_printf("Finalize:            %f\n", time_finalize);
908         report += string_printf("  Surface:           %f\n", time_generate_surface);
909         report += string_printf("  Bump:              %f\n", time_generate_bump);
910         report += string_printf("  Volume:            %f\n", time_generate_volume);
911         report += string_printf("  Displacement:      %f\n", time_generate_displacement);
912         report += string_printf("Generate:            %f\n", time_generate_surface +
913                                                              time_generate_bump +
914                                                              time_generate_volume +
915                                                              time_generate_displacement);
916         report += string_printf("Total:               %f\n", time_total);
917
918         return report;
919 }
920
921 /* Global state of the compiler. */
922
923 SVMCompiler::CompilerState::CompilerState(ShaderGraph *graph)
924 {
925         int max_id = 0;
926         foreach(ShaderNode *node, graph->nodes) {
927                 max_id = max(node->id, max_id);
928         }
929         nodes_done_flag.resize(max_id + 1, false);
930 }
931
932 CCL_NAMESPACE_END