Cycles: Make all #include statements relative to cycles source directory
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "device/device.h"
18 #include "render/graph.h"
19 #include "render/light.h"
20 #include "render/mesh.h"
21 #include "render/nodes.h"
22 #include "render/scene.h"
23 #include "render/shader.h"
24 #include "render/svm.h"
25
26 #include "util/util_debug.h"
27 #include "util/util_logging.h"
28 #include "util/util_foreach.h"
29 #include "util/util_progress.h"
30 #include "util/util_task.h"
31
32 CCL_NAMESPACE_BEGIN
33
34 /* Shader Manager */
35
36 SVMShaderManager::SVMShaderManager()
37 {
38 }
39
40 SVMShaderManager::~SVMShaderManager()
41 {
42 }
43
44 void SVMShaderManager::reset(Scene * /*scene*/)
45 {
46 }
47
48 void SVMShaderManager::device_update_shader(Scene *scene,
49                                             Shader *shader,
50                                             Progress *progress,
51                                             vector<int4> *global_svm_nodes)
52 {
53         if(progress->get_cancel()) {
54                 return;
55         }
56         assert(shader->graph);
57
58         vector<int4> svm_nodes;
59         svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
60
61         SVMCompiler::Summary summary;
62         SVMCompiler compiler(scene->shader_manager, scene->image_manager);
63         compiler.background = (shader == scene->default_background);
64         compiler.compile(scene, shader, svm_nodes, 0, &summary);
65
66         VLOG(2) << "Compilation summary:\n"
67                 << "Shader name: " << shader->name << "\n"
68                 << summary.full_report();
69
70         if(shader->use_mis && shader->has_surface_emission) {
71                 scene->light_manager->need_update = true;
72         }
73
74         /* The copy needs to be done inside the lock, if another thread resizes the array 
75          * while memcpy is running, it'll be copying into possibly invalid/freed ram. 
76          */
77         nodes_lock_.lock();
78         size_t global_nodes_size = global_svm_nodes->size();
79         global_svm_nodes->resize(global_nodes_size + svm_nodes.size());
80         
81         /* Offset local SVM nodes to a global address space. */
82         int4& jump_node = global_svm_nodes->at(shader->id);
83         jump_node.y = svm_nodes[0].y + global_nodes_size - 1;
84         jump_node.z = svm_nodes[0].z + global_nodes_size - 1;
85         jump_node.w = svm_nodes[0].w + global_nodes_size - 1;
86         /* Copy new nodes to global storage. */
87         memcpy(&global_svm_nodes->at(global_nodes_size),
88                &svm_nodes[1],
89                sizeof(int4) * (svm_nodes.size() - 1));
90         nodes_lock_.unlock();
91 }
92
93 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
94 {
95         if(!need_update)
96                 return;
97
98         VLOG(1) << "Total " << scene->shaders.size() << " shaders.";
99
100         double start_time = time_dt();
101
102         /* test if we need to update */
103         device_free(device, dscene, scene);
104
105         /* determine which shaders are in use */
106         device_update_shaders_used(scene);
107
108         /* svm_nodes */
109         vector<int4> svm_nodes;
110         size_t i;
111
112         for(i = 0; i < scene->shaders.size(); i++) {
113                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
114         }
115
116         TaskPool task_pool;
117         foreach(Shader *shader, scene->shaders) {
118                 task_pool.push(function_bind(&SVMShaderManager::device_update_shader,
119                                              this,
120                                              scene,
121                                              shader,
122                                              &progress,
123                                              &svm_nodes),
124                                false);
125         }
126         task_pool.wait_work();
127
128         if(progress.get_cancel()) {
129                 return;
130         }
131
132         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
133         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
134
135         for(i = 0; i < scene->shaders.size(); i++) {
136                 Shader *shader = scene->shaders[i];
137                 shader->need_update = false;
138         }
139
140         device_update_common(device, dscene, scene, progress);
141
142         need_update = false;
143
144         VLOG(1) << "Shader manager updated "
145                 << scene->shaders.size() << " shaders in "
146                 << time_dt() - start_time << " seconds.";
147 }
148
149 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene)
150 {
151         device_free_common(device, dscene, scene);
152
153         device->tex_free(dscene->svm_nodes);
154         dscene->svm_nodes.clear();
155 }
156
157 /* Graph Compiler */
158
159 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_)
160 {
161         shader_manager = shader_manager_;
162         image_manager = image_manager_;
163         max_stack_use = 0;
164         current_type = SHADER_TYPE_SURFACE;
165         current_shader = NULL;
166         current_graph = NULL;
167         background = false;
168         mix_weight_offset = SVM_STACK_INVALID;
169         compile_failed = false;
170 }
171
172 int SVMCompiler::stack_size(SocketType::Type type)
173 {
174         int size = 0;
175         
176         switch(type) {
177                 case SocketType::FLOAT:
178                 case SocketType::INT:
179                         size = 1;
180                         break;
181                 case SocketType::COLOR:
182                 case SocketType::VECTOR:
183                 case SocketType::NORMAL:
184                 case SocketType::POINT:
185                         size = 3;
186                         break;
187                 case SocketType::CLOSURE:
188                         size = 0;
189                         break;
190                 default:
191                         assert(0);
192                         break;
193         }
194         
195         return size;
196 }
197
198 int SVMCompiler::stack_find_offset(int size)
199 {
200         int offset = -1;
201         
202         /* find free space in stack & mark as used */
203         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
204                 if(active_stack.users[i]) num_unused = 0;
205                 else num_unused++;
206
207                 if(num_unused == size) {
208                         offset = i+1 - size;
209                         max_stack_use = max(i+1, max_stack_use);
210
211                         while(i >= offset)
212                                 active_stack.users[i--] = 1;
213
214                         return offset;
215                 }
216         }
217
218         if(!compile_failed) {
219                 compile_failed = true;
220                 fprintf(stderr, "Cycles: out of SVM stack space, shader \"%s\" too big.\n", current_shader->name.c_str());
221         }
222
223         return 0;
224 }
225
226 int SVMCompiler::stack_find_offset(SocketType::Type type)
227 {
228         return stack_find_offset(stack_size(type));
229 }
230
231 void SVMCompiler::stack_clear_offset(SocketType::Type type, int offset)
232 {
233         int size = stack_size(type);
234
235         for(int i = 0; i < size; i++)
236                 active_stack.users[offset + i]--;
237 }
238
239 int SVMCompiler::stack_assign(ShaderInput *input)
240 {
241         /* stack offset assign? */
242         if(input->stack_offset == SVM_STACK_INVALID) {
243                 if(input->link) {
244                         /* linked to output -> use output offset */
245                         input->stack_offset = input->link->stack_offset;
246                 }
247                 else {
248                         Node *node = input->parent;
249
250                         /* not linked to output -> add nodes to load default value */
251                         input->stack_offset = stack_find_offset(input->type());
252
253                         if(input->type() == SocketType::FLOAT) {
254                                 add_node(NODE_VALUE_F, __float_as_int(node->get_float(input->socket_type)), input->stack_offset);
255                         }
256                         else if(input->type() == SocketType::INT) {
257                                 add_node(NODE_VALUE_F, node->get_int(input->socket_type), input->stack_offset);
258                         }
259                         else if(input->type() == SocketType::VECTOR ||
260                                 input->type() == SocketType::NORMAL ||
261                                 input->type() == SocketType::POINT ||
262                                 input->type() == SocketType::COLOR)
263                         {
264
265                                 add_node(NODE_VALUE_V, input->stack_offset);
266                                 add_node(NODE_VALUE_V, node->get_float3(input->socket_type));
267                         }
268                         else /* should not get called for closure */
269                                 assert(0);
270                 }
271         }
272
273         return input->stack_offset;
274 }
275
276 int SVMCompiler::stack_assign(ShaderOutput *output)
277 {
278         /* if no stack offset assigned yet, find one */
279         if(output->stack_offset == SVM_STACK_INVALID)
280                 output->stack_offset = stack_find_offset(output->type());
281
282         return output->stack_offset;
283 }
284
285 int SVMCompiler::stack_assign_if_linked(ShaderInput *input)
286 {
287         if(input->link)
288                 return stack_assign(input);
289
290         return SVM_STACK_INVALID;
291 }
292
293 int SVMCompiler::stack_assign_if_linked(ShaderOutput *output)
294 {
295         if(!output->links.empty())
296                 return stack_assign(output);
297
298         return SVM_STACK_INVALID;
299 }
300
301 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
302 {
303         if(output->stack_offset == SVM_STACK_INVALID) {
304                 assert(input->link);
305                 assert(stack_size(output->type()) == stack_size(input->link->type()));
306
307                 output->stack_offset = input->link->stack_offset;
308
309                 int size = stack_size(output->type());
310
311                 for(int i = 0; i < size; i++)
312                         active_stack.users[output->stack_offset + i]++;
313         }
314 }
315
316 void SVMCompiler::stack_clear_users(ShaderNode *node, ShaderNodeSet& done)
317 {
318         /* optimization we should add:
319          * find and lower user counts for outputs for which all inputs are done.
320          * this is done before the node is compiled, under the assumption that the
321          * node will first load all inputs from the stack and then writes its
322          * outputs. this used to work, but was disabled because it gave trouble
323          * with inputs getting stack positions assigned */
324
325         foreach(ShaderInput *input, node->inputs) {
326                 ShaderOutput *output = input->link;
327
328                 if(output && output->stack_offset != SVM_STACK_INVALID) {
329                         bool all_done = true;
330
331                         /* optimization we should add: verify if in->parent is actually used */
332                         foreach(ShaderInput *in, output->links)
333                                 if(in->parent != node && done.find(in->parent) == done.end())
334                                         all_done = false;
335
336                         if(all_done) {
337                                 stack_clear_offset(output->type(), output->stack_offset);
338                                 output->stack_offset = SVM_STACK_INVALID;
339
340                                 foreach(ShaderInput *in, output->links)
341                                         in->stack_offset = SVM_STACK_INVALID;
342                         }
343                 }
344         }
345 }
346
347 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
348 {
349         foreach(ShaderInput *input, node->inputs) {
350                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
351                         stack_clear_offset(input->type(), input->stack_offset);
352                         input->stack_offset = SVM_STACK_INVALID;
353                 }
354         }
355 }
356
357 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
358 {
359         assert(x <= 255);
360         assert(y <= 255);
361         assert(z <= 255);
362         assert(w <= 255);
363
364         return (x) | (y << 8) | (z << 16) | (w << 24);
365 }
366
367 void SVMCompiler::add_node(int a, int b, int c, int d)
368 {
369         current_svm_nodes.push_back(make_int4(a, b, c, d));
370 }
371
372 void SVMCompiler::add_node(ShaderNodeType type, int a, int b, int c)
373 {
374         current_svm_nodes.push_back(make_int4(type, a, b, c));
375 }
376
377 void SVMCompiler::add_node(ShaderNodeType type, const float3& f)
378 {
379         current_svm_nodes.push_back(make_int4(type,
380                 __float_as_int(f.x),
381                 __float_as_int(f.y),
382                 __float_as_int(f.z)));
383 }
384
385 void SVMCompiler::add_node(const float4& f)
386 {
387         current_svm_nodes.push_back(make_int4(
388                 __float_as_int(f.x),
389                 __float_as_int(f.y),
390                 __float_as_int(f.z),
391                 __float_as_int(f.w)));
392 }
393
394 uint SVMCompiler::attribute(ustring name)
395 {
396         return shader_manager->get_attribute_id(name);
397 }
398
399 uint SVMCompiler::attribute(AttributeStandard std)
400 {
401         return shader_manager->get_attribute_id(std);
402 }
403
404 bool SVMCompiler::node_skip_input(ShaderNode * /*node*/, ShaderInput *input)
405 {
406         /* nasty exception .. */
407         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->special_type == SHADER_SPECIAL_TYPE_BUMP)
408                 return true;
409         
410         return false;
411 }
412
413 void SVMCompiler::find_dependencies(ShaderNodeSet& dependencies,
414                                     const ShaderNodeSet& done,
415                                     ShaderInput *input,
416                                     ShaderNode *skip_node)
417 {
418         ShaderNode *node = (input->link)? input->link->parent: NULL;
419
420         if(node != NULL &&
421            done.find(node) == done.end() &&
422            node != skip_node &&
423            dependencies.find(node) == dependencies.end())
424         {
425                 foreach(ShaderInput *in, node->inputs)
426                         if(!node_skip_input(node, in))
427                                 find_dependencies(dependencies, done, in, skip_node);
428
429                 dependencies.insert(node);
430         }
431 }
432
433 void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet& done)
434 {
435         node->compile(*this);
436         stack_clear_users(node, done);
437         stack_clear_temporary(node);
438
439         if(current_type == SHADER_TYPE_SURFACE) {
440                 if(node->has_spatial_varying())
441                         current_shader->has_surface_spatial_varying = true;
442         }
443         else if(current_type == SHADER_TYPE_VOLUME) {
444                 if(node->has_spatial_varying())
445                         current_shader->has_volume_spatial_varying = true;
446         }
447
448         if(node->has_object_dependency()) {
449                 current_shader->has_object_dependency = true;
450         }
451
452         if(node->has_integrator_dependency()) {
453                 current_shader->has_integrator_dependency = true;
454         }
455 }
456
457 void SVMCompiler::generate_svm_nodes(const ShaderNodeSet& nodes,
458                                      CompilerState *state)
459 {
460         ShaderNodeSet& done = state->nodes_done;
461         vector<bool>& done_flag = state->nodes_done_flag;
462
463         bool nodes_done;
464         do {
465                 nodes_done = true;
466
467                 foreach(ShaderNode *node, nodes) {
468                         if(!done_flag[node->id]) {
469                                 bool inputs_done = true;
470
471                                 foreach(ShaderInput *input, node->inputs)
472                                         if(!node_skip_input(node, input))
473                                                 if(input->link && !done_flag[input->link->parent->id])
474                                                         inputs_done = false;
475
476                                 if(inputs_done) {
477                                         generate_node(node, done);
478                                         done.insert(node);
479                                         done_flag[node->id] = true;
480                                 }
481                                 else
482                                         nodes_done = false;
483                         }
484                 }
485         } while(!nodes_done);
486 }
487
488 void SVMCompiler::generate_closure_node(ShaderNode *node,
489                                         CompilerState *state)
490 {
491         /* execute dependencies for closure */
492         foreach(ShaderInput *in, node->inputs) {
493                 if(!node_skip_input(node, in) && in->link) {
494                         ShaderNodeSet dependencies;
495                         find_dependencies(dependencies, state->nodes_done, in);
496                         generate_svm_nodes(dependencies, state);
497                 }
498         }
499
500         /* closure mix weight */
501         const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
502         ShaderInput *weight_in = node->input(weight_name);
503
504         if(weight_in && (weight_in->link || node->get_float(weight_in->socket_type) != 1.0f))
505                 mix_weight_offset = stack_assign(weight_in);
506         else
507                 mix_weight_offset = SVM_STACK_INVALID;
508
509         /* compile closure itself */
510         generate_node(node, state->nodes_done);
511
512         mix_weight_offset = SVM_STACK_INVALID;
513
514         if(current_type == SHADER_TYPE_SURFACE) {
515                 if(node->has_surface_emission())
516                         current_shader->has_surface_emission = true;
517                 if(node->has_surface_transparent())
518                         current_shader->has_surface_transparent = true;
519                 if(node->has_surface_bssrdf()) {
520                         current_shader->has_surface_bssrdf = true;
521                         if(node->has_bssrdf_bump())
522                                 current_shader->has_bssrdf_bump = true;
523                 }
524         }
525 }
526
527 void SVMCompiler::generated_shared_closure_nodes(ShaderNode *root_node,
528                                                  ShaderNode *node,
529                                                  CompilerState *state,
530                                                  const ShaderNodeSet& shared)
531 {
532         if(shared.find(node) != shared.end()) {
533                 generate_multi_closure(root_node, node, state);
534         }
535         else {
536                 foreach(ShaderInput *in, node->inputs) {
537                         if(in->type() == SocketType::CLOSURE && in->link)
538                                 generated_shared_closure_nodes(root_node,
539                                                                in->link->parent,
540                                                                state,
541                                                                shared);
542                 }
543         }
544 }
545
546 void SVMCompiler::generate_multi_closure(ShaderNode *root_node,
547                                          ShaderNode *node,
548                                          CompilerState *state)
549 {
550         /* only generate once */
551         if(state->closure_done.find(node) != state->closure_done.end())
552                 return;
553
554         state->closure_done.insert(node);
555
556         if(node->special_type == SHADER_SPECIAL_TYPE_COMBINE_CLOSURE) {
557                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
558                 ShaderInput *cl1in = node->input("Closure1");
559                 ShaderInput *cl2in = node->input("Closure2");
560                 ShaderInput *facin = node->input("Fac");
561
562                 /* skip empty mix/add closure nodes */
563                 if(!cl1in->link && !cl2in->link)
564                         return;
565
566                 if(facin && facin->link) {
567                         /* mix closure: generate instructions to compute mix weight */
568                         ShaderNodeSet dependencies;
569                         find_dependencies(dependencies, state->nodes_done, facin);
570                         generate_svm_nodes(dependencies, state);
571
572                         /* execute shared dependencies. this is needed to allow skipping
573                          * of zero weight closures and their dependencies later, so we
574                          * ensure that they only skip dependencies that are unique to them */
575                         ShaderNodeSet cl1deps, cl2deps, shareddeps;
576
577                         find_dependencies(cl1deps, state->nodes_done, cl1in);
578                         find_dependencies(cl2deps, state->nodes_done, cl2in);
579
580                         ShaderNodeIDComparator node_id_comp;
581                         set_intersection(cl1deps.begin(), cl1deps.end(),
582                                          cl2deps.begin(), cl2deps.end(),
583                                          std::inserter(shareddeps, shareddeps.begin()),
584                                          node_id_comp);
585
586                         /* it's possible some nodes are not shared between this mix node
587                          * inputs, but still needed to be always executed, this mainly
588                          * happens when a node of current subbranch is used by a parent
589                          * node or so */
590                         if(root_node != node) {
591                                 foreach(ShaderInput *in, root_node->inputs) {
592                                         ShaderNodeSet rootdeps;
593                                         find_dependencies(rootdeps, state->nodes_done, in, node);
594                                         set_intersection(rootdeps.begin(), rootdeps.end(),
595                                                          cl1deps.begin(), cl1deps.end(),
596                                                          std::inserter(shareddeps, shareddeps.begin()),
597                                                          node_id_comp);
598                                         set_intersection(rootdeps.begin(), rootdeps.end(),
599                                                          cl2deps.begin(), cl2deps.end(),
600                                                          std::inserter(shareddeps, shareddeps.begin()),
601                                                          node_id_comp);
602                                 }
603                         }
604
605                         if(!shareddeps.empty()) {
606                                 if(cl1in->link) {
607                                         generated_shared_closure_nodes(root_node,
608                                                                        cl1in->link->parent,
609                                                                        state,
610                                                                        shareddeps);
611                                 }
612                                 if(cl2in->link) {
613                                         generated_shared_closure_nodes(root_node,
614                                                                        cl2in->link->parent,
615                                                                        state,
616                                                                        shareddeps);
617                                 }
618
619                                 generate_svm_nodes(shareddeps, state);
620                         }
621
622                         /* generate instructions for input closure 1 */
623                         if(cl1in->link) {
624                                 /* Add instruction to skip closure and its dependencies if mix
625                                  * weight is zero.
626                                  */
627                                 current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE,
628                                                                       0,
629                                                                       stack_assign(facin),
630                                                                       0));
631                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
632
633                                 generate_multi_closure(root_node, cl1in->link->parent, state);
634
635                                 /* Fill in jump instruction location to be after closure. */
636                                 current_svm_nodes[node_jump_skip_index].y =
637                                         current_svm_nodes.size() - node_jump_skip_index - 1;
638                         }
639
640                         /* generate instructions for input closure 2 */
641                         if(cl2in->link) {
642                                 /* Add instruction to skip closure and its dependencies if mix
643                                  * weight is zero.
644                                  */
645                                 current_svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO,
646                                                                       0,
647                                                                       stack_assign(facin),
648                                                                       0));
649                                 int node_jump_skip_index = current_svm_nodes.size() - 1;
650
651                                 generate_multi_closure(root_node, cl2in->link->parent, state);
652
653                                 /* Fill in jump instruction location to be after closure. */
654                                 current_svm_nodes[node_jump_skip_index].y =
655                                         current_svm_nodes.size() - node_jump_skip_index - 1;
656                         }
657
658                         /* unassign */
659                         facin->stack_offset = SVM_STACK_INVALID;
660                 }
661                 else {
662                         /* execute closures and their dependencies, no runtime checks
663                          * to skip closures here because was already optimized due to
664                          * fixed weight or add closure that always needs both */
665                         if(cl1in->link)
666                                 generate_multi_closure(root_node, cl1in->link->parent, state);
667                         if(cl2in->link)
668                                 generate_multi_closure(root_node, cl2in->link->parent, state);
669                 }
670         }
671         else {
672                 generate_closure_node(node, state);
673         }
674
675         state->nodes_done.insert(node);
676         state->nodes_done_flag[node->id] = true;
677 }
678
679
680 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
681 {
682         /* Converting a shader graph into svm_nodes that can be executed
683          * sequentially on the virtual machine is fairly simple. We can keep
684          * looping over nodes and each time all the inputs of a node are
685          * ready, we add svm_nodes for it that read the inputs from the
686          * stack and write outputs back to the stack.
687          *
688          * With the SVM, we always sample only a single closure. We can think
689          * of all closures nodes as a binary tree with mix closures as inner
690          * nodes and other closures as leafs. The SVM will traverse that tree,
691          * each time deciding to go left or right depending on the mix weights,
692          * until a closure is found.
693          *
694          * We only execute nodes that are needed for the mix weights and chosen
695          * closure.
696          */
697
698         current_type = type;
699         current_graph = graph;
700
701         /* get input in output node */
702         ShaderNode *node = graph->output();
703         ShaderInput *clin = NULL;
704         
705         switch(type) {
706                 case SHADER_TYPE_SURFACE:
707                         clin = node->input("Surface");
708                         break;
709                 case SHADER_TYPE_VOLUME:
710                         clin = node->input("Volume");
711                         break;
712                 case SHADER_TYPE_DISPLACEMENT:
713                         clin = node->input("Displacement");
714                         break;
715                 case SHADER_TYPE_BUMP:
716                         clin = node->input("Normal");
717                         break;
718                 default:
719                         assert(0);
720                         break;
721         }
722
723         /* clear all compiler state */
724         memset(&active_stack, 0, sizeof(active_stack));
725         current_svm_nodes.clear();
726
727         foreach(ShaderNode *node_iter, graph->nodes) {
728                 foreach(ShaderInput *input, node_iter->inputs)
729                         input->stack_offset = SVM_STACK_INVALID;
730                 foreach(ShaderOutput *output, node_iter->outputs)
731                         output->stack_offset = SVM_STACK_INVALID;
732         }
733
734         /* for the bump shader we need add a node to store the shader state */
735         bool need_bump_state = (type == SHADER_TYPE_BUMP) && (shader->displacement_method == DISPLACE_BOTH);
736         int bump_state_offset = SVM_STACK_INVALID;
737         if(need_bump_state) {
738                 bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
739                 add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
740         }
741
742         if(shader->used) {
743                 if(clin->link) {
744                         bool generate = false;
745                         
746                         switch(type) {
747                                 case SHADER_TYPE_SURFACE: /* generate surface shader */         
748                                         generate = true;
749                                         shader->has_surface = true;
750                                         break;
751                                 case SHADER_TYPE_VOLUME: /* generate volume shader */
752                                         generate = true;
753                                         shader->has_volume = true;
754                                         break;
755                                 case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */
756                                         generate = true;
757                                         shader->has_displacement = true;
758                                         break;
759                                 case SHADER_TYPE_BUMP: /* generate bump shader */
760                                         generate = true;
761                                         break;
762                                 default:
763                                         break;
764                         }
765
766                         if(generate) {
767                                 CompilerState state(graph);
768                                 generate_multi_closure(clin->link->parent,
769                                                        clin->link->parent,
770                                                        &state);
771                         }
772                 }
773
774                 /* compile output node */
775                 node->compile(*this);
776         }
777
778         /* add node to restore state after bump shader has finished */
779         if(need_bump_state) {
780                 add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
781         }
782
783         /* if compile failed, generate empty shader */
784         if(compile_failed) {
785                 current_svm_nodes.clear();
786                 compile_failed = false;
787         }
788
789         /* for bump shaders we fall thru to the surface shader, but if this is any other kind of shader it ends here */
790         if(type != SHADER_TYPE_BUMP) {
791                 add_node(NODE_END, 0, 0, 0);
792         }
793 }
794
795 void SVMCompiler::compile(Scene *scene,
796                           Shader *shader,
797                           vector<int4>& svm_nodes,
798                           int index,
799                           Summary *summary)
800 {
801         /* copy graph for shader with bump mapping */
802         ShaderNode *node = shader->graph->output();
803         int start_num_svm_nodes = svm_nodes.size();
804
805         const double time_start = time_dt();
806
807         if(node->input("Surface")->link && node->input("Displacement")->link)
808                 if(!shader->graph_bump)
809                         shader->graph_bump = shader->graph->copy();
810
811         /* finalize */
812         {
813                 scoped_timer timer((summary != NULL)? &summary->time_finalize: NULL);
814                 shader->graph->finalize(scene,
815                                         false,
816                                         shader->has_integrator_dependency);
817         }
818
819         if(shader->graph_bump) {
820                 scoped_timer timer((summary != NULL)? &summary->time_finalize_bump: NULL);
821                 shader->graph_bump->finalize(scene,
822                                              true,
823                                              shader->has_integrator_dependency,
824                                              shader->displacement_method == DISPLACE_BOTH);
825         }
826
827         current_shader = shader;
828
829         shader->has_surface = false;
830         shader->has_surface_emission = false;
831         shader->has_surface_transparent = false;
832         shader->has_surface_bssrdf = false;
833         shader->has_bssrdf_bump = false;
834         shader->has_volume = false;
835         shader->has_displacement = false;
836         shader->has_surface_spatial_varying = false;
837         shader->has_volume_spatial_varying = false;
838         shader->has_object_dependency = false;
839         shader->has_integrator_dependency = false;
840
841         /* generate bump shader */
842         if(shader->displacement_method != DISPLACE_TRUE && shader->graph_bump) {
843                 scoped_timer timer((summary != NULL)? &summary->time_generate_bump: NULL);
844                 compile_type(shader, shader->graph_bump, SHADER_TYPE_BUMP);
845                 svm_nodes[index].y = svm_nodes.size();
846                 svm_nodes.insert(svm_nodes.end(),
847                                  current_svm_nodes.begin(),
848                                  current_svm_nodes.end());
849         }
850
851         /* generate surface shader */
852         {
853                 scoped_timer timer((summary != NULL)? &summary->time_generate_surface: NULL);
854                 compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
855                 /* only set jump offset if there's no bump shader, as the bump shader will fall thru to this one if it exists */
856                 if(shader->displacement_method == DISPLACE_TRUE || !shader->graph_bump) {
857                         svm_nodes[index].y = svm_nodes.size();
858                 }
859                 svm_nodes.insert(svm_nodes.end(),
860                                  current_svm_nodes.begin(),
861                                  current_svm_nodes.end());
862         }
863
864         /* generate volume shader */
865         {
866                 scoped_timer timer((summary != NULL)? &summary->time_generate_volume: NULL);
867                 compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
868                 svm_nodes[index].z = svm_nodes.size();
869                 svm_nodes.insert(svm_nodes.end(),
870                                  current_svm_nodes.begin(),
871                                  current_svm_nodes.end());
872         }
873
874         /* generate displacement shader */
875         {
876                 scoped_timer timer((summary != NULL)? &summary->time_generate_displacement: NULL);
877                 compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
878                 svm_nodes[index].w = svm_nodes.size();
879                 svm_nodes.insert(svm_nodes.end(),
880                                  current_svm_nodes.begin(),
881                                  current_svm_nodes.end());
882         }
883
884         /* Fill in summary information. */
885         if(summary != NULL) {
886                 summary->time_total = time_dt() - time_start;
887                 summary->peak_stack_usage = max_stack_use;
888                 summary->num_svm_nodes = svm_nodes.size() - start_num_svm_nodes;
889         }
890 }
891
892 /* Compiler summary implementation. */
893
894 SVMCompiler::Summary::Summary()
895         : num_svm_nodes(0),
896           peak_stack_usage(0),
897           time_finalize(0.0),
898           time_finalize_bump(0.0),
899           time_generate_surface(0.0),
900           time_generate_bump(0.0),
901           time_generate_volume(0.0),
902           time_generate_displacement(0.0),
903           time_total(0.0)
904 {
905 }
906
907 string SVMCompiler::Summary::full_report() const
908 {
909         string report = "";
910         report += string_printf("Number of SVM nodes: %d\n", num_svm_nodes);
911         report += string_printf("Peak stack usage:    %d\n", peak_stack_usage);
912
913         report += string_printf("Time (in seconds):\n");
914         report += string_printf("  Finalize:          %f\n", time_finalize);
915         report += string_printf("  Bump finalize:     %f\n", time_finalize_bump);
916         report += string_printf("Finalize:            %f\n", time_finalize +
917                                                              time_finalize_bump);
918         report += string_printf("  Surface:           %f\n", time_generate_surface);
919         report += string_printf("  Bump:              %f\n", time_generate_bump);
920         report += string_printf("  Volume:            %f\n", time_generate_volume);
921         report += string_printf("  Displacement:      %f\n", time_generate_displacement);
922         report += string_printf("Generate:            %f\n", time_generate_surface +
923                                                              time_generate_bump +
924                                                              time_generate_volume +
925                                                              time_generate_displacement);
926         report += string_printf("Total:               %f\n", time_total);
927
928         return report;
929 }
930
931 /* Global state of the compiler. */
932
933 SVMCompiler::CompilerState::CompilerState(ShaderGraph *graph)
934 {
935         int max_id = 0;
936         foreach(ShaderNode *node, graph->nodes) {
937                 max_id = max(node->id, max_id);
938         }
939         nodes_done_flag.resize(max_id + 1, false);
940 }
941
942 CCL_NAMESPACE_END
943