Fix #34121: OSL + persistent images option was not freeing shader memory properly,
[blender.git] / intern / cycles / render / svm.cpp
1 /*
2  * Copyright 2011, Blender Foundation.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18
19 #include "device.h"
20 #include "graph.h"
21 #include "light.h"
22 #include "mesh.h"
23 #include "scene.h"
24 #include "shader.h"
25 #include "svm.h"
26
27 #include "util_debug.h"
28 #include "util_foreach.h"
29 #include "util_progress.h"
30
31 CCL_NAMESPACE_BEGIN
32
33 /* Shader Manager */
34
35 SVMShaderManager::SVMShaderManager()
36 {
37 }
38
39 SVMShaderManager::~SVMShaderManager()
40 {
41 }
42
43 void SVMShaderManager::reset(Scene *scene)
44 {
45 }
46
47 void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
48 {
49         if(!need_update)
50                 return;
51
52         /* test if we need to update */
53         device_free(device, dscene);
54
55         /* determine which shaders are in use */
56         device_update_shaders_used(scene);
57
58         /* svm_nodes */
59         vector<int4> svm_nodes;
60         size_t i;
61
62         for(i = 0; i < scene->shaders.size(); i++) {
63                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
64                 svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
65         }
66         
67         bool sunsky_done = false;
68         bool use_multi_closure = device->info.advanced_shading;
69
70         for(i = 0; i < scene->shaders.size(); i++) {
71                 Shader *shader = scene->shaders[i];
72
73                 if(progress.get_cancel()) return;
74
75                 assert(shader->graph);
76
77                 if(shader->sample_as_light && shader->has_surface_emission)
78                         scene->light_manager->need_update = true;
79
80                 SVMCompiler compiler(scene->shader_manager, scene->image_manager,
81                         use_multi_closure);
82                 compiler.sunsky = (sunsky_done)? NULL: &dscene->data.sunsky;
83                 compiler.background = ((int)i == scene->default_background);
84                 compiler.compile(shader, svm_nodes, i);
85                 if(!compiler.sunsky)
86                         sunsky_done = true;
87         }
88
89         dscene->svm_nodes.copy((uint4*)&svm_nodes[0], svm_nodes.size());
90         device->tex_alloc("__svm_nodes", dscene->svm_nodes);
91
92         for(i = 0; i < scene->shaders.size(); i++) {
93                 Shader *shader = scene->shaders[i];
94                 shader->need_update = false;
95         }
96
97         device_update_common(device, dscene, scene, progress);
98
99         need_update = false;
100 }
101
102 void SVMShaderManager::device_free(Device *device, DeviceScene *dscene)
103 {
104         device_free_common(device, dscene);
105
106         device->tex_free(dscene->svm_nodes);
107         dscene->svm_nodes.clear();
108 }
109
110 /* Graph Compiler */
111
112 SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
113 {
114         shader_manager = shader_manager_;
115         image_manager = image_manager_;
116         sunsky = NULL;
117         max_stack_use = 0;
118         current_type = SHADER_TYPE_SURFACE;
119         current_shader = NULL;
120         background = false;
121         mix_weight_offset = SVM_STACK_INVALID;
122         use_multi_closure = use_multi_closure_;
123 }
124
125 int SVMCompiler::stack_size(ShaderSocketType type)
126 {
127         if(type == SHADER_SOCKET_FLOAT)
128                 return 1;
129         else if(type == SHADER_SOCKET_INT)
130                 return 1;
131         else if(type == SHADER_SOCKET_COLOR)
132                 return 3;
133         else if(type == SHADER_SOCKET_VECTOR)
134                 return 3;
135         else if(type == SHADER_SOCKET_NORMAL)
136                 return 3;
137         else if(type == SHADER_SOCKET_POINT)
138                 return 3;
139         else if(type == SHADER_SOCKET_CLOSURE)
140                 return 0;
141
142         assert(0);
143         return 0;
144 }
145
146 int SVMCompiler::stack_find_offset(ShaderSocketType type)
147 {
148         int size = stack_size(type);
149         int offset = -1;
150         
151         /* find free space in stack & mark as used */
152         for(int i = 0, num_unused = 0; i < SVM_STACK_SIZE; i++) {
153                 if(active_stack.users[i]) num_unused = 0;
154                 else num_unused++;
155
156                 if(num_unused == size) {
157                         offset = i+1 - size;
158                         max_stack_use = max(i+1, max_stack_use);
159
160                         while(i >= offset)
161                                 active_stack.users[i--] = 1;
162
163                         return offset;
164                 }
165         }
166
167         fprintf(stderr, "Out of SVM stack space.\n");
168         assert(0);
169
170         return offset;
171 }
172
173 void SVMCompiler::stack_clear_offset(ShaderSocketType type, int offset)
174 {
175         int size = stack_size(type);
176
177         for(int i = 0; i < size; i++)
178                 active_stack.users[offset + i]--;
179 }
180
181 void SVMCompiler::stack_backup(StackBackup& backup, set<ShaderNode*>& done)
182 {
183         backup.done = done;
184         backup.stack = active_stack;
185
186         foreach(ShaderNode *node, current_graph->nodes) {
187                 foreach(ShaderInput *input, node->inputs)
188                         backup.offsets.push_back(input->stack_offset);
189                 foreach(ShaderOutput *output, node->outputs)
190                         backup.offsets.push_back(output->stack_offset);
191         }
192 }
193
194 void SVMCompiler::stack_restore(StackBackup& backup, set<ShaderNode*>& done)
195 {
196         int i = 0;
197
198         done = backup.done;
199         active_stack = backup.stack;
200
201         foreach(ShaderNode *node, current_graph->nodes) {
202                 foreach(ShaderInput *input, node->inputs)
203                         input->stack_offset = backup.offsets[i++];
204                 foreach(ShaderOutput *output, node->outputs)
205                         output->stack_offset = backup.offsets[i++];
206         }
207 }
208
209 void SVMCompiler::stack_assign(ShaderInput *input)
210 {
211         /* stack offset assign? */
212         if(input->stack_offset == SVM_STACK_INVALID) {
213                 if(input->link) {
214                         /* linked to output -> use output offset */
215                         input->stack_offset = input->link->stack_offset;
216                 }
217                 else {
218                         /* not linked to output -> add nodes to load default value */
219                         input->stack_offset = stack_find_offset(input->type);
220
221                         if(input->type == SHADER_SOCKET_FLOAT) {
222                                 add_node(NODE_VALUE_F, __float_as_int(input->value.x), input->stack_offset);
223                         }
224                         else if(input->type == SHADER_SOCKET_INT) {
225                                 add_node(NODE_VALUE_F, (int)input->value.x, input->stack_offset);
226                         }
227                         else if(input->type == SHADER_SOCKET_VECTOR ||
228                                 input->type == SHADER_SOCKET_NORMAL ||
229                                 input->type == SHADER_SOCKET_POINT ||
230                                 input->type == SHADER_SOCKET_COLOR) {
231
232                                 add_node(NODE_VALUE_V, input->stack_offset);
233                                 add_node(NODE_VALUE_V, input->value);
234                         }
235                         else /* should not get called for closure */
236                                 assert(0);
237                 }
238         }
239 }
240
241 void SVMCompiler::stack_assign(ShaderOutput *output)
242 {
243         /* if no stack offset assigned yet, find one */
244         if(output->stack_offset == SVM_STACK_INVALID)
245                 output->stack_offset = stack_find_offset(output->type);
246 }
247
248 void SVMCompiler::stack_link(ShaderInput *input, ShaderOutput *output)
249 {
250         if(output->stack_offset == SVM_STACK_INVALID) {
251                 assert(input->link);
252                 assert(stack_size(output->type) == stack_size(input->link->type));
253
254                 output->stack_offset = input->link->stack_offset;
255
256                 int size = stack_size(output->type);
257
258                 for(int i = 0; i < size; i++)
259                         active_stack.users[output->stack_offset + i]++;
260         }
261 }
262
263 void SVMCompiler::stack_clear_users(ShaderNode *node, set<ShaderNode*>& done)
264 {
265         /* optimization we should add:
266          * find and lower user counts for outputs for which all inputs are done.
267          * this is done before the node is compiled, under the assumption that the
268          * node will first load all inputs from the stack and then writes its
269          * outputs. this used to work, but was disabled because it gave trouble
270          * with inputs getting stack positions assigned */
271
272         foreach(ShaderInput *input, node->inputs) {
273                 ShaderOutput *output = input->link;
274
275                 if(output && output->stack_offset != SVM_STACK_INVALID) {
276                         bool all_done = true;
277
278                         /* optimization we should add: verify if in->parent is actually used */
279                         foreach(ShaderInput *in, output->links)
280                                 if(in->parent != node && done.find(in->parent) == done.end())
281                                         all_done = false;
282
283                         if(all_done) {
284                                 stack_clear_offset(output->type, output->stack_offset);
285                                 output->stack_offset = SVM_STACK_INVALID;
286
287                                 foreach(ShaderInput *in, output->links)
288                                         in->stack_offset = SVM_STACK_INVALID;
289                         }
290                 }
291         }
292 }
293
294 void SVMCompiler::stack_clear_temporary(ShaderNode *node)
295 {
296         foreach(ShaderInput *input, node->inputs) {
297                 if(!input->link && input->stack_offset != SVM_STACK_INVALID) {
298                         stack_clear_offset(input->type, input->stack_offset);
299                         input->stack_offset = SVM_STACK_INVALID;
300                 }
301         }
302 }
303
304 uint SVMCompiler::encode_uchar4(uint x, uint y, uint z, uint w)
305 {
306         assert(x <= 255);
307         assert(y <= 255);
308         assert(z <= 255);
309         assert(w <= 255);
310
311         return (x) | (y << 8) | (z << 16) | (w << 24);
312 }
313
314 void SVMCompiler::add_node(int a, int b, int c, int d)
315 {
316         svm_nodes.push_back(make_int4(a, b, c, d));
317 }
318
319 void SVMCompiler::add_node(NodeType type, int a, int b, int c)
320 {
321         svm_nodes.push_back(make_int4(type, a, b, c));
322 }
323
324 void SVMCompiler::add_node(NodeType type, const float3& f)
325 {
326         svm_nodes.push_back(make_int4(type,
327                 __float_as_int(f.x),
328                 __float_as_int(f.y),
329                 __float_as_int(f.z)));
330 }
331
332 void SVMCompiler::add_node(const float4& f)
333 {
334         svm_nodes.push_back(make_int4(
335                 __float_as_int(f.x),
336                 __float_as_int(f.y),
337                 __float_as_int(f.z),
338                 __float_as_int(f.w)));
339 }
340
341 void SVMCompiler::add_array(float4 *f, int num)
342 {
343         for(int i = 0; i < num; i++)
344                 add_node(f[i]);
345 }
346
347 uint SVMCompiler::attribute(ustring name)
348 {
349         return shader_manager->get_attribute_id(name);
350 }
351
352 uint SVMCompiler::attribute(AttributeStandard std)
353 {
354         return shader_manager->get_attribute_id(std);
355 }
356
357 bool SVMCompiler::node_skip_input(ShaderNode *node, ShaderInput *input)
358 {
359         /* nasty exception .. */
360         if(current_type == SHADER_TYPE_DISPLACEMENT && input->link && input->link->parent->name == ustring("bump"))
361                 return true;
362         
363         return false;
364 }
365
366 void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input)
367 {
368         ShaderNode *node = (input->link)? input->link->parent: NULL;
369
370         if(node && done.find(node) == done.end()) {
371                 foreach(ShaderInput *in, node->inputs)
372                         if(!node_skip_input(node, in))
373                                 find_dependencies(dependencies, done, in);
374
375                 dependencies.insert(node);
376         }
377 }
378
379 void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
380 {
381         bool nodes_done;
382
383         do {
384                 nodes_done = true;
385
386                 foreach(ShaderNode *node, nodes) {
387                         if(done.find(node) == done.end()) {
388                                 bool inputs_done = true;
389
390                                 foreach(ShaderInput *input, node->inputs)
391                                         if(!node_skip_input(node, input))
392                                                 if(input->link && done.find(input->link->parent) == done.end())
393                                                         inputs_done = false;
394
395                                 if(inputs_done) {
396                                         node->compile(*this);
397                                         stack_clear_users(node, done);
398                                         stack_clear_temporary(node);
399                                         done.insert(node);
400                                 }
401                                 else
402                                         nodes_done = false;
403                         }
404                 }
405         } while(!nodes_done);
406 }
407
408 void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
409 {
410         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
411                 ShaderInput *fin = node->input("Fac");
412                 ShaderInput *cl1in = node->input("Closure1");
413                 ShaderInput *cl2in = node->input("Closure2");
414
415                 /* execute dependencies for mix weight */
416                 if(fin) {
417                         set<ShaderNode*> dependencies;
418                         find_dependencies(dependencies, done, fin);
419                         generate_svm_nodes(dependencies, done);
420
421                         /* add mix node */
422                         stack_assign(fin);
423                 }
424
425                 int mix_offset = svm_nodes.size();
426
427                 if(fin)
428                         add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
429                 else
430                         add_node(NODE_ADD_CLOSURE, 0, 0, 0);
431
432                 /* generate code for closure 1
433                  * note we backup all compiler state and restore it afterwards, so one
434                  * closure choice doesn't influence the other*/
435                 if(cl1in->link) {
436                         StackBackup backup;
437                         stack_backup(backup, done);
438
439                         generate_closure(cl1in->link->parent, done);
440                         add_node(NODE_END, 0, 0, 0);
441
442                         stack_restore(backup, done);
443                 }
444                 else
445                         add_node(NODE_END, 0, 0, 0);
446
447                 /* generate code for closure 2 */
448                 int cl2_offset = svm_nodes.size();
449
450                 if(cl2in->link) {
451                         StackBackup backup;
452                         stack_backup(backup, done);
453
454                         generate_closure(cl2in->link->parent, done);
455                         add_node(NODE_END, 0, 0, 0);
456
457                         stack_restore(backup, done);
458                 }
459                 else
460                         add_node(NODE_END, 0, 0, 0);
461
462                 /* set jump for mix node, -1 because offset is already
463                  * incremented when this jump is added to it */
464                 svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
465
466                 done.insert(node);
467                 stack_clear_users(node, done);
468                 stack_clear_temporary(node);
469         }
470         else {
471                 /* execute dependencies for closure */
472                 foreach(ShaderInput *in, node->inputs) {
473                         if(!node_skip_input(node, in) && in->link) {
474                                 set<ShaderNode*> dependencies;
475                                 find_dependencies(dependencies, done, in);
476                                 generate_svm_nodes(dependencies, done);
477                         }
478                 }
479
480                 /* compile closure itself */
481                 node->compile(*this);
482                 stack_clear_users(node, done);
483                 stack_clear_temporary(node);
484
485                 if(node->has_surface_emission())
486                         current_shader->has_surface_emission = true;
487                 if(node->has_surface_transparent())
488                         current_shader->has_surface_transparent = true;
489
490                 /* end node is added outside of this */
491         }
492 }
493
494 void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done)
495 {
496         /* todo: the weaks point here is that unlike the single closure sampling 
497          * we will evaluate all nodes even if they are used as input for closures
498          * that are unused. it's not clear what would be the best way to skip such
499          * nodes at runtime, especially if they are tangled up  */
500         
501         /* only generate once */
502         if(closure_done.find(node) != closure_done.end())
503                 return;
504
505         closure_done.insert(node);
506
507         if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
508                 /* weighting is already taken care of in ShaderGraph::transform_multi_closure */
509                 ShaderInput *cl1in = node->input("Closure1");
510                 ShaderInput *cl2in = node->input("Closure2");
511
512                 if(cl1in->link)
513                         generate_multi_closure(cl1in->link->parent, done, closure_done);
514                 if(cl2in->link)
515                         generate_multi_closure(cl2in->link->parent, done, closure_done);
516         }
517         else {
518                 /* execute dependencies for closure */
519                 foreach(ShaderInput *in, node->inputs) {
520                         if(!node_skip_input(node, in) && in->link) {
521                                 set<ShaderNode*> dependencies;
522                                 find_dependencies(dependencies, done, in);
523                                 generate_svm_nodes(dependencies, done);
524                         }
525                 }
526
527                 /* closure mix weight */
528                 const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
529                 ShaderInput *weight_in = node->input(weight_name);
530
531                 if(weight_in && (weight_in->link || weight_in->value.x != 1.0f)) {
532                         stack_assign(weight_in);
533                         mix_weight_offset = weight_in->stack_offset;
534                 }
535                 else
536                         mix_weight_offset = SVM_STACK_INVALID;
537
538                 /* compile closure itself */
539                 node->compile(*this);
540                 stack_clear_users(node, done);
541                 stack_clear_temporary(node);
542
543                 mix_weight_offset = SVM_STACK_INVALID;
544
545                 if(node->has_surface_emission())
546                         current_shader->has_surface_emission = true;
547                 if(node->has_surface_transparent())
548                         current_shader->has_surface_transparent = true;
549         }
550
551         done.insert(node);
552 }
553
554
555 void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType type)
556 {
557         /* Converting a shader graph into svm_nodes that can be executed
558          * sequentially on the virtual machine is fairly simple. We can keep
559          * looping over nodes and each time all the inputs of a node are
560          * ready, we add svm_nodes for it that read the inputs from the
561          * stack and write outputs back to the stack.
562          *
563          * With the SVM, we always sample only a single closure. We can think
564          * of all closures nodes as a binary tree with mix closures as inner
565          * nodes and other closures as leafs. The SVM will traverse that tree,
566          * each time deciding to go left or right depending on the mix weights,
567          * until a closure is found.
568          *
569          * We only execute nodes that are needed for the mix weights and chosen
570          * closure.
571          */
572
573         current_type = type;
574         current_graph = graph;
575
576         /* get input in output node */
577         ShaderNode *node = graph->output();
578         ShaderInput *clin = NULL;
579         
580         if(type == SHADER_TYPE_SURFACE)
581                 clin = node->input("Surface");
582         else if(type == SHADER_TYPE_VOLUME)
583                 clin = node->input("Volume");
584         else if(type == SHADER_TYPE_DISPLACEMENT)
585                 clin = node->input("Displacement");
586         else
587                 assert(0);
588
589         /* clear all compiler state */
590         memset(&active_stack, 0, sizeof(active_stack));
591         svm_nodes.clear();
592
593         foreach(ShaderNode *node_iter, graph->nodes) {
594                 foreach(ShaderInput *input, node_iter->inputs)
595                         input->stack_offset = SVM_STACK_INVALID;
596                 foreach(ShaderOutput *output, node_iter->outputs)
597                         output->stack_offset = SVM_STACK_INVALID;
598         }
599
600         if(shader->used) {
601                 if(clin->link) {
602                         bool generate = false;
603                         if(type == SHADER_TYPE_SURFACE) {
604                                 /* generate surface shader */
605                                 generate = true;
606                                 shader->has_surface = true;
607                         }
608                         else if(type == SHADER_TYPE_VOLUME) {
609                                 /* generate volume shader */
610                                 generate = true;
611                                 shader->has_volume = true;
612                         }
613                         else if(type == SHADER_TYPE_DISPLACEMENT) {
614                                 /* generate displacement shader */
615                                 generate = true;
616                                 shader->has_displacement = true;
617                         }
618
619                         if(generate) {
620                                 set<ShaderNode*> done;
621
622                                 if(use_multi_closure) {
623                                         set<ShaderNode*> closure_done;
624                                         generate_multi_closure(clin->link->parent, done, closure_done);
625                                 }
626                                 else
627                                         generate_closure(clin->link->parent, done);
628                         }
629                 }
630
631                 /* compile output node */
632                 node->compile(*this);
633         }
634
635         add_node(NODE_END, 0, 0, 0);
636 }
637
638 void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int index)
639 {
640         /* copy graph for shader with bump mapping */
641         ShaderNode *node = shader->graph->output();
642
643         if(node->input("Surface")->link && node->input("Displacement")->link)
644                 if(!shader->graph_bump)
645                         shader->graph_bump = shader->graph->copy();
646
647         /* finalize */
648         shader->graph->finalize(false, false, use_multi_closure);
649         if(shader->graph_bump)
650                 shader->graph_bump->finalize(true, false, use_multi_closure);
651
652         current_shader = shader;
653
654         shader->has_surface = false;
655         shader->has_surface_emission = false;
656         shader->has_surface_transparent = false;
657         shader->has_volume = false;
658         shader->has_displacement = false;
659
660         /* generate surface shader */
661         compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
662         global_svm_nodes[index*2 + 0].y = global_svm_nodes.size();
663         global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
664         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
665
666         if(shader->graph_bump) {
667                 compile_type(shader, shader->graph_bump, SHADER_TYPE_SURFACE);
668                 global_svm_nodes[index*2 + 1].y = global_svm_nodes.size();
669                 global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
670         }
671
672         /* generate volume shader */
673         compile_type(shader, shader->graph, SHADER_TYPE_VOLUME);
674         global_svm_nodes[index*2 + 0].z = global_svm_nodes.size();
675         global_svm_nodes[index*2 + 1].z = global_svm_nodes.size();
676         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
677
678         /* generate displacement shader */
679         compile_type(shader, shader->graph, SHADER_TYPE_DISPLACEMENT);
680         global_svm_nodes[index*2 + 0].w = global_svm_nodes.size();
681         global_svm_nodes[index*2 + 1].w = global_svm_nodes.size();
682         global_svm_nodes.insert(global_svm_nodes.end(), svm_nodes.begin(), svm_nodes.end());
683 }
684
685 CCL_NAMESPACE_END
686