2 * Copyright 2011, Blender Foundation.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 /* Shader Virtual Machine
24 * A shader is a list of nodes to be executed. These are simply read one after
25 * the other and executed, using an node counter. Each node and it's associated
26 * data is encoded as one or more uint4's in a 1D texture. If the data is larger
27 * than an uint4, the node can increase the node counter to compensate for this.
28 * Floats are encoded as int and then converted to float again.
30 * Nodes write their output into a stack. All stack data in the stack is
31 * floats, since it's all factors, colors and vectors. The stack will be stored
32 * in local memory on the GPU, as it would take too many register and indexes in
33 * ways not known at compile time. This seems the only solution even though it
34 * may be slow, with two positive factors. If the same shader is being executed,
35 * memory access will be coalesced, and on fermi cards, memory will actually be
38 * The result of shader execution will be a single closure. This means the
39 * closure type, associated label, data and weight. Sampling from multiple
40 * closures is supported through the mix closure node, the logic for that is
41 * mostly taken care of in the SVM compiler.
44 #include "svm_types.h"
50 __device_inline float3 stack_load_float3(float *stack, uint a)
52 kernel_assert(a+2 < SVM_STACK_SIZE);
54 return make_float3(stack[a+0], stack[a+1], stack[a+2]);
57 __device_inline void stack_store_float3(float *stack, uint a, float3 f)
59 kernel_assert(a+2 < SVM_STACK_SIZE);
66 __device_inline float stack_load_float(float *stack, uint a)
68 kernel_assert(a < SVM_STACK_SIZE);
73 __device_inline float stack_load_float_default(float *stack, uint a, uint value)
75 return (a == (uint)SVM_STACK_INVALID)? __int_as_float(value): stack_load_float(stack, a);
78 __device_inline void stack_store_float(float *stack, uint a, float f)
80 kernel_assert(a < SVM_STACK_SIZE);
85 __device_inline bool stack_valid(uint a)
87 return a != (uint)SVM_STACK_INVALID;
92 __device_inline uint4 read_node(KernelGlobals *kg, int *offset)
94 uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
99 __device_inline float4 read_node_float(KernelGlobals *kg, int *offset)
101 uint4 node = kernel_tex_fetch(__svm_nodes, *offset);
102 float4 f = make_float4(__int_as_float(node.x), __int_as_float(node.y), __int_as_float(node.z), __int_as_float(node.w));
107 __device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
109 if(x) *x = (i & 0xFF);
110 if(y) *y = ((i >> 8) & 0xFF);
111 if(z) *z = ((i >> 16) & 0xFF);
112 if(w) *w = ((i >> 24) & 0xFF);
119 #include "svm_noise.h"
120 #include "svm_texture.h"
122 #include "svm_attribute.h"
123 #include "svm_gradient.h"
124 #include "svm_closure.h"
125 #include "svm_noisetex.h"
126 #include "svm_convert.h"
127 #include "svm_displace.h"
128 #include "svm_fresnel.h"
129 #include "svm_camera.h"
130 #include "svm_geometry.h"
132 #include "svm_image.h"
133 #include "svm_light_path.h"
134 #include "svm_magic.h"
135 #include "svm_mapping.h"
136 #include "svm_wave.h"
137 #include "svm_math.h"
139 #include "svm_sepcomb_rgb.h"
140 #include "svm_musgrave.h"
142 #include "svm_tex_coord.h"
143 #include "svm_value.h"
144 #include "svm_voronoi.h"
148 /* Main Interpreter Loop */
150 __device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderType type, float randb, int path_flag)
152 float stack[SVM_STACK_SIZE];
153 float closure_weight = 1.0f;
154 int offset = sd->shader & SHADER_MASK;
156 #ifdef __MULTI_CLOSURE__
158 sd->randb_closure = randb;
160 sd->closure.type = NBUILTIN_CLOSURES;
164 uint4 node = read_node(kg, &offset);
167 case NODE_SHADER_JUMP: {
168 if(type == SHADER_TYPE_SURFACE) offset = node.y;
169 else if(type == SHADER_TYPE_VOLUME) offset = node.z;
170 else if(type == SHADER_TYPE_DISPLACEMENT) offset = node.w;
174 case NODE_CLOSURE_BSDF:
175 svm_node_closure_bsdf(kg, sd, stack, node, randb, path_flag);
177 case NODE_CLOSURE_EMISSION:
178 svm_node_closure_emission(sd, stack, node);
180 case NODE_CLOSURE_BACKGROUND:
181 svm_node_closure_background(sd, stack, node);
183 case NODE_CLOSURE_HOLDOUT:
184 svm_node_closure_holdout(sd, stack, node);
186 case NODE_CLOSURE_VOLUME:
187 svm_node_closure_volume(kg, sd, stack, node, path_flag);
189 case NODE_CLOSURE_SET_WEIGHT:
190 svm_node_closure_set_weight(sd, node.y, node.z, node.w);
192 case NODE_CLOSURE_WEIGHT:
193 svm_node_closure_weight(sd, stack, node.y);
195 case NODE_EMISSION_WEIGHT:
196 svm_node_emission_weight(kg, sd, stack, node);
198 case NODE_MIX_CLOSURE:
199 svm_node_mix_closure(sd, stack, node, &offset, &randb);
201 case NODE_ADD_CLOSURE:
202 svm_node_add_closure(sd, stack, node.y, node.z, &offset, &randb, &closure_weight);
209 svm_node_tex_image(kg, sd, stack, node);
211 case NODE_TEX_ENVIRONMENT:
212 svm_node_tex_environment(kg, sd, stack, node);
215 svm_node_tex_sky(kg, sd, stack, node.y, node.z);
217 case NODE_TEX_GRADIENT:
218 svm_node_tex_gradient(sd, stack, node);
221 svm_node_tex_noise(kg, sd, stack, node, &offset);
223 case NODE_TEX_VORONOI:
224 svm_node_tex_voronoi(kg, sd, stack, node, &offset);
226 case NODE_TEX_MUSGRAVE:
227 svm_node_tex_musgrave(kg, sd, stack, node, &offset);
230 svm_node_tex_wave(kg, sd, stack, node, &offset);
233 svm_node_tex_magic(kg, sd, stack, node, &offset);
237 svm_node_camera(kg, sd, stack, node.y, node.z, node.w);
240 svm_node_geometry(sd, stack, node.y, node.z);
242 case NODE_GEOMETRY_BUMP_DX:
243 svm_node_geometry_bump_dx(sd, stack, node.y, node.z);
245 case NODE_GEOMETRY_BUMP_DY:
246 svm_node_geometry_bump_dy(sd, stack, node.y, node.z);
248 case NODE_LIGHT_PATH:
249 svm_node_light_path(sd, stack, node.y, node.z, path_flag);
252 svm_node_convert(sd, stack, node.y, node.z, node.w);
255 svm_node_value_f(kg, sd, stack, node.y, node.z);
258 svm_node_value_v(kg, sd, stack, node.y, &offset);
261 svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset);
263 case NODE_SEPARATE_RGB:
264 svm_node_separate_rgb(sd, stack, node.y, node.z, node.w);
266 case NODE_COMBINE_RGB:
267 svm_node_combine_rgb(sd, stack, node.y, node.z, node.w);
270 svm_node_hsv(kg, sd, stack, node.y, node.z, node.w, &offset);
273 svm_node_attr(kg, sd, stack, node);
275 case NODE_ATTR_BUMP_DX:
276 svm_node_attr_bump_dx(kg, sd, stack, node);
278 case NODE_ATTR_BUMP_DY:
279 svm_node_attr_bump_dy(kg, sd, stack, node);
282 svm_node_fresnel(sd, stack, node.y, node.z, node.w);
284 case NODE_LAYER_WEIGHT:
285 svm_node_layer_weight(sd, stack, node);
287 case NODE_SET_DISPLACEMENT:
288 svm_node_set_displacement(sd, stack, node.y);
291 svm_node_set_bump(sd, stack, node.y, node.z, node.w);
294 svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset);
296 case NODE_VECTOR_MATH:
297 svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset);
300 svm_node_mapping(kg, sd, stack, node.y, node.z, &offset);
303 svm_node_tex_coord(kg, sd, stack, node.y, node.z);
305 case NODE_TEX_COORD_BUMP_DX:
306 svm_node_tex_coord_bump_dx(kg, sd, stack, node.y, node.z);
308 case NODE_TEX_COORD_BUMP_DY:
309 svm_node_tex_coord_bump_dy(kg, sd, stack, node.y, node.z);
311 case NODE_EMISSION_SET_WEIGHT_TOTAL:
312 svm_node_emission_set_weight_total(kg, sd, node.y, node.z, node.w);
316 #ifndef __MULTI_CLOSURE__
317 sd->closure.weight *= closure_weight;
326 #endif /* __SVM_H__ */