Cycles: Delay shooting SSS indirect rays
[blender-staging.git] / intern / cycles / kernel / kernel_types.h
index 551c2dd596e13519865d3d22f698599b3c772840..bef00355c5e1b8cc0e8bee0bdd143bbdcf700d94 100644 (file)
@@ -11,7 +11,7 @@
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
  */
 
 #ifndef __KERNEL_TYPES_H__
 #define __KERNEL_CPU__
 #endif
 
+/* TODO(sergey): This is only to make it possible to include this header
+ * from outside of the kernel. but this could be done somewhat cleaner?
+ */
+#ifndef ccl_addr_space
+#define ccl_addr_space
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 /* constants */
 #define OBJECT_SIZE            11
 #define OBJECT_VECTOR_SIZE     6
-#define LIGHT_SIZE                     4
-#define FILTER_TABLE_SIZE      256
+#define LIGHT_SIZE                     5
+#define FILTER_TABLE_SIZE      1024
 #define RAMP_TABLE_SIZE                256
+#define SHUTTER_TABLE_SIZE             256
 #define PARTICLE_SIZE          5
 #define TIME_INVALID           FLT_MAX
 
 #define BSSRDF_MIN_RADIUS                      1e-8f
 #define BSSRDF_MAX_HITS                                4
 
-#define BB_DRAPPER                             800.0f
-#define BB_MAX_TABLE_RANGE             12000.0f
-#define BB_TABLE_XPOWER                        1.5f
-#define BB_TABLE_YPOWER                        5.0f
-#define BB_TABLE_SPACING               2.0f
+#define BECKMANN_TABLE_SIZE            256
 
 #define TEX_NUM_FLOAT_IMAGES   5
 
-#define SHADER_NO_ID                   -1
+#define SHADER_NONE                            (~0)
+#define OBJECT_NONE                            (~0)
+#define PRIM_NONE                              (~0)
+#define LAMP_NONE                              (~0)
 
 #define VOLUME_STACK_SIZE              16
 
 /* device capabilities */
 #ifdef __KERNEL_CPU__
+#ifdef __KERNEL_SSE2__
+#  define __QBVH__
+#endif
 #define __KERNEL_SHADING__
 #define __KERNEL_ADV_SHADING__
 #define __BRANCHED_PATH__
@@ -61,15 +71,25 @@ CCL_NAMESPACE_BEGIN
 #define __SUBSURFACE__
 #define __CMJ__
 #define __VOLUME__
+#define __VOLUME_DECOUPLED__
+#define __VOLUME_SCATTER__
+#define __SHADOW_RECORD_ALL__
+#define __VOLUME_RECORD_ALL__
 #endif
 
 #ifdef __KERNEL_CUDA__
 #define __KERNEL_SHADING__
 #define __KERNEL_ADV_SHADING__
-#if __CUDA_ARCH__ != 300
 #define __BRANCHED_PATH__
+#define __VOLUME__
+#define __VOLUME_SCATTER__
+
+/* Experimental on GPU */
+#ifdef __KERNEL_EXPERIMENTAL__
+#define __SUBSURFACE__
+#define __CMJ__
 #endif
-//#define __VOLUME__
+
 #endif
 
 #ifdef __KERNEL_OPENCL__
@@ -77,41 +97,51 @@ CCL_NAMESPACE_BEGIN
 /* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
 
 #ifdef __KERNEL_OPENCL_NVIDIA__
-#define __KERNEL_SHADING__
-#define __KERNEL_ADV_SHADING__
+#  define __KERNEL_SHADING__
+#  define __KERNEL_ADV_SHADING__
+#  ifdef __KERNEL_EXPERIMENTAL__
+#    define __CMJ__
+#  endif
 #endif
 
 #ifdef __KERNEL_OPENCL_APPLE__
-#define __KERNEL_SHADING__
-//#define __KERNEL_ADV_SHADING__
+#  define __KERNEL_SHADING__
+#  define __KERNEL_ADV_SHADING__
+/* TODO(sergey): Currently experimental section is ignored here,
+ * this is because megakernel in device_opencl does not support
+ * custom cflags depending on the scene features.
+ */
+#  ifdef __KERNEL_EXPERIMENTAL__
+#    define __CMJ__
+#  endif
 #endif
 
 #ifdef __KERNEL_OPENCL_AMD__
-#define __SVM__
-#define __EMISSION__
-#define __IMAGE_TEXTURES__
-#define __PROCEDURAL_TEXTURES__
-#define __EXTRA_NODES__
-#define __HOLDOUT__
-#define __NORMAL_MAP__
-//#define __BACKGROUND_MIS__
-//#define __LAMP_MIS__
-//#define __AO__
-//#define __ANISOTROPIC__
-//#define __CAMERA_MOTION__
-//#define __OBJECT_MOTION__
-//#define __HAIR__
-//#define __MULTI_CLOSURE__
-//#define __TRANSPARENT_SHADOWS__
-//#define __PASSES__
+#  define __CL_USE_NATIVE__
+#  define __KERNEL_SHADING__
+#  define __MULTI_CLOSURE__
+#  define __PASSES__
+#  define __BACKGROUND_MIS__
+#  define __LAMP_MIS__
+#  define __AO__
+#  define __CAMERA_MOTION__
+#  define __OBJECT_MOTION__
+#  define __HAIR__
+#  ifdef __KERNEL_EXPERIMENTAL__
+#    define __TRANSPARENT_SHADOWS__
+#  endif
 #endif
 
 #ifdef __KERNEL_OPENCL_INTEL_CPU__
-#define __KERNEL_SHADING__
-#define __KERNEL_ADV_SHADING__
+#  define __CL_USE_NATIVE__
+#  define __KERNEL_SHADING__
+#  define __KERNEL_ADV_SHADING__
+#  ifdef __KERNEL_EXPERIMENTAL__
+#    define __CMJ__
+#  endif
 #endif
 
-#endif
+#endif // __KERNEL_OPENCL__
 
 /* kernel features */
 #define __SOBOL__
@@ -129,11 +159,9 @@ CCL_NAMESPACE_BEGIN
 #ifdef __KERNEL_SHADING__
 #define __SVM__
 #define __EMISSION__
-#define __PROCEDURAL_TEXTURES__
-#define __IMAGE_TEXTURES__
+#define __TEXTURES__
 #define __EXTRA_NODES__
 #define __HOLDOUT__
-#define __NORMAL_MAP__
 #endif
 
 #ifdef __KERNEL_ADV_SHADING__
@@ -143,16 +171,30 @@ CCL_NAMESPACE_BEGIN
 #define __BACKGROUND_MIS__
 #define __LAMP_MIS__
 #define __AO__
-#define __ANISOTROPIC__
 #define __CAMERA_MOTION__
 #define __OBJECT_MOTION__
 #define __HAIR__
 #endif
 
-/* Sanity check */
+#ifdef WITH_CYCLES_DEBUG
+#  define __KERNEL_DEBUG__
+#endif
 
-#if defined(__KERNEL_OPENCL_NEED_ADVANCED_SHADING__) && !defined(__MULTI_CLOSURE__)
-#error "OpenCL: mismatch between advanced shading flags in device_opencl.cpp and kernel_types.h"
+/* Scene-based selective featrues compilation. */
+#ifdef __NO_CAMERA_MOTION__
+#  undef __CAMERA_MOTION__
+#endif
+#ifdef __NO_OBJECT_MOTION__
+#  undef __OBJECT_MOTION__
+#endif
+#ifdef __NO_HAIR__
+#  undef __HAIR__
+#endif
+#ifdef __NO_SUBSURFACE__
+#  undef __SUBSURFACE__
+#endif
+#ifdef __NO_BRANCHED_PATH__
+#  undef __BRANCHED_PATH__
 #endif
 
 /* Random Numbers */
@@ -163,7 +205,35 @@ typedef uint RNG;
 
 typedef enum ShaderEvalType {
        SHADER_EVAL_DISPLACE,
-       SHADER_EVAL_BACKGROUND
+       SHADER_EVAL_BACKGROUND,
+       /* bake types */
+       SHADER_EVAL_BAKE, /* no real shade, it's used in the code to
+                          * differentiate the type of shader eval from the above
+                          */
+       /* data passes */
+       SHADER_EVAL_NORMAL,
+       SHADER_EVAL_UV,
+       SHADER_EVAL_DIFFUSE_COLOR,
+       SHADER_EVAL_GLOSSY_COLOR,
+       SHADER_EVAL_TRANSMISSION_COLOR,
+       SHADER_EVAL_SUBSURFACE_COLOR,
+       SHADER_EVAL_EMISSION,
+
+       /* light passes */
+       SHADER_EVAL_AO,
+       SHADER_EVAL_COMBINED,
+       SHADER_EVAL_SHADOW,
+       SHADER_EVAL_DIFFUSE_DIRECT,
+       SHADER_EVAL_GLOSSY_DIRECT,
+       SHADER_EVAL_TRANSMISSION_DIRECT,
+       SHADER_EVAL_SUBSURFACE_DIRECT,
+       SHADER_EVAL_DIFFUSE_INDIRECT,
+       SHADER_EVAL_GLOSSY_INDIRECT,
+       SHADER_EVAL_TRANSMISSION_INDIRECT,
+       SHADER_EVAL_SUBSURFACE_INDIRECT,
+
+       /* extra */
+       SHADER_EVAL_ENVIRONMENT,
 } ShaderEvalType;
 
 /* Path Tracing
@@ -179,10 +249,8 @@ enum PathTraceDimension {
        PRNG_UNUSED_0 = 5,
        PRNG_UNUSED_1 = 6,      /* for some reason (6, 7) is a bad sobol pattern */
        PRNG_UNUSED_2 = 7,  /* with a low number of samples (< 64) */
-       PRNG_BASE_NUM = 8,
-#else
-       PRNG_BASE_NUM = 4,
 #endif
+       PRNG_BASE_NUM = 8,
 
        PRNG_BSDF_U = 0,
        PRNG_BSDF_V = 1,
@@ -190,7 +258,7 @@ enum PathTraceDimension {
        PRNG_LIGHT = 3,
        PRNG_LIGHT_U = 4,
        PRNG_LIGHT_V = 5,
-       PRNG_LIGHT_F = 6,
+       PRNG_UNUSED_3 = 6,
        PRNG_TERMINATE = 7,
 
 #ifdef __VOLUME__
@@ -198,10 +266,9 @@ enum PathTraceDimension {
        PRNG_PHASE_V = 9,
        PRNG_PHASE = 10,
        PRNG_SCATTER_DISTANCE = 11,
-       PRNG_BOUNCE_NUM = 12,
-#else
-       PRNG_BOUNCE_NUM = 8,
 #endif
+
+       PRNG_BOUNCE_NUM = 12,
 };
 
 enum SamplingPattern {
@@ -227,17 +294,15 @@ enum PathRayFlag {
        PATH_RAY_SHADOW_TRANSPARENT = 256,
        PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),
 
-       PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/
+       PATH_RAY_CURVE = 512, /* visibility flag to define curve segments */
+       PATH_RAY_VOLUME_SCATTER = 1024, /* volume scattering */
 
        /* note that these can use maximum 12 bits, the other are for layers */
-       PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512),
+       PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512|1024),
 
-       PATH_RAY_MIS_SKIP = 1024,
-       PATH_RAY_DIFFUSE_ANCESTOR = 2048,
-       PATH_RAY_GLOSSY_ANCESTOR = 4096,
-       PATH_RAY_BSSRDF_ANCESTOR = 8192,
-       PATH_RAY_SINGLE_PASS_DONE = 16384,
-       PATH_RAY_VOLUME_SCATTER = 32768,
+       PATH_RAY_MIS_SKIP = 2048,
+       PATH_RAY_DIFFUSE_ANCESTOR = 4096,
+       PATH_RAY_SINGLE_PASS_DONE = 8192,
 
        /* we need layer member flags to be the 20 upper bits */
        PATH_RAY_LAYER_SHIFT = (32-20)
@@ -260,38 +325,44 @@ typedef enum ClosureLabel {
 
 typedef enum PassType {
        PASS_NONE = 0,
-       PASS_COMBINED = 1,
-       PASS_DEPTH = 2,
-       PASS_NORMAL = 4,
-       PASS_UV = 8,
-       PASS_OBJECT_ID = 16,
-       PASS_MATERIAL_ID = 32,
-       PASS_DIFFUSE_COLOR = 64,
-       PASS_GLOSSY_COLOR = 128,
-       PASS_TRANSMISSION_COLOR = 256,
-       PASS_DIFFUSE_INDIRECT = 512,
-       PASS_GLOSSY_INDIRECT = 1024,
-       PASS_TRANSMISSION_INDIRECT = 2048,
-       PASS_DIFFUSE_DIRECT = 4096,
-       PASS_GLOSSY_DIRECT = 8192,
-       PASS_TRANSMISSION_DIRECT = 16384,
-       PASS_EMISSION = 32768,
-       PASS_BACKGROUND = 65536,
-       PASS_AO = 131072,
-       PASS_SHADOW = 262144,
-       PASS_MOTION = 524288,
-       PASS_MOTION_WEIGHT = 1048576,
-       PASS_MIST = 2097152,
-       PASS_SUBSURFACE_DIRECT = 4194304,
-       PASS_SUBSURFACE_INDIRECT = 8388608,
-       PASS_SUBSURFACE_COLOR = 16777216
+       PASS_COMBINED = (1 << 0),
+       PASS_DEPTH = (1 << 1),
+       PASS_NORMAL = (1 << 2),
+       PASS_UV = (1 << 3),
+       PASS_OBJECT_ID = (1 << 4),
+       PASS_MATERIAL_ID = (1 << 5),
+       PASS_DIFFUSE_COLOR = (1 << 6),
+       PASS_GLOSSY_COLOR = (1 << 7),
+       PASS_TRANSMISSION_COLOR = (1 << 8),
+       PASS_DIFFUSE_INDIRECT = (1 << 9),
+       PASS_GLOSSY_INDIRECT = (1 << 10),
+       PASS_TRANSMISSION_INDIRECT = (1 << 11),
+       PASS_DIFFUSE_DIRECT = (1 << 12),
+       PASS_GLOSSY_DIRECT = (1 << 13),
+       PASS_TRANSMISSION_DIRECT = (1 << 14),
+       PASS_EMISSION = (1 << 15),
+       PASS_BACKGROUND = (1 << 16),
+       PASS_AO = (1 << 17),
+       PASS_SHADOW = (1 << 18),
+       PASS_MOTION = (1 << 19),
+       PASS_MOTION_WEIGHT = (1 << 20),
+       PASS_MIST = (1 << 21),
+       PASS_SUBSURFACE_DIRECT = (1 << 22),
+       PASS_SUBSURFACE_INDIRECT = (1 << 23),
+       PASS_SUBSURFACE_COLOR = (1 << 24),
+       PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */
+#ifdef __KERNEL_DEBUG__
+       PASS_BVH_TRAVERSAL_STEPS = (1 << 26),
+       PASS_BVH_TRAVERSED_INSTANCES = (1 << 27),
+       PASS_RAY_BOUNCES = (1 << 28),
+#endif
 } PassType;
 
 #define PASS_ALL (~0)
 
 #ifdef __PASSES__
 
-typedef struct PathRadiance {
+typedef ccl_addr_space struct PathRadiance {
        int use_light_pass;
 
        float3 emission;
@@ -306,21 +377,25 @@ typedef struct PathRadiance {
        float3 color_glossy;
        float3 color_transmission;
        float3 color_subsurface;
+       float3 color_scatter;
 
        float3 direct_diffuse;
        float3 direct_glossy;
        float3 direct_transmission;
        float3 direct_subsurface;
+       float3 direct_scatter;
 
        float3 indirect_diffuse;
        float3 indirect_glossy;
        float3 indirect_transmission;
        float3 indirect_subsurface;
+       float3 indirect_scatter;
 
        float3 path_diffuse;
        float3 path_glossy;
        float3 path_transmission;
        float3 path_subsurface;
+       float3 path_scatter;
 
        float4 shadow;
        float mist;
@@ -334,11 +409,12 @@ typedef struct BsdfEval {
        float3 transmission;
        float3 transparent;
        float3 subsurface;
+       float3 scatter;
 } BsdfEval;
 
 #else
 
-typedef float3 PathRadiance;
+typedef ccl_addr_space float3 PathRadiance;
 typedef float3 BsdfEval;
 
 #endif
@@ -354,7 +430,8 @@ typedef enum ShaderFlag {
        SHADER_EXCLUDE_GLOSSY = (1 << 26),
        SHADER_EXCLUDE_TRANSMIT = (1 << 25),
        SHADER_EXCLUDE_CAMERA = (1 << 24),
-       SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA),
+       SHADER_EXCLUDE_SCATTER = (1 << 23),
+       SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER),
 
        SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY)
 } ShaderFlag;
@@ -366,10 +443,8 @@ typedef enum LightType {
        LIGHT_DISTANT,
        LIGHT_BACKGROUND,
        LIGHT_AREA,
-       LIGHT_AO,
        LIGHT_SPOT,
-       LIGHT_TRIANGLE,
-       LIGHT_STRAND
+       LIGHT_TRIANGLE
 } LightType;
 
 /* Camera Type */
@@ -384,6 +459,7 @@ enum CameraType {
 
 enum PanoramaType {
        PANORAMA_EQUIRECTANGULAR,
+       PANORAMA_MIRRORBALL,
        PANORAMA_FISHEYE_EQUIDISTANT,
        PANORAMA_FISHEYE_EQUISOLID
 };
@@ -403,10 +479,26 @@ typedef struct differential {
 /* Ray */
 
 typedef struct Ray {
+/* TODO(sergey): This is only needed because current AMD
+ * compiler has hard time building the kernel with this
+ * reshuffle. And at the same time reshuffle will cause
+ * less optimal CPU code in certain places.
+ *
+ * We'll get rid of this nasty exception once AMD compiler
+ * is fixed.
+ */
+#ifndef __KERNEL_OPENCL_AMD__
        float3 P;               /* origin */
        float3 D;               /* direction */
+
        float t;                /* length of the ray */
        float time;             /* time (for motion blur) */
+#else
+       float t;                /* length of the ray */
+       float time;             /* time (for motion blur) */
+       float3 P;               /* origin */
+       float3 D;               /* direction */
+#endif
 
 #ifdef __RAY_DIFFERENTIALS__
        differential3 dP;
@@ -416,13 +508,41 @@ typedef struct Ray {
 
 /* Intersection */
 
-typedef struct Intersection {
+typedef ccl_addr_space struct Intersection {
        float t, u, v;
        int prim;
        int object;
-       int segment;
+       int type;
+
+#ifdef __KERNEL_DEBUG__
+       int num_traversal_steps;
+       int num_traversed_instances;
+#endif
 } Intersection;
 
+/* Primitives */
+
+typedef enum PrimitiveType {
+       PRIMITIVE_NONE = 0,
+       PRIMITIVE_TRIANGLE = 1,
+       PRIMITIVE_MOTION_TRIANGLE = 2,
+       PRIMITIVE_CURVE = 4,
+       PRIMITIVE_MOTION_CURVE = 8,
+
+       PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE),
+       PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE),
+       PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE),
+       PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE),
+
+       /* Total number of different primitives.
+        * NOTE: This is an actual value, not a bitflag.
+        */
+       PRIMITIVE_NUM_TOTAL = 4,
+} PrimitiveType;
+
+#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << 16) | type)
+#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> 16)
+
 /* Attributes */
 
 #define ATTR_PRIM_TYPES                2
@@ -434,9 +554,13 @@ typedef enum AttributeElement {
        ATTR_ELEMENT_MESH,
        ATTR_ELEMENT_FACE,
        ATTR_ELEMENT_VERTEX,
+       ATTR_ELEMENT_VERTEX_MOTION,
        ATTR_ELEMENT_CORNER,
+       ATTR_ELEMENT_CORNER_BYTE,
        ATTR_ELEMENT_CURVE,
-       ATTR_ELEMENT_CURVE_KEY
+       ATTR_ELEMENT_CURVE_KEY,
+       ATTR_ELEMENT_CURVE_KEY_MOTION,
+       ATTR_ELEMENT_VOXEL
 } AttributeElement;
 
 typedef enum AttributeStandard {
@@ -450,12 +574,18 @@ typedef enum AttributeStandard {
        ATTR_STD_GENERATED_TRANSFORM,
        ATTR_STD_POSITION_UNDEFORMED,
        ATTR_STD_POSITION_UNDISPLACED,
-       ATTR_STD_MOTION_PRE,
-       ATTR_STD_MOTION_POST,
+       ATTR_STD_MOTION_VERTEX_POSITION,
+       ATTR_STD_MOTION_VERTEX_NORMAL,
        ATTR_STD_PARTICLE,
        ATTR_STD_CURVE_INTERCEPT,
        ATTR_STD_PTEX_FACE_ID,
        ATTR_STD_PTEX_UV,
+       ATTR_STD_VOLUME_DENSITY,
+       ATTR_STD_VOLUME_COLOR,
+       ATTR_STD_VOLUME_FLAME,
+       ATTR_STD_VOLUME_HEAT,
+       ATTR_STD_VOLUME_VELOCITY,
+       ATTR_STD_POINTINESS,
        ATTR_STD_NUM,
 
        ATTR_STD_NOT_FOUND = ~0
@@ -463,30 +593,35 @@ typedef enum AttributeStandard {
 
 /* Closure data */
 
-#define MAX_CLOSURE 64
-
-typedef struct ShaderClosure {
-       ClosureType type;
-       float3 weight;
-
 #ifdef __MULTI_CLOSURE__
-       float sample_weight;
+#  ifndef __MAX_CLOSURE__
+#     define MAX_CLOSURE 64
+#  else
+#    define MAX_CLOSURE __MAX_CLOSURE__
+#  endif
+#else
+#define MAX_CLOSURE 1
 #endif
 
-       float data0;
-       float data1;
-
+/* This struct is to be 16 bytes aligned, we also keep some extra precautions:
+ * - All the float3 members are in the beginning of the struct, so compiler
+ *   does not put own padding trying to align this members.
+ * - We make sure OSL pointer is also 16 bytes aligned.
+ */
+typedef ccl_addr_space struct ShaderClosure {
+       float3 weight;
        float3 N;
-#if defined(__ANISOTROPIC__) || defined(__SUBSURFACE__) || defined(__HAIR__)
        float3 T;
-#endif
 
-#ifdef __HAIR__
-       float offset;
-#endif
+       ClosureType type;
+       float sample_weight;
+       float data0;
+       float data1;
+       float data2;
+       int pad1, pad2, pad3;
 
 #ifdef __OSL__
-       void *prim;
+       void *prim, *pad4;
 #endif
 } ShaderClosure;
 
@@ -511,119 +646,70 @@ typedef enum ShaderContext {
 
 enum ShaderDataFlag {
        /* runtime flags */
-       SD_BACKFACING = 1,              /* backside of surface? */
-       SD_EMISSION = 2,                /* have emissive closure? */
-       SD_BSDF = 4,                    /* have bsdf closure? */
-       SD_BSDF_HAS_EVAL = 8,   /* have non-singular bsdf closure? */
-       SD_PHASE_HAS_EVAL = 8,  /* have non-singular phase closure? */
-       SD_BSDF_GLOSSY = 16,    /* have glossy bsdf */
-       SD_BSSRDF = 32,                 /* have bssrdf */
-       SD_HOLDOUT = 64,                /* have holdout closure? */
-       SD_ABSORPTION = 128,    /* have volume absorption closure? */
-       SD_SCATTER = 256,               /* have volume phase closure? */
-       SD_AO = 512,                    /* have ao closure? */
-       SD_TRANSPARENT = 1024,  /* have transparent closure? */
-
-       SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY|SD_BSSRDF|SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO),
+       SD_BACKFACING     = (1 << 0),   /* backside of surface? */
+       SD_EMISSION       = (1 << 1),   /* have emissive closure? */
+       SD_BSDF           = (1 << 2),   /* have bsdf closure? */
+       SD_BSDF_HAS_EVAL  = (1 << 3),   /* have non-singular bsdf closure? */
+       SD_BSSRDF         = (1 << 4),   /* have bssrdf */
+       SD_HOLDOUT        = (1 << 5),   /* have holdout closure? */
+       SD_ABSORPTION     = (1 << 6),   /* have volume absorption closure? */
+       SD_SCATTER        = (1 << 7),   /* have volume phase closure? */
+       SD_AO             = (1 << 8),   /* have ao closure? */
+       SD_TRANSPARENT    = (1 << 9),  /* have transparent closure? */
+
+       SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF|
+                           SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO),
 
        /* shader flags */
-       SD_USE_MIS = 2048,                                      /* direct light sample */
-       SD_HAS_TRANSPARENT_SHADOW = 4096,       /* has transparent shadow */
-       SD_HAS_VOLUME = 8192,                           /* has volume shader */
-       SD_HAS_ONLY_VOLUME = 16384,                     /* has only volume shader, no surface */
-       SD_HETEROGENEOUS_VOLUME = 32768,        /* has heterogeneous volume */
-       SD_HAS_BSSRDF_BUMP = 65536,                     /* bssrdf normal uses bump */
-
-       SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME|SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME|SD_HAS_BSSRDF_BUMP),
+       SD_USE_MIS                = (1 << 10),  /* direct light sample */
+       SD_HAS_TRANSPARENT_SHADOW = (1 << 11),  /* has transparent shadow */
+       SD_HAS_VOLUME             = (1 << 12),  /* has volume shader */
+       SD_HAS_ONLY_VOLUME        = (1 << 13),  /* has only volume shader, no surface */
+       SD_HETEROGENEOUS_VOLUME   = (1 << 14),  /* has heterogeneous volume */
+       SD_HAS_BSSRDF_BUMP        = (1 << 15),  /* bssrdf normal uses bump */
+       SD_VOLUME_EQUIANGULAR     = (1 << 16),  /* use equiangular sampling */
+       SD_VOLUME_MIS             = (1 << 17),  /* use multiple importance sampling */
+       SD_VOLUME_CUBIC           = (1 << 18),  /* use cubic interpolation for voxels */
+       SD_HAS_BUMP               = (1 << 19),  /* has data connected to the displacement input */
+
+       SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME|
+                          SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME|
+                          SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS|
+                          SD_VOLUME_CUBIC|SD_HAS_BUMP),
 
        /* object flags */
-       SD_HOLDOUT_MASK = 131072,                       /* holdout for camera rays */
-       SD_OBJECT_MOTION = 262144,                      /* has object motion blur */
-       SD_TRANSFORM_APPLIED = 524288,          /* vertices have transform applied */
-
-       SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED)
+       SD_HOLDOUT_MASK             = (1 << 20),  /* holdout for camera rays */
+       SD_OBJECT_MOTION            = (1 << 21),  /* has object motion blur */
+       SD_TRANSFORM_APPLIED        = (1 << 22),  /* vertices have transform applied */
+       SD_NEGATIVE_SCALE_APPLIED   = (1 << 23),  /* vertices have negative scale applied */
+       SD_OBJECT_HAS_VOLUME        = (1 << 24),  /* object has a volume shader */
+       SD_OBJECT_INTERSECTS_VOLUME = (1 << 25),  /* object intersects AABB of an object with volume shader */
+       SD_OBJECT_HAS_VERTEX_MOTION = (1 << 26),  /* has position for motion vertices */
+
+       SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED|
+                          SD_NEGATIVE_SCALE_APPLIED|SD_OBJECT_HAS_VOLUME|
+                          SD_OBJECT_INTERSECTS_VOLUME)
 };
 
 struct KernelGlobals;
 
-typedef struct ShaderData {
-       /* position */
-       float3 P;
-       /* smooth normal for shading */
-       float3 N;
-       /* true geometric normal */
-       float3 Ng;
-       /* view/incoming direction */
-       float3 I;
-       /* shader id */
-       int shader;
-       /* booleans describing shader, see ShaderDataFlag */
-       int flag;
-
-       /* primitive id if there is one, ~0 otherwise */
-       int prim;
-
-#ifdef __HAIR__
-       /* for curves, segment number in curve, ~0 for triangles */
-       int segment;
-       /* variables for minimum hair width using transparency bsdf */
-       /*float curve_transparency; */
-       /*float curve_radius; */
-#endif
-       /* parametric coordinates
-        * - barycentric weights for triangles */
-       float u, v;
-       /* object id if there is one, ~0 otherwise */
-       int object;
-
-       /* motion blur sample time */
-       float time;
-       
-       /* length of the ray being shaded */
-       float ray_length;
-       
-       /* ray bounce depth */
-       int ray_depth;
-
-#ifdef __RAY_DIFFERENTIALS__
-       /* differential of P. these are orthogonal to Ng, not N */
-       differential3 dP;
-       /* differential of I */
-       differential3 dI;
-       /* differential of u, v */
-       differential du;
-       differential dv;
-#endif
-#ifdef __DPDU__
-       /* differential of P w.r.t. parametric coordinates. note that dPdu is
-        * not readily suitable as a tangent for shading on triangles. */
-       float3 dPdu, dPdv;
-#endif
-
-#ifdef __OBJECT_MOTION__
-       /* object <-> world space transformations, cached to avoid
-        * re-interpolating them constantly for shading */
-       Transform ob_tfm;
-       Transform ob_itfm;
-#endif
-
-#ifdef __MULTI_CLOSURE__
-       /* Closure data, we store a fixed array of closures */
-       ShaderClosure closure[MAX_CLOSURE];
-       int num_closure;
-       float randb_closure;
+#ifdef __SPLIT_KERNEL__
+#define SD_VAR(type, what) ccl_global type *what;
+#define SD_CLOSURE_VAR(type, what, max_closure) type *what;
+#define TIDX (get_global_id(1) * get_global_size(0) + get_global_id(0))
+#define ccl_fetch(s, t) (s->t[TIDX])
+#define ccl_fetch_array(s, t, index) (&s->t[TIDX * MAX_CLOSURE + index])
 #else
-       /* Closure data, with a single sampled closure for low memory usage */
-       ShaderClosure closure;
+#define SD_VAR(type, what) type what;
+#define SD_CLOSURE_VAR(type, what, max_closure) type what[max_closure];
+#define ccl_fetch(s, t) (s->t)
+#define ccl_fetch_array(s, t, index) (&s->t[index])
 #endif
 
-       /* ray start position, only set for backgrounds */
-       float3 ray_P;
-       differential3 ray_dP;
+typedef ccl_addr_space struct ShaderData {
+
+#include "kernel_shaderdata_vars.h"
 
-#ifdef __OSL__
-       struct KernelGlobals *osl_globals;
-#endif
 } ShaderData;
 
 /* Path State */
@@ -640,9 +726,9 @@ typedef struct PathState {
        int flag;          
 
        /* random number generator state */
-       int rng_offset;    /* dimension offset */
-       int sample;        /* path sample number */
-       int num_samples;   /* total number of times this path will be sampled */
+       int rng_offset;                 /* dimension offset */
+       int sample;                     /* path sample number */
+       int num_samples;                /* total number of times this path will be sampled */
 
        /* bounce counting */
        int bounce;
@@ -666,6 +752,30 @@ typedef struct PathState {
 #endif
 } PathState;
 
+/* Subsurface */
+
+/* Struct to gather multiple SSS hits. */
+struct SubsurfaceIntersection
+{
+       Ray ray;
+       float3 weight[BSSRDF_MAX_HITS];
+
+       int num_hits;
+       Intersection hits[BSSRDF_MAX_HITS];
+       float3 Ng[BSSRDF_MAX_HITS];
+};
+
+/* Struct to gather SSS indirect rays and delay tracing them. */
+struct SubsurfaceIndirectRays
+{
+       bool need_update_volume_stack;
+       PathState state;
+
+       int num_rays;
+       Ray rays[BSSRDF_MAX_HITS];
+       float3 throughputs[BSSRDF_MAX_HITS];
+};
+
 /* Constant Kernel Data
  *
  * These structs are passed from CPU to various devices, and the struct layout
@@ -680,6 +790,7 @@ typedef struct KernelCamera {
        int panorama_type;
        float fisheye_fov;
        float fisheye_lens;
+       float4 equirectangular_range;
 
        /* matrices */
        Transform cameratoworld;
@@ -697,7 +808,7 @@ typedef struct KernelCamera {
 
        /* motion blur */
        float shuttertime;
-       int have_motion;
+       int have_motion, have_perspective_motion;
 
        /* clipping */
        float nearclip;
@@ -710,9 +821,11 @@ typedef struct KernelCamera {
        /* render size */
        float width, height;
        int resolution;
-       int pad1;
-       int pad2;
-       int pad3;
+
+       /* anamorphic lens bokeh */
+       float inv_aperture_ratio;
+
+       int is_inside_volume;
 
        /* more matrices */
        Transform screentoworld;
@@ -726,6 +839,14 @@ typedef struct KernelCamera {
        Transform worldtocamera;
 
        MotionTransform motion;
+
+       /* Denotes changes in the projective matrix, namely in rastertocamera.
+        * Used for camera zoom motion blur,
+        */
+       PerspectiveMotionTransform perspective_motion;
+
+       int shutter_table_offset;
+       int pad;
 } KernelCamera;
 
 typedef struct KernelFilm {
@@ -773,6 +894,13 @@ typedef struct KernelFilm {
        float mist_start;
        float mist_inv_depth;
        float mist_falloff;
+
+#ifdef __KERNEL_DEBUG__
+       int pass_bvh_traversal_steps;
+       int pass_bvh_traversed_instances;
+       int pass_ray_bounces;
+       int pass_pad3;
+#endif
 } KernelFilm;
 
 typedef struct KernelBackground {
@@ -799,6 +927,11 @@ typedef struct KernelIntegrator {
        float inv_pdf_lights;
        int pdf_background_res;
 
+       /* light portals */
+       float portal_pdf;
+       int num_portals;
+       int portal_offset;
+
        /* bounces */
        int min_bounce;
        int max_bounce;
@@ -814,7 +947,8 @@ typedef struct KernelIntegrator {
        int transparent_shadows;
 
        /* caustics */
-       int no_caustics;
+       int caustics_reflective;
+       int caustics_refractive;
        float filter_glossy;
 
        /* seed */
@@ -829,28 +963,29 @@ typedef struct KernelIntegrator {
 
        /* branched path */
        int branched;
-       int aa_samples;
        int diffuse_samples;
        int glossy_samples;
        int transmission_samples;
        int ao_samples;
        int mesh_light_samples;
        int subsurface_samples;
+       int sample_all_lights_direct;
        int sample_all_lights_indirect;
-       
+
        /* mis */
        int use_lamp_mis;
 
        /* sampler */
        int sampling_pattern;
+       int aa_samples;
 
        /* volume render */
-       int volume_homogeneous_sampling;
        int use_volumes;
        int volume_max_steps;
        float volume_step_size;
        int volume_samples;
-       int pad1;
+
+       int pad;
 } KernelIntegrator;
 
 typedef struct KernelBVH {
@@ -860,8 +995,8 @@ typedef struct KernelBVH {
        int have_motion;
        int have_curves;
        int have_instancing;
-
-       int pad1, pad2, pad3;
+       int use_qbvh;
+       int pad1, pad2;
 } KernelBVH;
 
 typedef enum CurveFlag {
@@ -876,7 +1011,6 @@ typedef enum CurveFlag {
 } CurveFlag;
 
 typedef struct KernelCurves {
-       /* strand intersect and normal parameters - many can be changed to flags */
        int curveflags;
        int subdivisions;
 
@@ -884,11 +1018,10 @@ typedef struct KernelCurves {
        float maximum_width;
 } KernelCurves;
 
-typedef struct KernelBlackbody {
-       int table_offset;
+typedef struct KernelTables {
+       int beckmann_offset;
        int pad1, pad2, pad3;
-} KernelBlackbody;
-
+} KernelTables;
 
 typedef struct KernelData {
        KernelCamera cam;
@@ -897,9 +1030,71 @@ typedef struct KernelData {
        KernelIntegrator integrator;
        KernelBVH bvh;
        KernelCurves curve;
-       KernelBlackbody blackbody;
+       KernelTables tables;
 } KernelData;
 
+#ifdef __KERNEL_DEBUG__
+typedef ccl_addr_space struct DebugData {
+       // Total number of BVH node traversal steps and primitives intersections
+       // for the camera rays.
+       int num_bvh_traversal_steps;
+       int num_bvh_traversed_instances;
+       int num_ray_bounces;
+} DebugData;
+#endif
+
+/* Declarations required for split kernel */
+
+/* Macro for queues */
+/* Value marking queue's empty slot */
+#define QUEUE_EMPTY_SLOT -1
+
+/*
+* Queue 1 - Active rays
+* Queue 2 - Background queue
+* Queue 3 - Shadow ray cast kernel - AO
+* Queeu 4 - Shadow ray cast kernel - direct lighting
+*/
+#define NUM_QUEUES 4
+
+/* Queue names */
+enum QueueNumber {
+       QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0,     /* All active rays and regenerated rays are enqueued here. */
+       QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS = 1,  /* All
+                                                   * 1. Background-hit rays,
+                                                   * 2. Rays that has exited path-iteration but needs to update output buffer
+                                                   * 3. Rays to be regenerated
+                                                   * are enqueued here.
+                                                   */
+       QUEUE_SHADOW_RAY_CAST_AO_RAYS = 2,         /* All rays for which a shadow ray should be cast to determine radiance
+                                                   * contribution for AO are enqueued here.
+                                                   */
+       QUEUE_SHADOW_RAY_CAST_DL_RAYS = 3,         /* All rays for which a shadow ray should be cast to determine radiance
+                                                   * contributing for direct lighting are enqueued here.
+                                                   */
+};
+
+/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */
+#define RAY_STATE_MASK 0x007
+#define RAY_FLAG_MASK 0x0F8
+enum RayState {
+       RAY_ACTIVE = 0,             // Denotes ray is actively involved in path-iteration
+       RAY_INACTIVE = 1,           // Denotes ray has completed processing all samples and is inactive
+       RAY_UPDATE_BUFFER = 2,      // Denoted ray has exited path-iteration and needs to update output buffer
+       RAY_HIT_BACKGROUND = 3,     // Donotes ray has hit background
+       RAY_TO_REGENERATE = 4,      // Denotes ray has to be regenerated
+       RAY_REGENERATED = 5,        // Denotes ray has been regenerated
+       RAY_SKIP_DL = 6,            // Denotes ray should skip direct lighting
+       RAY_SHADOW_RAY_CAST_AO = 16, // Flag's ray has to execute shadow blocked function in AO part
+       RAY_SHADOW_RAY_CAST_DL = 32 // Flag's ray has to execute shadow blocked function in direct lighting part
+};
+
+#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
+#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state)
+#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
+#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
+#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
+
 CCL_NAMESPACE_END
 
 #endif /*  __KERNEL_TYPES_H__ */