Cycles: Also support the constant emission speedup for mesh lights
authorLukas Stockner <lukas.stockner@freenet.de>
Wed, 14 Sep 2016 16:53:35 +0000 (18:53 +0200)
committerLukas Stockner <lukas.stockner@freenet.de>
Wed, 14 Sep 2016 16:53:35 +0000 (18:53 +0200)
Reviewers: brecht, sergey, dingto, juicyfruit

Differential Revision: https://developer.blender.org/D2220

intern/cycles/kernel/bvh/bvh_shadow_all.h
intern/cycles/kernel/bvh/qbvh_shadow_all.h
intern/cycles/kernel/geom/geom_object.h
intern/cycles/kernel/kernel_emission.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/kernel_volume.h
intern/cycles/render/light.cpp
intern/cycles/render/shader.cpp

index e9eeff3..f9da2a3 100644 (file)
@@ -278,7 +278,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                                                shader = __float_as_int(str.z);
                                                        }
 #endif
-                                                       int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+                                                       int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
 
                                                        /* if no transparent shadows, all light is blocked */
                                                        if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
index 3a728b3..5043965 100644 (file)
@@ -356,7 +356,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                                                shader = __float_as_int(str.z);
                                                        }
 #endif
-                                                       int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+                                                       int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
 
                                                        /* if no transparent shadows, all light is blocked */
                                                        if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
index 4f72c5b..c2ec774 100644 (file)
@@ -308,7 +308,7 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
 
 ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
 {
-       return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2 + 1);
+       return kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE + 1);
 }
 
 /* Particle data from which object was instanced */
index 2b52581..ac498ba 100644 (file)
@@ -29,6 +29,8 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
        /* setup shading at emitter */
        float3 eval;
 
+       int shader_flag = kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE);
+
 #ifdef __BACKGROUND_MIS__
        if(ls->type == LIGHT_BACKGROUND) {
                Ray ray;
@@ -49,10 +51,14 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
        }
        else
 #endif
-       if(ls->lamp != LAMP_NONE && (ls->shader & SHADER_FIXED_EMISSION))
+       if(shader_flag & SD_HAS_CONSTANT_EMISSION)
        {
-               float4 L = kernel_tex_fetch(__light_data, ls->lamp*LIGHT_SIZE + 4);
-               eval = make_float3(L.y, L.z, L.w);
+               eval.x = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 2));
+               eval.y = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 3));
+               eval.z = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 4));
+               if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
+                       ls->Ng = -ls->Ng;
+               }
        }
        else
        {
index e59f9a6..c36d940 100644 (file)
@@ -111,7 +111,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
 
        ccl_fetch(sd, I) = -ray->D;
 
-       ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
+       ccl_fetch(sd, flag) |= kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
 
 #ifdef __INSTANCING__
        if(isect->object != OBJECT_NONE) {
@@ -195,7 +195,7 @@ void shader_setup_from_subsurface(
                motion_triangle_shader_setup(kg, sd, isect, ray, true);
        }
 
-       sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
+       sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
 
 #  ifdef __INSTANCING__
        if(isect->object != OBJECT_NONE) {
@@ -264,7 +264,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
 #endif
        ccl_fetch(sd, ray_length) = t;
 
-       ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
+       ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
        if(ccl_fetch(sd, object) != OBJECT_NONE) {
                ccl_fetch(sd, flag) |= kernel_tex_fetch(__object_flag, ccl_fetch(sd, object));
 
@@ -370,7 +370,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
        ccl_fetch(sd, Ng) = -ray->D;
        ccl_fetch(sd, I) = -ray->D;
        ccl_fetch(sd, shader) = kernel_data.background.surface_shader;
-       ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*2);
+       ccl_fetch(sd, flag) = kernel_tex_fetch(__shader_flag, (ccl_fetch(sd, shader) & SHADER_MASK)*SHADER_SIZE);
 #ifdef __OBJECT_MOTION__
        ccl_fetch(sd, time) = ray->time;
 #endif
@@ -1027,7 +1027,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
                sd->shader = stack[i].shader;
 
                sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
-               sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
+               sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
 
                if(sd->object != OBJECT_NONE) {
                        sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
@@ -1100,7 +1100,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
                shader = __float_as_int(str.z);
        }
 #endif
-       int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+       int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
 
        return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
 }
index 18ca55f..548bc1c 100644 (file)
@@ -42,6 +42,7 @@ CCL_NAMESPACE_BEGIN
 #define RAMP_TABLE_SIZE                256
 #define SHUTTER_TABLE_SIZE             256
 #define PARTICLE_SIZE          5
+#define SHADER_SIZE            5
 
 #define BSSRDF_MIN_RADIUS                      1e-8f
 #define BSSRDF_MAX_HITS                                4
@@ -453,9 +454,8 @@ typedef enum ShaderFlag {
        SHADER_EXCLUDE_CAMERA = (1 << 24),
        SHADER_EXCLUDE_SCATTER = (1 << 23),
        SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER),
-       SHADER_FIXED_EMISSION = (1 << 22),
 
-       SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY|SHADER_FIXED_EMISSION)
+       SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY)
 } ShaderFlag;
 
 /* Light Type */
@@ -715,20 +715,21 @@ enum ShaderDataFlag {
        SD_VOLUME_CUBIC           = (1 << 20),  /* use cubic interpolation for voxels */
        SD_HAS_BUMP               = (1 << 21),  /* has data connected to the displacement input */
        SD_HAS_DISPLACEMENT       = (1 << 22),  /* has true displacement */
+       SD_HAS_CONSTANT_EMISSION  = (1 << 23),  /* has constant emission (value stored in __shader_flag) */
 
        SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME|
                           SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME|
                           SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS|
-                          SD_VOLUME_CUBIC|SD_HAS_BUMP|SD_HAS_DISPLACEMENT),
+                          SD_VOLUME_CUBIC|SD_HAS_BUMP|SD_HAS_DISPLACEMENT|SD_HAS_CONSTANT_EMISSION),
 
        /* object flags */
-       SD_HOLDOUT_MASK             = (1 << 23),  /* holdout for camera rays */
-       SD_OBJECT_MOTION            = (1 << 24),  /* has object motion blur */
-       SD_TRANSFORM_APPLIED        = (1 << 25),  /* vertices have transform applied */
-       SD_NEGATIVE_SCALE_APPLIED   = (1 << 26),  /* vertices have negative scale applied */
-       SD_OBJECT_HAS_VOLUME        = (1 << 27),  /* object has a volume shader */
-       SD_OBJECT_INTERSECTS_VOLUME = (1 << 28),  /* object intersects AABB of an object with volume shader */
-       SD_OBJECT_HAS_VERTEX_MOTION = (1 << 29),  /* has position for motion vertices */
+       SD_HOLDOUT_MASK             = (1 << 24),  /* holdout for camera rays */
+       SD_OBJECT_MOTION            = (1 << 25),  /* has object motion blur */
+       SD_TRANSFORM_APPLIED        = (1 << 26),  /* vertices have transform applied */
+       SD_NEGATIVE_SCALE_APPLIED   = (1 << 27),  /* vertices have negative scale applied */
+       SD_OBJECT_HAS_VOLUME        = (1 << 28),  /* object has a volume shader */
+       SD_OBJECT_INTERSECTS_VOLUME = (1 << 29),  /* object intersects AABB of an object with volume shader */
+       SD_OBJECT_HAS_VERTEX_MOTION = (1 << 30),  /* has position for motion vertices */
 
        SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED|
                           SD_NEGATIVE_SCALE_APPLIED|SD_OBJECT_HAS_VOLUME|
index 4ab51b8..0f45b0e 100644 (file)
@@ -115,7 +115,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
 {
        for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-               int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
+               int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
 
                if(shader_flag & SD_HETEROGENEOUS_VOLUME)
                        return true;
@@ -132,7 +132,7 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
        int method = -1;
 
        for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-               int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
+               int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
 
                if(shader_flag & SD_VOLUME_MIS) {
                        return SD_VOLUME_MIS;
index 3b79358..93f6d79 100644 (file)
@@ -661,11 +661,6 @@ void LightManager::device_update_points(Device *device,
                        use_light_visibility = true;
                }
 
-               float3 fixed_emission = make_float3(0.0f, 0.0f, 0.0f);
-               if(shader->is_constant_emission(&fixed_emission)) {
-                       shader_id |= SHADER_FIXED_EMISSION;
-               }
-
                if(light->type == LIGHT_POINT) {
                        shader_id &= ~SHADER_AREA_LIGHT;
 
@@ -765,7 +760,7 @@ void LightManager::device_update_points(Device *device,
                        light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
                }
 
-               light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, fixed_emission.x, fixed_emission.y, fixed_emission.z);
+               light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f);
 
                light_index++;
        }
index 1849161..06b6dd9 100644 (file)
@@ -401,7 +401,7 @@ void ShaderManager::device_update_common(Device *device,
        if(scene->shaders.size() == 0)
                return;
 
-       uint shader_flag_size = scene->shaders.size()*2;
+       uint shader_flag_size = scene->shaders.size()*SHADER_SIZE;
        uint *shader_flag = dscene->shader_flag.resize(shader_flag_size);
        uint i = 0;
        bool has_volumes = false;
@@ -446,9 +446,17 @@ void ShaderManager::device_update_common(Device *device,
                if(shader->displacement_method != DISPLACE_TRUE && shader->graph_bump)
                        flag |= SD_HAS_BSSRDF_BUMP;
 
+               /* constant emission check */
+               float3 constant_emission = make_float3(0.0f, 0.0f, 0.0f);
+               if(shader->is_constant_emission(&constant_emission))
+                       flag |= SD_HAS_CONSTANT_EMISSION;
+
                /* regular shader */
                shader_flag[i++] = flag;
                shader_flag[i++] = shader->pass_id;
+               shader_flag[i++] = __float_as_int(constant_emission.x);
+               shader_flag[i++] = __float_as_int(constant_emission.y);
+               shader_flag[i++] = __float_as_int(constant_emission.z);
 
                has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
        }