Eevee: SSR: Making ray count a define rather than an uniform.
authorClément Foucault <foucault.clem@gmail.com>
Wed, 13 Sep 2017 13:29:13 +0000 (15:29 +0200)
committerClément Foucault <foucault.clem@gmail.com>
Wed, 13 Sep 2017 13:29:38 +0000 (15:29 +0200)
The branching introduced by the uniform caused problems on mesa + AMD in the resolve stage.
This patch create one shader per sample count without branching.
This improves performance of a single ray per pixel case (3.0ms against 3.6ms in my testing)

source/blender/draw/engines/eevee/eevee_effects.c
source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl

index b18d6455893c2cbe0cd94c1ffb39e710f2da6617..bcc9986d671eb6fb3cff0e3793f8135d5159eccd 100644 (file)
@@ -60,9 +60,10 @@ typedef struct EEVEE_LightProbeData {
 
 /* SSR shader variations */
 enum {
-       SSR_RESOLVE      = (1 << 0),
-       SSR_FULL_TRACE   = (1 << 1),
-       SSR_MAX_SHADER   = (1 << 2),
+       SSR_SAMPLES      = (1 << 0) | (1 << 1),
+       SSR_RESOLVE      = (1 << 2),
+       SSR_FULL_TRACE   = (1 << 3),
+       SSR_MAX_SHADER   = (1 << 4),
 };
 
 static struct {
@@ -198,8 +199,11 @@ static struct GPUShader *eevee_effects_ssr_shader_get(int options)
                char *ssr_shader_str = BLI_dynstr_get_cstring(ds_frag);
                BLI_dynstr_free(ds_frag);
 
+               int samples = (SSR_SAMPLES & options) + 1;
+
                DynStr *ds_defines = BLI_dynstr_new();
                BLI_dynstr_appendf(ds_defines, SHADER_DEFINES);
+               BLI_dynstr_appendf(ds_defines, "#define RAY_COUNT %d\n", samples);
                if (options & SSR_RESOLVE) {
                        BLI_dynstr_appendf(ds_defines, "#define STEP_RESOLVE\n");
                }
@@ -856,6 +860,7 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
 
        if ((effects->enabled_effects & EFFECT_SSR) != 0) {
                int options = (effects->reflection_trace_full) ? SSR_FULL_TRACE : 0;
+               options |= (effects->ssr_ray_count - 1);
 
                struct GPUShader *trace_shader = eevee_effects_ssr_shader_get(options);
                struct GPUShader *resolve_shader = eevee_effects_ssr_shader_get(SSR_RESOLVE | options);
@@ -871,7 +876,6 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
                DRW_shgroup_uniform_vec4(grp, "viewvecs[0]", (float *)stl->g_data->viewvecs, 2);
                DRW_shgroup_uniform_vec2(grp, "mipRatio[0]", (float *)stl->g_data->mip_ratio, 10);
                DRW_shgroup_uniform_vec4(grp, "ssrParameters", &effects->ssr_quality, 1);
-               DRW_shgroup_uniform_int(grp, "rayCount", &effects->ssr_ray_count, 1);
                DRW_shgroup_uniform_int(grp, "planar_count", &sldata->probes->num_planar, 1);
                DRW_shgroup_uniform_float(grp, "maxRoughness", &effects->ssr_max_roughness, 1);
                DRW_shgroup_uniform_buffer(grp, "planarDepth", &vedata->txl->planar_depth);
@@ -900,10 +904,15 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
                DRW_shgroup_uniform_buffer(grp, "probeCubes", &sldata->probe_pool);
                DRW_shgroup_uniform_buffer(grp, "probePlanars", &vedata->txl->planar_pool);
                DRW_shgroup_uniform_buffer(grp, "hitBuffer0", &stl->g_data->ssr_hit_output[0]);
-               DRW_shgroup_uniform_buffer(grp, "hitBuffer1", (effects->ssr_ray_count < 2) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[1]);
-               DRW_shgroup_uniform_buffer(grp, "hitBuffer2", (effects->ssr_ray_count < 3) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[2]);
-               DRW_shgroup_uniform_buffer(grp, "hitBuffer3", (effects->ssr_ray_count < 4) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[3]);
-               DRW_shgroup_uniform_int(grp, "rayCount", &effects->ssr_ray_count, 1);
+               if (effects->ssr_ray_count > 1) {
+                       DRW_shgroup_uniform_buffer(grp, "hitBuffer1", &stl->g_data->ssr_hit_output[1]);
+               }
+               if (effects->ssr_ray_count > 2) {
+                       DRW_shgroup_uniform_buffer(grp, "hitBuffer2", &stl->g_data->ssr_hit_output[2]);
+               }
+               if (effects->ssr_ray_count > 3) {
+                       DRW_shgroup_uniform_buffer(grp, "hitBuffer3", &stl->g_data->ssr_hit_output[3]);
+               }
                DRW_shgroup_call_add(grp, quad, NULL);
        }
 
index 0a9584043858ea5722f12d06469d9f632e4b4f8a..1f3c782212436df753ed4d6901b65f09f3a1995b 100644 (file)
@@ -111,7 +111,6 @@ void main()
        if (dot(speccol_roughness.rgb, vec3(1.0)) == 0.0)
                discard;
 
-
        float roughness = speccol_roughness.a;
        float roughnessSquared = max(1e-3, roughness * roughness);
        float a2 = roughnessSquared * roughnessSquared;
@@ -129,8 +128,6 @@ void main()
        vec3 T, B;
        make_orthonormal_basis(N, T, B); /* Generate tangent space */
 
-       float ray_ofs = 1.0 / float(rayCount);
-
        /* Planar Reflections */
        for (int i = 0; i < MAX_PLANAR && i < planar_count; ++i) {
                PlanarData pd = planars_data[i];
@@ -144,20 +141,31 @@ void main()
                        tracePosition = transform_point(ViewMatrix, tracePosition);
                        vec3 planeNormal = transform_direction(ViewMatrix, pd.pl_normal);
 
-                       /* TODO : Raytrace together if textureGather is supported. */
                        hitData0 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand, 0.0);
-                       if (rayCount > 1) hitData1 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 * ray_ofs);
-                       if (rayCount > 2) hitData2 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 * ray_ofs);
-                       if (rayCount > 3) hitData3 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 * ray_ofs);
+#if (RAY_COUNT > 1)
+                       hitData1 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 2)
+                       hitData2 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 3)
+                       hitData3 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 / float(RAY_COUNT));
+#endif
                        return;
                }
        }
 
        /* TODO : Raytrace together if textureGather is supported. */
        hitData0 = do_ssr(V, N, T, B, viewPosition, a2, rand, 0.0);
-       if (rayCount > 1) hitData1 = do_ssr(V, N, T, B, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 * ray_ofs);
-       if (rayCount > 2) hitData2 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 * ray_ofs);
-       if (rayCount > 3) hitData3 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 * ray_ofs);
+#if (RAY_COUNT > 1)
+       hitData1 = do_ssr(V, N, T, B, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 2)
+       hitData2 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 3)
+       hitData3 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 / float(RAY_COUNT));
+#endif
 }
 
 #else /* STEP_RESOLVE */
@@ -395,21 +403,21 @@ void main()
                        ssr_accum += get_ssr_sample(hitBuffer0, pd, planar_index, worldPosition, N, V,
                                                    roughnessSquared, cone_tan, source_uvs,
                                                    texture_size, target_texel, weight_acc);
-                       if (rayCount > 1) {
-                               ssr_accum += get_ssr_sample(hitBuffer1, pd, planar_index, worldPosition, N, V,
-                                                           roughnessSquared, cone_tan, source_uvs,
-                                                           texture_size, target_texel, weight_acc);
-                       }
-                       if (rayCount > 2) {
-                               ssr_accum += get_ssr_sample(hitBuffer2, pd, planar_index, worldPosition, N, V,
-                                                           roughnessSquared, cone_tan, source_uvs,
-                                                           texture_size, target_texel, weight_acc);
-                       }
-                       if (rayCount > 3) {
-                               ssr_accum += get_ssr_sample(hitBuffer3, pd, planar_index, worldPosition, N, V,
-                                                           roughnessSquared, cone_tan, source_uvs,
-                                                           texture_size, target_texel, weight_acc);
-                       }
+#if (RAY_COUNT > 1)
+                       ssr_accum += get_ssr_sample(hitBuffer1, pd, planar_index, worldPosition, N, V,
+                                                   roughnessSquared, cone_tan, source_uvs,
+                                                   texture_size, target_texel, weight_acc);
+#endif
+#if (RAY_COUNT > 2)
+                       ssr_accum += get_ssr_sample(hitBuffer2, pd, planar_index, worldPosition, N, V,
+                                                   roughnessSquared, cone_tan, source_uvs,
+                                                   texture_size, target_texel, weight_acc);
+#endif
+#if (RAY_COUNT > 3)
+                       ssr_accum += get_ssr_sample(hitBuffer3, pd, planar_index, worldPosition, N, V,
+                                                   roughnessSquared, cone_tan, source_uvs,
+                                                   texture_size, target_texel, weight_acc);
+#endif
                }
        }