Fix T53001: more workarounds for crash in AMD compiler with recent drivers.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 5 Oct 2017 14:41:41 +0000 (16:41 +0200)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 5 Oct 2017 15:57:58 +0000 (17:57 +0200)
intern/cycles/kernel/kernel_compat_cuda.h
intern/cycles/kernel/kernel_compat_opencl.h
intern/cycles/kernel/svm/svm_blackbody.h
intern/cycles/kernel/svm/svm_math_util.h
intern/cycles/kernel/svm/svm_wavelength.h
intern/cycles/util/util_defines.h

index 1e2af9de8b3c09e950930863c0e357dbfe8b81d2..2e8ca48c41303fef4d612a590351ce1b4c86b52f 100644 (file)
@@ -50,7 +50,8 @@
 #endif
 #define ccl_device_noinline  __device__ __noinline__
 #define ccl_global
-#define ccl_constant
+#define ccl_static_constant __constant__
+#define ccl_constant const
 #define ccl_local __shared__
 #define ccl_local_param
 #define ccl_private
index 36d6031d042823bd346747fa0fbfcb63539c075c..7f81523791be0c300adea4cf790fbbab8977b2dc 100644 (file)
@@ -36,6 +36,7 @@
 #define ccl_device_forceinline ccl_device
 #define ccl_device_noinline ccl_device ccl_noinline
 #define ccl_may_alias
+#define ccl_static_constant static __constant
 #define ccl_constant __constant
 #define ccl_global __global
 #define ccl_local __local
index b750ad87b7fbcba36cb966f1bc3f5ddd03823c7d..51590b18505ee6a836122e355469fdfef8a796a9 100644 (file)
@@ -41,8 +41,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta
 
        float3 color_rgb = svm_math_blackbody_color(temperature);
 
-       if(stack_valid(col_offset))
-               stack_store_float3(stack, col_offset, color_rgb);
+       stack_store_float3(stack, col_offset, color_rgb);
 }
 
 CCL_NAMESPACE_END
index f8aeeba4a212e91e0a2d204acbe55187c290ecf3..1ce7777aac3ff99ac0f11dc0b46c3587aaa20ebf 100644 (file)
@@ -100,40 +100,42 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
        return Fac;
 }
 
-ccl_device float3 svm_math_blackbody_color(float t) {
-       /* Calculate color in range 800..12000 using an approximation
-        * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
-        * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
-        * which is enough to get the same 8 bit/channel color.
-        */
-
-       const float rc[6][3] = {
-               {  2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
-               {  3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
-               {  4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
-               {  4.66849800e+03f,  2.85655028e-05f, 1.29075375e-01f },
-               {  4.60124770e+03f,  2.89727618e-05f, 1.48001316e-01f },
-               {  3.78765709e+03f,  9.36026367e-06f, 3.98995841e-01f },
-       };
-
-       const float gc[6][3] = {
-               { -7.50343014e+02f,  3.15679613e-04f, 4.73464526e-01f },
-               { -1.00402363e+03f,  1.29189794e-04f, 9.08181524e-01f },
-               { -1.22075471e+03f,  2.56245413e-05f, 1.20753416e+00f },
-               { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
-               { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
-               { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
-       };
-
-       const float bc[6][4] = {
-               { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
-               { 0.0f, 0.0f, 0.0f, 0.0f },
-               { 0.0f, 0.0f, 0.0f, 0.0f },
-               { -2.02524603e-11f,  1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
-               { -2.22463426e-13f, -1.55078698e-08f,  3.81675160e-04f, -7.30646033e-01f },
-               {  6.72595954e-13f, -2.73059993e-08f,  4.24068546e-04f, -7.52204323e-01f },
-       };
+/* Calculate color in range 800..12000 using an approximation
+ * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
+ * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
+ * which is enough to get the same 8 bit/channel color.
+ */
 
+ccl_static_constant float blackbody_table_r[6][3] = {
+       {  2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
+       {  3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
+       {  4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
+       {  4.66849800e+03f,  2.85655028e-05f, 1.29075375e-01f },
+       {  4.60124770e+03f,  2.89727618e-05f, 1.48001316e-01f },
+       {  3.78765709e+03f,  9.36026367e-06f, 3.98995841e-01f },
+};
+
+ccl_static_constant float blackbody_table_g[6][3] = {
+       { -7.50343014e+02f,  3.15679613e-04f, 4.73464526e-01f },
+       { -1.00402363e+03f,  1.29189794e-04f, 9.08181524e-01f },
+       { -1.22075471e+03f,  2.56245413e-05f, 1.20753416e+00f },
+       { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
+       { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
+       { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
+};
+
+ccl_static_constant float blackbody_table_b[6][4] = {
+       { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
+       { 0.0f, 0.0f, 0.0f, 0.0f },
+       { 0.0f, 0.0f, 0.0f, 0.0f },
+       { -2.02524603e-11f,  1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
+       { -2.22463426e-13f, -1.55078698e-08f,  3.81675160e-04f, -7.30646033e-01f },
+       {  6.72595954e-13f, -2.73059993e-08f,  4.24068546e-04f, -7.52204323e-01f },
+};
+
+
+ccl_device float3 svm_math_blackbody_color(float t)
+{
        if(t >= 12000.0f) {
                return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
        }
@@ -148,10 +150,14 @@ ccl_device float3 svm_math_blackbody_color(float t) {
                (t >= 1449.0f)? 2:
                (t >= 1167.0f)? 1: 0;
 
+       ccl_constant float *r = blackbody_table_r[i];
+       ccl_constant float *g = blackbody_table_g[i];
+       ccl_constant float *b = blackbody_table_b[i];
+
        const float t_inv = 1.0f / t;
-       return make_float3(rc[i][0] * t_inv + rc[i][1] * t + rc[i][2],
-                          gc[i][0] * t_inv + gc[i][1] * t + gc[i][2],
-                          ((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3]);
+       return make_float3(r[0] * t_inv + r[1] * t + r[2],
+                          g[0] * t_inv + g[1] * t + g[2],
+                          ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
 }
 
 ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
index 57030f3979d9a49503af0c069d828e66a8d2b515..855b356b397717e9dbb692b7e9f4710496dd1368 100644 (file)
@@ -34,44 +34,44 @@ CCL_NAMESPACE_BEGIN
 
 /* Wavelength to RGB */
 
+// CIE colour matching functions xBar, yBar, and zBar for
+//      wavelengths from 380 through 780 nanometers, every 5
+//      nanometers.  For a wavelength lambda in this range:
+//               cie_colour_match[(lambda - 380) / 5][0] = xBar
+//               cie_colour_match[(lambda - 380) / 5][1] = yBar
+//               cie_colour_match[(lambda - 380) / 5][2] = zBar
+ccl_static_constant float cie_colour_match[81][3] = {
+       {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
+       {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
+       {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
+       {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
+       {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
+       {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
+       {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
+       {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
+       {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
+       {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
+       {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
+       {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
+       {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
+       {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
+       {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
+       {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
+       {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
+       {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
+       {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
+       {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
+       {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
+       {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
+       {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
+       {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
+       {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
+       {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
+       {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
+};
+
 ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelength, uint color_out)
 {      
-       // CIE colour matching functions xBar, yBar, and zBar for
-       //       wavelengths from 380 through 780 nanometers, every 5
-       //       nanometers.  For a wavelength lambda in this range:
-       //                cie_colour_match[(lambda - 380) / 5][0] = xBar
-       //                cie_colour_match[(lambda - 380) / 5][1] = yBar
-       //                cie_colour_match[(lambda - 380) / 5][2] = zBar
-       const float cie_colour_match[81][3] = {
-               {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
-               {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
-               {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
-               {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
-               {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
-               {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
-               {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
-               {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
-               {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
-               {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
-               {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
-               {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
-               {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
-               {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
-               {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
-               {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
-               {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
-               {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
-               {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
-               {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
-               {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
-               {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
-               {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
-               {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
-               {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
-               {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
-               {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
-       };
-
        float lambda_nm = stack_load_float(stack, wavelength);
        float ii = (lambda_nm-380.0f) * (1.0f/5.0f);  // scaled 0..80
        int i = float_to_int(ii);
@@ -82,7 +82,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt
        }
        else {
                ii -= i;
-               const float *c = cie_colour_match[i];
+               ccl_constant float *c = cie_colour_match[i];
                color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
        }
        
@@ -92,8 +92,7 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt
        /* Clamp to zero if values are smaller */
        color = max(color, make_float3(0.0f, 0.0f, 0.0f));
 
-       if(stack_valid(color_out))
-               stack_store_float3(stack, color_out, color);
+       stack_store_float3(stack, color_out, color);
 }
 
 CCL_NAMESPACE_END
index ae654092c87208d65c8c08a6cbf504b30d041465..98944a19022be38a9bb92dc0d1915f660be1ca35 100644 (file)
@@ -30,7 +30,8 @@
 #  define ccl_device static inline
 #  define ccl_device_noinline static
 #  define ccl_global
-#  define ccl_constant
+#  define ccl_static_constant static const
+#  define ccl_constant const
 #  define ccl_local
 #  define ccl_local_param
 #  define ccl_private