Merge branch 'master' into blender2.8
authorBastien Montagne <montagne29@wanadoo.fr>
Wed, 26 Apr 2017 08:11:03 +0000 (10:11 +0200)
committerBastien Montagne <montagne29@wanadoo.fr>
Wed, 26 Apr 2017 08:11:03 +0000 (10:11 +0200)
intern/cycles/device/opencl/opencl_base.cpp
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/split/kernel_split_data_types.h
intern/cycles/render/shader.cpp

index 52d0662a8e3d67a2cbdc2cf860a4a25f237f2523..22aeaddcde85e410ef67df1db842cc8fa59b5006 100644 (file)
@@ -612,7 +612,7 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
 
 string OpenCLDeviceBase::kernel_build_options(const string *debug_src)
 {
-       string build_options = "-cl-fast-relaxed-math ";
+       string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
 
        if(platform_name == "NVIDIA CUDA") {
                build_options += "-D__KERNEL_OPENCL_NVIDIA__ "
index f58eef7a368fdbb58e6586918b79fcf9fa1fd527..baf629342b99366baa3a3184b93e2acb2d7407af 100644 (file)
@@ -399,6 +399,12 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
 #else
        Ray *ray = &ss_isect->ray;
 #endif
+
+       /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
+#if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
+       kernel_split_params.dummy_sd_flag = sd->flag;
+#endif
+
        /* Setup new shading point. */
        shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
 
index 748197b718315c187095c7cfef73d79cabf0484f..0af8bfc89d5a5136152eac51f5ad9456ae29877f 100644 (file)
@@ -43,6 +43,9 @@ typedef struct SplitParams {
        ccl_global char *use_queues_flag;
 
        ccl_global float *buffer;
+
+       /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
+       int dummy_sd_flag;
 } SplitParams;
 
 /* Global memory variables [porting]; These memory is used for
index a7d42a4b4a029748c4fd4cea723f702e013bc5b4..ef89288f16799263bd732b896bcf540d398b97fb 100644 (file)
@@ -49,6 +49,16 @@ static float beckmann_table_slope_max()
        return 6.0;
 }
 
+
+/* MSVC 2015 needs this ugly hack to prevent a codegen bug on x86
+ * see T50176 for details
+ */
+#if defined(_MSC_VER) && (_MSC_VER == 1900)
+#  define MSVC_VOLATILE volatile
+#else
+#  define MSVC_VOLATILE
+#endif
+
 /* Paper used: Importance Sampling Microfacet-Based BSDFs with the
  * Distribution of Visible Normals. Supplemental Material 2/2.
  *
@@ -72,7 +82,7 @@ static void beckmann_table_rows(float *table, int row_from, int row_to)
                slope_x[0] = (double)-beckmann_table_slope_max();
                CDF_P22_omega_i[0] = 0;
 
-               for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) {
+               for(MSVC_VOLATILE int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) {
                        /* slope_x */
                        slope_x[index_slope_x] = (double)(-beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * index_slope_x/(DATA_TMP_SIZE - 1.0f));
 
@@ -116,6 +126,8 @@ static void beckmann_table_rows(float *table, int row_from, int row_to)
        }
 }
 
+#undef MSVC_VOLATILE
+
 static void beckmann_table_build(vector<float>& table)
 {
        table.resize(BECKMANN_TABLE_SIZE*BECKMANN_TABLE_SIZE);