string OpenCLDeviceBase::kernel_build_options(const string *debug_src)
{
- string build_options = "-cl-fast-relaxed-math ";
+ string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
if(platform_name == "NVIDIA CUDA") {
build_options += "-D__KERNEL_OPENCL_NVIDIA__ "
#else
Ray *ray = &ss_isect->ray;
#endif
+
+ /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
+#if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
+ kernel_split_params.dummy_sd_flag = sd->flag;
+#endif
+
/* Setup new shading point. */
shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
ccl_global char *use_queues_flag;
ccl_global float *buffer;
+
+ /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
+ int dummy_sd_flag;
} SplitParams;
/* Global memory variables [porting]; These memory is used for
return 6.0;
}
+
+/* MSVC 2015 needs this ugly hack to prevent a codegen bug on x86
+ * see T50176 for details
+ */
+#if defined(_MSC_VER) && (_MSC_VER == 1900)
+# define MSVC_VOLATILE volatile
+#else
+# define MSVC_VOLATILE
+#endif
+
/* Paper used: Importance Sampling Microfacet-Based BSDFs with the
* Distribution of Visible Normals. Supplemental Material 2/2.
*
slope_x[0] = (double)-beckmann_table_slope_max();
CDF_P22_omega_i[0] = 0;
- for(int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) {
+ for(MSVC_VOLATILE int index_slope_x = 1; index_slope_x < DATA_TMP_SIZE; ++index_slope_x) {
/* slope_x */
slope_x[index_slope_x] = (double)(-beckmann_table_slope_max() + 2.0f * beckmann_table_slope_max() * index_slope_x/(DATA_TMP_SIZE - 1.0f));
}
}
+#undef MSVC_VOLATILE
+
static void beckmann_table_build(vector<float>& table)
{
table.resize(BECKMANN_TABLE_SIZE*BECKMANN_TABLE_SIZE);