Merge branch 'blender2.7'
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 14 Feb 2019 19:00:37 +0000 (20:00 +0100)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 14 Feb 2019 19:00:37 +0000 (20:00 +0100)
12 files changed:
intern/cycles/app/cycles_cubin_cc.cpp
intern/cycles/kernel/kernel_bake.h
intern/cycles/kernel/kernel_volume.h
intern/cycles/kernel/shaders/node_hsv.osl
intern/cycles/kernel/svm/svm_ao.h
intern/cycles/kernel/svm/svm_hsv.h
intern/cycles/render/denoising.cpp
intern/cycles/render/denoising.h
intern/cycles/util/util_thread.cpp
intern/cycles/util/util_thread.h
source/blender/gpu/shaders/gpu_shader_material.glsl
source/blender/nodes/shader/nodes/node_shader_hueSatVal.c

index da8ca53..e6eb0be 100644 (file)
@@ -63,7 +63,7 @@ public:
        bool fast_math;
 };
 
-bool compile_cuda(CompilationSettings &settings)
+static bool compile_cuda(CompilationSettings &settings)
 {
        const char* headers[] = {"stdlib.h" , "float.h", "math.h", "stdio.h"};
        const char* header_content[] = {"\n", "\n", "\n", "\n"};
@@ -99,7 +99,7 @@ bool compile_cuda(CompilationSettings &settings)
                headers);                        // includeNames
 
        if(result != NVRTC_SUCCESS) {
-               fprintf(stderr, "Error: nvrtcCreateProgram failed (%x)\n\n", result);
+               fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result);
                return false;
        }
 
@@ -112,7 +112,7 @@ bool compile_cuda(CompilationSettings &settings)
        result = nvrtcCompileProgram(prog, options.size(), &opts[0]);
 
        if(result != NVRTC_SUCCESS) {
-               fprintf(stderr, "Error: nvrtcCompileProgram failed (%x)\n\n", result);
+               fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result);
 
                size_t log_size;
                nvrtcGetProgramLogSize(prog, &log_size);
@@ -128,14 +128,14 @@ bool compile_cuda(CompilationSettings &settings)
        size_t ptx_size;
        result = nvrtcGetPTXSize(prog, &ptx_size);
        if(result != NVRTC_SUCCESS) {
-               fprintf(stderr, "Error: nvrtcGetPTXSize failed (%x)\n\n", result);
+               fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result);
                return false;
        }
 
        vector<char> ptx_code(ptx_size);
        result = nvrtcGetPTX(prog, &ptx_code[0]);
        if(result != NVRTC_SUCCESS) {
-               fprintf(stderr, "Error: nvrtcGetPTX failed (%x)\n\n", result);
+               fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result);
                return false;
        }
 
@@ -148,7 +148,7 @@ bool compile_cuda(CompilationSettings &settings)
        return true;
 }
 
-bool link_ptxas(CompilationSettings &settings)
+static bool link_ptxas(CompilationSettings &settings)
 {
        string cudapath = "";
        if(settings.cuda_toolkit_dir.size())
@@ -166,7 +166,7 @@ bool link_ptxas(CompilationSettings &settings)
 
        int pxresult = system(ptx.c_str());
        if(pxresult) {
-               fprintf(stderr, "Error: ptxas failed (%x)\n\n", pxresult);
+               fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult);
                return false;
        }
 
@@ -177,17 +177,19 @@ bool link_ptxas(CompilationSettings &settings)
        return true;
 }
 
-bool init(CompilationSettings &settings)
+static bool init(CompilationSettings &settings)
 {
 #ifdef _MSC_VER
        if(settings.cuda_toolkit_dir.size()) {
                SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str());
        }
+#else
+       (void)settings;
 #endif
 
        int cuewresult = cuewInit(CUEW_INIT_NVRTC);
        if(cuewresult != CUEW_SUCCESS) {
-               fprintf(stderr, "Error: cuew init fialed (0x%x)\n\n", cuewresult);
+               fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult);
                return false;
        }
 
@@ -229,7 +231,7 @@ bool init(CompilationSettings &settings)
        return true;
 }
 
-bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
+static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings)
 {
        OIIO::ArgParse ap;
        ap.options("Usage: cycles_cubin_cc [options]",
index afb6315..920b100 100644 (file)
@@ -25,9 +25,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg,
                                           int pass_filter,
                                           int sample)
 {
-       /* initialize master radiance accumulator */
        kernel_assert(kernel_data.film.use_light_pass);
-       path_radiance_init(L, kernel_data.film.use_light_pass);
 
        PathRadiance L_sample;
        PathState state;
@@ -299,6 +297,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 
        /* light passes */
        PathRadiance L;
+       path_radiance_init(&L, kernel_data.film.use_light_pass);
 
        shader_setup_from_sample(kg, &sd,
                                 P, Ng, Ng,
index 1df5050..44c8f79 100644 (file)
@@ -486,6 +486,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
                float3 transmittance = volume_color_transmittance(coeff.sigma_t, t);
                new_tp = *throughput * transmittance;
        }
+       else {
+               new_tp = *throughput;
+       }
 
        /* integrate emission attenuated by extinction */
        if(L && (closure_flag & SD_EMISSION)) {
@@ -607,6 +610,9 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
                                transmittance = volume_color_transmittance(coeff.sigma_t, dt);
                                new_tp = tp * transmittance;
                        }
+                       else {
+                               new_tp = tp;
+                       }
 
                        /* integrate emission attenuated by absorption */
                        if(L && (closure_flag & SD_EMISSION)) {
index 9d7b7cd..d72a87a 100644 (file)
@@ -29,7 +29,7 @@ shader node_hsv(
 
        // remember: fmod doesn't work for negative numbers
        Color[0] = fmod(Color[0] + Hue + 0.5, 1.0);
-       Color[1] *= clamp(Saturation, 0.0, 1.0);
+       Color[1] = clamp(Color[1] * Saturation, 0.0, 1.0);
        Color[2] *= Value;
 
        Color = hsv_to_rgb(Color);
index 15d0747..0744ec1 100644 (file)
@@ -60,6 +60,8 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
                ray.D = D.x*T + D.y*B + D.z*N;
                ray.t = max_dist;
                ray.time = sd->time;
+               ray.dP = sd->dP;
+               ray.dD = differential3_zero();
 
                if(flags & NODE_AO_ONLY_LOCAL) {
                        if(!scene_intersect_local(kg,
index e69a4ee..6f3efa6 100644 (file)
@@ -38,7 +38,7 @@ ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, ui
 
        /* remember: fmod doesn't work for negative numbers here */
        color.x = fmodf(color.x + hue + 0.5f, 1.0f);
-       color.y *= saturate(sat);
+       color.y = saturate(color.y * sat);
        color.z *= val;
 
        color = hsv_to_rgb(color);
index bbc9f61..2fceee0 100644 (file)
@@ -536,8 +536,6 @@ void DenoiseTask::free()
 
 DenoiseImage::DenoiseImage()
 {
-       in = NULL;
-
        width = 0;
        height = 0;
        num_channels = 0;
@@ -552,7 +550,6 @@ DenoiseImage::~DenoiseImage()
 void DenoiseImage::close_input()
 {
        in_neighbors.clear();
-       in.reset();
 }
 
 void DenoiseImage::free()
@@ -662,13 +659,13 @@ bool DenoiseImage::load(const string& in_filepath, string& error)
                return false;
        }
 
-       in.reset(ImageInput::open(in_filepath));
+       unique_ptr<ImageInput> in(ImageInput::open(in_filepath));
        if(!in) {
                error = "Couldn't open file: " + in_filepath;
                return false;
        }
 
-       const ImageSpec &in_spec = in->spec();
+       in_spec = in->spec();
        width = in_spec.width;
        height = in_spec.height;
        num_channels = in_spec.nchannels;
@@ -725,7 +722,7 @@ bool DenoiseImage::load_neighbors(const vector<string>& filepaths, const vector<
 
                foreach(DenoiseImageLayer& layer, layers) {
                        if(!layer.match_channels(neighbor,
-                                                in->spec().channelnames,
+                                                in_spec.channelnames,
                                                 neighbor_spec.channelnames))
                        {
                                error = "Neighbor frame misses denoising data passes: " + filepath;
@@ -742,7 +739,7 @@ bool DenoiseImage::load_neighbors(const vector<string>& filepaths, const vector<
 bool DenoiseImage::save_output(const string& out_filepath, string& error)
 {
        /* Save image with identical dimensions, channels and metadata. */
-       ImageSpec out_spec = in->spec();
+       ImageSpec out_spec = in_spec;
 
        /* Ensure that the output frame contains sample information even if the input didn't. */
        for(int i = 0; i < layers.size(); i++) {
index 85a1c7d..5bf1a8d 100644 (file)
@@ -117,7 +117,7 @@ public:
        array<float> pixels;
 
        /* Image file handles */
-       unique_ptr<ImageInput> in;
+       ImageSpec in_spec;
        vector<unique_ptr<ImageInput>> in_neighbors;
 
        /* Render layers */
index 4d30e3f..f3c6077 100644 (file)
@@ -26,7 +26,17 @@ thread::thread(function<void()> run_cb, int node)
     joined_(false),
        node_(node)
 {
-       thread_ = std::thread(&thread::run, this);
+#ifdef __APPLE__
+       /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is
+        * too small for Embree, and consistent stack size also makes things more
+        * predictable in general. */
+       pthread_attr_t attribute;
+       pthread_attr_init(&attribute);
+       pthread_attr_setstacksize(&attribute, 1024*1024*2);
+       pthread_create(&pthread_id, &attribute, run, (void*)this);
+#else
+       std_thread = std::thread(&thread::run, this);
+#endif
 }
 
 thread::~thread()
@@ -49,13 +59,17 @@ void *thread::run(void *arg)
 bool thread::join()
 {
        joined_ = true;
+#ifdef __APPLE__
+       return pthread_join(pthread_id, NULL) == 0;
+#else
        try {
-               thread_.join();
+               std_thread.join();
                return true;
        }
        catch (const std::system_error&) {
                return false;
        }
+#endif
 }
 
 CCL_NAMESPACE_END
index 9ae9af2..793d441 100644 (file)
@@ -41,8 +41,8 @@ typedef std::mutex thread_mutex;
 typedef std::unique_lock<std::mutex> thread_scoped_lock;
 typedef std::condition_variable thread_condition_variable;
 
-/* own pthread based implementation, to avoid boost version conflicts with
- * dynamically loaded blender plugins */
+/* Own thread implementation similar to std::thread, so we can set a
+ * custom stack size on macOS. */
 
 class thread {
 public:
@@ -56,7 +56,11 @@ public:
 
 protected:
        function<void()> run_cb_;
-       std::thread thread_;
+#ifdef __APPLE__
+       pthread_t pthread_id;
+#else
+       std::thread std_thread;
+#endif
        bool joined_;
        int node_;
 };
index d5bb157..fc7878d 100644 (file)
@@ -857,7 +857,7 @@ void hue_sat(float hue, float sat, float value, float fac, vec4 col, out vec4 ou
        rgb_to_hsv(col, hsv);
 
        hsv[0] = fract(hsv[0] + hue + 0.5);
-       hsv[1] = hsv[1] * clamp(sat, 0.0, 1.0);
+       hsv[1] = clamp(hsv[1] * sat, 0.0, 1.0);
        hsv[2] = hsv[2] * value;
 
        hsv_to_rgb(hsv, outcol);
index af802e9..392830d 100644 (file)
@@ -46,7 +46,7 @@ static void do_hue_sat_fac(bNode *UNUSED(node), float *out, float hue, float sat
 
                rgb_to_hsv(in[0], in[1], in[2], hsv, hsv + 1, hsv + 2);
                hsv[0] = fmodf(hsv[0] + hue + 0.5f, 1.0f);
-               hsv[1] *= clamp_f(sat, 0.0f, 1.0f);
+               hsv[1] = clamp_f(hsv[1] * sat, 0.0f, 1.0f);
                hsv[2] *= val;
                hsv_to_rgb(hsv[0], hsv[1], hsv[2], col, col + 1, col + 2);