Cycles: render passes for CUDA cards with compute model >= 2.x.
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Thu, 26 Jan 2012 19:07:01 +0000 (19:07 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Thu, 26 Jan 2012 19:07:01 +0000 (19:07 +0000)
12 files changed:
intern/cycles/blender/blender_session.cpp
intern/cycles/device/device.cpp
intern/cycles/device/device.h
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_multi.cpp
intern/cycles/device/device_network.cpp
intern/cycles/device/device_opencl.cpp
intern/cycles/kernel/kernel_passes.h
intern/cycles/kernel/kernel_types.h
intern/cycles/render/buffers.cpp
intern/cycles/render/svm.cpp

index 5e3102fd7c707010e1207e6d33670f8965c1f277..d9adc5480dc473c35c941316e0dc5d4c6f3bc77f 100644 (file)
@@ -206,7 +206,7 @@ void BlenderSession::render()
                vector<Pass> passes;
                Pass::add(PASS_COMBINED, passes);
 
-               if(session_params.device.type == DEVICE_CPU) { /* todo */
+               if(session_params.device.advanced_shading) {
                        BL::RenderLayer::passes_iterator b_pass_iter;
                        
                        for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
index e4beb4d7d8cfd9a9bf732b122e58fa1e0b7717fd..cceec8b8e5c73d5c7a3dc04874321c049dfd6553 100644 (file)
@@ -183,6 +183,9 @@ Device *Device::create(DeviceInfo& info, bool background, int threads)
                        return NULL;
        }
 
+       if(device)
+               device->info = info;
+
        return device;
 }
 
index b8fea4c4c6900f512d5ca4592d2cef981a8ce34b..af2567498d920a072bdbc1cc45c0308bf9fb33bf 100644 (file)
@@ -51,6 +51,7 @@ public:
        string id;
        int num;
        bool display_device;
+       bool advanced_shading;
        vector<DeviceInfo> multi_devices;
 
        DeviceInfo()
@@ -59,6 +60,7 @@ public:
                id = "CPU";
                num = 0;
                display_device = false;
+               advanced_shading = true;
        }
 };
 
@@ -101,10 +103,8 @@ protected:
 public:
        virtual ~Device() {}
 
-       virtual bool support_full_kernel() = 0;
-
        /* info */
-       virtual string description() = 0;
+       DeviceInfo info;
        virtual const string& error_message() { return error_msg; }
 
        /* regular memory */
index 2ca599f6c674c07bb30fdd8a5e227e645a54dffc..da977ed8472e6cf17368457d0c2bfc20e272e9f5 100644 (file)
@@ -72,16 +72,11 @@ public:
                kernel_globals_free(kg);
        }
 
-       bool support_full_kernel()
+       bool support_advanced_shading()
        {
                return true;
        }
 
-       string description()
-       {
-               return system_cpu_brand_string();
-       }
-
        void mem_alloc(device_memory& mem, MemoryType type)
        {
                mem.device_pointer = mem.data_pointer;
@@ -271,6 +266,7 @@ void device_cpu_info(vector<DeviceInfo>& devices)
        info.description = system_cpu_brand_string();
        info.id = "CPU";
        info.num = 0;
+       info.advanced_shading = true;
 
        devices.insert(devices.begin(), info);
 }
index 55b467fc856ab98a76c47952a2bd5517e42a37c0..14bcaa94130651a70f7ee053e9e6cc7abcc3d8e0 100644 (file)
@@ -194,26 +194,6 @@ public:
                cuda_assert(cuCtxDetach(cuContext))
        }
 
-       bool support_full_kernel()
-       {
-               int major, minor;
-               cuDeviceComputeCapability(&major, &minor, cuDevId);
-
-               return (major >= 2);
-       }
-
-       string description()
-       {
-               /* print device information */
-               char deviceName[256];
-
-               cuda_push_context();
-               cuDeviceGetName(deviceName, 256, cuDevId);
-               cuda_pop_context();
-
-               return string("CUDA ") + deviceName;
-       }
-
        bool support_device(bool experimental)
        {
                if(!experimental) {
@@ -881,6 +861,10 @@ void device_cuda_info(vector<DeviceInfo>& devices)
                info.id = string_printf("CUDA_%d", num);
                info.num = num;
 
+               int major, minor;
+               cuDeviceComputeCapability(&major, &minor, num);
+               info.advanced_shading = (major >= 2);
+
                /* if device has a kernel timeout, assume it is used for display */
                if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) {
                        info.display_device = true;
index 41d0e268526e1ba4bb7132331c075a51db28ca50..375719133b85432b39e2df27a5c0107d76cc8bf3 100644 (file)
@@ -76,16 +76,6 @@ public:
                        delete sub.device;
        }
 
-       bool support_full_kernel()
-       {
-               foreach(SubDevice& sub, devices) {
-                       if(!sub.device->support_full_kernel())
-                               return false;
-               }
-
-               return true;
-       }
-
        const string& error_message()
        {
                foreach(SubDevice& sub, devices) {
@@ -99,38 +89,6 @@ public:
                return error_msg;
        }
 
-       string description()
-       {
-               /* create map to find duplicate descriptions */
-               map<string, int> dupli_map;
-               map<string, int>::iterator dt;
-
-               foreach(SubDevice& sub, devices) {
-                       string key = sub.device->description();
-
-                       if(dupli_map.find(key) == dupli_map.end())
-                               dupli_map[key] = 1;
-                       else
-                               dupli_map[key]++;
-               }
-
-               /* generate string */
-               stringstream desc;
-               bool first = true;
-
-               for(dt = dupli_map.begin(); dt != dupli_map.end(); dt++) {
-                       if(!first) desc << ", ";
-                       first = false;
-
-                       if(dt->second > 1)
-                               desc << dt->second << "x " << dt->first;
-                       else
-                               desc << dt->first;
-               }
-
-               return desc.str();
-       }
-
        bool load_kernels(bool experimental)
        {
                foreach(SubDevice& sub, devices)
@@ -344,6 +302,8 @@ static void device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
        map<string, int>::iterator dt;
        int num_added = 0, num_display = 0;
 
+       info.advanced_shading = true;
+
        foreach(DeviceInfo& subinfo, devices) {
                if(subinfo.type == type) {
                        if(subinfo.display_device) {
@@ -363,6 +323,8 @@ static void device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
                        info.multi_devices.push_back(subinfo);
                        if(subinfo.display_device)
                                info.display_device = true;
+                       if(!subinfo.advanced_shading)
+                               info.advanced_shading = false;
                        num_added++;
                }
        }
index 14518b1507ec82b84f1271f846f3dd6541596ad9..931890b5859a951f47695637160202aa9d9da801 100644 (file)
@@ -57,24 +57,6 @@ public:
        {
        }
 
-       bool support_full_kernel()
-       {
-               return false;
-       }
-
-       string description()
-       {
-               RPCSend snd(socket, "description");
-               snd.write();
-
-               RPCReceive rcv(socket);
-               string desc_string;
-
-               *rcv.archive & desc_string;
-
-               return desc_string + " (remote)";
-       }
-
        void mem_alloc(device_memory& mem, MemoryType type)
        {
 #if 0
index ccfd854436261f0de34c2ebdb4d82a937ba61da9..9a55f95789507f1fd92a71f78751fc7c8d87c756 100644 (file)
@@ -453,20 +453,6 @@ public:
                        clReleaseContext(cxContext);
        }
 
-       bool support_full_kernel()
-       {
-               return false;
-       }
-
-       string description()
-       {
-               char name[1024];
-
-               clGetDeviceInfo(cdDevice, CL_DEVICE_NAME, sizeof(name), &name, NULL);
-
-               return string("OpenCL ") + name;
-       }
-
        void mem_alloc(device_memory& mem, MemoryType type)
        {
                size_t size = mem.memory_size();
@@ -750,6 +736,7 @@ void device_opencl_info(vector<DeviceInfo>& devices)
                info.num = num;
                /* we don't know if it's used for display, but assume it is */
                info.display_device = true;
+               info.advanced_shading = false;
 
                devices.push_back(info);
        }
index cfd73c98bade11e6b4521f367b0ee56852e799b1..a1b3b0e9038bca992a5ba0b6fb03241f59ca1caf 100644 (file)
@@ -36,7 +36,7 @@ __device_inline void kernel_write_pass_float4(__global float *buffer, int sample
        *buf = (sample == 0)? value: *buf + value;
 }
 
-__device void kernel_write_data_passes(KernelGlobals *kg, __global float *buffer, PathRadiance *L,
+__device_inline void kernel_write_data_passes(KernelGlobals *kg, __global float *buffer, PathRadiance *L,
        ShaderData *sd, int sample, int path_flag, float3 throughput)
 {
 #ifdef __PASSES__
@@ -86,7 +86,7 @@ __device void kernel_write_data_passes(KernelGlobals *kg, __global float *buffer
 #endif
 }
 
-__device void kernel_write_light_passes(KernelGlobals *kg, __global float *buffer, PathRadiance *L, int sample)
+__device_inline void kernel_write_light_passes(KernelGlobals *kg, __global float *buffer, PathRadiance *L, int sample)
 {
 #ifdef __PASSES__
        int flag = kernel_data.film.pass_flag;
index b4b1da831627bcd93e040cbc2c25cd2dd011881b..9ebe4120e3620528aa76878808a3ca9440c1fbc1 100644 (file)
@@ -185,6 +185,9 @@ typedef float3 PathThroughput;
 struct PathRadiance {
        int use_light_pass;
 
+       float3 emission;
+       float3 background;
+
        float3 indirect;
        float3 direct_throughput;
        float3 direct_emission;
@@ -200,9 +203,6 @@ struct PathRadiance {
        float3 indirect_diffuse;
        float3 indirect_glossy;
        float3 indirect_transmission;
-
-       float3 emission;
-       float3 background;
 };
 
 struct BsdfEval {
index 56219482ef0716cd8e94da07a88802f710013227..361ead3cd24e903bd9eacc22c51b88fd8665f428 100644 (file)
@@ -130,7 +130,7 @@ bool RenderBuffers::copy_from_device()
        if(!buffer.device_pointer)
                return false;
 
-       device->mem_copy_from(buffer, 0, params.width, params.height, sizeof(float4));
+       device->mem_copy_from(buffer, 0, params.width, params.height, params.get_passes_size()*sizeof(float));
 
        return true;
 }
index f088a8143cc46157520f33bca3b577b0d288b30f..ae666ddfe68aed39c4af005ea74090a5f99b7777 100644 (file)
@@ -58,7 +58,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
        }
        
        bool sunsky_done = false;
-       bool use_multi_closure = device->support_full_kernel();
+       bool use_multi_closure = device->info.advanced_shading;
 
        for(i = 0; i < scene->shaders.size(); i++) {
                Shader *shader = scene->shaders[i];