Cycles: Add single channel texture support for OpenCL.
authorThomas Dinges <blender@dingto.org>
Sun, 14 Aug 2016 18:21:08 +0000 (20:21 +0200)
committerThomas Dinges <blender@dingto.org>
Sun, 14 Aug 2016 18:21:08 +0000 (20:21 +0200)
This way OpenCL devices can also benefit from a smaller memory footprint, when using e.g. bumpmaps (greyscale, 1 channel).

Additional target for my GSoC 2016.

intern/cycles/kernel/kernel_compat_cpu.h
intern/cycles/kernel/kernel_textures.h
intern/cycles/kernel/svm/svm_image.h
intern/cycles/render/image.cpp
intern/cycles/render/scene.h
intern/cycles/util/util_texture.h

index c882b477c350f2a7a877d61e6349de2f408977ea..3775934f293966f53c3918376b750916f03c2e73 100644 (file)
@@ -495,6 +495,7 @@ typedef texture<uint> texture_uint;
 typedef texture<int> texture_int;
 typedef texture<uint4> texture_uint4;
 typedef texture<uchar4> texture_uchar4;
+typedef texture<uchar> texture_uchar;
 typedef texture_image<float> texture_image_float;
 typedef texture_image<uchar> texture_image_uchar;
 typedef texture_image<half> texture_image_half;
index 7d6fec023312d2a11fd27ba16087bb22a3946c38..8d5bb75a428fdb4a542d6818a1ea4ac9e790ad84 100644 (file)
@@ -188,6 +188,8 @@ KERNEL_TEX(uint, texture_uint, __bindless_mapping)
 /* packed image (opencl) */
 KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
 KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
+KERNEL_TEX(uchar, texture_uchar, __tex_image_byte_packed)
+KERNEL_TEX(float, texture_float, __tex_image_float_packed)
 KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
 
 #undef KERNEL_TEX
index 9050ce9395178e2da5b7db611359a323de5dbf80..5d02be1fa2f58437558dcd2912e069c3591860e6 100644 (file)
@@ -36,13 +36,26 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
 {
-       if(id >= TEX_NUM_FLOAT4_IMAGES) {
+       /* Float4 */
+       if(id < TEX_START_BYTE4_OPENCL) {
+               return kernel_tex_fetch(__tex_image_float4_packed, offset);
+       }
+       /* Byte4 */
+       else if(id < TEX_START_FLOAT_OPENCL) {
                uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
                float f = 1.0f/255.0f;
                return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
        }
+       /* Float */
+       else if(id < TEX_START_BYTE_OPENCL) {
+               float f = kernel_tex_fetch(__tex_image_float_packed, offset);
+               return make_float4(f, f, f, 1.0f);
+       }
+       /* Byte */
        else {
-               return kernel_tex_fetch(__tex_image_float4_packed, offset);
+               uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset);
+               float f = r * (1.0f/255.0f);
+               return make_float4(f, f, f, 1.0f);
        }
 }
 
index 284af5f90f71daeec8a2e56a58db963aade5f7c5..24543601ef9bacc2ea8ac4b4bc330860857a859d 100644 (file)
@@ -284,7 +284,7 @@ int ImageManager::add_image(const string& filename,
        if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
                is_float = true;
 
-       /* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */
+       /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
        if((type == IMAGE_DATA_TYPE_FLOAT ||
            type == IMAGE_DATA_TYPE_HALF4 ||
            type == IMAGE_DATA_TYPE_HALF) &&
@@ -1105,10 +1105,11 @@ void ImageManager::device_pack_images(Device *device,
        size_t size = 0, offset = 0;
        ImageDataType type;
 
-       int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4];
+       int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]
+                       + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE];
        uint4 *info = dscene->tex_image_packed_info.resize(info_size);
 
-       /* Byte Textures*/
+       /* Byte4 Textures*/
        type = IMAGE_DATA_TYPE_BYTE4;
 
        for(size_t slot = 0; slot < images[type].size(); slot++) {
@@ -1119,7 +1120,7 @@ void ImageManager::device_pack_images(Device *device,
                size += tex_img.size();
        }
 
-       uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size);
+       uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size);
 
        for(size_t slot = 0; slot < images[type].size(); slot++) {
                if(!images[type][slot])
@@ -1131,11 +1132,11 @@ void ImageManager::device_pack_images(Device *device,
 
                info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
 
-               memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+               memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
                offset += tex_img.size();
        }
 
-       /* Float Textures*/
+       /* Float4 Textures*/
        type = IMAGE_DATA_TYPE_FLOAT4;
        size = 0, offset = 0;
 
@@ -1147,7 +1148,7 @@ void ImageManager::device_pack_images(Device *device,
                size += tex_img.size();
        }
 
-       float4 *pixels_float = dscene->tex_image_float4_packed.resize(size);
+       float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size);
 
        for(size_t slot = 0; slot < images[type].size(); slot++) {
                if(!images[type][slot])
@@ -1160,6 +1161,63 @@ void ImageManager::device_pack_images(Device *device,
                uint8_t options = pack_image_options(type, slot);
                info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
 
+               memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+               offset += tex_img.size();
+       }
+
+       /* Byte Textures*/
+       type = IMAGE_DATA_TYPE_BYTE;
+       size = 0, offset = 0;
+
+       for(size_t slot = 0; slot < images[type].size(); slot++) {
+               if(!images[type][slot])
+                       continue;
+
+               device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+               size += tex_img.size();
+       }
+
+       uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size);
+
+       for(size_t slot = 0; slot < images[type].size(); slot++) {
+               if(!images[type][slot])
+                       continue;
+
+               device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+
+               uint8_t options = pack_image_options(type, slot);
+
+               info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
+               memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+               offset += tex_img.size();
+       }
+
+       /* Float Textures*/
+       type = IMAGE_DATA_TYPE_FLOAT;
+       size = 0, offset = 0;
+
+       for(size_t slot = 0; slot < images[type].size(); slot++) {
+               if(!images[type][slot])
+                       continue;
+
+               device_vector<float>& tex_img = dscene->tex_float_image[slot];
+               size += tex_img.size();
+       }
+
+       float *pixels_float = dscene->tex_image_float_packed.resize(size);
+
+       for(size_t slot = 0; slot < images[type].size(); slot++) {
+               if(!images[type][slot])
+                       continue;
+
+               device_vector<float>& tex_img = dscene->tex_float_image[slot];
+
+               /* todo: support 3D textures, only CPU for now */
+
+               uint8_t options = pack_image_options(type, slot);
+               info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
                memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
                offset += tex_img.size();
        }
@@ -1178,6 +1236,20 @@ void ImageManager::device_pack_images(Device *device,
                }
                device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
        }
+       if(dscene->tex_image_byte_packed.size()) {
+               if(dscene->tex_image_byte_packed.device_pointer) {
+                       thread_scoped_lock device_lock(device_mutex);
+                       device->tex_free(dscene->tex_image_byte_packed);
+               }
+               device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed);
+       }
+       if(dscene->tex_image_float_packed.size()) {
+               if(dscene->tex_image_float_packed.device_pointer) {
+                       thread_scoped_lock device_lock(device_mutex);
+                       device->tex_free(dscene->tex_image_float_packed);
+               }
+               device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed);
+       }
        if(dscene->tex_image_packed_info.size()) {
                if(dscene->tex_image_packed_info.device_pointer) {
                        thread_scoped_lock device_lock(device_mutex);
@@ -1208,10 +1280,14 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
 
        device->tex_free(dscene->tex_image_byte4_packed);
        device->tex_free(dscene->tex_image_float4_packed);
+       device->tex_free(dscene->tex_image_byte_packed);
+       device->tex_free(dscene->tex_image_float_packed);
        device->tex_free(dscene->tex_image_packed_info);
 
        dscene->tex_image_byte4_packed.clear();
        dscene->tex_image_float4_packed.clear();
+       dscene->tex_image_byte_packed.clear();
+       dscene->tex_image_float_packed.clear();
        dscene->tex_image_packed_info.clear();
 }
 
index 9e72f197ccebf5c51914c7095c42e344dc1878c4..8fec171b6fbc7a57ddcf128bd3bb818de4f1b1d5 100644 (file)
@@ -123,6 +123,8 @@ public:
        /* opencl images */
        device_vector<uchar4> tex_image_byte4_packed;
        device_vector<float4> tex_image_float4_packed;
+       device_vector<uchar> tex_image_byte_packed;
+       device_vector<float> tex_image_float_packed;
        device_vector<uint4> tex_image_packed_info;
 
        KernelData data;
index be1177d3be92fa48e33a8be19b2ce7adbd764d12..aff928ea2ee0c8635df66b468a53ab50b6a1ab11 100644 (file)
@@ -67,8 +67,8 @@ CCL_NAMESPACE_BEGIN
 #define TEX_NUM_FLOAT4_OPENCL  1024
 #define TEX_NUM_BYTE4_OPENCL   1024
 #define TEX_NUM_HALF4_OPENCL   0
-#define TEX_NUM_FLOAT_OPENCL   0
-#define TEX_NUM_BYTE_OPENCL            0
+#define TEX_NUM_FLOAT_OPENCL   1024
+#define TEX_NUM_BYTE_OPENCL            1024
 #define TEX_NUM_HALF_OPENCL            0
 #define TEX_START_FLOAT4_OPENCL        0
 #define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL