Cycles: Change code order for Image Data Types.
authorThomas Dinges <blender@dingto.org>
Thu, 11 Aug 2016 20:30:03 +0000 (22:30 +0200)
committerThomas Dinges <blender@dingto.org>
Thu, 11 Aug 2016 20:30:03 +0000 (22:30 +0200)
Now we have the 4 component ones first (float4, byte4, half4) followed by the 1 component ones (float, byte, half).
Makes code a bit more consistent and also reduces code a bit when enabling half support on GPU in next commit.

This also exposed a typo in half CPU images for 3D textures, which wasn't used yet, but good to have that one fixed anyway.

intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
intern/cycles/kernel/svm/svm_image.h
intern/cycles/render/image.cpp
intern/cycles/render/image.h
intern/cycles/util/util_texture.h

index 47383140170229bdb3031fdff26ad2b9575fcf3c..af68907a5c2ff3da081fb03fbfe49f8868e86ef0 100644 (file)
@@ -25,12 +25,12 @@ ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float
 {
        if(tex >= TEX_START_HALF_CPU)
                return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y);
-       else if(tex >= TEX_START_HALF4_CPU)
-               return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
        else if(tex >= TEX_START_BYTE_CPU)
                return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y);
        else if(tex >= TEX_START_FLOAT_CPU)
                return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y);
+       else if(tex >= TEX_START_HALF4_CPU)
+               return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
        else if(tex >= TEX_START_BYTE4_CPU)
                return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y);
        else
@@ -41,12 +41,12 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl
 {
        if(tex >= TEX_START_HALF_CPU)
                return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z);
-       else if(tex >= TEX_START_HALF4_CPU)
-               return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
        else if(tex >= TEX_START_BYTE_CPU)
                return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z);
        else if(tex >= TEX_START_FLOAT_CPU)
                return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z);
+       else if(tex >= TEX_START_HALF4_CPU)
+               return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
        else if(tex >= TEX_START_BYTE4_CPU)
                return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z);
        else
@@ -57,13 +57,13 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl
 ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
 {
        if(tex >= TEX_START_HALF_CPU)
-               return kg->texture_half4_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
-       else if(tex >= TEX_START_HALF4_CPU)
-               return kg->texture_half_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
+               return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
        else if(tex >= TEX_START_BYTE_CPU)
                return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation);
        else if(tex >= TEX_START_FLOAT_CPU)
                return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation);
+       else if(tex >= TEX_START_HALF4_CPU)
+               return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
        else if(tex >= TEX_START_BYTE4_CPU)
                return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation);
        else
index b6b90dfff81fc93e8b8f336c717a1097edddd9dc..f359829374d784040d3c408f0b485e914ae1102f 100644 (file)
@@ -277,7 +277,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
        }
 #  else
        CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
-       if(id < 2048) /* TODO(dingto): Make this a variable */
+       if(id < TEX_START_FLOAT_CUDA_KEPLER)
                r = kernel_tex_image_interp_float4(tex, x, y);
        else {
                float f = kernel_tex_image_interp_float(tex, x, y);
index 614620c14af343208442752196d0b8c82f691bf2..284af5f90f71daeec8a2e56a58db963aade5f7c5 100644 (file)
@@ -52,15 +52,15 @@ ImageManager::ImageManager(const DeviceInfo& info)
        { \
                tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \
                tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \
+               tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
                tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \
                tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \
-               tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
                tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \
                tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \
                tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \
+               tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
                tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \
                tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \
-               tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
                tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \
        }
 
@@ -82,15 +82,15 @@ ImageManager::ImageManager(const DeviceInfo& info)
                /* Should not happen. */
                tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
                tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0;
+               tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
                tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0;
                tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0;
-               tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
                tex_num_images[IMAGE_DATA_TYPE_HALF] = 0;
                tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
                tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0;
+               tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
                tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0;
                tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0;
-               tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
                tex_start_images[IMAGE_DATA_TYPE_HALF] = 0;
                assert(0);
        }
@@ -216,7 +216,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen
 }
 
 /* We use a consecutive slot counting scheme on the devices, in order
- * float4, byte4, float, byte.
+ * float4, byte4, half4, float, byte, half.
  * These functions convert the slot ids from ImageManager "images" ones
  * to device ones and vice versa. */
 int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
index 07998684b2346ca27bb04f594d84aa3f4629ec4e..cca71a6bb93b8665834dec9e3ee229305b9205f5 100644 (file)
@@ -39,9 +39,9 @@ public:
        enum ImageDataType {
                IMAGE_DATA_TYPE_FLOAT4 = 0,
                IMAGE_DATA_TYPE_BYTE4 = 1,
-               IMAGE_DATA_TYPE_FLOAT = 2,
-               IMAGE_DATA_TYPE_BYTE = 3,
-               IMAGE_DATA_TYPE_HALF4 = 4,
+               IMAGE_DATA_TYPE_HALF4 = 2,
+               IMAGE_DATA_TYPE_FLOAT = 3,
+               IMAGE_DATA_TYPE_BYTE = 4,
                IMAGE_DATA_TYPE_HALF = 5,
 
                IMAGE_DATA_NUM_TYPES
index 0cecfe91ea9317ffa385b85183b314f27fc4e1d3..ec3ee2b81917d7b0760ad58053afdaadaa0284e1 100644 (file)
@@ -24,58 +24,58 @@ CCL_NAMESPACE_BEGIN
 /* CPU */
 #define TEX_NUM_FLOAT4_CPU             1024
 #define TEX_NUM_BYTE4_CPU              1024
+#define TEX_NUM_HALF4_CPU              1024
 #define TEX_NUM_FLOAT_CPU              1024
 #define TEX_NUM_BYTE_CPU               1024
-#define TEX_NUM_HALF4_CPU              1024
 #define TEX_NUM_HALF_CPU               1024
 #define TEX_START_FLOAT4_CPU   0
 #define TEX_START_BYTE4_CPU            TEX_NUM_FLOAT4_CPU
-#define TEX_START_FLOAT_CPU            (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
-#define TEX_START_BYTE_CPU             (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU)
-#define TEX_START_HALF4_CPU            (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
-#define TEX_START_HALF_CPU             (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU + TEX_NUM_HALF4_CPU)
+#define TEX_START_HALF4_CPU            (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
+#define TEX_START_FLOAT_CPU            (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU)
+#define TEX_START_BYTE_CPU             (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU)
+#define TEX_START_HALF_CPU             (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
 
 /* CUDA (Geforce 4xx and 5xx) */
 #define TEX_NUM_FLOAT4_CUDA            5
 #define TEX_NUM_BYTE4_CUDA             85
+#define TEX_NUM_HALF4_CUDA             0
 #define TEX_NUM_FLOAT_CUDA             0
 #define TEX_NUM_BYTE_CUDA              0
-#define TEX_NUM_HALF4_CUDA             0
 #define TEX_NUM_HALF_CUDA              0
 #define TEX_START_FLOAT4_CUDA  0
 #define TEX_START_BYTE4_CUDA   TEX_NUM_FLOAT4_CUDA
-#define TEX_START_FLOAT_CUDA   (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
-#define TEX_START_BYTE_CUDA            (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA)
-#define TEX_START_HALF4_CUDA   (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
-#define TEX_START_HALF_CUDA            (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA + TEX_NUM_HALF4_CUDA)
+#define TEX_START_HALF4_CUDA   (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
+#define TEX_START_FLOAT_CUDA   (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
+#define TEX_START_BYTE_CUDA            (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
+#define TEX_START_HALF_CUDA            (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
 
 /* CUDA (Kepler, Geforce 6xx and above) */
 #define TEX_NUM_FLOAT4_CUDA_KEPLER             1024
 #define TEX_NUM_BYTE4_CUDA_KEPLER              1024
+#define TEX_NUM_HALF4_CUDA_KEPLER              0
 #define TEX_NUM_FLOAT_CUDA_KEPLER              1024
 #define TEX_NUM_BYTE_CUDA_KEPLER               1024
-#define TEX_NUM_HALF4_CUDA_KEPLER              0
 #define TEX_NUM_HALF_CUDA_KEPLER               0
 #define TEX_START_FLOAT4_CUDA_KEPLER   0
 #define TEX_START_BYTE4_CUDA_KEPLER            TEX_NUM_FLOAT4_CUDA_KEPLER
-#define TEX_START_FLOAT_CUDA_KEPLER            (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
-#define TEX_START_BYTE_CUDA_KEPLER             (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
-#define TEX_START_HALF4_CUDA_KEPLER            (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
-#define TEX_START_HALF_CUDA_KEPLER             (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
+#define TEX_START_HALF4_CUDA_KEPLER            (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
+#define TEX_START_FLOAT_CUDA_KEPLER            (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
+#define TEX_START_BYTE_CUDA_KEPLER             (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
+#define TEX_START_HALF_CUDA_KEPLER             (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
 
 /* OpenCL */
 #define TEX_NUM_FLOAT4_OPENCL  1024
 #define TEX_NUM_BYTE4_OPENCL   1024
+#define TEX_NUM_HALF4_OPENCL   0
 #define TEX_NUM_FLOAT_OPENCL   0
 #define TEX_NUM_BYTE_OPENCL            0
-#define TEX_NUM_HALF4_OPENCL   0
 #define TEX_NUM_HALF_OPENCL            0
 #define TEX_START_FLOAT4_OPENCL        0
 #define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL
-#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
-#define TEX_START_BYTE_OPENCL  (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL)
-#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
-#define TEX_START_HALF_OPENCL  (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL + TEX_NUM_HALF4_OPENCL)
+#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
+#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL)
+#define TEX_START_BYTE_OPENCL  (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL)
+#define TEX_START_HALF_OPENCL  (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
 
 
 /* Color to use when textures are not found. */