\0;115;0cCycles: Cleanup, use ccl_restrict instead of ccl_restrict_ptr
authorSergey Sharybin <sergey.vfx@gmail.com>
Fri, 19 May 2017 10:33:28 +0000 (12:33 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Fri, 19 May 2017 10:41:03 +0000 (12:41 +0200)
There were following issues with ccl_restrict_ptr:

- We already had ccl_restrict for all platforms.

- It was secretly adding `const` qualifier to the declaration,
  which is quite weird since non-const pointer can also be
  declared as restricted.

- We never in Blender are using foo_ptr or FooPtr type definitions,
  so not sure why we should introduce such a thing here.

- It is absolutely wrong from semantic point of view to put pointer
  into the restrict macro -- const is a part of type, not part of
  hint for compiler that some pointer is never aliased.

15 files changed:
intern/cycles/kernel/filter/filter_features.h
intern/cycles/kernel/filter/filter_features_sse.h
intern/cycles/kernel/filter/filter_nlm_cpu.h
intern/cycles/kernel/filter/filter_nlm_gpu.h
intern/cycles/kernel/filter/filter_prefilter.h
intern/cycles/kernel/filter/filter_reconstruction.h
intern/cycles/kernel/filter/filter_transform.h
intern/cycles/kernel/filter/filter_transform_gpu.h
intern/cycles/kernel/filter/filter_transform_sse.h
intern/cycles/kernel/kernel_compat_cpu.h
intern/cycles/kernel/kernel_compat_cuda.h
intern/cycles/kernel/kernel_compat_opencl.h
intern/cycles/kernel/kernels/cuda/filter.cu
intern/cycles/kernel/kernels/opencl/filter.cl
intern/cycles/util/util_math_matrix.h

index 41998c792b69c8c32e2d5593c5728fc48add7448..53d703de1430ca7fcaf6e57e7d352706731c7b46 100644 (file)
                                  pixel_buffer += buffer_w - (high.x - low.x); \
                              }
 
-ccl_device_inline void filter_get_features(int2 pixel, ccl_global float ccl_restrict_ptr buffer, float *features, float ccl_restrict_ptr mean, int pass_stride)
+ccl_device_inline void filter_get_features(int2 pixel,
+                                           const ccl_global float *ccl_restrict buffer,
+                                           float *features,
+                                           const float *ccl_restrict mean,
+                                           int pass_stride)
 {
        features[0] = pixel.x;
        features[1] = pixel.y;
@@ -46,7 +50,11 @@ ccl_device_inline void filter_get_features(int2 pixel, ccl_global float ccl_rest
        }
 }
 
-ccl_device_inline void filter_get_feature_scales(int2 pixel, ccl_global float ccl_restrict_ptr buffer, float *scales, float ccl_restrict_ptr mean, int pass_stride)
+ccl_device_inline void filter_get_feature_scales(int2 pixel,
+                                                 const ccl_global float *ccl_restrict buffer,
+                                                 float *scales,
+                                                 const float *ccl_restrict mean,
+                                                 int pass_stride)
 {
        scales[0] = fabsf(pixel.x - mean[0]);
        scales[1] = fabsf(pixel.y - mean[1]);
@@ -70,19 +78,21 @@ ccl_device_inline void filter_calculate_scale(float *scale)
        scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f);
 }
 
-ccl_device_inline float3 filter_get_pixel_color(ccl_global float ccl_restrict_ptr buffer, int pass_stride)
+ccl_device_inline float3 filter_get_pixel_color(const ccl_global float *ccl_restrict buffer,
+                                                int pass_stride)
 {
        return make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2));
 }
 
-ccl_device_inline float filter_get_pixel_variance(ccl_global float ccl_restrict_ptr buffer, int pass_stride)
+ccl_device_inline float filter_get_pixel_variance(const ccl_global float *ccl_restrict buffer,
+                                                  int pass_stride)
 {
        return average(make_float3(ccl_get_feature(buffer, 0), ccl_get_feature(buffer, 1), ccl_get_feature(buffer, 2)));
 }
 
 ccl_device_inline void design_row_add(float *design_row,
                                       int rank,
-                                      ccl_global float ccl_restrict_ptr transform,
+                                      const ccl_global float *ccl_restrict transform,
                                       int stride,
                                       int row,
                                       float feature)
@@ -94,13 +104,13 @@ ccl_device_inline void design_row_add(float *design_row,
 
 /* Fill the design row. */
 ccl_device_inline void filter_get_design_row_transform(int2 p_pixel,
-                                                       ccl_global float ccl_restrict_ptr p_buffer,
+                                                       const ccl_global float *ccl_restrict p_buffer,
                                                        int2 q_pixel,
-                                                       ccl_global float ccl_restrict_ptr q_buffer,
+                                                       const ccl_global float *ccl_restrict q_buffer,
                                                        int pass_stride,
                                                        int rank,
                                                        float *design_row,
-                                                       ccl_global float ccl_restrict_ptr transform,
+                                                       const ccl_global float *ccl_restrict transform,
                                                        int stride)
 {
        design_row[0] = 1.0f;
index a242a8ed0a118246ffe67662b289e58f8dce5e1a..ad0978d0c1de17d80e08fa86106bc4953272a55f 100644 (file)
@@ -33,7 +33,12 @@ CCL_NAMESPACE_BEGIN
                                      pixel_buffer += buffer_w - (pixel.x - low.x); \
                                  }
 
-ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y, __m128 active_pixels, float ccl_restrict_ptr buffer, __m128 *features, __m128 ccl_restrict_ptr mean, int pass_stride)
+ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y,
+                                               __m128 active_pixels,
+                                               const float *ccl_restrict buffer,
+                                               __m128 *features,
+                                               const __m128 ccl_restrict *mean,
+                                               int pass_stride)
 {
        features[0] = x;
        features[1] = y;
@@ -53,7 +58,12 @@ ccl_device_inline void filter_get_features_sse(__m128 x, __m128 y, __m128 active
                features[i] = _mm_mask_ps(features[i], active_pixels);
 }
 
-ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y, __m128 active_pixels, float ccl_restrict_ptr buffer, __m128 *scales, __m128 ccl_restrict_ptr mean, int pass_stride)
+ccl_device_inline void filter_get_feature_scales_sse(__m128 x, __m128 y,
+                                                     __m128 active_pixels,
+                                                     const float *ccl_restrict buffer,
+                                                     __m128 *scales,
+                                                     const __m128 *ccl_restrict mean,
+                                                     int pass_stride)
 {
        scales[0] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(x, mean[0])), active_pixels);
        scales[1] = _mm_mask_ps(_mm_fabs_ps(_mm_sub_ps(y, mean[1])), active_pixels);
index 1a314b100be07f3b83bbf9cb744ef3d5016d1412..572228119921eac0c83201f0b91062fd132c393d 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy, float ccl_restrict_ptr weightImage, float ccl_restrict_ptr varianceImage, float *differenceImage, int4 rect, int w, int channel_offset, float a, float k_2)
+ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy,
+                                                         const float *ccl_restrict weightImage,
+                                                         const float *ccl_restrict varianceImage,
+                                                         float *differenceImage,
+                                                         int4 rect,
+                                                         int w,
+                                                         int channel_offset,
+                                                         float a,
+                                                         float k_2)
 {
        for(int y = rect.y; y < rect.w; y++) {
                for(int x = rect.x; x < rect.z; x++) {
@@ -36,7 +44,11 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy, float c
        }
 }
 
-ccl_device_inline void kernel_filter_nlm_blur(float ccl_restrict_ptr differenceImage, float *outImage, int4 rect, int w, int f)
+ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict differenceImage,
+                                              float *outImage,
+                                              int4 rect,
+                                              int w,
+                                              int f)
 {
 #ifdef __KERNEL_SSE3__
        int aligned_lowx = (rect.x & ~(3));
@@ -65,7 +77,11 @@ ccl_device_inline void kernel_filter_nlm_blur(float ccl_restrict_ptr differenceI
        }
 }
 
-ccl_device_inline void kernel_filter_nlm_calc_weight(float ccl_restrict_ptr differenceImage, float *outImage, int4 rect, int w, int f)
+ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict differenceImage,
+                                                     float *outImage,
+                                                     int4 rect,
+                                                     int w,
+                                                     int f)
 {
        for(int y = rect.y; y < rect.w; y++) {
                for(int x = rect.x; x < rect.z; x++) {
@@ -90,7 +106,14 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(float ccl_restrict_ptr diff
        }
 }
 
-ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, float ccl_restrict_ptr differenceImage, float ccl_restrict_ptr image, float *outImage, float *accumImage, int4 rect, int w, int f)
+ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy,
+                                                       const float *ccl_restrict differenceImage,
+                                                       const float *ccl_restrict image,
+                                                       float *outImage,
+                                                       float *accumImage,
+                                                       int4 rect,
+                                                       int w,
+                                                       int f)
 {
        for(int y = rect.y; y < rect.w; y++) {
                for(int x = rect.x; x < rect.z; x++) {
@@ -108,8 +131,8 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, float ccl
 }
 
 ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
-                                                           float ccl_restrict_ptr differenceImage,
-                                                           float ccl_restrict_ptr buffer,
+                                                           const float *ccl_restrict differenceImage,
+                                                           const float *ccl_restrict buffer,
                                                            float *color_pass,
                                                            float *variance_pass,
                                                            float *transform,
@@ -151,7 +174,10 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy,
        }
 }
 
-ccl_device_inline void kernel_filter_nlm_normalize(float *outImage, float ccl_restrict_ptr accumImage, int4 rect, int w)
+ccl_device_inline void kernel_filter_nlm_normalize(float *outImage,
+                                                   const float *ccl_restrict accumImage,
+                                                   int4 rect,
+                                                   int w)
 {
        for(int y = rect.y; y < rect.w; y++) {
                for(int x = rect.x; x < rect.z; x++) {
index b5ba7cf51a5b22c45a0243b79895a98eacf2966f..fd0a88340ea77a72d5fa6362deb8c3c6faa8432b 100644 (file)
@@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y,
                                                          int dx, int dy,
-                                                         ccl_global float ccl_restrict_ptr weightImage,
-                                                         ccl_global float ccl_restrict_ptr varianceImage,
+                                                         const ccl_global float *ccl_restrict weightImage,
+                                                         const ccl_global float *ccl_restrict varianceImage,
                                                          ccl_global float *differenceImage,
                                                          int4 rect, int w,
                                                          int channel_offset,
@@ -40,7 +40,7 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y,
 }
 
 ccl_device_inline void kernel_filter_nlm_blur(int x, int y,
-                                              ccl_global float ccl_restrict_ptr differenceImage,
+                                              const ccl_global float *ccl_restrict differenceImage,
                                               ccl_global float *outImage,
                                               int4 rect, int w, int f)
 {
@@ -55,7 +55,7 @@ ccl_device_inline void kernel_filter_nlm_blur(int x, int y,
 }
 
 ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y,
-                                                     ccl_global float ccl_restrict_ptr differenceImage,
+                                                     const ccl_global float *ccl_restrict differenceImage,
                                                      ccl_global float *outImage,
                                                      int4 rect, int w, int f)
 {
@@ -71,8 +71,8 @@ ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y,
 
 ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
                                                        int dx, int dy,
-                                                       ccl_global float ccl_restrict_ptr differenceImage,
-                                                       ccl_global float ccl_restrict_ptr image,
+                                                       const ccl_global float *ccl_restrict differenceImage,
+                                                       const ccl_global float *ccl_restrict image,
                                                        ccl_global float *outImage,
                                                        ccl_global float *accumImage,
                                                        int4 rect, int w, int f)
@@ -95,11 +95,11 @@ ccl_device_inline void kernel_filter_nlm_update_output(int x, int y,
 
 ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
                                                            int dx, int dy,
-                                                           ccl_global float ccl_restrict_ptr differenceImage,
-                                                           ccl_global float ccl_restrict_ptr buffer,
+                                                           const ccl_global float *ccl_restrict differenceImage,
+                                                           const ccl_global float *ccl_restrict buffer,
                                                            ccl_global float *color_pass,
                                                            ccl_global float *variance_pass,
-                                                           ccl_global float ccl_restrict_ptr transform,
+                                                           const ccl_global float *ccl_restrict transform,
                                                            ccl_global int *rank,
                                                            ccl_global float *XtWX,
                                                            ccl_global float3 *XtWY,
@@ -138,7 +138,7 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int fx, int fy,
 
 ccl_device_inline void kernel_filter_nlm_normalize(int x, int y,
                                                    ccl_global float *outImage,
-                                                   ccl_global float ccl_restrict_ptr accumImage,
+                                                   const ccl_global float *ccl_restrict accumImage,
                                                    int4 rect, int w)
 {
        outImage[y*w+x] /= accumImage[y*w+x];
index 252bcc5e675ee88f1322de78d80611476a62bde6..82cc36625ec26c356d06c7a3592c7bbcd4e8c36c 100644 (file)
@@ -44,7 +44,7 @@ ccl_device void kernel_filter_divide_shadow(int sample,
 
        int offset = tiles->offsets[tile];
        int stride = tiles->strides[tile];
-       ccl_global float ccl_restrict_ptr center_buffer = (ccl_global float*) tiles->buffers[tile];
+       const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) tiles->buffers[tile];
        center_buffer += (y*stride + x + offset)*buffer_pass_stride;
        center_buffer += buffer_denoising_offset + 14;
 
index 6a7c86e401299a01519022fd310aa0425a751af5..13e90f8233bd03dddba80b21529146818f096497 100644 (file)
@@ -21,10 +21,10 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y,
                                                        int dx, int dy,
                                                        int w, int h,
                                                        int pass_stride,
-                                                       ccl_global float ccl_restrict_ptr buffer,
+                                                       const ccl_global float *ccl_restrict buffer,
                                                        ccl_global float *color_pass,
                                                        ccl_global float *variance_pass,
-                                                       ccl_global float ccl_restrict_ptr transform,
+                                                       const ccl_global float *ccl_restrict transform,
                                                        ccl_global int *rank,
                                                        float weight,
                                                        ccl_global float *XtWX,
index 139dc402d216f81d72096a292a8b4ee783089589..4766f225fb1df3db0ccb254b8b5982571f32331e 100644 (file)
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_filter_construct_transform(float ccl_restrict_ptr buffer,
+ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
                                                   int x, int y, int4 rect,
                                                   int pass_stride,
                                                   float *transform, int *rank,
@@ -29,7 +29,7 @@ ccl_device void kernel_filter_construct_transform(float ccl_restrict_ptr buffer,
        /* Temporary storage, used in different steps of the algorithm. */
        float tempmatrix[DENOISE_FEATURES*DENOISE_FEATURES];
        float tempvector[2*DENOISE_FEATURES];
-       float ccl_restrict_ptr pixel_buffer;
+       const float *ccl_restrict pixel_buffer;
        int2 pixel;
 
 
index 68304e14143a2ecbb05289554f0723c4171e09f2..2cd21224762e77d6b6770f36234f2ae92362a80b 100644 (file)
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_filter_construct_transform(ccl_global float ccl_restrict_ptr buffer,
+ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
                                                   int x, int y, int4 rect,
                                                   int pass_stride,
                                                   ccl_global float *transform,
@@ -38,7 +38,7 @@ ccl_device void kernel_filter_construct_transform(ccl_global float ccl_restrict_
                              max(rect.y, y - radius));
        int2 high = make_int2(min(rect.z, x + radius + 1),
                              min(rect.w, y + radius + 1));
-       ccl_global float ccl_restrict_ptr pixel_buffer;
+       const ccl_global float *ccl_restrict pixel_buffer;
        int2 pixel;
 
 
index ed3a92f6241611c07699acf16770d193bbcf8fc6..9de51e2d86cf998ac89492e130db40562eb68fc2 100644 (file)
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device void kernel_filter_construct_transform(float ccl_restrict_ptr buffer,
+ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer,
                                                   int x, int y, int4 rect,
                                                   int pass_stride,
                                                   float *transform, int *rank,
@@ -25,7 +25,7 @@ ccl_device void kernel_filter_construct_transform(float ccl_restrict_ptr buffer,
        int buffer_w = align_up(rect.z - rect.x, 4);
 
        __m128 features[DENOISE_FEATURES];
-       float ccl_restrict_ptr pixel_buffer;
+       const float *ccl_restrict pixel_buffer;
        int2 pixel;
 
        int2 low  = make_int2(max(rect.x, x - radius),
index 7595e74e2d569873541d23eefe2a87b714a14ace..21da180bb8efdec846e9562bf88881907a35483e 100644 (file)
@@ -42,8 +42,6 @@
 #include "util/util_types.h"
 #include "util/util_texture.h"
 
-#define ccl_restrict_ptr const * __restrict
-
 #define ccl_addr_space
 
 #define ccl_local_id(d) 0
index 80d7401fbcfefe6de44709bb025a1aacc1fa0bbb..988126f90e1cd8e668bed82c59430a43c50244cc 100644 (file)
@@ -55,7 +55,6 @@
 #define ccl_restrict __restrict__
 #define ccl_align(n) __align__(n)
 
-#define ccl_restrict_ptr const * __restrict__
 #define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH)
 
 
index 15cf4b81b21a210d5886c46bf68f3708387d1d65..c2263ac0d49cd215386dee7b98ad058d2fd13293 100644 (file)
@@ -50,8 +50,6 @@
 #  define ccl_addr_space
 #endif
 
-#define ccl_restrict_ptr const * __restrict__
-
 #define ccl_local_id(d) get_local_id(d)
 #define ccl_global_id(d) get_global_id(d)
 
index f812a6601c6c93891313038b69b4a8d10a99ab5d..30e1414f1e94bc369f2b815c3c259cffbb66d52b 100644 (file)
@@ -139,8 +139,8 @@ kernel_cuda_filter_construct_transform(float const* __restrict__ buffer,
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
 kernel_cuda_filter_nlm_calc_difference(int dx, int dy,
-                                       float ccl_restrict_ptr weightImage,
-                                       float ccl_restrict_ptr varianceImage,
+                                       const float *ccl_restrict weightImage,
+                                       const float *ccl_restrict varianceImage,
                                        float *differenceImage,
                                        int4 rect, int w,
                                        int channel_offset,
@@ -154,7 +154,7 @@ kernel_cuda_filter_nlm_calc_difference(int dx, int dy,
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_blur(float ccl_restrict_ptr differenceImage, float *outImage, int4 rect, int w, int f) {
+kernel_cuda_filter_nlm_blur(const float *ccl_restrict differenceImage, float *outImage, int4 rect, int w, int f) {
        int x = blockDim.x*blockIdx.x + threadIdx.x + rect.x;
        int y = blockDim.y*blockIdx.y + threadIdx.y + rect.y;
        if(x < rect.z && y < rect.w) {
@@ -164,7 +164,7 @@ kernel_cuda_filter_nlm_blur(float ccl_restrict_ptr differenceImage, float *outIm
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_calc_weight(float ccl_restrict_ptr differenceImage, float *outImage, int4 rect, int w, int f) {
+kernel_cuda_filter_nlm_calc_weight(const float *ccl_restrict differenceImage, float *outImage, int4 rect, int w, int f) {
        int x = blockDim.x*blockIdx.x + threadIdx.x + rect.x;
        int y = blockDim.y*blockIdx.y + threadIdx.y + rect.y;
        if(x < rect.z && y < rect.w) {
@@ -175,8 +175,8 @@ kernel_cuda_filter_nlm_calc_weight(float ccl_restrict_ptr differenceImage, float
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
 kernel_cuda_filter_nlm_update_output(int dx, int dy,
-                                     float ccl_restrict_ptr differenceImage,
-                                     float ccl_restrict_ptr image,
+                                     const float *ccl_restrict differenceImage,
+                                     const float *ccl_restrict image,
                                      float *outImage, float *accumImage,
                                      int4 rect, int w,
                                      int f) {
@@ -189,7 +189,7 @@ kernel_cuda_filter_nlm_update_output(int dx, int dy,
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_filter_nlm_normalize(float *outImage, float ccl_restrict_ptr accumImage, int4 rect, int w) {
+kernel_cuda_filter_nlm_normalize(float *outImage, const float *ccl_restrict accumImage, int4 rect, int w) {
        int x = blockDim.x*blockIdx.x + threadIdx.x + rect.x;
        int y = blockDim.y*blockIdx.y + threadIdx.y + rect.y;
        if(x < rect.z && y < rect.w) {
@@ -200,8 +200,8 @@ kernel_cuda_filter_nlm_normalize(float *outImage, float ccl_restrict_ptr accumIm
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
 kernel_cuda_filter_nlm_construct_gramian(int dx, int dy,
-                                         float ccl_restrict_ptr differenceImage,
-                                         float ccl_restrict_ptr buffer,
+                                         const float *ccl_restrict differenceImage,
+                                         const float *ccl_restrict buffer,
                                          float *color_pass,
                                          float *variance_pass,
                                          float const* __restrict__ transform,
index fbc3daa62b9eb5aa3f8a83a41e246fd65b779121..f7d177b45b04a64bfb759f5d20a9a3a6d9c26959 100644 (file)
@@ -106,7 +106,7 @@ __kernel void kernel_ocl_filter_combine_halves(ccl_global float *mean,
        }
 }
 
-__kernel void kernel_ocl_filter_construct_transform(ccl_global float ccl_restrict_ptr buffer,
+__kernel void kernel_ocl_filter_construct_transform(const ccl_global float *ccl_restrict buffer,
                                                     ccl_global float *transform,
                                                     ccl_global int *rank,
                                                     int4 filter_area,
@@ -132,8 +132,8 @@ __kernel void kernel_ocl_filter_construct_transform(ccl_global float ccl_restric
 
 __kernel void kernel_ocl_filter_nlm_calc_difference(int dx,
                                                     int dy,
-                                                    ccl_global float ccl_restrict_ptr weightImage,
-                                                    ccl_global float ccl_restrict_ptr varianceImage,
+                                                    const ccl_global float *ccl_restrict weightImage,
+                                                    const ccl_global float *ccl_restrict varianceImage,
                                                     ccl_global float *differenceImage,
                                                     int4 rect,
                                                     int w,
@@ -147,7 +147,7 @@ __kernel void kernel_ocl_filter_nlm_calc_difference(int dx,
        }
 }
 
-__kernel void kernel_ocl_filter_nlm_blur(ccl_global float ccl_restrict_ptr differenceImage,
+__kernel void kernel_ocl_filter_nlm_blur(const ccl_global float *ccl_restrict differenceImage,
                                          ccl_global float *outImage,
                                          int4 rect,
                                          int w,
@@ -159,7 +159,7 @@ __kernel void kernel_ocl_filter_nlm_blur(ccl_global float ccl_restrict_ptr diffe
        }
 }
 
-__kernel void kernel_ocl_filter_nlm_calc_weight(ccl_global float ccl_restrict_ptr differenceImage,
+__kernel void kernel_ocl_filter_nlm_calc_weight(const ccl_global float *ccl_restrict differenceImage,
                                                 ccl_global float *outImage,
                                                 int4 rect,
                                                 int w,
@@ -173,8 +173,8 @@ __kernel void kernel_ocl_filter_nlm_calc_weight(ccl_global float ccl_restrict_pt
 
 __kernel void kernel_ocl_filter_nlm_update_output(int dx,
                                                   int dy,
-                                                  ccl_global float ccl_restrict_ptr differenceImage,
-                                                  ccl_global float ccl_restrict_ptr image,
+                                                  const ccl_global float *ccl_restrict differenceImage,
+                                                  const ccl_global float *ccl_restrict image,
                                                   ccl_global float *outImage,
                                                   ccl_global float *accumImage,
                                                   int4 rect,
@@ -188,7 +188,7 @@ __kernel void kernel_ocl_filter_nlm_update_output(int dx,
 }
 
 __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *outImage,
-                                              ccl_global float ccl_restrict_ptr accumImage,
+                                              const ccl_global float *ccl_restrict accumImage,
                                               int4 rect,
                                               int w) {
        int x = get_global_id(0) + rect.x;
@@ -200,11 +200,11 @@ __kernel void kernel_ocl_filter_nlm_normalize(ccl_global float *outImage,
 
 __kernel void kernel_ocl_filter_nlm_construct_gramian(int dx,
                                                       int dy,
-                                                      ccl_global float ccl_restrict_ptr differenceImage,
-                                                      ccl_global float ccl_restrict_ptr buffer,
+                                                      const ccl_global float *ccl_restrict differenceImage,
+                                                      const ccl_global float *ccl_restrict buffer,
                                                       ccl_global float *color_pass,
                                                       ccl_global float *variance_pass,
-                                                      ccl_global float ccl_restrict_ptr transform,
+                                                      const ccl_global float *ccl_restrict transform,
                                                       ccl_global int *rank,
                                                       ccl_global float *XtWX,
                                                       ccl_global float3 *XtWY,
index 2172e94a14f2ae3f716461852527035282663af8..0c58ae5058cdfe8e3a90a253f055d31f65fabc62 100644 (file)
@@ -50,19 +50,19 @@ ccl_device_inline void math_matrix_zero(float *A, int n)
 
 /* Elementary vector operations. */
 
-ccl_device_inline void math_vector_add(float *a, float ccl_restrict_ptr b, int n)
+ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n)
 {
        for(int i = 0; i < n; i++)
                a[i] += b[i];
 }
 
-ccl_device_inline void math_vector_mul(float *a, float ccl_restrict_ptr b, int n)
+ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n)
 {
        for(int i = 0; i < n; i++)
                a[i] *= b[i];
 }
 
-ccl_device_inline void math_vector_mul_strided(ccl_global float *a, float ccl_restrict_ptr b, int astride, int n)
+ccl_device_inline void math_vector_mul_strided(ccl_global float *a, const float *ccl_restrict b, int astride, int n)
 {
        for(int i = 0; i < n; i++)
                a[i*astride] *= b[i];
@@ -74,7 +74,7 @@ ccl_device_inline void math_vector_scale(float *a, float b, int n)
                a[i] *= b;
 }
 
-ccl_device_inline void math_vector_max(float *a, float ccl_restrict_ptr b, int n)
+ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n)
 {
        for(int i = 0; i < n; i++)
                a[i] = max(a[i], b[i]);
@@ -105,7 +105,7 @@ ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, int n, f
  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
 ccl_device_inline void math_matrix_add_gramian(float *A,
                                                   int n,
-                                                  float ccl_restrict_ptr v,
+                                                  const float *ccl_restrict v,
                                                   float weight)
 {
        for(int row = 0; row < n; row++)
@@ -117,7 +117,7 @@ ccl_device_inline void math_matrix_add_gramian(float *A,
  * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
 ccl_device_inline void math_trimatrix_add_gramian_strided(ccl_global float *A,
                                                           int n,
-                                                          float ccl_restrict_ptr v,
+                                                          const float *ccl_restrict v,
                                                           float weight,
                                                           int stride)
 {
@@ -342,32 +342,32 @@ ccl_device_inline void math_matrix_zero_sse(__m128 *A, int n)
 
 /* Add Gramian matrix of v to A.
  * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_matrix_add_gramian_sse(__m128 *A, int n, __m128 ccl_restrict_ptr v, __m128 weight)
+ccl_device_inline void math_matrix_add_gramian_sse(__m128 *A, int n, const __m128 *ccl_restrict v, __m128 weight)
 {
        for(int row = 0; row < n; row++)
                for(int col = 0; col <= row; col++)
                        MAT(A, n, row, col) = _mm_add_ps(MAT(A, n, row, col), _mm_mul_ps(_mm_mul_ps(v[row], v[col]), weight));
 }
 
-ccl_device_inline void math_vector_add_sse(__m128 *V, int n, __m128 ccl_restrict_ptr a)
+ccl_device_inline void math_vector_add_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
 {
        for(int i = 0; i < n; i++)
                V[i] = _mm_add_ps(V[i], a[i]);
 }
 
-ccl_device_inline void math_vector_mul_sse(__m128 *V, int n, __m128 ccl_restrict_ptr a)
+ccl_device_inline void math_vector_mul_sse(__m128 *V, int n, const __m128 *ccl_restrict a)
 {
        for(int i = 0; i < n; i++)
                V[i] = _mm_mul_ps(V[i], a[i]);
 }
 
-ccl_device_inline void math_vector_max_sse(__m128 *a, __m128 ccl_restrict_ptr b, int n)
+ccl_device_inline void math_vector_max_sse(__m128 *a, const __m128 *ccl_restrict b, int n)
 {
        for(int i = 0; i < n; i++)
                a[i] = _mm_max_ps(a[i], b[i]);
 }
 
-ccl_device_inline void math_matrix_hsum(float *A, int n, __m128 ccl_restrict_ptr B)
+ccl_device_inline void math_matrix_hsum(float *A, int n, const __m128 *ccl_restrict B)
 {
        for(int row = 0; row < n; row++)
                for(int col = 0; col <= row; col++)