Fix T52645, T52645: AMD OpenCL compiler crash with recent drivers.
[blender-staging.git] / intern / cycles / kernel / kernels / cuda / kernel.cu
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* CUDA kernel entry points */
18
19 #ifdef __CUDA_ARCH__
20
21 #include "kernel/kernel_compat_cuda.h"
22 #include "kernel_config.h"
23 #include "kernel/kernel_math.h"
24 #include "kernel/kernel_types.h"
25 #include "kernel/kernel_globals.h"
26 #include "kernel/kernel_film.h"
27 #include "kernel/kernel_path.h"
28 #include "kernel/kernel_path_branched.h"
29 #include "kernel/kernel_bake.h"
30
31 /* kernels */
32 extern "C" __global__ void
33 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
34 kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
35 {
36         int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
37         int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
38
39         if(x < sx + sw && y < sy + sh) {
40                 KernelGlobals kg;
41                 kernel_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride);
42         }
43 }
44
45 #ifdef __BRANCHED_PATH__
46 extern "C" __global__ void
47 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
48 kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
49 {
50         int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
51         int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
52
53         if(x < sx + sw && y < sy + sh) {
54                 KernelGlobals kg;
55                 kernel_branched_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride);
56         }
57 }
58 #endif
59
60 extern "C" __global__ void
61 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
62 kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
63 {
64         int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
65         int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
66
67         if(x < sx + sw && y < sy + sh) {
68                 kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
69         }
70 }
71
72 extern "C" __global__ void
73 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
74 kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
75 {
76         int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
77         int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
78
79         if(x < sx + sw && y < sy + sh) {
80                 kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
81         }
82 }
83
84 extern "C" __global__ void
85 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
86 kernel_cuda_shader(uint4 *input,
87                    float4 *output,
88                    float *output_luma,
89                    int type,
90                    int sx,
91                    int sw,
92                    int offset,
93                    int sample)
94 {
95         int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
96
97         if(x < sx + sw) {
98                 KernelGlobals kg;
99                 kernel_shader_evaluate(&kg,
100                                        input,
101                                        output,
102                                        output_luma,
103                                        (ShaderEvalType)type, 
104                                        x,
105                                        sample);
106         }
107 }
108
109 #ifdef __BAKING__
110 extern "C" __global__ void
111 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
112 kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample)
113 {
114         int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
115
116         if(x < sx + sw) {
117                 KernelGlobals kg;
118                 kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
119         }
120 }
121 #endif
122
123 #endif
124