Cuda use streams and async to avoid busywaiting
[blender.git] / intern / opencolorio / ocio_impl_glsl.cc
1 /*
2  * Adapted from OpenColorIO with this license:
3  *
4  * Copyright (c) 2003-2010 Sony Pictures Imageworks Inc., et al.
5  * All Rights Reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  * * Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * * Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * * Neither the name of Sony Pictures Imageworks nor the names of its
16  *   contributors may be used to endorse or promote products derived from
17  *   this software without specific prior written permission.
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * Modifications Copyright 2013, Blender Foundation.
31  *
32  * Contributor(s): Sergey Sharybin
33  *
34  */
35
36 #include <limits>
37 #include <sstream>
38 #include <string.h>
39
40 #include <GL/glew.h>
41
42 #include <OpenColorIO/OpenColorIO.h>
43
44 using namespace OCIO_NAMESPACE;
45
46 #include "MEM_guardedalloc.h"
47
48 #include "ocio_impl.h"
49
50 static const int LUT3D_EDGE_SIZE = 64;
51
52 extern "C" char datatoc_gpu_shader_display_transform_glsl[];
53
54 /* **** OpenGL drawing routines using GLSL for color space transform ***** */
55
56 typedef struct OCIO_GLSLDrawState {
57         bool lut3d_texture_allocated;  /* boolean flag indicating whether
58                                         * lut texture is allocated
59                                         */
60         bool lut3d_texture_valid;
61
62         GLuint lut3d_texture;  /* OGL texture ID for 3D LUT */
63
64         float *lut3d;  /* 3D LUT table */
65
66         bool dither_used;
67
68         bool curve_mapping_used;
69         bool curve_mapping_texture_allocated;
70         bool curve_mapping_texture_valid;
71         GLuint curve_mapping_texture;
72         size_t curve_mapping_cache_id;
73
74         bool predivide_used;
75
76         bool texture_size_used;
77
78         /* Cache */
79         std::string lut3dcacheid;
80         std::string shadercacheid;
81
82         /* GLSL stuff */
83         GLuint ocio_shader;
84         GLuint program;
85
86         /* Previous OpenGL state. */
87         GLint last_texture, last_texture_unit;
88 } OCIO_GLSLDrawState;
89
90 static GLuint compileShaderText(GLenum shaderType, const char *text)
91 {
92         GLuint shader;
93         GLint stat;
94
95         shader = glCreateShader(shaderType);
96         glShaderSource(shader, 1, (const GLchar **) &text, NULL);
97         glCompileShader(shader);
98         glGetShaderiv(shader, GL_COMPILE_STATUS, &stat);
99
100         if (!stat) {
101                 GLchar log[1000];
102                 GLsizei len;
103                 glGetShaderInfoLog(shader, 1000, &len, log);
104                 fprintf(stderr, "Shader compile error:\n%s\n", log);
105                 return 0;
106         }
107
108         return shader;
109 }
110
111 static GLuint linkShaders(GLuint ocio_shader)
112 {
113         if (!ocio_shader)
114                 return 0;
115
116         GLuint program = glCreateProgram();
117
118         glAttachShader(program, ocio_shader);
119
120         glLinkProgram(program);
121
122         /* check link */
123         {
124                 GLint stat;
125                 glGetProgramiv(program, GL_LINK_STATUS, &stat);
126                 if (!stat) {
127                         GLchar log[1000];
128                         GLsizei len;
129                         glGetProgramInfoLog(program, 1000, &len, log);
130                         fprintf(stderr, "Shader link error:\n%s\n", log);
131                         return 0;
132                 }
133         }
134
135         return program;
136 }
137
138 static OCIO_GLSLDrawState *allocateOpenGLState(void)
139 {
140         OCIO_GLSLDrawState *state;
141
142         /* Allocate memory for state. */
143         state = (OCIO_GLSLDrawState *) MEM_callocN(sizeof(OCIO_GLSLDrawState),
144                                                    "OCIO OpenGL State struct");
145
146         /* Call constructors on new memory. */
147         new (&state->lut3dcacheid) std::string("");
148         new (&state->shadercacheid) std::string("");
149
150         return state;
151 }
152
153 /* Ensure LUT texture and array are allocated */
154 static bool ensureLUT3DAllocated(OCIO_GLSLDrawState *state)
155 {
156         int num_3d_entries = 3 * LUT3D_EDGE_SIZE * LUT3D_EDGE_SIZE * LUT3D_EDGE_SIZE;
157
158         if (state->lut3d_texture_allocated)
159                 return state->lut3d_texture_valid;
160
161         glGenTextures(1, &state->lut3d_texture);
162
163         state->lut3d = (float *) MEM_callocN(sizeof(float) * num_3d_entries, "OCIO GPU 3D LUT");
164
165         glActiveTexture(GL_TEXTURE1);
166         glBindTexture(GL_TEXTURE_3D, state->lut3d_texture);
167         glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
168         glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
169         glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
170         glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
171         glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
172
173         /* clean glError buffer */
174         while (glGetError() != GL_NO_ERROR) {}
175
176         glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB16F_ARB,
177                      LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE,
178                      0, GL_RGB, GL_FLOAT, state->lut3d);
179
180         state->lut3d_texture_allocated = true;
181
182         /* GL_RGB16F_ARB could be not supported at some drivers
183          * in this case we could not use GLSL display
184          */
185         state->lut3d_texture_valid = glGetError() == GL_NO_ERROR;
186
187         return state->lut3d_texture_valid;
188 }
189
190 static bool ensureCurveMappingAllocated(OCIO_GLSLDrawState *state, OCIO_CurveMappingSettings *curve_mapping_settings)
191 {
192         if (state->curve_mapping_texture_allocated)
193                 return state->curve_mapping_texture_valid;
194
195         glGenTextures(1, &state->curve_mapping_texture);
196
197         glActiveTexture(GL_TEXTURE2);
198         glBindTexture(GL_TEXTURE_1D, state->curve_mapping_texture);
199         glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
200         glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
201         glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
202         glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
203         glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
204
205         /* clean glError buffer */
206         while (glGetError() != GL_NO_ERROR) {}
207
208         glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA16F_ARB, curve_mapping_settings->lut_size,
209                      0, GL_RGBA, GL_FLOAT, curve_mapping_settings->lut);
210
211         state->curve_mapping_texture_allocated = true;
212
213         /* GL_RGB16F_ARB could be not supported at some drivers
214          * in this case we could not use GLSL display
215          */
216         state->curve_mapping_texture_valid = glGetError() == GL_NO_ERROR;
217
218         return state->curve_mapping_texture_valid;
219 }
220
221 /* Detect if we can support GLSL drawing */
222 bool OCIOImpl::supportGLSLDraw()
223 {
224         /* GLSL and GL_RGB16F_ARB */
225         return GLEW_VERSION_2_0 && (GLEW_VERSION_3_0 || GLEW_ARB_texture_float);
226 }
227
228 static bool supportGLSL13()
229 {
230         const char *version = (const char*)glGetString(GL_SHADING_LANGUAGE_VERSION);
231         int major = 1, minor = 0;
232
233         if (version && sscanf(version, "%d.%d", &major, &minor) == 2)
234                 return (major > 1 || (major == 1 && minor >= 30));
235
236         return false;
237 }
238
239 /**
240  * Setup OpenGL contexts for a transform defined by processor using GLSL
241  * All LUT allocating baking and shader compilation happens here.
242  *
243  * Once this function is called, callee could start drawing images
244  * using regular 2D texture.
245  *
246  * When all drawing is finished, finishGLSLDraw shall be called to
247  * restore OpenGL context to it's pre-GLSL draw state.
248  */
249 bool OCIOImpl::setupGLSLDraw(OCIO_GLSLDrawState **state_r, OCIO_ConstProcessorRcPtr *processor,
250                              OCIO_CurveMappingSettings *curve_mapping_settings,
251                              float dither, bool use_predivide)
252 {
253         ConstProcessorRcPtr ocio_processor = *(ConstProcessorRcPtr *) processor;
254         bool use_curve_mapping = curve_mapping_settings != NULL;
255         bool use_dither = dither > std::numeric_limits<float>::epsilon();
256
257         /* Create state if needed. */
258         OCIO_GLSLDrawState *state;
259         if (!*state_r)
260                 *state_r = allocateOpenGLState();
261         state = *state_r;
262
263         glGetIntegerv(GL_TEXTURE_2D, &state->last_texture);
264         glGetIntegerv(GL_ACTIVE_TEXTURE, &state->last_texture_unit);
265
266         if (!ensureLUT3DAllocated(state)) {
267                 glActiveTexture(state->last_texture_unit);
268                 glBindTexture(GL_TEXTURE_2D, state->last_texture);
269
270                 return false;
271         }
272
273         if (use_curve_mapping) {
274                 if (!ensureCurveMappingAllocated(state, curve_mapping_settings)) {
275                         glActiveTexture(state->last_texture_unit);
276                         glBindTexture(GL_TEXTURE_2D, state->last_texture);
277
278                         return false;
279                 }
280         }
281         else {
282                 if (state->curve_mapping_texture_allocated) {
283                         glDeleteTextures(1, &state->curve_mapping_texture);
284                         state->curve_mapping_texture_allocated = false;
285                 }
286         }
287
288         /* Step 1: Create a GPU Shader Description */
289         GpuShaderDesc shaderDesc;
290         shaderDesc.setLanguage(GPU_LANGUAGE_GLSL_1_3);
291         shaderDesc.setFunctionName("OCIODisplay");
292         shaderDesc.setLut3DEdgeLen(LUT3D_EDGE_SIZE);
293
294         if (use_curve_mapping) {
295                 if (state->curve_mapping_cache_id != curve_mapping_settings->cache_id) {
296                         glActiveTexture(GL_TEXTURE2);
297                         glBindTexture(GL_TEXTURE_1D, state->curve_mapping_texture);
298                         glTexSubImage1D(GL_TEXTURE_1D, 0, 0, curve_mapping_settings->lut_size,
299                                         GL_RGBA, GL_FLOAT, curve_mapping_settings->lut);
300                 }
301         }
302
303         /* Step 2: Compute the 3D LUT */
304         std::string lut3dCacheID = ocio_processor->getGpuLut3DCacheID(shaderDesc);
305         if (lut3dCacheID != state->lut3dcacheid) {
306                 state->lut3dcacheid = lut3dCacheID;
307                 ocio_processor->getGpuLut3D(state->lut3d, shaderDesc);
308
309                 glActiveTexture(GL_TEXTURE1);
310                 glBindTexture(GL_TEXTURE_3D, state->lut3d_texture);
311                 glTexSubImage3D(GL_TEXTURE_3D, 0, 0, 0, 0,
312                                 LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE, LUT3D_EDGE_SIZE,
313                                 GL_RGB, GL_FLOAT, state->lut3d);
314         }
315
316         /* Step 3: Compute the Shader */
317         std::string shaderCacheID = ocio_processor->getGpuShaderTextCacheID(shaderDesc);
318         if (state->program == 0 ||
319             shaderCacheID != state->shadercacheid ||
320             use_predivide != state->predivide_used ||
321             use_curve_mapping != state->curve_mapping_used ||
322             use_dither != state->dither_used)
323         {
324                 state->shadercacheid = shaderCacheID;
325
326                 if (state->program) {
327                         glDeleteProgram(state->program);
328                 }
329
330                 if (state->ocio_shader) {
331                         glDeleteShader(state->ocio_shader);
332                 }
333
334                 std::ostringstream os;
335
336                 if (supportGLSL13()) {
337                         os << "#version 130\n";
338                 }
339                 else {
340                         os << "#define USE_TEXTURE_SIZE\n";
341                         state->texture_size_used = use_dither;
342                 }
343
344                 if (use_predivide) {
345                         os << "#define USE_PREDIVIDE\n";
346                 }
347
348                 if (use_dither) {
349                         os << "#define USE_DITHER\n";
350                 }
351
352                 if (use_curve_mapping) {
353                         os << "#define USE_CURVE_MAPPING\n";
354                 }
355
356                 os << ocio_processor->getGpuShaderText(shaderDesc) << "\n";
357                 os << datatoc_gpu_shader_display_transform_glsl;
358
359                 state->ocio_shader = compileShaderText(GL_FRAGMENT_SHADER, os.str().c_str());
360
361                 if (state->ocio_shader) {
362                         state->program = linkShaders(state->ocio_shader);
363                 }
364
365                 state->curve_mapping_used = use_curve_mapping;
366                 state->dither_used = use_dither;
367                 state->predivide_used = use_predivide;
368         }
369
370         if (state->program) {
371                 glActiveTexture(GL_TEXTURE1);
372                 glBindTexture(GL_TEXTURE_3D, state->lut3d_texture);
373
374                 if (use_curve_mapping) {
375                         glActiveTexture(GL_TEXTURE2);
376                         glBindTexture(GL_TEXTURE_1D, state->curve_mapping_texture);
377                 }
378
379                 glActiveTexture(GL_TEXTURE0);
380
381                 glUseProgram(state->program);
382
383                 glUniform1i(glGetUniformLocation(state->program, "image_texture"), 0);
384                 glUniform1i(glGetUniformLocation(state->program, "lut3d_texture"), 1);
385
386                 if (state->texture_size_used) {
387                         /* we use textureSize() if possible for best performance, if not
388                          * supported we query the size and pass it as uniform variables */
389                         GLint width, height;
390
391                         glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &width);
392                         glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &height);
393
394                         glUniform1f(glGetUniformLocation(state->program, "image_texture_width"), (float)width);
395                         glUniform1f(glGetUniformLocation(state->program, "image_texture_height"), (float)height);
396                 }
397
398                 if (use_dither) {
399                         glUniform1f(glGetUniformLocation(state->program, "dither"), dither);
400                 }
401
402                 if (use_curve_mapping) {
403                         glUniform1i(glGetUniformLocation(state->program, "curve_mapping_texture"), 2);
404                         glUniform1i(glGetUniformLocation(state->program, "curve_mapping_lut_size"), curve_mapping_settings->lut_size);
405                         glUniform4iv(glGetUniformLocation(state->program, "use_curve_mapping_extend_extrapolate"), 1, curve_mapping_settings->use_extend_extrapolate);
406                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_mintable"), 1, curve_mapping_settings->mintable);
407                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_range"), 1, curve_mapping_settings->range);
408                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_ext_in_x"), 1, curve_mapping_settings->ext_in_x);
409                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_ext_in_y"), 1, curve_mapping_settings->ext_in_y);
410                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_ext_out_x"), 1, curve_mapping_settings->ext_out_x);
411                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_ext_out_y"), 1, curve_mapping_settings->ext_out_y);
412                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_first_x"), 1, curve_mapping_settings->first_x);
413                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_first_y"), 1, curve_mapping_settings->first_y);
414                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_last_x"), 1, curve_mapping_settings->last_x);
415                         glUniform4fv(glGetUniformLocation(state->program, "curve_mapping_last_y"), 1, curve_mapping_settings->last_y);
416                         glUniform3fv(glGetUniformLocation(state->program, "curve_mapping_black"), 1, curve_mapping_settings->black);
417                         glUniform3fv(glGetUniformLocation(state->program, "curve_mapping_bwmul"), 1, curve_mapping_settings->bwmul);
418                 }
419
420                 return true;
421         }
422         else {
423                 glActiveTexture(state->last_texture_unit);
424                 glBindTexture(GL_TEXTURE_2D, state->last_texture);
425
426                 return false;
427         }
428 }
429
430 void OCIOImpl::finishGLSLDraw(OCIO_GLSLDrawState *state)
431 {
432         glActiveTexture(state->last_texture_unit);
433         glBindTexture(GL_TEXTURE_2D, state->last_texture);
434         glUseProgram(0);
435 }
436
437 void OCIOImpl::freeGLState(struct OCIO_GLSLDrawState *state)
438 {
439         using std::string;
440
441         if (state->lut3d_texture_allocated)
442                 glDeleteTextures(1, &state->lut3d_texture);
443
444         if (state->lut3d)
445                 MEM_freeN(state->lut3d);
446
447         if (state->program)
448                 glDeleteProgram(state->program);
449
450         if (state->ocio_shader)
451                 glDeleteShader(state->ocio_shader);
452
453         state->lut3dcacheid.~string();
454         state->shadercacheid.~string();
455
456         MEM_freeN(state);
457 }