Fix Cycles OpenCL not taking Extend and Clip extension types into account.
[blender.git] / intern / cycles / render / image.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "device.h"
18 #include "image.h"
19 #include "scene.h"
20
21 #include "util_foreach.h"
22 #include "util_path.h"
23 #include "util_progress.h"
24 #include "util_texture.h"
25
26 #ifdef WITH_OSL
27 #include <OSL/oslexec.h>
28 #endif
29
30 CCL_NAMESPACE_BEGIN
31
32 ImageManager::ImageManager(const DeviceInfo& info)
33 {
34         need_update = true;
35         pack_images = false;
36         osl_texture_system = NULL;
37         animation_frame = 0;
38
39         /* In case of multiple devices used we need to know type of an actual
40          * compute device.
41          *
42          * NOTE: We assume that all the devices are same type, otherwise we'll
43          * be screwed on so many levels..
44          */
45         DeviceType device_type = info.type;
46         if (device_type == DEVICE_MULTI) {
47                 device_type = info.multi_devices[0].type;
48         }
49
50         /* Set image limits */
51 #define SET_TEX_IMAGES_LIMITS(ARCH) \
52         { \
53                 tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \
54                 tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \
55                 tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \
56                 tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \
57                 tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
58                 tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \
59                 tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \
60                 tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \
61                 tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \
62                 tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \
63                 tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
64                 tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \
65         }
66
67         if(device_type == DEVICE_CPU) {
68                 SET_TEX_IMAGES_LIMITS(CPU);
69         }
70         else if(device_type == DEVICE_CUDA) {
71                 if(info.has_bindless_textures) {
72                         SET_TEX_IMAGES_LIMITS(CUDA_KEPLER);
73                 }
74                 else {
75                         SET_TEX_IMAGES_LIMITS(CUDA);
76                 }
77         }
78         else if(device_type == DEVICE_OPENCL) {
79                 SET_TEX_IMAGES_LIMITS(OPENCL);
80         }
81         else {
82                 /* Should not happen. */
83                 tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
84                 tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0;
85                 tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0;
86                 tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0;
87                 tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
88                 tex_num_images[IMAGE_DATA_TYPE_HALF] = 0;
89                 tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
90                 tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0;
91                 tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0;
92                 tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0;
93                 tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
94                 tex_start_images[IMAGE_DATA_TYPE_HALF] = 0;
95                 assert(0);
96         }
97
98 #undef SET_TEX_IMAGES_LIMITS
99 }
100
101 ImageManager::~ImageManager()
102 {
103         for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
104                 for(size_t slot = 0; slot < images[type].size(); slot++)
105                         assert(!images[type][slot]);
106         }
107 }
108
109 void ImageManager::set_pack_images(bool pack_images_)
110 {
111         pack_images = pack_images_;
112 }
113
114 void ImageManager::set_osl_texture_system(void *texture_system)
115 {
116         osl_texture_system = texture_system;
117 }
118
119 bool ImageManager::set_animation_frame_update(int frame)
120 {
121         if(frame != animation_frame) {
122                 animation_frame = frame;
123
124                 for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
125                         for(size_t slot = 0; slot < images[type].size(); slot++) {
126                                 if(images[type][slot] && images[type][slot]->animated)
127                                         return true;
128                         }
129                 }
130         }
131
132         return false;
133 }
134
135 ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filename,
136                                                              void *builtin_data,
137                                                              bool& is_linear)
138 {
139         bool is_float = false, is_half = false;
140         is_linear = false;
141         int channels = 4;
142
143         if(builtin_data) {
144                 if(builtin_image_info_cb) {
145                         int width, height, depth;
146                         builtin_image_info_cb(filename, builtin_data, is_float, width, height, depth, channels);
147                 }
148
149                 if(is_float) {
150                         is_linear = true;
151                         return (channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
152                 }
153                 else {
154                         return (channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
155                 }
156         }
157
158         ImageInput *in = ImageInput::create(filename);
159
160         if(in) {
161                 ImageSpec spec;
162
163                 if(in->open(filename, spec)) {
164                         /* check the main format, and channel formats;
165                          * if any take up more than one byte, we'll need a float texture slot */
166                         if(spec.format.basesize() > 1) {
167                                 is_float = true;
168                                 is_linear = true;
169                         }
170
171                         for(size_t channel = 0; channel < spec.channelformats.size(); channel++) {
172                                 if(spec.channelformats[channel].basesize() > 1) {
173                                         is_float = true;
174                                         is_linear = true;
175                                 }
176                         }
177
178                         /* check if it's half float */
179                         if(spec.format == TypeDesc::HALF)
180                                 is_half = true;
181
182                         channels = spec.nchannels;
183
184                         /* basic color space detection, not great but better than nothing
185                          * before we do OpenColorIO integration */
186                         if(is_float) {
187                                 string colorspace = spec.get_string_attribute("oiio:ColorSpace");
188
189                                 is_linear = !(colorspace == "sRGB" ||
190                                               colorspace == "GammaCorrected" ||
191                                               (colorspace == "" &&
192                                                   (strcmp(in->format_name(), "png") == 0 ||
193                                                    strcmp(in->format_name(), "tiff") == 0 ||
194                                                    strcmp(in->format_name(), "dpx") == 0 ||
195                                                    strcmp(in->format_name(), "jpeg2000") == 0)));
196                         }
197                         else {
198                                 is_linear = false;
199                         }
200
201                         in->close();
202                 }
203
204                 delete in;
205         }
206
207         if(is_half) {
208                 return (channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF;
209         }
210         else if(is_float) {
211                 return (channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
212         }
213         else {
214                 return (channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
215         }
216 }
217
218 /* We use a consecutive slot counting scheme on the devices, in order
219  * float4, byte4, float, byte.
220  * These functions convert the slot ids from ImageManager "images" ones
221  * to device ones and vice versa. */
222 int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
223 {
224         return slot + tex_start_images[type];
225 }
226
227 int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type)
228 {
229         for(int i = IMAGE_DATA_NUM_TYPES - 1; i >= 0; i--) {
230                 if(flat_slot >= tex_start_images[i]) {
231                         *type = (ImageDataType)i;
232                         return flat_slot - tex_start_images[i];
233                 }
234         }
235
236         /* Should not happen. */
237         return flat_slot;
238 }
239
240 string ImageManager::name_from_type(int type)
241 {
242         if(type == IMAGE_DATA_TYPE_FLOAT4)
243                 return "float4";
244         else if(type == IMAGE_DATA_TYPE_FLOAT)
245                 return "float";
246         else if(type == IMAGE_DATA_TYPE_BYTE)
247                 return "byte";
248         else if(type == IMAGE_DATA_TYPE_HALF4)
249                 return "half4";
250         else if(type == IMAGE_DATA_TYPE_HALF)
251                 return "half";
252         else
253                 return "byte4";
254 }
255
256 static bool image_equals(ImageManager::Image *image,
257                          const string& filename,
258                          void *builtin_data,
259                          InterpolationType interpolation,
260                          ExtensionType extension)
261 {
262         return image->filename == filename &&
263                image->builtin_data == builtin_data &&
264                image->interpolation == interpolation &&
265                image->extension == extension;
266 }
267
268 int ImageManager::add_image(const string& filename,
269                             void *builtin_data,
270                             bool animated,
271                             float frame,
272                             bool& is_float,
273                             bool& is_linear,
274                             InterpolationType interpolation,
275                             ExtensionType extension,
276                             bool use_alpha)
277 {
278         Image *img;
279         size_t slot;
280
281         ImageDataType type = get_image_metadata(filename, builtin_data, is_linear);
282
283         /* Do we have a float? */
284         if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
285                 is_float = true;
286
287         /* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */
288         if((type == IMAGE_DATA_TYPE_FLOAT ||
289             type == IMAGE_DATA_TYPE_HALF4 ||
290             type == IMAGE_DATA_TYPE_HALF) &&
291             tex_num_images[type] == 0) {
292                 type = IMAGE_DATA_TYPE_FLOAT4;
293         }
294         if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) {
295                 type = IMAGE_DATA_TYPE_BYTE4;
296         }
297
298         /* Fnd existing image. */
299         for(slot = 0; slot < images[type].size(); slot++) {
300                 img = images[type][slot];
301                 if(img && image_equals(img,
302                                        filename,
303                                        builtin_data,
304                                        interpolation,
305                                        extension))
306                 {
307                         if(img->frame != frame) {
308                                 img->frame = frame;
309                                 img->need_load = true;
310                         }
311                         if(img->use_alpha != use_alpha) {
312                                 img->use_alpha = use_alpha;
313                                 img->need_load = true;
314                         }
315                         img->users++;
316                         return type_index_to_flattened_slot(slot, type);
317                 }
318         }
319
320         /* Find free slot. */
321         for(slot = 0; slot < images[type].size(); slot++) {
322                 if(!images[type][slot])
323                         break;
324         }
325
326         if(slot == images[type].size()) {
327                 /* Max images limit reached. */
328                 if(images[type].size() == tex_num_images[type]) {
329                         printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n",
330                                name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
331                         return -1;
332                 }
333
334                 images[type].resize(images[type].size() + 1);
335         }
336
337         /* Add new image. */
338         img = new Image();
339         img->filename = filename;
340         img->builtin_data = builtin_data;
341         img->need_load = true;
342         img->animated = animated;
343         img->frame = frame;
344         img->interpolation = interpolation;
345         img->extension = extension;
346         img->users = 1;
347         img->use_alpha = use_alpha;
348
349         images[type][slot] = img;
350
351         need_update = true;
352
353         return type_index_to_flattened_slot(slot, type);
354 }
355
356 void ImageManager::remove_image(int flat_slot)
357 {
358         ImageDataType type;
359         int slot = flattened_slot_to_type_index(flat_slot, &type);
360
361         Image *image = images[type][slot];
362         assert(image && image->users >= 1);
363
364         /* decrement user count */
365         image->users--;
366
367         /* don't remove immediately, rather do it all together later on. one of
368          * the reasons for this is that on shader changes we add and remove nodes
369          * that use them, but we do not want to reload the image all the time. */
370         if(image->users == 0)
371                 need_update = true;
372 }
373
374 void ImageManager::remove_image(const string& filename,
375                                 void *builtin_data,
376                                 InterpolationType interpolation,
377                                 ExtensionType extension)
378 {
379         size_t slot;
380
381         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
382                 for(slot = 0; slot < images[type].size(); slot++) {
383                         if(images[type][slot] && image_equals(images[type][slot],
384                                                               filename,
385                                                               builtin_data,
386                                                               interpolation,
387                                                               extension))
388                         {
389                                 remove_image(type_index_to_flattened_slot(slot, (ImageDataType)type));
390                                 return;
391                         }
392                 }
393         }
394 }
395
396 /* TODO(sergey): Deduplicate with the iteration above, but make it pretty,
397  * without bunch of arguments passing around making code readability even
398  * more cluttered.
399  */
400 void ImageManager::tag_reload_image(const string& filename,
401                                     void *builtin_data,
402                                     InterpolationType interpolation,
403                                     ExtensionType extension)
404 {
405         for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
406                 for(size_t slot = 0; slot < images[type].size(); slot++) {
407                         if(images[type][slot] && image_equals(images[type][slot],
408                                                               filename,
409                                                               builtin_data,
410                                                               interpolation,
411                                                               extension))
412                         {
413                                 images[type][slot]->need_load = true;
414                                 break;
415                         }
416                 }
417         }
418 }
419
420 bool ImageManager::file_load_image_generic(Image *img, ImageInput **in, int &width, int &height, int &depth, int &components)
421 {
422         if(img->filename == "")
423                 return false;
424
425         if(!img->builtin_data) {
426                 /* load image from file through OIIO */
427                 *in = ImageInput::create(img->filename);
428
429                 if(!*in)
430                         return false;
431
432                 ImageSpec spec = ImageSpec();
433                 ImageSpec config = ImageSpec();
434
435                 if(img->use_alpha == false)
436                         config.attribute("oiio:UnassociatedAlpha", 1);
437
438                 if(!(*in)->open(img->filename, spec, config)) {
439                         delete *in;
440                         *in = NULL;
441                         return false;
442                 }
443
444                 width = spec.width;
445                 height = spec.height;
446                 depth = spec.depth;
447                 components = spec.nchannels;
448         }
449         else {
450                 /* load image using builtin images callbacks */
451                 if(!builtin_image_info_cb || !builtin_image_pixels_cb)
452                         return false;
453
454                 bool is_float;
455                 builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components);
456         }
457
458         /* we only handle certain number of components */
459         if(!(components >= 1 && components <= 4)) {
460                 if(*in) {
461                         (*in)->close();
462                         delete *in;
463                         *in = NULL;
464                 }
465
466                 return false;
467         }
468
469         return true;
470 }
471
472 template<typename T>
473 bool ImageManager::file_load_byte_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
474 {
475         ImageInput *in = NULL;
476         int width, height, depth, components;
477
478         if(!file_load_image_generic(img, &in, width, height, depth, components))
479                 return false;
480
481         /* read RGBA pixels */
482         uchar *pixels = (uchar*)tex_img.resize(width, height, depth);
483         if(pixels == NULL) {
484                 return false;
485         }
486         bool cmyk = false;
487
488         if(in) {
489                 if(depth <= 1) {
490                         int scanlinesize = width*components*sizeof(uchar);
491
492                         in->read_image(TypeDesc::UINT8,
493                                        (uchar*)pixels + (((size_t)height)-1)*scanlinesize,
494                                        AutoStride,
495                                        -scanlinesize,
496                                        AutoStride);
497                 }
498                 else {
499                         in->read_image(TypeDesc::UINT8, (uchar*)pixels);
500                 }
501
502                 cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
503
504                 in->close();
505                 delete in;
506         }
507         else {
508                 builtin_image_pixels_cb(img->filename, img->builtin_data, pixels);
509         }
510
511         /* Check if we actually have a byte4 slot, in case components == 1, but device
512          * doesn't support single channel textures. */
513         if(type == IMAGE_DATA_TYPE_BYTE4) {
514                 size_t num_pixels = ((size_t)width) * height * depth;
515                 if(cmyk) {
516                         /* CMYK */
517                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
518                                 pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255;
519                                 pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255;
520                                 pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255;
521                                 pixels[i*4+3] = 255;
522                         }
523                 }
524                 else if(components == 2) {
525                         /* grayscale + alpha */
526                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
527                                 pixels[i*4+3] = pixels[i*2+1];
528                                 pixels[i*4+2] = pixels[i*2+0];
529                                 pixels[i*4+1] = pixels[i*2+0];
530                                 pixels[i*4+0] = pixels[i*2+0];
531                         }
532                 }
533                 else if(components == 3) {
534                         /* RGB */
535                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
536                                 pixels[i*4+3] = 255;
537                                 pixels[i*4+2] = pixels[i*3+2];
538                                 pixels[i*4+1] = pixels[i*3+1];
539                                 pixels[i*4+0] = pixels[i*3+0];
540                         }
541                 }
542                 else if(components == 1) {
543                         /* grayscale */
544                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
545                                 pixels[i*4+3] = 255;
546                                 pixels[i*4+2] = pixels[i];
547                                 pixels[i*4+1] = pixels[i];
548                                 pixels[i*4+0] = pixels[i];
549                         }
550                 }
551
552                 if(img->use_alpha == false) {
553                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
554                                 pixels[i*4+3] = 255;
555                         }
556                 }
557         }
558
559         return true;
560 }
561
562 template<typename T>
563 bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
564 {
565         ImageInput *in = NULL;
566         int width, height, depth, components;
567
568         if(!file_load_image_generic(img, &in, width, height, depth, components))
569                 return false;
570
571         /* read RGBA pixels */
572         float *pixels = (float*)tex_img.resize(width, height, depth);
573         if(pixels == NULL) {
574                 return false;
575         }
576         bool cmyk = false;
577
578         if(in) {
579                 float *readpixels = pixels;
580                 vector<float> tmppixels;
581
582                 if(components > 4) {
583                         tmppixels.resize(((size_t)width)*height*components);
584                         readpixels = &tmppixels[0];
585                 }
586
587                 if(depth <= 1) {
588                         int scanlinesize = width*components*sizeof(float);
589
590                         in->read_image(TypeDesc::FLOAT,
591                                        (uchar*)readpixels + (height-1)*scanlinesize,
592                                        AutoStride,
593                                        -scanlinesize,
594                                        AutoStride);
595                 }
596                 else {
597                         in->read_image(TypeDesc::FLOAT, (uchar*)readpixels);
598                 }
599
600                 if(components > 4) {
601                         size_t dimensions = ((size_t)width)*height;
602                         for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
603                                 pixels[i*4+3] = tmppixels[i*components+3];
604                                 pixels[i*4+2] = tmppixels[i*components+2];
605                                 pixels[i*4+1] = tmppixels[i*components+1];
606                                 pixels[i*4+0] = tmppixels[i*components+0];
607                         }
608
609                         tmppixels.clear();
610                 }
611
612                 cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
613
614                 in->close();
615                 delete in;
616         }
617         else {
618                 builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels);
619         }
620
621         /* Check if we actually have a float4 slot, in case components == 1, but device
622          * doesn't support single channel textures. */
623         if(type == IMAGE_DATA_TYPE_FLOAT4) {
624                 size_t num_pixels = ((size_t)width) * height * depth;
625                 if(cmyk) {
626                         /* CMYK */
627                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
628                                 pixels[i*4+3] = 255;
629                                 pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255;
630                                 pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255;
631                                 pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255;
632                         }
633                 }
634                 else if(components == 2) {
635                         /* grayscale + alpha */
636                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
637                                 pixels[i*4+3] = pixels[i*2+1];
638                                 pixels[i*4+2] = pixels[i*2+0];
639                                 pixels[i*4+1] = pixels[i*2+0];
640                                 pixels[i*4+0] = pixels[i*2+0];
641                         }
642                 }
643                 else if(components == 3) {
644                         /* RGB */
645                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
646                                 pixels[i*4+3] = 1.0f;
647                                 pixels[i*4+2] = pixels[i*3+2];
648                                 pixels[i*4+1] = pixels[i*3+1];
649                                 pixels[i*4+0] = pixels[i*3+0];
650                         }
651                 }
652                 else if(components == 1) {
653                         /* grayscale */
654                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
655                                 pixels[i*4+3] = 1.0f;
656                                 pixels[i*4+2] = pixels[i];
657                                 pixels[i*4+1] = pixels[i];
658                                 pixels[i*4+0] = pixels[i];
659                         }
660                 }
661
662                 if(img->use_alpha == false) {
663                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
664                                 pixels[i*4+3] = 1.0f;
665                         }
666                 }
667         }
668
669         return true;
670 }
671
672 template<typename T>
673 bool ImageManager::file_load_half_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
674 {
675         ImageInput *in = NULL;
676         int width, height, depth, components;
677
678         if(!file_load_image_generic(img, &in, width, height, depth, components))
679                 return false;
680
681         /* read RGBA pixels */
682         half *pixels = (half*)tex_img.resize(width, height, depth);
683         if(pixels == NULL) {
684                 return false;
685         }
686
687         if(in) {
688                 half *readpixels = pixels;
689                 vector<half> tmppixels;
690
691                 if(components > 4) {
692                         tmppixels.resize(((size_t)width)*height*components);
693                         readpixels = &tmppixels[0];
694                 }
695
696                 if(depth <= 1) {
697                         int scanlinesize = width*components*sizeof(half);
698
699                         in->read_image(TypeDesc::HALF,
700                                        (uchar*)readpixels + (height-1)*scanlinesize,
701                                        AutoStride,
702                                        -scanlinesize,
703                                        AutoStride);
704                 }
705                 else {
706                         in->read_image(TypeDesc::HALF, (uchar*)readpixels);
707                 }
708
709                 if(components > 4) {
710                         size_t dimensions = ((size_t)width)*height;
711                         for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
712                                 pixels[i*4+3] = tmppixels[i*components+3];
713                                 pixels[i*4+2] = tmppixels[i*components+2];
714                                 pixels[i*4+1] = tmppixels[i*components+1];
715                                 pixels[i*4+0] = tmppixels[i*components+0];
716                         }
717
718                         tmppixels.clear();
719                 }
720
721                 in->close();
722                 delete in;
723         }
724 #if 0
725         /* TODO(dingto): Support half for ImBuf. */
726         else {
727                 builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels);
728         }
729 #endif
730
731         /* Check if we actually have a half4 slot, in case components == 1, but device
732          * doesn't support single channel textures. */
733         if(type == IMAGE_DATA_TYPE_HALF4) {
734                 size_t num_pixels = ((size_t)width) * height * depth;
735                 if(components == 2) {
736                         /* grayscale + alpha */
737                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
738                                 pixels[i*4+3] = pixels[i*2+1];
739                                 pixels[i*4+2] = pixels[i*2+0];
740                                 pixels[i*4+1] = pixels[i*2+0];
741                                 pixels[i*4+0] = pixels[i*2+0];
742                         }
743                 }
744                 else if(components == 3) {
745                         /* RGB */
746                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
747                                 pixels[i*4+3] = 1.0f;
748                                 pixels[i*4+2] = pixels[i*3+2];
749                                 pixels[i*4+1] = pixels[i*3+1];
750                                 pixels[i*4+0] = pixels[i*3+0];
751                         }
752                 }
753                 else if(components == 1) {
754                         /* grayscale */
755                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
756                                 pixels[i*4+3] = 1.0f;
757                                 pixels[i*4+2] = pixels[i];
758                                 pixels[i*4+1] = pixels[i];
759                                 pixels[i*4+0] = pixels[i];
760                         }
761                 }
762
763                 if(img->use_alpha == false) {
764                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
765                                 pixels[i*4+3] = 1.0f;
766                         }
767                 }
768         }
769
770         return true;
771 }
772
773 void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot, Progress *progress)
774 {
775         if(progress->get_cancel())
776                 return;
777         
778         Image *img = images[type][slot];
779
780         if(osl_texture_system && !img->builtin_data)
781                 return;
782
783         string filename = path_filename(images[type][slot]->filename);
784         progress->set_status("Updating Images", "Loading " + filename);
785
786         /* Slot assignment */
787         int flat_slot = type_index_to_flattened_slot(slot, type);
788
789         string name;
790         if(flat_slot >= 100)
791                 name = string_printf("__tex_image_%s_%d", name_from_type(type).c_str(), flat_slot);
792         else if(flat_slot >= 10)
793                 name = string_printf("__tex_image_%s_0%d", name_from_type(type).c_str(), flat_slot);
794         else
795                 name = string_printf("__tex_image_%s_00%d", name_from_type(type).c_str(), flat_slot);
796
797         if(type == IMAGE_DATA_TYPE_FLOAT4) {
798                 device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
799
800                 if(tex_img.device_pointer) {
801                         thread_scoped_lock device_lock(device_mutex);
802                         device->tex_free(tex_img);
803                 }
804
805                 if(!file_load_float_image(img, type, tex_img)) {
806                         /* on failure to load, we set a 1x1 pixels pink image */
807                         float *pixels = (float*)tex_img.resize(1, 1);
808
809                         pixels[0] = TEX_IMAGE_MISSING_R;
810                         pixels[1] = TEX_IMAGE_MISSING_G;
811                         pixels[2] = TEX_IMAGE_MISSING_B;
812                         pixels[3] = TEX_IMAGE_MISSING_A;
813                 }
814
815                 if(!pack_images) {
816                         thread_scoped_lock device_lock(device_mutex);
817                         device->tex_alloc(name.c_str(),
818                                           tex_img,
819                                           img->interpolation,
820                                           img->extension);
821                 }
822         }
823         else if(type == IMAGE_DATA_TYPE_FLOAT) {
824                 device_vector<float>& tex_img = dscene->tex_float_image[slot];
825
826                 if(tex_img.device_pointer) {
827                         thread_scoped_lock device_lock(device_mutex);
828                         device->tex_free(tex_img);
829                 }
830
831                 if(!file_load_float_image(img, type, tex_img)) {
832                         /* on failure to load, we set a 1x1 pixels pink image */
833                         float *pixels = (float*)tex_img.resize(1, 1);
834
835                         pixels[0] = TEX_IMAGE_MISSING_R;
836                 }
837
838                 if(!pack_images) {
839                         thread_scoped_lock device_lock(device_mutex);
840                         device->tex_alloc(name.c_str(),
841                                           tex_img,
842                                           img->interpolation,
843                                           img->extension);
844                 }
845         }
846         else if(type == IMAGE_DATA_TYPE_BYTE4) {
847                 device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
848
849                 if(tex_img.device_pointer) {
850                         thread_scoped_lock device_lock(device_mutex);
851                         device->tex_free(tex_img);
852                 }
853
854                 if(!file_load_byte_image(img, type, tex_img)) {
855                         /* on failure to load, we set a 1x1 pixels pink image */
856                         uchar *pixels = (uchar*)tex_img.resize(1, 1);
857
858                         pixels[0] = (TEX_IMAGE_MISSING_R * 255);
859                         pixels[1] = (TEX_IMAGE_MISSING_G * 255);
860                         pixels[2] = (TEX_IMAGE_MISSING_B * 255);
861                         pixels[3] = (TEX_IMAGE_MISSING_A * 255);
862                 }
863
864                 if(!pack_images) {
865                         thread_scoped_lock device_lock(device_mutex);
866                         device->tex_alloc(name.c_str(),
867                                           tex_img,
868                                           img->interpolation,
869                                           img->extension);
870                 }
871         }
872         else if(type == IMAGE_DATA_TYPE_BYTE){
873                 device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
874
875                 if(tex_img.device_pointer) {
876                         thread_scoped_lock device_lock(device_mutex);
877                         device->tex_free(tex_img);
878                 }
879
880                 if(!file_load_byte_image(img, type, tex_img)) {
881                         /* on failure to load, we set a 1x1 pixels pink image */
882                         uchar *pixels = (uchar*)tex_img.resize(1, 1);
883
884                         pixels[0] = (TEX_IMAGE_MISSING_R * 255);
885                 }
886
887                 if(!pack_images) {
888                         thread_scoped_lock device_lock(device_mutex);
889                         device->tex_alloc(name.c_str(),
890                                           tex_img,
891                                           img->interpolation,
892                                           img->extension);
893                 }
894         }
895         else if(type == IMAGE_DATA_TYPE_HALF4){
896                 device_vector<half4>& tex_img = dscene->tex_half4_image[slot];
897
898                 if(tex_img.device_pointer) {
899                         thread_scoped_lock device_lock(device_mutex);
900                         device->tex_free(tex_img);
901                 }
902
903                 if(!file_load_half_image(img, type, tex_img)) {
904                         /* on failure to load, we set a 1x1 pixels pink image */
905                         half *pixels = (half*)tex_img.resize(1, 1);
906
907                         pixels[0] = TEX_IMAGE_MISSING_R;
908                         pixels[1] = TEX_IMAGE_MISSING_G;
909                         pixels[2] = TEX_IMAGE_MISSING_B;
910                         pixels[3] = TEX_IMAGE_MISSING_A;
911                 }
912
913                 if(!pack_images) {
914                         thread_scoped_lock device_lock(device_mutex);
915                         device->tex_alloc(name.c_str(),
916                                           tex_img,
917                                           img->interpolation,
918                                           img->extension);
919                 }
920         }
921         else if(type == IMAGE_DATA_TYPE_HALF){
922                 device_vector<half>& tex_img = dscene->tex_half_image[slot];
923
924                 if(tex_img.device_pointer) {
925                         thread_scoped_lock device_lock(device_mutex);
926                         device->tex_free(tex_img);
927                 }
928
929                 if(!file_load_half_image(img, type, tex_img)) {
930                         /* on failure to load, we set a 1x1 pixels pink image */
931                         half *pixels = (half*)tex_img.resize(1, 1);
932
933                         pixels[0] = TEX_IMAGE_MISSING_R;
934                 }
935
936                 if(!pack_images) {
937                         thread_scoped_lock device_lock(device_mutex);
938                         device->tex_alloc(name.c_str(),
939                                           tex_img,
940                                           img->interpolation,
941                                           img->extension);
942                 }
943         }
944
945         img->need_load = false;
946 }
947
948 void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot)
949 {
950         Image *img = images[type][slot];
951
952         if(img) {
953                 if(osl_texture_system && !img->builtin_data) {
954 #ifdef WITH_OSL
955                         ustring filename(images[type][slot]->filename);
956                         ((OSL::TextureSystem*)osl_texture_system)->invalidate(filename);
957 #endif
958                 }
959                 else if(type == IMAGE_DATA_TYPE_FLOAT4) {
960                         device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
961
962                         if(tex_img.device_pointer) {
963                                 thread_scoped_lock device_lock(device_mutex);
964                                 device->tex_free(tex_img);
965                         }
966
967                         tex_img.clear();
968                 }
969                 else if(type == IMAGE_DATA_TYPE_FLOAT) {
970                         device_vector<float>& tex_img = dscene->tex_float_image[slot];
971
972                         if(tex_img.device_pointer) {
973                                 thread_scoped_lock device_lock(device_mutex);
974                                 device->tex_free(tex_img);
975                         }
976
977                         tex_img.clear();
978                 }
979                 else if(type == IMAGE_DATA_TYPE_BYTE4) {
980                         device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
981
982                         if(tex_img.device_pointer) {
983                                 thread_scoped_lock device_lock(device_mutex);
984                                 device->tex_free(tex_img);
985                         }
986
987                         tex_img.clear();
988                 }
989                 else if(type == IMAGE_DATA_TYPE_BYTE){
990                         device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
991
992                         if(tex_img.device_pointer) {
993                                 thread_scoped_lock device_lock(device_mutex);
994                                 device->tex_free(tex_img);
995                         }
996
997                         tex_img.clear();
998                 }
999                 else if(type == IMAGE_DATA_TYPE_HALF4){
1000                         device_vector<half4>& tex_img = dscene->tex_half4_image[slot];
1001
1002                         if(tex_img.device_pointer) {
1003                                 thread_scoped_lock device_lock(device_mutex);
1004                                 device->tex_free(tex_img);
1005                         }
1006
1007                         tex_img.clear();
1008                 }
1009                 else if(type == IMAGE_DATA_TYPE_HALF){
1010                         device_vector<half>& tex_img = dscene->tex_half_image[slot];
1011
1012                         if(tex_img.device_pointer) {
1013                                 thread_scoped_lock device_lock(device_mutex);
1014                                 device->tex_free(tex_img);
1015                         }
1016
1017                         tex_img.clear();
1018                 }
1019
1020                 delete images[type][slot];
1021                 images[type][slot] = NULL;
1022         }
1023 }
1024
1025 void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& progress)
1026 {
1027         if(!need_update)
1028                 return;
1029
1030         TaskPool pool;
1031
1032         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
1033                 for(size_t slot = 0; slot < images[type].size(); slot++) {
1034                         if(!images[type][slot])
1035                                 continue;
1036
1037                         if(images[type][slot]->users == 0) {
1038                                 device_free_image(device, dscene, (ImageDataType)type, slot);
1039                         }
1040                         else if(images[type][slot]->need_load) {
1041                                 if(!osl_texture_system || images[type][slot]->builtin_data)
1042                                         pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, (ImageDataType)type, slot, &progress));
1043                         }
1044                 }
1045         }
1046
1047         pool.wait_work();
1048
1049         if(pack_images)
1050                 device_pack_images(device, dscene, progress);
1051
1052         need_update = false;
1053 }
1054
1055 void ImageManager::device_update_slot(Device *device,
1056                                       DeviceScene *dscene,
1057                                       int flat_slot,
1058                                       Progress *progress)
1059 {
1060         ImageDataType type;
1061         int slot = flattened_slot_to_type_index(flat_slot, &type);
1062
1063         Image *image = images[type][slot];
1064         assert(image != NULL);
1065
1066         if(image->users == 0) {
1067                 device_free_image(device, dscene, type, slot);
1068         }
1069         else if(image->need_load) {
1070                 if(!osl_texture_system || image->builtin_data)
1071                         device_load_image(device,
1072                                           dscene,
1073                                           type,
1074                                           slot,
1075                                           progress);
1076         }
1077 }
1078
1079 uint8_t ImageManager::pack_image_options(ImageDataType type, size_t slot)
1080 {
1081         uint8_t options;
1082         /* Image Options are packed into one uint:
1083          * bit 0 -> Interpolation
1084          * bit 1 + 2  + 3-> Extension */
1085         if(images[type][slot]->interpolation == INTERPOLATION_CLOSEST)
1086                 options |= (1 << 0);
1087
1088         if(images[type][slot]->extension == EXTENSION_REPEAT)
1089                 options |= (1 << 1);
1090         else if(images[type][slot]->extension == EXTENSION_EXTEND)
1091                 options |= (1 << 2);
1092         else /* EXTENSION_CLIP */
1093                 options |= (1 << 3);
1094
1095         return options;
1096 }
1097
1098 void ImageManager::device_pack_images(Device *device,
1099                                       DeviceScene *dscene,
1100                                       Progress& /*progess*/)
1101 {
1102         /* For OpenCL, we pack all image textures into a single large texture, and
1103          * do our own interpolation in the kernel. */
1104         size_t size = 0, offset = 0;
1105         ImageDataType type;
1106
1107         int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4];
1108         uint4 *info = dscene->tex_image_packed_info.resize(info_size);
1109
1110         /* Byte Textures*/
1111         type = IMAGE_DATA_TYPE_BYTE4;
1112
1113         for(size_t slot = 0; slot < images[type].size(); slot++) {
1114                 if(!images[type][slot])
1115                         continue;
1116
1117                 device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
1118                 size += tex_img.size();
1119         }
1120
1121         uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size);
1122
1123         for(size_t slot = 0; slot < images[type].size(); slot++) {
1124                 if(!images[type][slot])
1125                         continue;
1126
1127                 device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
1128
1129                 uint8_t options = pack_image_options(type, slot);
1130
1131                 info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
1132
1133                 memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
1134                 offset += tex_img.size();
1135         }
1136
1137         /* Float Textures*/
1138         type = IMAGE_DATA_TYPE_FLOAT4;
1139         size = 0, offset = 0;
1140
1141         for(size_t slot = 0; slot < images[type].size(); slot++) {
1142                 if(!images[type][slot])
1143                         continue;
1144
1145                 device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
1146                 size += tex_img.size();
1147         }
1148
1149         float4 *pixels_float = dscene->tex_image_float4_packed.resize(size);
1150
1151         for(size_t slot = 0; slot < images[type].size(); slot++) {
1152                 if(!images[type][slot])
1153                         continue;
1154
1155                 device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
1156
1157                 /* todo: support 3D textures, only CPU for now */
1158
1159                 uint8_t options = pack_image_options(type, slot);
1160                 info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
1161
1162                 memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
1163                 offset += tex_img.size();
1164         }
1165
1166         if(dscene->tex_image_byte4_packed.size()) {
1167                 if(dscene->tex_image_byte4_packed.device_pointer) {
1168                         thread_scoped_lock device_lock(device_mutex);
1169                         device->tex_free(dscene->tex_image_byte4_packed);
1170                 }
1171                 device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed);
1172         }
1173         if(dscene->tex_image_float4_packed.size()) {
1174                 if(dscene->tex_image_float4_packed.device_pointer) {
1175                         thread_scoped_lock device_lock(device_mutex);
1176                         device->tex_free(dscene->tex_image_float4_packed);
1177                 }
1178                 device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
1179         }
1180         if(dscene->tex_image_packed_info.size()) {
1181                 if(dscene->tex_image_packed_info.device_pointer) {
1182                         thread_scoped_lock device_lock(device_mutex);
1183                         device->tex_free(dscene->tex_image_packed_info);
1184                 }
1185                 device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
1186         }
1187 }
1188
1189 void ImageManager::device_free_builtin(Device *device, DeviceScene *dscene)
1190 {
1191         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
1192                 for(size_t slot = 0; slot < images[type].size(); slot++) {
1193                         if(images[type][slot] && images[type][slot]->builtin_data)
1194                                 device_free_image(device, dscene, (ImageDataType)type, slot);
1195                 }
1196         }
1197 }
1198
1199 void ImageManager::device_free(Device *device, DeviceScene *dscene)
1200 {
1201         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
1202                 for(size_t slot = 0; slot < images[type].size(); slot++) {
1203                         device_free_image(device, dscene, (ImageDataType)type, slot);
1204                 }
1205                 images[type].clear();
1206         }
1207
1208         device->tex_free(dscene->tex_image_byte4_packed);
1209         device->tex_free(dscene->tex_image_float4_packed);
1210         device->tex_free(dscene->tex_image_packed_info);
1211
1212         dscene->tex_image_byte4_packed.clear();
1213         dscene->tex_image_float4_packed.clear();
1214         dscene->tex_image_packed_info.clear();
1215 }
1216
1217 CCL_NAMESPACE_END
1218