Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / render / image.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "device/device.h"
18 #include "render/image.h"
19 #include "render/scene.h"
20
21 #include "util/util_foreach.h"
22 #include "util/util_logging.h"
23 #include "util/util_path.h"
24 #include "util/util_progress.h"
25 #include "util/util_texture.h"
26
27 #ifdef WITH_OSL
28 #include <OSL/oslexec.h>
29 #endif
30
31 CCL_NAMESPACE_BEGIN
32
33 ImageManager::ImageManager(const DeviceInfo& info)
34 {
35         need_update = true;
36         pack_images = false;
37         osl_texture_system = NULL;
38         animation_frame = 0;
39
40         /* In case of multiple devices used we need to know type of an actual
41          * compute device.
42          *
43          * NOTE: We assume that all the devices are same type, otherwise we'll
44          * be screwed on so many levels..
45          */
46         DeviceType device_type = info.type;
47         if(device_type == DEVICE_MULTI) {
48                 device_type = info.multi_devices[0].type;
49         }
50
51         /* Set image limits */
52         max_num_images = TEX_NUM_MAX;
53         has_half_images = true;
54         cuda_fermi_limits = false;
55
56         if(device_type == DEVICE_CUDA) {
57                 if(!info.has_bindless_textures) {
58                         /* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */
59                         cuda_fermi_limits = true;
60                         has_half_images = false;
61                 }
62         }
63         else if(device_type == DEVICE_OPENCL) {
64                 has_half_images = false;
65         }
66
67         for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
68                 tex_num_images[type] = 0;
69         }
70 }
71
72 ImageManager::~ImageManager()
73 {
74         for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
75                 for(size_t slot = 0; slot < images[type].size(); slot++)
76                         assert(!images[type][slot]);
77         }
78 }
79
80 void ImageManager::set_pack_images(bool pack_images_)
81 {
82         pack_images = pack_images_;
83 }
84
85 void ImageManager::set_osl_texture_system(void *texture_system)
86 {
87         osl_texture_system = texture_system;
88 }
89
90 bool ImageManager::set_animation_frame_update(int frame)
91 {
92         if(frame != animation_frame) {
93                 animation_frame = frame;
94
95                 for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
96                         for(size_t slot = 0; slot < images[type].size(); slot++) {
97                                 if(images[type][slot] && images[type][slot]->animated)
98                                         return true;
99                         }
100                 }
101         }
102
103         return false;
104 }
105
106 ImageDataType ImageManager::get_image_metadata(const string& filename,
107                                                void *builtin_data,
108                                                bool& is_linear)
109 {
110         bool is_float = false, is_half = false;
111         is_linear = false;
112         int channels = 4;
113
114         if(builtin_data) {
115                 if(builtin_image_info_cb) {
116                         int width, height, depth;
117                         builtin_image_info_cb(filename, builtin_data, is_float, width, height, depth, channels);
118                 }
119
120                 if(is_float) {
121                         is_linear = true;
122                         return (channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
123                 }
124                 else {
125                         return (channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
126                 }
127         }
128
129         /* Perform preliminary checks, with meaningful logging. */
130         if(!path_exists(filename)) {
131                 VLOG(1) << "File '" << filename << "' does not exist.";
132                 return IMAGE_DATA_TYPE_BYTE4;
133         }
134         if(path_is_directory(filename)) {
135                 VLOG(1) << "File '" << filename << "' is a directory, can't use as image.";
136                 return IMAGE_DATA_TYPE_BYTE4;
137         }
138
139         ImageInput *in = ImageInput::create(filename);
140
141         if(in) {
142                 ImageSpec spec;
143
144                 if(in->open(filename, spec)) {
145                         /* check the main format, and channel formats;
146                          * if any take up more than one byte, we'll need a float texture slot */
147                         if(spec.format.basesize() > 1) {
148                                 is_float = true;
149                                 is_linear = true;
150                         }
151
152                         for(size_t channel = 0; channel < spec.channelformats.size(); channel++) {
153                                 if(spec.channelformats[channel].basesize() > 1) {
154                                         is_float = true;
155                                         is_linear = true;
156                                 }
157                         }
158
159                         /* check if it's half float */
160                         if(spec.format == TypeDesc::HALF)
161                                 is_half = true;
162
163                         channels = spec.nchannels;
164
165                         /* basic color space detection, not great but better than nothing
166                          * before we do OpenColorIO integration */
167                         if(is_float) {
168                                 string colorspace = spec.get_string_attribute("oiio:ColorSpace");
169
170                                 is_linear = !(colorspace == "sRGB" ||
171                                               colorspace == "GammaCorrected" ||
172                                               (colorspace == "" &&
173                                                   (strcmp(in->format_name(), "png") == 0 ||
174                                                    strcmp(in->format_name(), "tiff") == 0 ||
175                                                    strcmp(in->format_name(), "dpx") == 0 ||
176                                                    strcmp(in->format_name(), "jpeg2000") == 0)));
177                         }
178                         else {
179                                 is_linear = false;
180                         }
181
182                         in->close();
183                 }
184
185                 delete in;
186         }
187
188         if(is_half) {
189                 return (channels > 1) ? IMAGE_DATA_TYPE_HALF4 : IMAGE_DATA_TYPE_HALF;
190         }
191         else if(is_float) {
192                 return (channels > 1) ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_FLOAT;
193         }
194         else {
195                 return (channels > 1) ? IMAGE_DATA_TYPE_BYTE4 : IMAGE_DATA_TYPE_BYTE;
196         }
197 }
198
199 int ImageManager::max_flattened_slot(ImageDataType type)
200 {
201         if(tex_num_images[type] == 0) {
202                 /* No textures for the type, no slots needs allocation. */
203                 return 0;
204         }
205         return type_index_to_flattened_slot(tex_num_images[type], type);
206 }
207
208 /* The lower three bits of a device texture slot number indicate its type.
209  * These functions convert the slot ids from ImageManager "images" ones
210  * to device ones and vice verse.
211  *
212  * There are special cases for CUDA Fermi, since there we have only 90 image texture
213  * slots available and should keep the flattended numbers in the 0-89 range.
214  */
215 int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
216 {
217         if(cuda_fermi_limits) {
218                 if(type == IMAGE_DATA_TYPE_BYTE4) {
219                         return slot + TEX_START_BYTE4_CUDA;
220                 }
221                 else {
222                         return slot;
223                 }
224         }
225
226         return (slot << IMAGE_DATA_TYPE_SHIFT) | (type);
227 }
228
229 int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type)
230 {
231         if(cuda_fermi_limits) {
232                 if(flat_slot >= 4) {
233                         *type = IMAGE_DATA_TYPE_BYTE4;
234                         return flat_slot - TEX_START_BYTE4_CUDA;
235                 }
236                 else {
237                         *type = IMAGE_DATA_TYPE_FLOAT4;
238                         return flat_slot;
239                 }
240         }
241
242         *type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK);
243         return flat_slot >> IMAGE_DATA_TYPE_SHIFT;
244 }
245
246 string ImageManager::name_from_type(int type)
247 {
248         if(type == IMAGE_DATA_TYPE_FLOAT4)
249                 return "float4";
250         else if(type == IMAGE_DATA_TYPE_FLOAT)
251                 return "float";
252         else if(type == IMAGE_DATA_TYPE_BYTE)
253                 return "byte";
254         else if(type == IMAGE_DATA_TYPE_HALF4)
255                 return "half4";
256         else if(type == IMAGE_DATA_TYPE_HALF)
257                 return "half";
258         else
259                 return "byte4";
260 }
261
262 static bool image_equals(ImageManager::Image *image,
263                          const string& filename,
264                          void *builtin_data,
265                          InterpolationType interpolation,
266                          ExtensionType extension,
267                          bool use_alpha)
268 {
269         return image->filename == filename &&
270                image->builtin_data == builtin_data &&
271                image->interpolation == interpolation &&
272                image->extension == extension &&
273                image->use_alpha == use_alpha;
274 }
275
276 int ImageManager::add_image(const string& filename,
277                             void *builtin_data,
278                             bool animated,
279                             float frame,
280                             bool& is_float,
281                             bool& is_linear,
282                             InterpolationType interpolation,
283                             ExtensionType extension,
284                             bool use_alpha)
285 {
286         Image *img;
287         size_t slot;
288
289         ImageDataType type = get_image_metadata(filename, builtin_data, is_linear);
290
291         thread_scoped_lock device_lock(device_mutex);
292
293         /* Check whether it's a float texture. */
294         is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4);
295
296         /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
297         if(!has_half_images) {
298                 if(type == IMAGE_DATA_TYPE_HALF4) {
299                         type = IMAGE_DATA_TYPE_FLOAT4;
300                 }
301                 else if(type == IMAGE_DATA_TYPE_HALF) {
302                         type = IMAGE_DATA_TYPE_FLOAT;
303                 }
304         }
305
306         if(cuda_fermi_limits) {
307                 if(type == IMAGE_DATA_TYPE_FLOAT) {
308                         type = IMAGE_DATA_TYPE_FLOAT4;
309                 }
310                 else if(type == IMAGE_DATA_TYPE_BYTE) {
311                         type = IMAGE_DATA_TYPE_BYTE4;
312                 }
313         }
314
315         /* Fnd existing image. */
316         for(slot = 0; slot < images[type].size(); slot++) {
317                 img = images[type][slot];
318                 if(img && image_equals(img,
319                                        filename,
320                                        builtin_data,
321                                        interpolation,
322                                        extension,
323                                        use_alpha))
324                 {
325                         if(img->frame != frame) {
326                                 img->frame = frame;
327                                 img->need_load = true;
328                         }
329                         if(img->use_alpha != use_alpha) {
330                                 img->use_alpha = use_alpha;
331                                 img->need_load = true;
332                         }
333                         img->users++;
334                         return type_index_to_flattened_slot(slot, type);
335                 }
336         }
337
338         /* Find free slot. */
339         for(slot = 0; slot < images[type].size(); slot++) {
340                 if(!images[type][slot])
341                         break;
342         }
343
344         /* Count if we're over the limit */
345         if(cuda_fermi_limits) {
346                 if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA
347                         || tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA)
348                 {
349                         printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n",
350                                 name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
351                         return -1;
352                 }
353         }
354         else {
355                 /* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
356                 int tex_count = 0;
357                 for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
358                         tex_count += tex_num_images[type];
359                 }
360                 if(tex_count > max_num_images) {
361                         printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n",
362                                 max_num_images, filename.c_str());
363                         return -1;
364                 }
365         }
366
367         if(slot == images[type].size()) {
368                 images[type].resize(images[type].size() + 1);
369         }
370
371         /* Add new image. */
372         img = new Image();
373         img->filename = filename;
374         img->builtin_data = builtin_data;
375         img->need_load = true;
376         img->animated = animated;
377         img->frame = frame;
378         img->interpolation = interpolation;
379         img->extension = extension;
380         img->users = 1;
381         img->use_alpha = use_alpha;
382
383         images[type][slot] = img;
384
385         ++tex_num_images[type];
386
387         need_update = true;
388
389         return type_index_to_flattened_slot(slot, type);
390 }
391
392 void ImageManager::remove_image(int flat_slot)
393 {
394         ImageDataType type;
395         int slot = flattened_slot_to_type_index(flat_slot, &type);
396
397         Image *image = images[type][slot];
398         assert(image && image->users >= 1);
399
400         /* decrement user count */
401         image->users--;
402
403         /* don't remove immediately, rather do it all together later on. one of
404          * the reasons for this is that on shader changes we add and remove nodes
405          * that use them, but we do not want to reload the image all the time. */
406         if(image->users == 0)
407                 need_update = true;
408 }
409
410 void ImageManager::remove_image(const string& filename,
411                                 void *builtin_data,
412                                 InterpolationType interpolation,
413                                 ExtensionType extension,
414                                 bool use_alpha)
415 {
416         size_t slot;
417
418         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
419                 for(slot = 0; slot < images[type].size(); slot++) {
420                         if(images[type][slot] && image_equals(images[type][slot],
421                                                               filename,
422                                                               builtin_data,
423                                                               interpolation,
424                                                               extension,
425                                                               use_alpha))
426                         {
427                                 remove_image(type_index_to_flattened_slot(slot, (ImageDataType)type));
428                                 return;
429                         }
430                 }
431         }
432 }
433
434 /* TODO(sergey): Deduplicate with the iteration above, but make it pretty,
435  * without bunch of arguments passing around making code readability even
436  * more cluttered.
437  */
438 void ImageManager::tag_reload_image(const string& filename,
439                                     void *builtin_data,
440                                     InterpolationType interpolation,
441                                     ExtensionType extension,
442                                     bool use_alpha)
443 {
444         for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
445                 for(size_t slot = 0; slot < images[type].size(); slot++) {
446                         if(images[type][slot] && image_equals(images[type][slot],
447                                                               filename,
448                                                               builtin_data,
449                                                               interpolation,
450                                                               extension,
451                                                               use_alpha))
452                         {
453                                 images[type][slot]->need_load = true;
454                                 break;
455                         }
456                 }
457         }
458 }
459
460 bool ImageManager::file_load_image_generic(Image *img, ImageInput **in, int &width, int &height, int &depth, int &components)
461 {
462         if(img->filename == "")
463                 return false;
464
465         if(!img->builtin_data) {
466                 /* NOTE: Error logging is done in meta data acquisition. */
467                 if(!path_exists(img->filename) || path_is_directory(img->filename)) {
468                         return false;
469                 }
470
471                 /* load image from file through OIIO */
472                 *in = ImageInput::create(img->filename);
473
474                 if(!*in)
475                         return false;
476
477                 ImageSpec spec = ImageSpec();
478                 ImageSpec config = ImageSpec();
479
480                 if(img->use_alpha == false)
481                         config.attribute("oiio:UnassociatedAlpha", 1);
482
483                 if(!(*in)->open(img->filename, spec, config)) {
484                         delete *in;
485                         *in = NULL;
486                         return false;
487                 }
488
489                 width = spec.width;
490                 height = spec.height;
491                 depth = spec.depth;
492                 components = spec.nchannels;
493         }
494         else {
495                 /* load image using builtin images callbacks */
496                 if(!builtin_image_info_cb || !builtin_image_pixels_cb)
497                         return false;
498
499                 bool is_float;
500                 builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components);
501         }
502
503         /* we only handle certain number of components */
504         if(!(components >= 1 && components <= 4)) {
505                 if(*in) {
506                         (*in)->close();
507                         delete *in;
508                         *in = NULL;
509                 }
510
511                 return false;
512         }
513
514         return true;
515 }
516
517 template<TypeDesc::BASETYPE FileFormat,
518          typename StorageType,
519          typename DeviceType>
520 bool ImageManager::file_load_image(Image *img,
521                                    ImageDataType type,
522                                    int texture_limit,
523                                    device_vector<DeviceType>& tex_img)
524 {
525         const StorageType alpha_one = (FileFormat == TypeDesc::UINT8)? 255 : 1;
526         ImageInput *in = NULL;
527         int width, height, depth, components;
528         if(!file_load_image_generic(img, &in, width, height, depth, components)) {
529                 return false;
530         }
531         /* Read RGBA pixels. */
532         vector<StorageType> pixels_storage;
533         StorageType *pixels;
534         const size_t max_size = max(max(width, height), depth);
535         if(texture_limit > 0 && max_size > texture_limit) {
536                 pixels_storage.resize(((size_t)width)*height*depth*4);
537                 pixels = &pixels_storage[0];
538         }
539         else {
540                 pixels = (StorageType*)tex_img.resize(width, height, depth);
541         }
542         bool cmyk = false;
543         const size_t num_pixels = ((size_t)width) * height * depth;
544         if(in) {
545                 StorageType *readpixels = pixels;
546                 vector<StorageType> tmppixels;
547                 if(components > 4) {
548                         tmppixels.resize(((size_t)width)*height*components);
549                         readpixels = &tmppixels[0];
550                 }
551                 if(depth <= 1) {
552                         size_t scanlinesize = ((size_t)width)*components*sizeof(StorageType);
553                         in->read_image(FileFormat,
554                                        (uchar*)readpixels + (height-1)*scanlinesize,
555                                        AutoStride,
556                                        -scanlinesize,
557                                        AutoStride);
558                 }
559                 else {
560                         in->read_image(FileFormat, (uchar*)readpixels);
561                 }
562                 if(components > 4) {
563                         size_t dimensions = ((size_t)width)*height;
564                         for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
565                                 pixels[i*4+3] = tmppixels[i*components+3];
566                                 pixels[i*4+2] = tmppixels[i*components+2];
567                                 pixels[i*4+1] = tmppixels[i*components+1];
568                                 pixels[i*4+0] = tmppixels[i*components+0];
569                         }
570                         tmppixels.clear();
571                 }
572                 cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
573                 in->close();
574                 delete in;
575         }
576         else {
577                 if(FileFormat == TypeDesc::FLOAT) {
578                         builtin_image_float_pixels_cb(img->filename,
579                                                       img->builtin_data,
580                                                       (float*)&pixels[0],
581                                                       num_pixels * components);
582                 }
583                 else if(FileFormat == TypeDesc::UINT8) {
584                         builtin_image_pixels_cb(img->filename,
585                                                 img->builtin_data,
586                                                 (uchar*)&pixels[0],
587                                                 num_pixels * components);
588                 }
589                 else {
590                         /* TODO(dingto): Support half for ImBuf. */
591                 }
592         }
593         /* Check if we actually have a float4 slot, in case components == 1,
594          * but device doesn't support single channel textures.
595          */
596         bool is_rgba = (type == IMAGE_DATA_TYPE_FLOAT4 ||
597                         type == IMAGE_DATA_TYPE_HALF4 ||
598                         type == IMAGE_DATA_TYPE_BYTE4);
599         if(is_rgba) {
600                 if(cmyk) {
601                         /* CMYK */
602                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
603                                 pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255;
604                                 pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255;
605                                 pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255;
606                                 pixels[i*4+3] = alpha_one;
607                         }
608                 }
609                 else if(components == 2) {
610                         /* grayscale + alpha */
611                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
612                                 pixels[i*4+3] = pixels[i*2+1];
613                                 pixels[i*4+2] = pixels[i*2+0];
614                                 pixels[i*4+1] = pixels[i*2+0];
615                                 pixels[i*4+0] = pixels[i*2+0];
616                         }
617                 }
618                 else if(components == 3) {
619                         /* RGB */
620                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
621                                 pixels[i*4+3] = alpha_one;
622                                 pixels[i*4+2] = pixels[i*3+2];
623                                 pixels[i*4+1] = pixels[i*3+1];
624                                 pixels[i*4+0] = pixels[i*3+0];
625                         }
626                 }
627                 else if(components == 1) {
628                         /* grayscale */
629                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
630                                 pixels[i*4+3] = alpha_one;
631                                 pixels[i*4+2] = pixels[i];
632                                 pixels[i*4+1] = pixels[i];
633                                 pixels[i*4+0] = pixels[i];
634                         }
635                 }
636                 if(img->use_alpha == false) {
637                         for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
638                                 pixels[i*4+3] = alpha_one;
639                         }
640                 }
641         }
642         if(pixels_storage.size() > 0) {
643                 float scale_factor = 1.0f;
644                 while(max_size * scale_factor > texture_limit) {
645                         scale_factor *= 0.5f;
646                 }
647                 VLOG(1) << "Scaling image " << img->filename
648                         << " by a factor of " << scale_factor << ".";
649                 vector<StorageType> scaled_pixels;
650                 size_t scaled_width, scaled_height, scaled_depth;
651                 util_image_resize_pixels(pixels_storage,
652                                          width, height, depth,
653                                          is_rgba ? 4 : 1,
654                                          scale_factor,
655                                          &scaled_pixels,
656                                          &scaled_width, &scaled_height, &scaled_depth);
657                 StorageType *texture_pixels = (StorageType*)tex_img.resize(scaled_width,
658                                                                            scaled_height,
659                                                                            scaled_depth);
660                 memcpy(texture_pixels,
661                        &scaled_pixels[0],
662                        scaled_pixels.size() * sizeof(StorageType));
663         }
664         return true;
665 }
666
667 void ImageManager::device_load_image(Device *device,
668                                      DeviceScene *dscene,
669                                      Scene *scene,
670                                      ImageDataType type,
671                                      int slot,
672                                      Progress *progress)
673 {
674         if(progress->get_cancel())
675                 return;
676
677         Image *img = images[type][slot];
678
679         if(osl_texture_system && !img->builtin_data)
680                 return;
681
682         string filename = path_filename(images[type][slot]->filename);
683         progress->set_status("Updating Images", "Loading " + filename);
684
685         const int texture_limit = scene->params.texture_limit;
686
687         /* Slot assignment */
688         int flat_slot = type_index_to_flattened_slot(slot, type);
689
690         string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
691
692         if(type == IMAGE_DATA_TYPE_FLOAT4) {
693                 if(dscene->tex_float4_image[slot] == NULL)
694                         dscene->tex_float4_image[slot] = new device_vector<float4>();
695                 device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
696
697                 if(tex_img.device_pointer) {
698                         thread_scoped_lock device_lock(device_mutex);
699                         device->tex_free(tex_img);
700                 }
701
702                 if(!file_load_image<TypeDesc::FLOAT, float>(img,
703                                                             type,
704                                                             texture_limit,
705                                                             tex_img))
706                 {
707                         /* on failure to load, we set a 1x1 pixels pink image */
708                         float *pixels = (float*)tex_img.resize(1, 1);
709
710                         pixels[0] = TEX_IMAGE_MISSING_R;
711                         pixels[1] = TEX_IMAGE_MISSING_G;
712                         pixels[2] = TEX_IMAGE_MISSING_B;
713                         pixels[3] = TEX_IMAGE_MISSING_A;
714                 }
715
716                 if(!pack_images) {
717                         thread_scoped_lock device_lock(device_mutex);
718                         device->tex_alloc(name.c_str(),
719                                           tex_img,
720                                           img->interpolation,
721                                           img->extension);
722                 }
723         }
724         else if(type == IMAGE_DATA_TYPE_FLOAT) {
725                 if(dscene->tex_float_image[slot] == NULL)
726                         dscene->tex_float_image[slot] = new device_vector<float>();
727                 device_vector<float>& tex_img = *dscene->tex_float_image[slot];
728
729                 if(tex_img.device_pointer) {
730                         thread_scoped_lock device_lock(device_mutex);
731                         device->tex_free(tex_img);
732                 }
733
734                 if(!file_load_image<TypeDesc::FLOAT, float>(img,
735                                                             type,
736                                                             texture_limit,
737                                                             tex_img))
738                 {
739                         /* on failure to load, we set a 1x1 pixels pink image */
740                         float *pixels = (float*)tex_img.resize(1, 1);
741
742                         pixels[0] = TEX_IMAGE_MISSING_R;
743                 }
744
745                 if(!pack_images) {
746                         thread_scoped_lock device_lock(device_mutex);
747                         device->tex_alloc(name.c_str(),
748                                           tex_img,
749                                           img->interpolation,
750                                           img->extension);
751                 }
752         }
753         else if(type == IMAGE_DATA_TYPE_BYTE4) {
754                 if(dscene->tex_byte4_image[slot] == NULL)
755                         dscene->tex_byte4_image[slot] = new device_vector<uchar4>();
756                 device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
757
758                 if(tex_img.device_pointer) {
759                         thread_scoped_lock device_lock(device_mutex);
760                         device->tex_free(tex_img);
761                 }
762
763                 if(!file_load_image<TypeDesc::UINT8, uchar>(img,
764                                                             type,
765                                                             texture_limit,
766                                                             tex_img))
767                 {
768                         /* on failure to load, we set a 1x1 pixels pink image */
769                         uchar *pixels = (uchar*)tex_img.resize(1, 1);
770
771                         pixels[0] = (TEX_IMAGE_MISSING_R * 255);
772                         pixels[1] = (TEX_IMAGE_MISSING_G * 255);
773                         pixels[2] = (TEX_IMAGE_MISSING_B * 255);
774                         pixels[3] = (TEX_IMAGE_MISSING_A * 255);
775                 }
776
777                 if(!pack_images) {
778                         thread_scoped_lock device_lock(device_mutex);
779                         device->tex_alloc(name.c_str(),
780                                           tex_img,
781                                           img->interpolation,
782                                           img->extension);
783                 }
784         }
785         else if(type == IMAGE_DATA_TYPE_BYTE){
786                 if(dscene->tex_byte_image[slot] == NULL)
787                         dscene->tex_byte_image[slot] = new device_vector<uchar>();
788                 device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
789
790                 if(tex_img.device_pointer) {
791                         thread_scoped_lock device_lock(device_mutex);
792                         device->tex_free(tex_img);
793                 }
794
795                 if(!file_load_image<TypeDesc::UINT8, uchar>(img,
796                                                             type,
797                                                             texture_limit,
798                                                             tex_img)) {
799                         /* on failure to load, we set a 1x1 pixels pink image */
800                         uchar *pixels = (uchar*)tex_img.resize(1, 1);
801
802                         pixels[0] = (TEX_IMAGE_MISSING_R * 255);
803                 }
804
805                 if(!pack_images) {
806                         thread_scoped_lock device_lock(device_mutex);
807                         device->tex_alloc(name.c_str(),
808                                           tex_img,
809                                           img->interpolation,
810                                           img->extension);
811                 }
812         }
813         else if(type == IMAGE_DATA_TYPE_HALF4){
814                 if(dscene->tex_half4_image[slot] == NULL)
815                         dscene->tex_half4_image[slot] = new device_vector<half4>();
816                 device_vector<half4>& tex_img = *dscene->tex_half4_image[slot];
817
818                 if(tex_img.device_pointer) {
819                         thread_scoped_lock device_lock(device_mutex);
820                         device->tex_free(tex_img);
821                 }
822
823                 if(!file_load_image<TypeDesc::HALF, half>(img,
824                                                           type,
825                                                           texture_limit,
826                                                           tex_img)) {
827                         /* on failure to load, we set a 1x1 pixels pink image */
828                         half *pixels = (half*)tex_img.resize(1, 1);
829
830                         pixels[0] = TEX_IMAGE_MISSING_R;
831                         pixels[1] = TEX_IMAGE_MISSING_G;
832                         pixels[2] = TEX_IMAGE_MISSING_B;
833                         pixels[3] = TEX_IMAGE_MISSING_A;
834                 }
835
836                 if(!pack_images) {
837                         thread_scoped_lock device_lock(device_mutex);
838                         device->tex_alloc(name.c_str(),
839                                           tex_img,
840                                           img->interpolation,
841                                           img->extension);
842                 }
843         }
844         else if(type == IMAGE_DATA_TYPE_HALF){
845                 if(dscene->tex_half_image[slot] == NULL)
846                         dscene->tex_half_image[slot] = new device_vector<half>();
847                 device_vector<half>& tex_img = *dscene->tex_half_image[slot];
848
849                 if(tex_img.device_pointer) {
850                         thread_scoped_lock device_lock(device_mutex);
851                         device->tex_free(tex_img);
852                 }
853
854                 if(!file_load_image<TypeDesc::HALF, half>(img,
855                                                           type,
856                                                           texture_limit,
857                                                           tex_img)) {
858                         /* on failure to load, we set a 1x1 pixels pink image */
859                         half *pixels = (half*)tex_img.resize(1, 1);
860
861                         pixels[0] = TEX_IMAGE_MISSING_R;
862                 }
863
864                 if(!pack_images) {
865                         thread_scoped_lock device_lock(device_mutex);
866                         device->tex_alloc(name.c_str(),
867                                           tex_img,
868                                           img->interpolation,
869                                           img->extension);
870                 }
871         }
872
873         img->need_load = false;
874 }
875
876 void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageDataType type, int slot)
877 {
878         Image *img = images[type][slot];
879
880         if(img) {
881                 if(osl_texture_system && !img->builtin_data) {
882 #ifdef WITH_OSL
883                         ustring filename(images[type][slot]->filename);
884                         ((OSL::TextureSystem*)osl_texture_system)->invalidate(filename);
885 #endif
886                 }
887                 else {
888                         device_memory *tex_img = NULL;
889                         switch(type) {
890                                 case IMAGE_DATA_TYPE_FLOAT4:
891                                         if(slot >= dscene->tex_float4_image.size()) {
892                                                 break;
893                                         }
894                                         tex_img = dscene->tex_float4_image[slot];
895                                         dscene->tex_float4_image[slot] = NULL;
896                                         break;
897                                 case IMAGE_DATA_TYPE_BYTE4:
898                                         if(slot >= dscene->tex_byte4_image.size()) {
899                                                 break;
900                                         }
901                                         tex_img = dscene->tex_byte4_image[slot];
902                                         dscene->tex_byte4_image[slot]= NULL;
903                                         break;
904                                 case IMAGE_DATA_TYPE_HALF4:
905                                         if(slot >= dscene->tex_half4_image.size()) {
906                                                 break;
907                                         }
908                                         tex_img = dscene->tex_half4_image[slot];
909                                         dscene->tex_half4_image[slot]= NULL;
910                                         break;
911                                 case IMAGE_DATA_TYPE_FLOAT:
912                                         if(slot >= dscene->tex_float_image.size()) {
913                                                 break;
914                                         }
915                                         tex_img = dscene->tex_float_image[slot];
916                                         dscene->tex_float_image[slot] = NULL;
917                                         break;
918                                 case IMAGE_DATA_TYPE_BYTE:
919                                         if(slot >= dscene->tex_byte_image.size()) {
920                                                 break;
921                                         }
922                                         tex_img = dscene->tex_byte_image[slot];
923                                         dscene->tex_byte_image[slot]= NULL;
924                                         break;
925                                 case IMAGE_DATA_TYPE_HALF:
926                                         if(slot >= dscene->tex_half_image.size()) {
927                                                 break;
928                                         }
929                                         tex_img = dscene->tex_half_image[slot];
930                                         dscene->tex_half_image[slot]= NULL;
931                                         break;
932                                 default:
933                                         assert(0);
934                                         tex_img = NULL;
935                         }
936                         if(tex_img) {
937                                 if(tex_img->device_pointer) {
938                                         thread_scoped_lock device_lock(device_mutex);
939                                         device->tex_free(*tex_img);
940                                 }
941
942                                 delete tex_img;
943                         }
944                 }
945
946                 delete images[type][slot];
947                 images[type][slot] = NULL;
948                 --tex_num_images[type];
949         }
950 }
951
952 void ImageManager::device_prepare_update(DeviceScene *dscene)
953 {
954         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
955                 switch(type) {
956                         case IMAGE_DATA_TYPE_FLOAT4:
957                                 if(dscene->tex_float4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT4])
958                                         dscene->tex_float4_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT4]);
959                                 break;
960                         case IMAGE_DATA_TYPE_BYTE4:
961                                 if(dscene->tex_byte4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE4])
962                                         dscene->tex_byte4_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE4]);
963                                 break;
964                         case IMAGE_DATA_TYPE_HALF4:
965                                 if(dscene->tex_half4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF4])
966                                         dscene->tex_half4_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF4]);
967                                 break;
968                         case IMAGE_DATA_TYPE_BYTE:
969                                 if(dscene->tex_byte_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE])
970                                         dscene->tex_byte_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE]);
971                                 break;
972                         case IMAGE_DATA_TYPE_FLOAT:
973                                 if(dscene->tex_float_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT])
974                                         dscene->tex_float_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT]);
975                                 break;
976                         case IMAGE_DATA_TYPE_HALF:
977                                 if(dscene->tex_half_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF])
978                                         dscene->tex_half_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF]);
979                                 break;
980                 }
981         }
982 }
983
984 void ImageManager::device_update(Device *device,
985                                  DeviceScene *dscene,
986                                  Scene *scene,
987                                  Progress& progress)
988 {
989         if(!need_update) {
990                 return;
991         }
992
993         /* Make sure arrays are proper size. */
994         device_prepare_update(dscene);
995
996         TaskPool pool;
997         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
998                 for(size_t slot = 0; slot < images[type].size(); slot++) {
999                         if(!images[type][slot])
1000                                 continue;
1001
1002                         if(images[type][slot]->users == 0) {
1003                                 device_free_image(device, dscene, (ImageDataType)type, slot);
1004                         }
1005                         else if(images[type][slot]->need_load) {
1006                                 if(!osl_texture_system || images[type][slot]->builtin_data)
1007                                         pool.push(function_bind(&ImageManager::device_load_image,
1008                                                                 this,
1009                                                                 device,
1010                                                                 dscene,
1011                                                                 scene,
1012                                                                 (ImageDataType)type,
1013                                                                 slot,
1014                                                                 &progress));
1015                         }
1016                 }
1017         }
1018
1019         pool.wait_work();
1020
1021         if(pack_images)
1022                 device_pack_images(device, dscene, progress);
1023
1024         need_update = false;
1025 }
1026
1027 void ImageManager::device_update_slot(Device *device,
1028                                       DeviceScene *dscene,
1029                                       Scene *scene,
1030                                       int flat_slot,
1031                                       Progress *progress)
1032 {
1033         ImageDataType type;
1034         int slot = flattened_slot_to_type_index(flat_slot, &type);
1035
1036         Image *image = images[type][slot];
1037         assert(image != NULL);
1038
1039         if(image->users == 0) {
1040                 device_free_image(device, dscene, type, slot);
1041         }
1042         else if(image->need_load) {
1043                 if(!osl_texture_system || image->builtin_data)
1044                         device_load_image(device,
1045                                           dscene,
1046                                           scene,
1047                                           type,
1048                                           slot,
1049                                           progress);
1050         }
1051 }
1052
1053 uint8_t ImageManager::pack_image_options(ImageDataType type, size_t slot)
1054 {
1055         uint8_t options = 0;
1056         /* Image Options are packed into one uint:
1057          * bit 0 -> Interpolation
1058          * bit 1 + 2 + 3 -> Extension
1059          */
1060         if(images[type][slot]->interpolation == INTERPOLATION_CLOSEST) {
1061                 options |= (1 << 0);
1062         }
1063         if(images[type][slot]->extension == EXTENSION_REPEAT) {
1064                 options |= (1 << 1);
1065         }
1066         else if(images[type][slot]->extension == EXTENSION_EXTEND) {
1067                 options |= (1 << 2);
1068         }
1069         else /* EXTENSION_CLIP */ {
1070                 options |= (1 << 3);
1071         }
1072         return options;
1073 }
1074
1075 template<typename T>
1076 void ImageManager::device_pack_images_type(
1077         ImageDataType type,
1078         const vector<device_vector<T>*>& cpu_textures,
1079         device_vector<T> *device_image,
1080         uint4 *info)
1081 {
1082         size_t size = 0, offset = 0;
1083         /* First step is to calculate size of the texture we need. */
1084         for(size_t slot = 0; slot < images[type].size(); slot++) {
1085                 if(images[type][slot] == NULL) {
1086                         continue;
1087                 }
1088                 device_vector<T>& tex_img = *cpu_textures[slot];
1089                 size += tex_img.size();
1090         }
1091         /* Now we know how much memory we need, so we can allocate and fill. */
1092         T *pixels = device_image->resize(size);
1093         for(size_t slot = 0; slot < images[type].size(); slot++) {
1094                 if(images[type][slot] == NULL) {
1095                         continue;
1096                 }
1097                 device_vector<T>& tex_img = *cpu_textures[slot];
1098                 uint8_t options = pack_image_options(type, slot);
1099                 const int index = type_index_to_flattened_slot(slot, type) * 2;
1100                 info[index] = make_uint4(tex_img.data_width,
1101                                          tex_img.data_height,
1102                                          offset,
1103                                          options);
1104                 info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
1105                 memcpy(pixels + offset,
1106                        (void*)tex_img.data_pointer,
1107                        tex_img.memory_size());
1108                 offset += tex_img.size();
1109         }
1110 }
1111
1112 void ImageManager::device_pack_images(Device *device,
1113                                       DeviceScene *dscene,
1114                                       Progress& /*progess*/)
1115 {
1116         /* For OpenCL, we pack all image textures into a single large texture, and
1117          * do our own interpolation in the kernel.
1118          */
1119
1120         /* TODO(sergey): This will over-allocate a bit, but this is constant memory
1121          * so should be fine for a short term.
1122          */
1123         const size_t info_size = max4(max_flattened_slot(IMAGE_DATA_TYPE_FLOAT4),
1124                                       max_flattened_slot(IMAGE_DATA_TYPE_BYTE4),
1125                                       max_flattened_slot(IMAGE_DATA_TYPE_FLOAT),
1126                                       max_flattened_slot(IMAGE_DATA_TYPE_BYTE));
1127         uint4 *info = dscene->tex_image_packed_info.resize(info_size*2);
1128
1129         /* Pack byte4 textures. */
1130         device_pack_images_type(IMAGE_DATA_TYPE_BYTE4,
1131                                 dscene->tex_byte4_image,
1132                                 &dscene->tex_image_byte4_packed,
1133                                 info);
1134         /* Pack float4 textures. */
1135         device_pack_images_type(IMAGE_DATA_TYPE_FLOAT4,
1136                                 dscene->tex_float4_image,
1137                                 &dscene->tex_image_float4_packed,
1138                                 info);
1139         /* Pack byte textures. */
1140         device_pack_images_type(IMAGE_DATA_TYPE_BYTE,
1141                                 dscene->tex_byte_image,
1142                                 &dscene->tex_image_byte_packed,
1143                                 info);
1144         /* Pack float textures. */
1145         device_pack_images_type(IMAGE_DATA_TYPE_FLOAT,
1146                                 dscene->tex_float_image,
1147                                 &dscene->tex_image_float_packed,
1148                                 info);
1149
1150         /* Push textures to the device. */
1151         if(dscene->tex_image_byte4_packed.size()) {
1152                 if(dscene->tex_image_byte4_packed.device_pointer) {
1153                         thread_scoped_lock device_lock(device_mutex);
1154                         device->tex_free(dscene->tex_image_byte4_packed);
1155                 }
1156                 device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed);
1157         }
1158         if(dscene->tex_image_float4_packed.size()) {
1159                 if(dscene->tex_image_float4_packed.device_pointer) {
1160                         thread_scoped_lock device_lock(device_mutex);
1161                         device->tex_free(dscene->tex_image_float4_packed);
1162                 }
1163                 device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
1164         }
1165         if(dscene->tex_image_byte_packed.size()) {
1166                 if(dscene->tex_image_byte_packed.device_pointer) {
1167                         thread_scoped_lock device_lock(device_mutex);
1168                         device->tex_free(dscene->tex_image_byte_packed);
1169                 }
1170                 device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed);
1171         }
1172         if(dscene->tex_image_float_packed.size()) {
1173                 if(dscene->tex_image_float_packed.device_pointer) {
1174                         thread_scoped_lock device_lock(device_mutex);
1175                         device->tex_free(dscene->tex_image_float_packed);
1176                 }
1177                 device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed);
1178         }
1179         if(dscene->tex_image_packed_info.size()) {
1180                 if(dscene->tex_image_packed_info.device_pointer) {
1181                         thread_scoped_lock device_lock(device_mutex);
1182                         device->tex_free(dscene->tex_image_packed_info);
1183                 }
1184                 device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
1185         }
1186 }
1187
1188 void ImageManager::device_free_builtin(Device *device, DeviceScene *dscene)
1189 {
1190         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
1191                 for(size_t slot = 0; slot < images[type].size(); slot++) {
1192                         if(images[type][slot] && images[type][slot]->builtin_data)
1193                                 device_free_image(device, dscene, (ImageDataType)type, slot);
1194                 }
1195         }
1196 }
1197
1198 void ImageManager::device_free(Device *device, DeviceScene *dscene)
1199 {
1200         for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
1201                 for(size_t slot = 0; slot < images[type].size(); slot++) {
1202                         device_free_image(device, dscene, (ImageDataType)type, slot);
1203                 }
1204                 images[type].clear();
1205         }
1206
1207         dscene->tex_float4_image.clear();
1208         dscene->tex_byte4_image.clear();
1209         dscene->tex_half4_image.clear();
1210         dscene->tex_float_image.clear();
1211         dscene->tex_byte_image.clear();
1212         dscene->tex_half_image.clear();
1213
1214         device->tex_free(dscene->tex_image_float4_packed);
1215         device->tex_free(dscene->tex_image_byte4_packed);
1216         device->tex_free(dscene->tex_image_float_packed);
1217         device->tex_free(dscene->tex_image_byte_packed);
1218         device->tex_free(dscene->tex_image_packed_info);
1219
1220         dscene->tex_image_float4_packed.clear();
1221         dscene->tex_image_byte4_packed.clear();
1222         dscene->tex_image_float_packed.clear();
1223         dscene->tex_image_byte_packed.clear();
1224         dscene->tex_image_packed_info.clear();
1225 }
1226
1227 CCL_NAMESPACE_END
1228