Cycles Bake
[blender-staging.git] / intern / cycles / device / device_cpu.cpp
index ea632b744dcfeaeabb03a745d9ac8f889526a973..c9cc7592028bdedc1fcfeb15d7eb667e2e6135d1 100644 (file)
@@ -61,6 +61,7 @@ public:
                system_cpu_support_sse2();
                system_cpu_support_sse3();
                system_cpu_support_sse41();
+               system_cpu_support_avx();
        }
 
        ~CPUDevice()
@@ -102,9 +103,9 @@ public:
                kernel_const_copy(&kernel_globals, name, host, size);
        }
 
-       void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
+       void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
        {
-               kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
+               kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height, mem.data_depth, interpolation);
                mem.device_pointer = mem.data_pointer;
 
                stats.mem_alloc(mem.memory_size());
@@ -166,7 +167,28 @@ public:
                        int start_sample = tile.start_sample;
                        int end_sample = tile.start_sample + tile.num_samples;
 
-#ifdef WITH_OPTIMIZED_KERNEL
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+                       if(system_cpu_support_avx()) {
+                               for(int sample = start_sample; sample < end_sample; sample++) {
+                                       if (task.get_cancel() || task_pool.canceled()) {
+                                               if(task.need_finish_queue == false)
+                                                       break;
+                                       }
+
+                                       for(int y = tile.y; y < tile.y + tile.h; y++) {
+                                               for(int x = tile.x; x < tile.x + tile.w; x++) {
+                                                       kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state,
+                                                               sample, x, y, tile.offset, tile.stride);
+                                               }
+                                       }
+
+                                       tile.sample = sample + 1;
+
+                                       task.update_progress(tile);
+                               }
+                       }
+                       else
+#endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41                      
                        if(system_cpu_support_sse41()) {
                                for(int sample = start_sample; sample < end_sample; sample++) {
@@ -189,6 +211,7 @@ public:
                        }
                        else
 #endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
                        if(system_cpu_support_sse3()) {
                                for(int sample = start_sample; sample < end_sample; sample++) {
                                        if (task.get_cancel() || task_pool.canceled()) {
@@ -208,7 +231,10 @@ public:
                                        task.update_progress(tile);
                                }
                        }
-                       else if(system_cpu_support_sse2()) {
+                       else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+                       if(system_cpu_support_sse2()) {
                                for(int sample = start_sample; sample < end_sample; sample++) {
                                        if (task.get_cancel() || task_pool.canceled()) {
                                                if(task.need_finish_queue == false)
@@ -267,7 +293,15 @@ public:
                float sample_scale = 1.0f/(task.sample + 1);
 
                if(task.rgba_half) {
-#ifdef WITH_OPTIMIZED_KERNEL
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+                       if(system_cpu_support_avx()) {
+                               for(int y = task.y; y < task.y + task.h; y++)
+                                       for(int x = task.x; x < task.x + task.w; x++)
+                                               kernel_cpu_avx_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
+                                                       sample_scale, x, y, task.offset, task.stride);
+                       }
+                       else
+#endif 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41                      
                        if(system_cpu_support_sse41()) {
                                for(int y = task.y; y < task.y + task.h; y++)
@@ -276,14 +310,18 @@ public:
                                                        sample_scale, x, y, task.offset, task.stride);
                        }
                        else
-#endif                         
+#endif         
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3               
                        if(system_cpu_support_sse3()) {
                                for(int y = task.y; y < task.y + task.h; y++)
                                        for(int x = task.x; x < task.x + task.w; x++)
                                                kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
                                                        sample_scale, x, y, task.offset, task.stride);
                        }
-                       else if(system_cpu_support_sse2()) {
+                       else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+                       if(system_cpu_support_sse2()) {
                                for(int y = task.y; y < task.y + task.h; y++)
                                        for(int x = task.x; x < task.x + task.w; x++)
                                                kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
@@ -299,7 +337,15 @@ public:
                        }
                }
                else {
-#ifdef WITH_OPTIMIZED_KERNEL
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+                       if(system_cpu_support_avx()) {
+                               for(int y = task.y; y < task.y + task.h; y++)
+                                       for(int x = task.x; x < task.x + task.w; x++)
+                                               kernel_cpu_avx_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
+                                                       sample_scale, x, y, task.offset, task.stride);
+                       }
+                       else
+#endif         
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41                      
                        if(system_cpu_support_sse41()) {
                                for(int y = task.y; y < task.y + task.h; y++)
@@ -309,13 +355,17 @@ public:
                        }
                        else
 #endif                 
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
                        if(system_cpu_support_sse3()) {
                                for(int y = task.y; y < task.y + task.h; y++)
                                        for(int x = task.x; x < task.x + task.w; x++)
                                                kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
                                                        sample_scale, x, y, task.offset, task.stride);
                        }
-                       else if(system_cpu_support_sse2()) {
+                       else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+                       if(system_cpu_support_sse2()) {
                                for(int y = task.y; y < task.y + task.h; y++)
                                        for(int x = task.x; x < task.x + task.w; x++)
                                                kernel_cpu_sse2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
@@ -340,31 +390,45 @@ public:
                OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
 #endif
 
-#ifdef WITH_OPTIMIZED_KERNEL
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+               if(system_cpu_support_avx()) {
+                       for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
+                               kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+
+                               if(task.get_cancel() || task_pool.canceled())
+                                       break;
+                       }
+               }
+               else
+#endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41                      
                if(system_cpu_support_sse41()) {
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
                                kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
 
-                               if(task_pool.canceled())
+                               if(task.get_cancel() || task_pool.canceled())
                                        break;
                        }
                }
                else
 #endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
                if(system_cpu_support_sse3()) {
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
                                kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
 
-                               if(task_pool.canceled())
+                               if(task.get_cancel() || task_pool.canceled())
                                        break;
                        }
                }
-               else if(system_cpu_support_sse2()) {
+               else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+               if(system_cpu_support_sse2()) {
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
                                kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
 
-                               if(task_pool.canceled())
+                               if(task.get_cancel() || task_pool.canceled())
                                        break;
                        }
                }
@@ -374,7 +438,7 @@ public:
                        for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
                                kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
 
-                               if(task_pool.canceled())
+                               if(task.get_cancel() || task_pool.canceled())
                                        break;
                        }
                }