Merge branch 'master' into blender2.8
authorSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 May 2017 13:29:00 +0000 (15:29 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 May 2017 13:29:00 +0000 (15:29 +0200)
1  2 
CMakeLists.txt
intern/cycles/device/device_cuda.cpp

diff --combined CMakeLists.txt
index 9c18710fd485e2859b14a1c95c4fc469e253e65a,1dac082459f17bd2ec3e5e44d0dd39e6a3c04d6d..89e4e76a8fd3da19ee6824b9d72d2172043f9552
@@@ -242,10 -242,6 +242,10 @@@ endif(
  option(WITH_PLAYER        "Build Player" OFF)
  option(WITH_OPENCOLORIO   "Enable OpenColorIO color management" ${_init_OPENCOLORIO})
  
 +option(WITH_CLAY_ENGINE    "Enable New Clay engine (Breaks Mac and Mesa compatibility)" ON)
 +
 +option(WITH_LEGACY_OPENGL  "Enable build of legacy OpenGL" ON)
 +
  # Compositor
  option(WITH_COMPOSITOR         "Enable the tile based nodal compositor" ON)
  
@@@ -369,7 -365,7 +369,7 @@@ mark_as_advanced(WITH_LIBMV_SCHUR_SPECI
  
  # Logging/unbit test libraries.
  option(WITH_SYSTEM_GFLAGS   "Use system-wide Gflags instead of a bundled one" OFF)
- option(WITH_SYSTEM_GFLOG    "Use system-wide Glog instead of a bundled one" OFF)
+ option(WITH_SYSTEM_GLOG     "Use system-wide Glog instead of a bundled one" OFF)
  mark_as_advanced(WITH_SYSTEM_GFLAGS)
  mark_as_advanced(WITH_SYSTEM_GLOG)
  
@@@ -466,13 -462,15 +466,13 @@@ endif(
  
  # OpenGL
  
 -option(WITH_GLEW_MX             "Support multiple GLEW contexts (experimental)"                                                                     OFF )
  option(WITH_GLEW_ES             "Switches to experimental copy of GLEW that has support for OpenGL ES. (temporary option for development purposes)" OFF)
  option(WITH_GL_EGL              "Use the EGL OpenGL system library instead of the platform specific OpenGL system library (CGL, glX, or WGL)"       OFF)
  option(WITH_GL_PROFILE_COMPAT   "Support using the OpenGL 'compatibility' profile. (deprecated)"                                                    ON )
 -option(WITH_GL_PROFILE_CORE     "Support using the OpenGL 3.2+ 'core' profile."                                                                     OFF)
 +option(WITH_GL_PROFILE_CORE     "Support using the OpenGL 3.3+ 'core' profile."                                                                     OFF)
  option(WITH_GL_PROFILE_ES20     "Support using OpenGL ES 2.0. (thru either EGL or the AGL/WGL/XGL 'es20' profile)"                                  OFF)
  
  mark_as_advanced(
 -      WITH_GLEW_MX
        WITH_GLEW_ES
        WITH_GL_EGL
        WITH_GL_PROFILE_COMPAT
        WITH_GL_PROFILE_ES20
  )
  
 -if(WITH_GL_PROFILE_COMPAT)
 -      set(WITH_GLU ON)
 -else()
 -      set(WITH_GLU OFF)
 -endif()
 -
  if(WIN32)
        option(WITH_GL_ANGLE "Link with the ANGLE library, an OpenGL ES 2.0 implementation based on Direct3D, instead of the system OpenGL library." OFF)
        mark_as_advanced(WITH_GL_ANGLE)
@@@ -500,10 -504,11 +500,10 @@@ endif(
  # We default options to whatever default standard in the current compiler.
  if(CMAKE_COMPILER_IS_GNUCC AND (NOT "${CMAKE_C_COMPILER_VERSION}" VERSION_LESS "6.0") AND (NOT WITH_CXX11))
        set(_c11_init ON)
 -      set(_cxx11_init ON)
  else()
        set(_c11_init OFF)
 -      set(_cxx11_init OFF)
  endif()
 +set(_cxx11_init ON)
  
  option(WITH_C11 "Build with C11 standard enabled, for development use only!" ${_c11_init})
  mark_as_advanced(WITH_C11)
@@@ -579,10 -584,6 +579,10 @@@ if(NOT WITH_GAMEENGINE AND WITH_PLAYER
        message(FATAL_ERROR "WITH_PLAYER requires WITH_GAMEENGINE")
  endif()
  
 +if(WITH_GAMEENGINE AND NOT WITH_LEGACY_OPENGL)
 +      message(FATAL_ERROR "WITH_GAME_ENGINE requires WITH_LEGACY_OPENGL")
 +endif()
 +
  if(NOT WITH_AUDASPACE)
        if(WITH_OPENAL)
                message(FATAL_ERROR "WITH_OPENAL requires WITH_AUDASPACE")
@@@ -960,6 -961,11 +960,6 @@@ endif(
  find_package(OpenGL)
  blender_include_dirs_sys("${OPENGL_INCLUDE_DIR}")
  
 -if(WITH_GLU)
 -      list(APPEND BLENDER_GL_LIBRARIES "${OPENGL_glu_LIBRARY}")
 -      list(APPEND GL_DEFINITIONS -DWITH_GLU)
 -endif()
 -
  if(WITH_SYSTEM_GLES)
        find_package_wrapper(OpenGLES)
  endif()
@@@ -1074,10 -1080,6 +1074,10 @@@ if(WITH_GL_EGL
  
  endif()
  
 +if(WITH_LEGACY_OPENGL)
 +      list(APPEND GL_DEFINITIONS -DWITH_LEGACY_OPENGL)
 +endif()
 +
  if(WITH_GL_PROFILE_COMPAT)
        list(APPEND GL_DEFINITIONS -DWITH_GL_PROFILE_COMPAT)
  endif()
@@@ -1124,6 -1126,10 +1124,6 @@@ endif(
  #-----------------------------------------------------------------------------
  # Configure GLEW
  
 -if(WITH_GLEW_MX)
 -      list(APPEND GL_DEFINITIONS -DWITH_GLEW_MX)
 -endif()
 -
  if(WITH_SYSTEM_GLEW)
        find_package(GLEW)
  
                message(FATAL_ERROR "GLEW is required to build Blender. Install it or disable WITH_SYSTEM_GLEW.")
        endif()
  
 -      if(WITH_GLEW_MX)
 -              set(BLENDER_GLEW_LIBRARIES ${GLEW_MX_LIBRARY})
 -      else()
 -              set(BLENDER_GLEW_LIBRARIES ${GLEW_LIBRARY})
 -      endif()
 +      set(BLENDER_GLEW_LIBRARIES ${GLEW_LIBRARY})
  else()
        if(WITH_GLEW_ES)
                set(GLEW_INCLUDE_PATH "${CMAKE_SOURCE_DIR}/extern/glew-es/include")
  
  endif()
  
 -if(NOT WITH_GLU)
 -      list(APPEND GL_DEFINITIONS -DGLEW_NO_GLU)
 -endif()
 +list(APPEND GL_DEFINITIONS -DGLEW_NO_GLU)
  
  #-----------------------------------------------------------------------------
  # Configure Bullet
@@@ -1718,6 -1730,7 +1718,6 @@@ if(FIRST_RUN
  
        info_cfg_text("OpenGL:")
        info_cfg_option(WITH_GLEW_ES)
 -      info_cfg_option(WITH_GLU)
        info_cfg_option(WITH_GL_EGL)
        info_cfg_option(WITH_GL_PROFILE_COMPAT)
        info_cfg_option(WITH_GL_PROFILE_CORE)
index 3532e640eaafa4077cdd17a1aa869dc4a911428c,a971170318ed274c808a5593f6a6c3cf501a03ae..e497ec6b0e160711a0bcca88f69108402865df80
@@@ -119,6 -119,7 +119,7 @@@ public
        int cuDevId;
        int cuDevArchitecture;
        bool first_error;
+       CUDASplitKernel *split_kernel;
  
        struct PixelMem {
                GLuint cuPBO;
                cuDevice = 0;
                cuContext = 0;
  
+               split_kernel = NULL;
                need_bindless_mapping = false;
  
                /* intialize */
        {
                task_pool.stop();
  
+               delete split_kernel;
                if(info.has_bindless_textures) {
                        tex_free(bindless_mapping);
                }
                }
        }
  
 -      void draw_pixels(device_memory& mem, int y, int w, int h, int dx, int dy, int width, int height, bool transparent,
 +      void draw_pixels(
 +          device_memory& mem, int y,
 +          int w, int h, int width, int height,
 +          int dx, int dy, int dw, int dh, bool transparent,
                const DeviceDrawParams &draw_params)
        {
                if(!background) {
 +                      const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
                        PixelMem pmem = pixel_mem_map[mem.device_pointer];
                        float *vpointer;
  
  
                        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
                        glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
 -                      if(mem.data_type == TYPE_HALF)
 +                      if(mem.data_type == TYPE_HALF) {
                                glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void*)offset);
 -                      else
 +                      }
 +                      else {
                                glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void*)offset);
 +                      }
                        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
  
                        glEnable(GL_TEXTURE_2D);
                                glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
                        }
  
 -                      glColor3f(1.0f, 1.0f, 1.0f);
 -
 -                      if(draw_params.bind_display_space_shader_cb) {
 +                      GLint shader_program;
 +                      if(use_fallback_shader) {
 +                              if(!bind_fallback_display_space_shader(dw, dh)) {
 +                                      return;
 +                              }
 +                              shader_program = fallback_shader_program;
 +                      }
 +                      else {
                                draw_params.bind_display_space_shader_cb();
 +                              glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
                        }
  
 -                      if(!vertex_buffer)
 +                      if(!vertex_buffer) {
                                glGenBuffers(1, &vertex_buffer);
 +                      }
  
                        glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
                        /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */
                                glUnmapBuffer(GL_ARRAY_BUFFER);
                        }
  
 -                      glTexCoordPointer(2, GL_FLOAT, 4 * sizeof(float), 0);
 -                      glVertexPointer(2, GL_FLOAT, 4 * sizeof(float), (char *)NULL + 2 * sizeof(float));
 +                      GLuint vertex_array_object;
 +                      GLuint position_attribute, texcoord_attribute;
  
 -                      glEnableClientState(GL_VERTEX_ARRAY);
 -                      glEnableClientState(GL_TEXTURE_COORD_ARRAY);
 +                      glGenVertexArrays(1, &vertex_array_object);
 +                      glBindVertexArray(vertex_array_object);
  
 -                      glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
 +                      texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
 +                      position_attribute = glGetAttribLocation(shader_program, "pos");
  
 -                      glDisableClientState(GL_TEXTURE_COORD_ARRAY);
 -                      glDisableClientState(GL_VERTEX_ARRAY);
 +                      glEnableVertexAttribArray(texcoord_attribute);
 +                      glEnableVertexAttribArray(position_attribute);
  
 -                      glBindBuffer(GL_ARRAY_BUFFER, 0);
 +                      glVertexAttribPointer(texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
 +                      glVertexAttribPointer(position_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)(sizeof(float) * 2));
 +
 +                      glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
  
 -                      if(draw_params.unbind_display_space_shader_cb) {
 +                      if(use_fallback_shader) {
 +                              glUseProgram(0);
 +                      }
 +                      else {
                                draw_params.unbind_display_space_shader_cb();
                        }
  
 -                      if(transparent)
 +                      if(transparent) {
                                glDisable(GL_BLEND);
 +                      }
  
                        glBindTexture(GL_TEXTURE_2D, 0);
                        glDisable(GL_TEXTURE_2D);
                        return;
                }
  
 -              Device::draw_pixels(mem, y, w, h, dx, dy, width, height, transparent, draw_params);
 +              Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
        }
  
        void thread_run(DeviceTask *task)
                                        requested_features.max_closure = 64;
                                }
  
-                               CUDASplitKernel split_kernel(this);
-                               split_kernel.load_kernels(requested_features);
+                               if(split_kernel == NULL) {
+                                       split_kernel = new CUDASplitKernel(this);
+                                       split_kernel->load_kernels(requested_features);
+                               }
  
                                while(task->acquire_tile(this, tile)) {
                                        device_memory void_buffer;
-                                       split_kernel.path_trace(task, tile, void_buffer, void_buffer);
+                                       split_kernel->path_trace(task, tile, void_buffer, void_buffer);
  
                                        task->release_tile(tile);
  
@@@ -1648,7 -1634,8 +1655,8 @@@ int2 CUDASplitKernel::split_kernel_glob
                << string_human_readable_size(free) << ").";
  
        size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
-       int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements));
+       size_t side = round_down((int)sqrt(num_elements), 32);
+       int2 global_size = make_int2(side, round_down(num_elements / side, 16));
        VLOG(1) << "Global size: " << global_size << ".";
        return global_size;
  }