Cycles: code refactor to bake using regular render session and tiles
authorBrecht Van Lommel <brecht@blender.org>
Fri, 10 May 2019 19:39:58 +0000 (21:39 +0200)
committerLukas Stockner <lukas.stockner@freenet.de>
Fri, 15 May 2020 18:25:24 +0000 (20:25 +0200)
There should be no user visible change from this, except that tile size
now affects performance. The goal here is to simplify bake denoising in
D3099, letting it reuse more denoising tiles and pass code.

A lot of code is now shared with regular rendering, with the two main
differences being that we read some render result passes from the bake API
when starting to render a tile, and call the bake kernel instead of the
path trace kernel.

With this kind of design where Cycles asks for tiles from the bake API,
it should eventually be easier to reduce memory usage, show tiles as
they are baked, or bake multiple passes at once, though there's still
quite some work needed for that.

Reviewers: #cycles

Subscribers: monio, wmatyjewicz, lukasstockner97, michaelknubben

Differential Revision: https://developer.blender.org/D3108

31 files changed:
intern/cycles/blender/addon/__init__.py
intern/cycles/blender/addon/engine.py
intern/cycles/blender/blender_python.cpp
intern/cycles/blender/blender_session.cpp
intern/cycles/blender/blender_session.h
intern/cycles/blender/blender_sync.cpp
intern/cycles/device/cuda/device_cuda.h
intern/cycles/device/cuda/device_cuda_impl.cpp
intern/cycles/device/device_cpu.cpp
intern/cycles/device/opencl/device_opencl.h
intern/cycles/device/opencl/device_opencl_impl.cpp
intern/cycles/kernel/kernel_bake.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/kernels/cpu/kernel_cpu.h
intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
intern/cycles/kernel/kernels/cuda/kernel.cu
intern/cycles/render/bake.cpp
intern/cycles/render/bake.h
intern/cycles/render/buffers.cpp
intern/cycles/render/buffers.h
intern/cycles/render/film.cpp
intern/cycles/render/session.cpp
intern/cycles/render/session.h
source/blender/editors/object/object_bake_api.c
source/blender/makesrna/intern/rna_render.c
source/blender/render/extern/include/RE_bake.h
source/blender/render/extern/include/RE_engine.h
source/blender/render/intern/include/render_result.h
source/blender/render/intern/source/external_engine.c
source/blender/render/intern/source/pipeline.c
source/blender/render/intern/source/render_result.c

index 3d2a52d0cf647cce456d9607a085fc9aedbd5223..3ab352e52a28a61591f867e511387c679e8114b9 100644 (file)
@@ -82,8 +82,8 @@ class CyclesRender(bpy.types.RenderEngine):
     def render(self, depsgraph):
         engine.render(self, depsgraph)
 
-    def bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
-        engine.bake(self, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result)
+    def bake(self, depsgraph, obj, pass_type, pass_filter, width, height):
+        engine.bake(self, depsgraph, obj, pass_type, pass_filter, width, height)
 
     # viewport render
     def view_update(self, context, depsgraph):
index a1b063430f5dbb842a9d1a363c6ac1bd8fa9af43..e7ea5e7a1f629aed286506b0d12accdc4e8fc151 100644 (file)
@@ -168,11 +168,11 @@ def render(engine, depsgraph):
         _cycles.render(engine.session, depsgraph.as_pointer())
 
 
-def bake(engine, depsgraph, obj, pass_type, pass_filter, object_id, pixel_array, num_pixels, depth, result):
+def bake(engine, depsgraph, obj, pass_type, pass_filter, width, height):
     import _cycles
     session = getattr(engine, "session", None)
     if session is not None:
-        _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, object_id, pixel_array.as_pointer(), num_pixels, depth, result.as_pointer())
+        _cycles.bake(engine.session, depsgraph.as_pointer(), obj.as_pointer(), pass_type, pass_filter, width, height)
 
 
 def reset(engine, data, depsgraph):
index 8c7c0bc1daadf00fe924ad177a0e968252cc323c..79c16856462ca9db15e658dd39cb4018bea1f259 100644 (file)
@@ -298,22 +298,18 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
 static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
 {
   PyObject *pysession, *pydepsgraph, *pyobject;
-  PyObject *pypixel_array, *pyresult;
   const char *pass_type;
-  int num_pixels, depth, object_id, pass_filter;
+  int pass_filter, width, height;
 
   if (!PyArg_ParseTuple(args,
-                        "OOOsiiOiiO",
+                        "OOOsiii",
                         &pysession,
                         &pydepsgraph,
                         &pyobject,
                         &pass_type,
                         &pass_filter,
-                        &object_id,
-                        &pypixel_array,
-                        &num_pixels,
-                        &depth,
-                        &pyresult))
+                        &width,
+                        &height))
     return NULL;
 
   BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
@@ -326,23 +322,9 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
   RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
   BL::Object b_object(objectptr);
 
-  void *b_result = PyLong_AsVoidPtr(pyresult);
-
-  PointerRNA bakepixelptr;
-  RNA_pointer_create(NULL, &RNA_BakePixel, PyLong_AsVoidPtr(pypixel_array), &bakepixelptr);
-  BL::BakePixel b_bake_pixel(bakepixelptr);
-
   python_thread_state_save(&session->python_thread_state);
 
-  session->bake(b_depsgraph,
-                b_object,
-                pass_type,
-                pass_filter,
-                object_id,
-                b_bake_pixel,
-                (size_t)num_pixels,
-                depth,
-                (float *)b_result);
+  session->bake(b_depsgraph, b_object, pass_type, pass_filter, width, height);
 
   python_thread_state_restore(&session->python_thread_state);
 
index 5ea96d6bdfdeca329427ddd77ec6ff316417baff..31b096956325773c7d43e52f06808422736eae78 100644 (file)
@@ -247,9 +247,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
 
 void BlenderSession::free_session()
 {
-  if (sync)
-    delete sync;
-
+  delete sync;
   delete session;
 }
 
@@ -317,6 +315,7 @@ static void end_render_result(BL::RenderEngine &b_engine,
 
 void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
                                                  bool do_update_only,
+                                                 bool do_read_only,
                                                  bool highlight)
 {
   int x = rtile.x - session->tile_manager.params.full_x;
@@ -342,7 +341,23 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
 
   BL::RenderLayer b_rlay = *b_single_rlay;
 
-  if (do_update_only) {
+  if (do_read_only) {
+    /* copy each pass */
+    BL::RenderLayer::passes_iterator b_iter;
+
+    for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) {
+      BL::RenderPass b_pass(*b_iter);
+
+      /* find matching pass type */
+      PassType pass_type = BlenderSync::get_pass_type(b_pass);
+      int components = b_pass.channels();
+
+      rtile.buffers->set_pass_rect(pass_type, components, (float *)b_pass.rect());
+    }
+
+    end_render_result(b_engine, b_rr, false, false, false);
+  }
+  else if (do_update_only) {
     /* Sample would be zero at initial tile update, which is only needed
      * to tag tile form blender side as IN PROGRESS for proper highlight
      * no buffers should be sent to blender yet. For denoise we also
@@ -362,9 +377,14 @@ void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
   }
 }
 
+void BlenderSession::read_render_tile(RenderTile &rtile)
+{
+  do_write_update_render_tile(rtile, false, true, false);
+}
+
 void BlenderSession::write_render_tile(RenderTile &rtile)
 {
-  do_write_update_render_tile(rtile, false, false);
+  do_write_update_render_tile(rtile, false, false, false);
 }
 
 void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
@@ -374,9 +394,9 @@ void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
    * would need to be investigated a bit further, but for now shall be fine
    */
   if (!b_engine.is_preview())
-    do_write_update_render_tile(rtile, true, highlight);
+    do_write_update_render_tile(rtile, true, false, highlight);
   else
-    do_write_update_render_tile(rtile, false, false);
+    do_write_update_render_tile(rtile, false, false, false);
 }
 
 static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
@@ -593,25 +613,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
 #endif
 }
 
-static void populate_bake_data(BakeData *data,
-                               const int object_id,
-                               BL::BakePixel &pixel_array,
-                               const int num_pixels)
-{
-  BL::BakePixel bp = pixel_array;
-
-  int i;
-  for (i = 0; i < num_pixels; i++) {
-    if (bp.object_id() == object_id) {
-      data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
-    }
-    else {
-      data->set_null(i);
-    }
-    bp = bp.next();
-  }
-}
-
 static int bake_pass_filter_get(const int pass_filter)
 {
   int flag = BAKE_FILTER_NONE;
@@ -642,43 +643,26 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
                           BL::Object &b_object,
                           const string &pass_type,
                           const int pass_filter,
-                          const int object_id,
-                          BL::BakePixel &pixel_array,
-                          const size_t num_pixels,
-                          const int /*depth*/,
-                          float result[])
+                          const int bake_width,
+                          const int bake_height)
 {
   b_depsgraph = b_depsgraph_;
 
   ShaderEvalType shader_type = get_shader_type(pass_type);
-
-  /* Set baking flag in advance, so kernel loading can check if we need
-   * any baking capabilities.
-   */
-  scene->bake_manager->set_baking(true);
-
-  /* ensure kernels are loaded before we do any scene updates */
-  session->load_kernels();
-
-  if (shader_type == SHADER_EVAL_UV) {
-    /* force UV to be available */
-    Pass::add(PASS_UV, scene->film->passes);
-  }
-
   int bake_pass_filter = bake_pass_filter_get(pass_filter);
-  bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter);
 
-  /* force use_light_pass to be true if we bake more than just colors */
-  if (bake_pass_filter & ~BAKE_FILTER_COLOR) {
-    Pass::add(PASS_LIGHT, scene->film->passes);
-  }
+  /* Initialize bake manager, before we load the baking kernels. */
+  scene->bake_manager->set(scene, b_object.name(), shader_type, bake_pass_filter);
 
-  /* create device and update scene */
-  scene->film->tag_update(scene);
-  scene->integrator->tag_update(scene);
+  /* Passes are identified by name, so in order to return the combined pass we need to set the
+   * name. */
+  Pass::add(PASS_COMBINED, scene->film->passes, "Combined");
+
+  session->read_bake_tile_cb = function_bind(&BlenderSession::read_render_tile, this, _1);
+  session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
 
   if (!session->progress.get_cancel()) {
-    /* update scene */
+    /* Sync scene. */
     BL::Object b_camera_override(b_engine.camera_override());
     sync->sync_camera(b_render, b_camera_override, width, height, "");
     sync->sync_data(
@@ -686,75 +670,43 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
     builtin_images_load();
   }
 
-  BakeData *bake_data = NULL;
+  /* Object might have been disabled for rendering or excluded in some
+   * other way, in that case Blender will report a warning afterwards. */
+  bool object_found = false;
+  foreach (Object *ob, scene->objects) {
+    if (ob->name == b_object.name()) {
+      object_found = true;
+      break;
+    }
+  }
 
-  if (!session->progress.get_cancel()) {
-    /* get buffer parameters */
+  if (object_found && !session->progress.get_cancel()) {
+    /* Get session and buffer parameters. */
     SessionParams session_params = BlenderSync::get_session_params(
         b_engine, b_userpref, b_scene, background);
-    BufferParams buffer_params = BlenderSync::get_buffer_params(
-        b_scene, b_render, b_v3d, b_rv3d, scene->camera, width, height);
+    session_params.progressive_refine = false;
 
-    scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
+    BufferParams buffer_params;
+    buffer_params.width = bake_width;
+    buffer_params.height = bake_height;
+    buffer_params.passes = scene->film->passes;
 
-    /* set number of samples */
+    /* Update session. */
     session->tile_manager.set_samples(session_params.samples);
     session->reset(buffer_params, session_params.samples);
-    session->update_scene();
-
-    /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */
-    size_t object_index = OBJECT_NONE;
-    int tri_offset = 0;
-
-    for (size_t i = 0; i < scene->objects.size(); i++) {
-      const Object *object = scene->objects[i];
-      const Geometry *geom = object->geometry;
-      if (object->name == b_object.name() && geom->type == Geometry::MESH) {
-        const Mesh *mesh = static_cast<const Mesh *>(geom);
-        object_index = i;
-        tri_offset = mesh->prim_offset;
-        break;
-      }
-    }
-
-    /* Object might have been disabled for rendering or excluded in some
-     * other way, in that case Blender will report a warning afterwards. */
-    if (object_index != OBJECT_NONE) {
-      int object = object_index;
-
-      bake_data = scene->bake_manager->init(object, tri_offset, num_pixels);
-      populate_bake_data(bake_data, object_id, pixel_array, num_pixels);
-    }
-
-    /* set number of samples */
-    session->tile_manager.set_samples(session_params.samples);
-    session->reset(buffer_params, session_params.samples);
-    session->update_scene();
 
     session->progress.set_update_callback(
         function_bind(&BlenderSession::update_bake_progress, this));
   }
 
   /* Perform bake. Check cancel to avoid crash with incomplete scene data. */
-  if (!session->progress.get_cancel() && bake_data) {
-    scene->bake_manager->bake(scene->device,
-                              &scene->dscene,
-                              scene,
-                              session->progress,
-                              shader_type,
-                              bake_pass_filter,
-                              bake_data,
-                              result);
+  if (object_found && !session->progress.get_cancel()) {
+    session->start();
+    session->wait();
   }
 
-  /* free all memory used (host and device), so we wouldn't leave render
-   * engine with extra memory allocated
-   */
-
-  session->device_free();
-
-  delete sync;
-  sync = NULL;
+  session->read_bake_tile_cb = function_null;
+  session->write_render_tile_cb = function_null;
 }
 
 void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,
index 3e6498bb6559eee3aa711c375ffe659eb3b5b2c8..34e952e312ba6f4be9845663a118d24ddf00f989 100644 (file)
@@ -66,14 +66,12 @@ class BlenderSession {
             BL::Object &b_object,
             const string &pass_type,
             const int custom_flag,
-            const int object_id,
-            BL::BakePixel &pixel_array,
-            const size_t num_pixels,
-            const int depth,
-            float pixels[]);
+            const int bake_width,
+            const int bake_height);
 
   void write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
   void write_render_tile(RenderTile &rtile);
+  void read_render_tile(RenderTile &rtile);
 
   /* update functions are used to update display buffer only after sample was rendered
    * only needed for better visual feedback */
@@ -155,7 +153,10 @@ class BlenderSession {
   void do_write_update_render_result(BL::RenderLayer &b_rlay,
                                      RenderTile &rtile,
                                      bool do_update_only);
-  void do_write_update_render_tile(RenderTile &rtile, bool do_update_only, bool highlight);
+  void do_write_update_render_tile(RenderTile &rtile,
+                                   bool do_update_only,
+                                   bool do_read_only,
+                                   bool highlight);
 
   void builtin_images_load();
 
index e8031be7dd1d144fdfa091f2d6ddc2b287f9f620..f16305e737d10a45dc7327259979a4874e2a112a 100644 (file)
@@ -481,6 +481,9 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
   MAP_PASS("AO", PASS_AO);
   MAP_PASS("Shadow", PASS_SHADOW);
 
+  MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
+  MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);
+
 #ifdef __KERNEL_DEBUG__
   MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
   MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);
index 3e397da895b43cbc5e154867d392a856c4623d61..3f23f0fe4c5d2d5f2238c6f68455e9568e3b6c4b 100644 (file)
@@ -223,7 +223,7 @@ class CUDADevice : public Device {
                               CUdeviceptr d_wtile,
                               CUstream stream = 0);
 
-  void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
+  void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
 
   void film_convert(DeviceTask &task,
                     device_ptr buffer,
index ba5d479e0e7a6ca06c368b0f00be7df553a170b8..acf53c3eb1bb1bd2107896b9bcb32c9933ddfe6d 100644 (file)
@@ -586,20 +586,23 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
   cuMemGetInfo(&free_before, &total);
 
   /* Get kernel function. */
-  CUfunction cuPathTrace;
+  CUfunction cuRender;
 
-  if (requested_features.use_integrator_branched) {
-    cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+  if (requested_features.use_baking) {
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+  }
+  else if (requested_features.use_integrator_branched) {
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
   }
   else {
-    cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
   }
 
-  cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+  cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
 
   int min_blocks, num_threads_per_block;
-  cuda_assert(cuOccupancyMaxPotentialBlockSize(
-      &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+  cuda_assert(
+      cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
 
   /* Launch kernel, using just 1 block appears sufficient to reserve
    * memory for all multiprocessors. It would be good to do this in
@@ -609,7 +612,7 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
 
   void *args[] = {&d_work_tiles, &total_work_size};
 
-  cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+  cuda_assert(cuLaunchKernel(cuRender, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
 
   cuda_assert(cuCtxSynchronize());
 
@@ -1780,9 +1783,7 @@ void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
                              0));
 }
 
-void CUDADevice::path_trace(DeviceTask &task,
-                            RenderTile &rtile,
-                            device_vector<WorkTile> &work_tiles)
+void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
 {
   scoped_timer timer(&rtile.buffers->render_time);
 
@@ -1790,21 +1791,24 @@ void CUDADevice::path_trace(DeviceTask &task,
     return;
 
   CUDAContextScope scope(this);
-  CUfunction cuPathTrace;
+  CUfunction cuRender;
 
   /* Get kernel function. */
-  if (task.integrator_branched) {
-    cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+  if (rtile.task == RenderTile::BAKE) {
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+  }
+  else if (task.integrator_branched) {
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
   }
   else {
-    cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+    cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
   }
 
   if (have_error()) {
     return;
   }
 
-  cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+  cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
 
   /* Allocate work tile. */
   work_tiles.alloc(1);
@@ -1822,8 +1826,8 @@ void CUDADevice::path_trace(DeviceTask &task,
    * remain conservative for GPUs connected to a display to avoid driver
    * timeouts and display freezing. */
   int min_blocks, num_threads_per_block;
-  cuda_assert(cuOccupancyMaxPotentialBlockSize(
-      &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+  cuda_assert(
+      cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
   if (!info.display_device) {
     min_blocks *= 8;
   }
@@ -1851,7 +1855,7 @@ void CUDADevice::path_trace(DeviceTask &task,
     void *args[] = {&d_work_tiles, &total_work_size};
 
     cuda_assert(
-        cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+        cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
 
     /* Run the adaptive sampling kernels at selected samples aligned to step samples. */
     uint filter_sample = sample + wtile->num_samples - 1;
@@ -1957,10 +1961,7 @@ void CUDADevice::shader(DeviceTask &task)
   CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
 
   /* get kernel function */
-  if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
-    cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
-  }
-  else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+  if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
     cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
   }
   else {
@@ -2297,9 +2298,12 @@ void CUDADevice::thread_run(DeviceTask *task)
           split_kernel->path_trace(task, tile, void_buffer, void_buffer);
         }
         else {
-          path_trace(*task, tile, work_tiles);
+          render(*task, tile, work_tiles);
         }
       }
+      else if (tile.task == RenderTile::BAKE) {
+        render(*task, tile, work_tiles);
+      }
       else if (tile.task == RenderTile::DENOISE) {
         tile.sample = tile.start_sample + tile.num_samples;
 
index c701c14318f8a41f7569178c812d8c6c637e5806..fc6febd8cee31933d46869fa65a9139ef826bc6b 100644 (file)
@@ -188,6 +188,7 @@ class CPUDevice : public Device {
       convert_to_byte_kernel;
   KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
       shader_kernel;
+  KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> bake_kernel;
 
   KernelFunctions<void (*)(
       int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
@@ -270,6 +271,7 @@ class CPUDevice : public Device {
         REGISTER_KERNEL(convert_to_half_float),
         REGISTER_KERNEL(convert_to_byte),
         REGISTER_KERNEL(shader),
+        REGISTER_KERNEL(bake),
         REGISTER_KERNEL(filter_divide_shadow),
         REGISTER_KERNEL(filter_get_feature),
         REGISTER_KERNEL(filter_write_feature),
@@ -895,7 +897,7 @@ class CPUDevice : public Device {
     }
   }
 
-  void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+  void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
   {
     const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
 
@@ -919,12 +921,21 @@ class CPUDevice : public Device {
           break;
       }
 
-      for (int y = tile.y; y < tile.y + tile.h; y++) {
-        for (int x = tile.x; x < tile.x + tile.w; x++) {
-          if (use_coverage) {
-            coverage.init_pixel(x, y);
+      if (tile.task == RenderTile::PATH_TRACE) {
+        for (int y = tile.y; y < tile.y + tile.h; y++) {
+          for (int x = tile.x; x < tile.x + tile.w; x++) {
+            if (use_coverage) {
+              coverage.init_pixel(x, y);
+            }
+            path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+          }
+        }
+      }
+      else {
+        for (int y = tile.y; y < tile.y + tile.h; y++) {
+          for (int x = tile.x; x < tile.x + tile.w; x++) {
+            bake_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
           }
-          path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
         }
       }
       tile.sample = sample + 1;
@@ -1019,9 +1030,12 @@ class CPUDevice : public Device {
           split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
         }
         else {
-          path_trace(task, tile, kg);
+          render(task, tile, kg);
         }
       }
+      else if (tile.task == RenderTile::BAKE) {
+        render(task, tile, kg);
+      }
       else if (tile.task == RenderTile::DENOISE) {
         denoise(denoising, tile);
         task.update_progress(&tile, tile.w * tile.h);
index d6f4fb430619accd025c7fc64bf9c6233c5faa55..389268e1c2aa8fcf3b6a0af246cd4ec08ca15aca 100644 (file)
@@ -451,6 +451,7 @@ class OpenCLDevice : public Device {
                     device_ptr rgba_half);
   void shader(DeviceTask &task);
   void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);
+  void bake(DeviceTask &task, RenderTile &tile);
 
   void denoise(RenderTile &tile, DenoisingTask &denoising);
 
index 2766f85d17c05cb2c07ffa9e7c8265e94c6f12ce..beb3174b111448d68015e04e704f97aed79372b9 100644 (file)
@@ -1367,6 +1367,9 @@ void OpenCLDevice::thread_run(DeviceTask *task)
          */
         clFinish(cqCommandQueue);
       }
+      else if (tile.task == RenderTile::BAKE) {
+        bake(*task, tile);
+      }
       else if (tile.task == RenderTile::DENOISE) {
         tile.sample = tile.start_sample + tile.num_samples;
         denoise(tile, denoising);
@@ -1858,10 +1861,7 @@ void OpenCLDevice::shader(DeviceTask &task)
   cl_int d_offset = task.offset;
 
   OpenCLDevice::OpenCLProgram *program = &background_program;
-  if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
-    program = &bake_program;
-  }
-  else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+  if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
     program = &displace_program;
   }
   program->wait_for_availability();
@@ -1892,6 +1892,51 @@ void OpenCLDevice::shader(DeviceTask &task)
   }
 }
 
+void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
+{
+  scoped_timer timer(&rtile.buffers->render_time);
+
+  /* Cast arguments to cl types. */
+  cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+  cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
+  cl_int d_x = rtile.x;
+  cl_int d_y = rtile.y;
+  cl_int d_w = rtile.w;
+  cl_int d_h = rtile.h;
+  cl_int d_offset = rtile.offset;
+  cl_int d_stride = rtile.stride;
+
+  bake_program.wait_for_availability();
+  cl_kernel kernel = bake_program();
+
+  cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer);
+
+  set_kernel_arg_buffers(kernel, &start_arg_index);
+
+  start_arg_index += kernel_set_args(
+      kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride);
+
+  int start_sample = rtile.start_sample;
+  int end_sample = rtile.start_sample + rtile.num_samples;
+
+  for (int sample = start_sample; sample < end_sample; sample++) {
+    if (task.get_cancel()) {
+      if (task.need_finish_queue == false)
+        break;
+    }
+
+    kernel_set_args(kernel, start_arg_index, sample);
+
+    enqueue_kernel(kernel, d_w, d_h);
+
+    rtile.sample = sample + 1;
+
+    task.update_progress(&rtile, rtile.w * rtile.h);
+  }
+
+  clFinish(cqCommandQueue);
+}
+
 string OpenCLDevice::kernel_build_options(const string *debug_src)
 {
   string build_options = "-cl-no-signed-zeros -cl-mad-enable ";
index f1fc697553ace547a96c58fc4b8eb13f8700b436..2709a9da7345d55c3b0caf9fc579b91e0fc8521f 100644 (file)
@@ -18,38 +18,40 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BAKING__
 
-ccl_device_inline void compute_light_pass(
+ccl_device_noinline void compute_light_pass(
     KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample)
 {
   kernel_assert(kernel_data.film.use_light_pass);
 
-  PathRadiance L_sample;
-  PathState state;
-  Ray ray;
   float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
 
-  /* emission and indirect shader data memory used by various functions */
-  ShaderData emission_sd, indirect_sd;
-
-  ray.P = sd->P + sd->Ng;
-  ray.D = -sd->Ng;
-  ray.t = FLT_MAX;
-#  ifdef __CAMERA_MOTION__
-  ray.time = 0.5f;
-#  endif
+  /* Emission and indirect shader data memory used by various functions. */
+  ShaderDataTinyStorage emission_sd_storage;
+  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ShaderData indirect_sd;
 
-  /* init radiance */
-  path_radiance_init(kg, &L_sample);
+  /* Init radiance. */
+  path_radiance_init(kg, L);
 
-  /* init path state */
-  path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL);
+  /* Init path state. */
+  PathState state;
+  path_state_init(kg, emission_sd, &state, rng_hash, sample, NULL);
 
-  /* evaluate surface shader */
+  /* Evaluate surface shader. */
   shader_eval_surface(kg, sd, &state, NULL, state.flag);
 
   /* TODO, disable more closures we don't need besides transparent */
   shader_bsdf_disable_transparency(kg, sd);
 
+  /* Init ray. */
+  Ray ray;
+  ray.P = sd->P + sd->Ng;
+  ray.D = -sd->Ng;
+  ray.t = FLT_MAX;
+#  ifdef __CAMERA_MOTION__
+  ray.time = 0.5f;
+#  endif
+
 #  ifdef __BRANCHED_PATH__
   if (!kernel_data.integrator.branched) {
     /* regular path tracer */
@@ -57,14 +59,13 @@ ccl_device_inline void compute_light_pass(
 
     /* sample ambient occlusion */
     if (pass_filter & BAKE_FILTER_AO) {
-      kernel_path_ao(
-          kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd));
+      kernel_path_ao(kg, sd, emission_sd, L, &state, throughput, shader_bsdf_alpha(kg, sd));
     }
 
     /* sample emission */
     if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
       float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
-      path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission);
+      path_radiance_accum_emission(kg, L, &state, throughput, emission);
     }
 
     bool is_sss_sample = false;
@@ -77,12 +78,10 @@ ccl_device_inline void compute_light_pass(
       SubsurfaceIndirectRays ss_indirect;
       kernel_path_subsurface_init_indirect(&ss_indirect);
       if (kernel_path_subsurface_scatter(
-              kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) {
+              kg, sd, emission_sd, L, &state, &ray, &throughput, &ss_indirect)) {
         while (ss_indirect.num_rays) {
-          kernel_path_subsurface_setup_indirect(
-              kg, &ss_indirect, &state, &ray, &L_sample, &throughput);
-          kernel_path_indirect(
-              kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+          kernel_path_subsurface_setup_indirect(kg, &ss_indirect, &state, &ray, L, &throughput);
+          kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
         }
         is_sss_sample = true;
       }
@@ -91,18 +90,18 @@ ccl_device_inline void compute_light_pass(
 
     /* sample light and BSDF */
     if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) {
-      kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample);
+      kernel_path_surface_connect_light(kg, sd, emission_sd, throughput, &state, L);
 
-      if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) {
+      if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L->state, &ray)) {
 #  ifdef __LAMP_MIS__
         state.ray_t = 0.0f;
 #  endif
         /* compute indirect light */
-        kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample);
+        kernel_path_indirect(kg, &indirect_sd, emission_sd, &ray, throughput, &state, L);
 
         /* sum and reset indirect light pass variables for the next samples */
-        path_radiance_sum_indirect(&L_sample);
-        path_radiance_reset_indirect(&L_sample);
+        path_radiance_sum_indirect(L);
+        path_radiance_reset_indirect(L);
       }
     }
 #  ifdef __BRANCHED_PATH__
@@ -112,13 +111,13 @@ ccl_device_inline void compute_light_pass(
 
     /* sample ambient occlusion */
     if (pass_filter & BAKE_FILTER_AO) {
-      kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput);
+      kernel_branched_path_ao(kg, sd, emission_sd, L, &state, throughput);
     }
 
     /* sample emission */
     if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) {
       float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf);
-      path_radiance_accum_emission(kg, &L_sample, &state, throughput, emission);
+      path_radiance_accum_emission(kg, L, &state, throughput, emission);
     }
 
 #    ifdef __SUBSURFACE__
@@ -127,7 +126,7 @@ ccl_device_inline void compute_light_pass(
       /* When mixing BSSRDF and BSDF closures we should skip BSDF lighting
        * if scattering was successful. */
       kernel_branched_path_subsurface_scatter(
-          kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput);
+          kg, sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
     }
 #    endif
 
@@ -138,19 +137,16 @@ ccl_device_inline void compute_light_pass(
       if (kernel_data.integrator.use_direct_light) {
         int all = kernel_data.integrator.sample_all_lights_direct;
         kernel_branched_path_surface_connect_light(
-            kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all);
+            kg, sd, emission_sd, &state, throughput, 1.0f, L, all);
       }
 #    endif
 
       /* indirect light */
       kernel_branched_path_surface_indirect_light(
-          kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample);
+          kg, sd, &indirect_sd, emission_sd, throughput, 1.0f, &state, L);
     }
   }
 #  endif
-
-  /* accumulate into master L */
-  path_radiance_accum_sample(L, &L_sample);
 }
 
 /* this helps with AA but it's not the real solution as it does not AA the geometry
@@ -225,41 +221,28 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg,
   return out;
 }
 
-ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
-                                     ccl_global uint4 *input,
-                                     ccl_global float4 *output,
-                                     ShaderEvalType type,
-                                     int pass_filter,
-                                     int i,
-                                     int offset,
-                                     int sample)
+ccl_device void kernel_bake_evaluate(
+    KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
 {
-  ShaderData sd;
-  PathState state = {0};
-  uint4 in = input[i * 2];
-  uint4 diff = input[i * 2 + 1];
-
-  float3 out = make_float3(0.0f, 0.0f, 0.0f);
+  /* Setup render buffers. */
+  const int index = offset + x + y * stride;
+  const int pass_stride = kernel_data.film.pass_stride;
+  buffer += index * pass_stride;
 
-  int object = in.x;
-  int prim = in.y;
+  ccl_global float *primitive = buffer + kernel_data.film.pass_bake_primitive;
+  ccl_global float *differential = buffer + kernel_data.film.pass_bake_differential;
+  ccl_global float *output = buffer + kernel_data.film.pass_combined;
 
+  int prim = __float_as_uint(primitive[1]);
   if (prim == -1)
     return;
 
-  float u = __uint_as_float(in.z);
-  float v = __uint_as_float(in.w);
-
-  float dudx = __uint_as_float(diff.x);
-  float dudy = __uint_as_float(diff.y);
-  float dvdx = __uint_as_float(diff.z);
-  float dvdy = __uint_as_float(diff.w);
+  prim += kernel_data.bake.tri_offset;
 
+  /* Random number generator. */
+  uint rng_hash = hash_uint2(x, y) ^ kernel_data.integrator.seed;
   int num_samples = kernel_data.integrator.aa_samples;
 
-  /* random number generator */
-  uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed);
-
   float filter_x, filter_y;
   if (sample == 0) {
     filter_x = filter_y = 0.5f;
@@ -268,23 +251,29 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
     path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y);
   }
 
-  /* subpixel u/v offset */
+  /* Barycentric UV with subpixel offset. */
+  float u = primitive[2];
+  float v = primitive[3];
+
+  float dudx = differential[0];
+  float dudy = differential[1];
+  float dvdx = differential[2];
+  float dvdy = differential[3];
+
   if (sample > 0) {
     u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
     v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
                                  1.0f - u);
   }
 
-  /* triangle */
+  /* Shader data setup. */
+  int object = kernel_data.bake.object_index;
   int shader;
   float3 P, Ng;
 
   triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
 
-  /* light passes */
-  PathRadiance L;
-  path_radiance_init(kg, &L);
-
+  ShaderData sd;
   shader_setup_from_sample(
       kg,
       &sd,
@@ -302,7 +291,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
       LAMP_NONE);
   sd.I = sd.N;
 
-  /* update differentials */
+  /* Setup differentials. */
   sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
   sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
   sd.du.dx = dudx;
@@ -310,17 +299,24 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
   sd.dv.dx = dvdx;
   sd.dv.dy = dvdy;
 
-  /* set RNG state for shaders that use sampling */
+  /* Set RNG state for shaders that use sampling. */
+  PathState state = {0};
   state.rng_hash = rng_hash;
   state.rng_offset = 0;
   state.sample = sample;
   state.num_samples = num_samples;
   state.min_ray_pdf = FLT_MAX;
 
-  /* light passes if we need more than color */
-  if (pass_filter & ~BAKE_FILTER_COLOR)
+  /* Light passes if we need more than color. */
+  PathRadiance L;
+  int pass_filter = kernel_data.bake.pass_filter;
+
+  if (kernel_data.bake.pass_filter & ~BAKE_FILTER_COLOR)
     compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample);
 
+  float3 out = make_float3(0.0f, 0.0f, 0.0f);
+
+  ShaderEvalType type = (ShaderEvalType)kernel_data.bake.type;
   switch (type) {
     /* data passes */
     case SHADER_EVAL_NORMAL:
@@ -441,10 +437,8 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg,
   }
 
   /* write output */
-  const float output_fac = 1.0f / num_samples;
-  const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
-
-  output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result;
+  const float4 result = make_float4(out.x, out.y, out.z, 1.0f);
+  kernel_write_pass_float4(output, result);
 }
 
 #endif /* __BAKING__ */
index a1f8c35348dac6a4bc805a0e60cb420d6fc64536..304835a1685160826e9ef262c93c46b512f86cc1 100644 (file)
@@ -395,6 +395,10 @@ typedef enum PassType {
   PASS_VOLUME_INDIRECT,
   /* No Scatter color since it's tricky to define what it would even mean. */
   PASS_CATEGORY_LIGHT_END = 63,
+
+  PASS_BAKE_PRIMITIVE,
+  PASS_BAKE_DIFFERENTIAL,
+  PASS_CATEGORY_BAKE_END = 95
 } PassType;
 
 #define PASS_ANY (~0)
@@ -1248,6 +1252,10 @@ typedef struct KernelFilm {
   float4 xyz_to_b;
   float4 rgb_to_y;
 
+  int pass_bake_primitive;
+  int pass_bake_differential;
+  int pad;
+
 #ifdef __KERNEL_DEBUG__
   int pass_bvh_traversed_nodes;
   int pass_bvh_traversed_instances;
@@ -1427,6 +1435,14 @@ typedef struct KernelTables {
 } KernelTables;
 static_assert_align(KernelTables, 16);
 
+typedef struct KernelBake {
+  int object_index;
+  int tri_offset;
+  int type;
+  int pass_filter;
+} KernelBake;
+static_assert_align(KernelBake, 16);
+
 typedef struct KernelData {
   KernelCamera cam;
   KernelFilm film;
@@ -1435,6 +1451,7 @@ typedef struct KernelData {
   KernelBVH bvh;
   KernelCurves curve;
   KernelTables tables;
+  KernelBake bake;
 } KernelData;
 static_assert_align(KernelData, 16);
 
index 683f4b88d79ef7b45c663d2f3420fb302a10761b..ea3103f12c3918f73b8e16c98d091120f87a6468 100644 (file)
@@ -46,6 +46,9 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
                                        int offset,
                                        int sample);
 
+void KERNEL_FUNCTION_FULL_NAME(bake)(
+    KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride);
+
 /* Split kernels */
 
 void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg,
index 091e53cfd83d6efa8f72325bd3b00a12ec33f04c..5aa3fb1431886fe9143635a22217575b5a6438d5 100644 (file)
@@ -132,6 +132,18 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg,
 #  endif /* KERNEL_STUB */
 }
 
+/* Bake */
+
+void KERNEL_FUNCTION_FULL_NAME(bake)(
+    KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride)
+{
+#  ifdef KERNEL_STUB
+  STUB_ASSERT(KERNEL_ARCH, bake);
+#  else
+  kernel_bake_evaluate(kg, buffer, sample, x, y, offset, stride);
+#  endif /* KERNEL_STUB */
+}
+
 /* Shader Evaluate */
 
 void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
@@ -146,12 +158,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
 #  ifdef KERNEL_STUB
   STUB_ASSERT(KERNEL_ARCH, shader);
 #  else
-  if (type >= SHADER_EVAL_BAKE) {
-#    ifdef __BAKING__
-    kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample);
-#    endif
-  }
-  else if (type == SHADER_EVAL_DISPLACE) {
+  if (type == SHADER_EVAL_DISPLACE) {
     kernel_displace_evaluate(kg, input, output, i);
   }
   else {
index c4c810c6a82b186be895d6dadff9b2d453587f16..d4f41132a11ea85e7ab31ab12d8a6e4953024a86 100644 (file)
@@ -214,13 +214,16 @@ kernel_cuda_background(uint4 *input,
 #ifdef __BAKING__
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_bake(uint4 *input, float4 *output, int type, int filter, int sx, int sw, int offset, int sample)
+kernel_cuda_bake(WorkTile *tile, uint total_work_size)
 {
-       int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+       int work_index = ccl_global_id(0);
+
+       if(work_index < total_work_size) {
+               uint x, y, sample;
+               get_work_pixel(tile, work_index, &x, &y, &sample);
 
-       if(x < sx + sw) {
                KernelGlobals kg;
-               kernel_bake_evaluate(&kg, input, output, (ShaderEvalType)type, filter, x, offset, sample);
+               kernel_bake_evaluate(&kg, tile->buffer, sample, x, y, tile->offset, tile->stride);
        }
 }
 #endif
index 35f942b3e9b676ca87904085a53675b238c7f5a7..6044182a51a55d6389e8ebc7dc5b3d6cafd0976c 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include "render/bake.h"
+#include "render/buffers.h"
 #include "render/integrator.h"
 #include "render/mesh.h"
 #include "render/object.h"
 
 CCL_NAMESPACE_BEGIN
 
-BakeData::BakeData(const int object, const size_t tri_offset, const size_t num_pixels)
-    : m_object(object), m_tri_offset(tri_offset), m_num_pixels(num_pixels)
+static int aa_samples(Scene *scene, Object *object, ShaderEvalType type)
 {
-  m_primitive.resize(num_pixels);
-  m_u.resize(num_pixels);
-  m_v.resize(num_pixels);
-  m_dudx.resize(num_pixels);
-  m_dudy.resize(num_pixels);
-  m_dvdx.resize(num_pixels);
-  m_dvdy.resize(num_pixels);
-}
-
-BakeData::~BakeData()
-{
-  m_primitive.clear();
-  m_u.clear();
-  m_v.clear();
-  m_dudx.clear();
-  m_dudy.clear();
-  m_dvdx.clear();
-  m_dvdy.clear();
-}
-
-void BakeData::set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy)
-{
-  m_primitive[i] = (prim == -1 ? -1 : m_tri_offset + prim);
-  m_u[i] = uv[0];
-  m_v[i] = uv[1];
-  m_dudx[i] = dudx;
-  m_dudy[i] = dudy;
-  m_dvdx[i] = dvdx;
-  m_dvdy[i] = dvdy;
-}
-
-void BakeData::set_null(int i)
-{
-  m_primitive[i] = -1;
-}
-
-int BakeData::object()
-{
-  return m_object;
-}
-
-size_t BakeData::size()
-{
-  return m_num_pixels;
-}
+  if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
+    return 1;
+  }
+  else if (type == SHADER_EVAL_NORMAL) {
+    /* Only antialias normal if mesh has bump mapping. */
+    if (object->geometry) {
+      foreach (Shader *shader, object->geometry->used_shaders) {
+        if (shader->has_bump) {
+          return scene->integrator->aa_samples;
+        }
+      }
+    }
 
-bool BakeData::is_valid(int i)
-{
-  return m_primitive[i] != -1;
+    return 1;
+  }
+  else {
+    return scene->integrator->aa_samples;
+  }
 }
 
-uint4 BakeData::data(int i)
+/* Keep it synced with kernel_bake.h logic */
+static int shader_type_to_pass_filter(ShaderEvalType type, int pass_filter)
 {
-  return make_uint4(m_object, m_primitive[i], __float_as_int(m_u[i]), __float_as_int(m_v[i]));
-}
+  const int component_flags = pass_filter &
+                              (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
 
-uint4 BakeData::differentials(int i)
-{
-  return make_uint4(__float_as_int(m_dudx[i]),
-                    __float_as_int(m_dudy[i]),
-                    __float_as_int(m_dvdx[i]),
-                    __float_as_int(m_dvdy[i]));
+  switch (type) {
+    case SHADER_EVAL_AO:
+      return BAKE_FILTER_AO;
+    case SHADER_EVAL_SHADOW:
+      return BAKE_FILTER_DIRECT;
+    case SHADER_EVAL_DIFFUSE:
+      return BAKE_FILTER_DIFFUSE | component_flags;
+    case SHADER_EVAL_GLOSSY:
+      return BAKE_FILTER_GLOSSY | component_flags;
+    case SHADER_EVAL_TRANSMISSION:
+      return BAKE_FILTER_TRANSMISSION | component_flags;
+    case SHADER_EVAL_COMBINED:
+      return pass_filter;
+    default:
+      return 0;
+  }
 }
 
 BakeManager::BakeManager()
 {
-  m_bake_data = NULL;
-  m_is_baking = false;
+  type = SHADER_EVAL_BAKE;
+  pass_filter = 0;
+
   need_update = true;
-  m_shader_limit = 512 * 512;
 }
 
 BakeManager::~BakeManager()
 {
-  if (m_bake_data)
-    delete m_bake_data;
 }
 
 bool BakeManager::get_baking()
 {
-  return m_is_baking;
-}
-
-void BakeManager::set_baking(const bool value)
-{
-  m_is_baking = value;
+  return !object_name.empty();
 }
 
-BakeData *BakeManager::init(const int object, const size_t tri_offset, const size_t num_pixels)
+void BakeManager::set(Scene *scene,
+                      const std::string &object_name_,
+                      ShaderEvalType type_,
+                      int pass_filter_)
 {
-  m_bake_data = new BakeData(object, tri_offset, num_pixels);
-  return m_bake_data;
-}
-
-void BakeManager::set_shader_limit(const size_t x, const size_t y)
-{
-  m_shader_limit = x * y;
-  m_shader_limit = (size_t)pow(2, std::ceil(log(m_shader_limit) / log(2)));
-}
+  object_name = object_name_;
+  type = type_;
+  pass_filter = shader_type_to_pass_filter(type_, pass_filter_);
 
-bool BakeManager::bake(Device *device,
-                       DeviceScene *dscene,
-                       Scene *scene,
-                       Progress &progress,
-                       ShaderEvalType shader_type,
-                       const int pass_filter,
-                       BakeData *bake_data,
-                       float result[])
-{
-  size_t num_pixels = bake_data->size();
-
-  int num_samples = aa_samples(scene, bake_data, shader_type);
+  Pass::add(PASS_BAKE_PRIMITIVE, scene->film->passes);
+  Pass::add(PASS_BAKE_DIFFERENTIAL, scene->film->passes);
 
-  /* calculate the total pixel samples for the progress bar */
-  total_pixel_samples = 0;
-  for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
-    size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
-    total_pixel_samples += shader_size * num_samples;
+  if (type == SHADER_EVAL_UV) {
+    /* force UV to be available */
+    Pass::add(PASS_UV, scene->film->passes);
   }
-  progress.reset_sample();
-  progress.set_total_pixel_samples(total_pixel_samples);
-
-  /* needs to be up to date for baking specific AA samples */
-  dscene->data.integrator.aa_samples = num_samples;
-  device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
-
-  for (size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
-    size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
 
-    /* setup input for device task */
-    device_vector<uint4> d_input(device, "bake_input", MEM_READ_ONLY);
-    uint4 *d_input_data = d_input.alloc(shader_size * 2);
-    size_t d_input_size = 0;
-
-    for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
-      d_input_data[d_input_size++] = bake_data->data(i);
-      d_input_data[d_input_size++] = bake_data->differentials(i);
-    }
-
-    if (d_input_size == 0) {
-      m_is_baking = false;
-      return false;
-    }
-
-    /* run device task */
-    device_vector<float4> d_output(device, "bake_output", MEM_READ_WRITE);
-    d_output.alloc(shader_size);
-    d_output.zero_to_device();
-    d_input.copy_to_device();
-
-    DeviceTask task(DeviceTask::SHADER);
-    task.shader_input = d_input.device_pointer;
-    task.shader_output = d_output.device_pointer;
-    task.shader_eval_type = shader_type;
-    task.shader_filter = pass_filter;
-    task.shader_x = 0;
-    task.offset = shader_offset;
-    task.shader_w = d_output.size();
-    task.num_samples = num_samples;
-    task.get_cancel = function_bind(&Progress::get_cancel, &progress);
-    task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
-
-    device->task_add(task);
-    device->task_wait();
-
-    if (progress.get_cancel()) {
-      d_input.free();
-      d_output.free();
-      m_is_baking = false;
-      return false;
-    }
-
-    d_output.copy_from_device(0, 1, d_output.size());
-    d_input.free();
-
-    /* read result */
-    int k = 0;
-
-    float4 *offset = d_output.data();
-
-    size_t depth = 4;
-    for (size_t i = shader_offset; i < (shader_offset + shader_size); i++) {
-      size_t index = i * depth;
-      float4 out = offset[k++];
-
-      if (bake_data->is_valid(i)) {
-        for (size_t j = 0; j < 4; j++) {
-          result[index + j] = out[j];
-        }
-      }
-    }
-
-    d_output.free();
+  /* force use_light_pass to be true if we bake more than just colors */
+  if (pass_filter & ~BAKE_FILTER_COLOR) {
+    Pass::add(PASS_LIGHT, scene->film->passes);
   }
 
-  m_is_baking = false;
-  return true;
+  /* create device and update scene */
+  scene->film->tag_update(scene);
+  scene->integrator->tag_update(scene);
+
+  need_update = true;
 }
 
 void BakeManager::device_update(Device * /*device*/,
-                                DeviceScene * /*dscene*/,
-                                Scene * /*scene*/,
-                                Progress &progress)
+                                DeviceScene *dscene,
+                                Scene *scene,
+                                Progress & /* progress */)
 {
   if (!need_update)
     return;
 
-  if (progress.get_cancel())
-    return;
+  KernelIntegrator *kintegrator = &dscene->data.integrator;
+  KernelBake *kbake = &dscene->data.bake;
 
-  need_update = false;
-}
-
-void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
-{
-}
-
-int BakeManager::aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type)
-{
-  if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) {
-    return 1;
-  }
-  else if (type == SHADER_EVAL_NORMAL) {
-    /* Only antialias normal if mesh has bump mapping. */
-    Object *object = scene->objects[bake_data->object()];
+  kbake->type = type;
+  kbake->pass_filter = pass_filter;
 
-    if (object->geometry) {
-      foreach (Shader *shader, object->geometry->used_shaders) {
-        if (shader->has_bump) {
-          return scene->integrator->aa_samples;
-        }
-      }
+  int object_index = 0;
+  foreach (Object *object, scene->objects) {
+    const Geometry *geom = object->geometry;
+    if (object->name == object_name && geom->type == Geometry::MESH) {
+      kbake->object_index = object_index;
+      kbake->tri_offset = geom->prim_offset;
+      kintegrator->aa_samples = aa_samples(scene, object, type);
+      break;
     }
 
-    return 1;
-  }
-  else {
-    return scene->integrator->aa_samples;
+    object_index++;
   }
+
+  need_update = false;
 }
 
-/* Keep it synced with kernel_bake.h logic */
-int BakeManager::shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter)
+void BakeManager::device_free(Device * /*device*/, DeviceScene * /*dscene*/)
 {
-  const int component_flags = pass_filter &
-                              (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR);
-
-  switch (type) {
-    case SHADER_EVAL_AO:
-      return BAKE_FILTER_AO;
-    case SHADER_EVAL_SHADOW:
-      return BAKE_FILTER_DIRECT;
-    case SHADER_EVAL_DIFFUSE:
-      return BAKE_FILTER_DIFFUSE | component_flags;
-    case SHADER_EVAL_GLOSSY:
-      return BAKE_FILTER_GLOSSY | component_flags;
-    case SHADER_EVAL_TRANSMISSION:
-      return BAKE_FILTER_TRANSMISSION | component_flags;
-    case SHADER_EVAL_COMBINED:
-      return pass_filter;
-    default:
-      return 0;
-  }
 }
 
 CCL_NAMESPACE_END
index 88537623efb9b38d15f67278ab5082d86a5a3b3a..93e664c2ab132151b490c85bf89bcbd03ae1a4bd 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-class BakeData {
- public:
-  BakeData(const int object, const size_t tri_offset, const size_t num_pixels);
-  ~BakeData();
-
-  void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy);
-  void set_null(int i);
-  int object();
-  size_t size();
-  uint4 data(int i);
-  uint4 differentials(int i);
-  bool is_valid(int i);
-
- private:
-  int m_object;
-  size_t m_tri_offset;
-  size_t m_num_pixels;
-  vector<int> m_primitive;
-  vector<float> m_u;
-  vector<float> m_v;
-  vector<float> m_dudx;
-  vector<float> m_dudy;
-  vector<float> m_dvdx;
-  vector<float> m_dvdy;
-};
-
 class BakeManager {
  public:
   BakeManager();
   ~BakeManager();
 
+  void set(Scene *scene, const std::string &object_name, ShaderEvalType type, int pass_filter);
   bool get_baking();
-  void set_baking(const bool value);
-
-  BakeData *init(const int object, const size_t tri_offset, const size_t num_pixels);
-
-  void set_shader_limit(const size_t x, const size_t y);
-
-  bool bake(Device *device,
-            DeviceScene *dscene,
-            Scene *scene,
-            Progress &progress,
-            ShaderEvalType shader_type,
-            const int pass_filter,
-            BakeData *bake_data,
-            float result[]);
 
   void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
   void device_free(Device *device, DeviceScene *dscene);
 
-  static int shader_type_to_pass_filter(ShaderEvalType type, const int pass_filter);
-  static int aa_samples(Scene *scene, BakeData *bake_data, ShaderEvalType type);
-
   bool need_update;
 
-  size_t total_pixel_samples;
-
  private:
-  BakeData *m_bake_data;
-  bool m_is_baking;
-  size_t m_shader_limit;
+  ShaderEvalType type;
+  int pass_filter;
+  std::string object_name;
 };
 
 CCL_NAMESPACE_END
index 2d89fb9ffba6c74f2b1e28422cfd9be54687db97..b26366af852055148663ec9e1abb71e17c88b2c3 100644 (file)
@@ -459,6 +459,40 @@ bool RenderBuffers::get_pass_rect(
   return false;
 }
 
+bool RenderBuffers::set_pass_rect(PassType type, int components, float *pixels)
+{
+  if (buffer.data() == NULL) {
+    return false;
+  }
+
+  int pass_offset = 0;
+
+  for (size_t j = 0; j < params.passes.size(); j++) {
+    Pass &pass = params.passes[j];
+
+    if (pass.type != type) {
+      pass_offset += pass.components;
+      continue;
+    }
+
+    float *out = buffer.data() + pass_offset;
+    int pass_stride = params.get_passes_size();
+    int size = params.width * params.height;
+
+    assert(pass.components == components);
+
+    for (int i = 0; i < size; i++, out += pass_stride, pixels += components) {
+      for (int j = 0; j < components; j++) {
+        out[j] = pixels[j];
+      }
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
 /* Display Buffer */
 
 DisplayBuffer::DisplayBuffer(Device *device, bool linear)
index 42efb031843c15faf39a267663b514543df4a0ba..975bae2239c28a4c68617283505e9d2f061b1bc6 100644 (file)
@@ -92,6 +92,7 @@ class RenderBuffers {
       const string &name, float exposure, int sample, int components, float *pixels);
   bool get_denoising_pass_rect(
       int offset, float exposure, int sample, int components, float *pixels);
+  bool set_pass_rect(PassType type, int components, float *pixels);
 };
 
 /* Display Buffer
@@ -130,7 +131,7 @@ class DisplayBuffer {
 
 class RenderTile {
  public:
-  typedef enum { PATH_TRACE = (1 << 0), DENOISE = (1 << 1) } Task;
+  typedef enum { PATH_TRACE = (1 << 0), BAKE = (1 << 1), DENOISE = (1 << 2) } Task;
 
   Task task;
   int x, y, w, h;
index 26eda93fadd0c99438f3c59307575609756f4d34..d7cbf4a3581c887b69ffed506195411570468264 100644 (file)
@@ -196,6 +196,10 @@ void Pass::add(PassType type, vector<Pass> &passes, const char *name)
     case PASS_AOV_VALUE:
       pass.components = 1;
       break;
+    case PASS_BAKE_PRIMITIVE:
+    case PASS_BAKE_DIFFERENTIAL:
+      pass.components = 4;
+      break;
     default:
       assert(false);
       break;
@@ -386,11 +390,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
     if (pass.type <= PASS_CATEGORY_MAIN_END) {
       kfilm->pass_flag |= pass_flag;
     }
-    else {
-      assert(pass.type <= PASS_CATEGORY_LIGHT_END);
+    else if (pass.type <= PASS_CATEGORY_LIGHT_END) {
       kfilm->use_light_pass = 1;
       kfilm->light_pass_flag |= pass_flag;
     }
+    else {
+      assert(pass.type <= PASS_CATEGORY_BAKE_END);
+    }
 
     switch (pass.type) {
       case PASS_COMBINED:
@@ -471,6 +477,13 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
         kfilm->pass_volume_direct = kfilm->pass_stride;
         break;
 
+      case PASS_BAKE_PRIMITIVE:
+        kfilm->pass_bake_primitive = kfilm->pass_stride;
+        break;
+      case PASS_BAKE_DIFFERENTIAL:
+        kfilm->pass_bake_differential = kfilm->pass_stride;
+        break;
+
 #ifdef WITH_CYCLES_DEBUG
       case PASS_BVH_TRAVERSED_NODES:
         kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride;
index f7df81a06018ded4dbefe50d560d04f44831bd6a..361a1465aac8658d3653565e87f57fe526b5240a 100644 (file)
@@ -410,7 +410,16 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
   rtile.num_samples = tile_manager.state.num_samples;
   rtile.resolution = tile_manager.state.resolution_divider;
   rtile.tile_index = tile->index;
-  rtile.task = tile->state == Tile::DENOISE ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
+
+  if (tile->state == Tile::DENOISE) {
+    rtile.task = RenderTile::DENOISE;
+  }
+  else if (read_bake_tile_cb) {
+    rtile.task = RenderTile::BAKE;
+  }
+  else {
+    rtile.task = RenderTile::PATH_TRACE;
+  }
 
   tile_lock.unlock();
 
@@ -451,11 +460,20 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
   rtile.buffers = tile->buffers;
   rtile.sample = tile_manager.state.sample;
 
-  /* this will tag tile as IN PROGRESS in blender-side render pipeline,
-   * which is needed to highlight currently rendering tile before first
-   * sample was processed for it
-   */
-  update_tile_sample(rtile);
+  if (read_bake_tile_cb) {
+    /* This will read any passes needed as input for baking. */
+    {
+      thread_scoped_lock tile_lock(tile_mutex);
+      read_bake_tile_cb(rtile);
+    }
+    rtile.buffers->buffer.copy_to_device();
+  }
+  else {
+    /* This will tag tile as IN PROGRESS in blender-side render pipeline,
+     * which is needed to highlight currently rendering tile before first
+     * sample was processed for it. */
+    update_tile_sample(rtile);
+  }
 
   return true;
 }
@@ -484,6 +502,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
   bool delete_tile;
 
   if (tile_manager.finish_tile(rtile.tile_index, need_denoise, delete_tile)) {
+    /* Finished tile pixels write. */
     if (write_render_tile_cb && params.progressive_refine == false) {
       write_render_tile_cb(rtile);
     }
@@ -494,6 +513,7 @@ void Session::release_tile(RenderTile &rtile, const bool need_denoise)
     }
   }
   else {
+    /* In progress tile pixels update. */
     if (update_render_tile_cb && params.progressive_refine == false) {
       update_render_tile_cb(rtile, false);
     }
index f06952e80209290aa8ee01c3850f41c5d2347ef4..2707eed5531b26462a4bb5bc3afdd031ab6a5e4c 100644 (file)
@@ -148,6 +148,7 @@ class Session {
 
   function<void(RenderTile &)> write_render_tile_cb;
   function<void(RenderTile &, bool)> update_render_tile_cb;
+  function<void(RenderTile &)> read_bake_tile_cb;
 
   explicit Session(const SessionParams &params);
   ~Session();
index 46923b593b1d15234c8ff7106ad82420c4ca53d5..c31de7f371c98a48278656eb60b8418b7d98b222 100644 (file)
@@ -1024,7 +1024,7 @@ static int bake(Render *re,
                           highpoly[i].ob,
                           i,
                           pixel_array_high,
-                          num_pixels,
+                          &bake_images,
                           depth,
                           pass_type,
                           pass_filter,
@@ -1046,7 +1046,7 @@ static int bake(Render *re,
                           ob_low_eval,
                           0,
                           pixel_array_low,
-                          num_pixels,
+                          &bake_images,
                           depth,
                           pass_type,
                           pass_filter,
index 8322c7ad5f45870bd366fc2fa07cdab31f34b376..6c21e4ad01b345d30f13e06d951c639834203d33 100644 (file)
@@ -181,11 +181,8 @@ static void engine_bake(RenderEngine *engine,
                         struct Object *object,
                         const int pass_type,
                         const int pass_filter,
-                        const int object_id,
-                        const struct BakePixel *pixel_array,
-                        const int num_pixels,
-                        const int depth,
-                        void *result)
+                        const int width,
+                        const int height)
 {
   extern FunctionRNA rna_RenderEngine_bake_func;
   PointerRNA ptr;
@@ -200,11 +197,8 @@ static void engine_bake(RenderEngine *engine,
   RNA_parameter_set_lookup(&list, "object", &object);
   RNA_parameter_set_lookup(&list, "pass_type", &pass_type);
   RNA_parameter_set_lookup(&list, "pass_filter", &pass_filter);
-  RNA_parameter_set_lookup(&list, "object_id", &object_id);
-  RNA_parameter_set_lookup(&list, "pixel_array", &pixel_array);
-  RNA_parameter_set_lookup(&list, "num_pixels", &num_pixels);
-  RNA_parameter_set_lookup(&list, "depth", &depth);
-  RNA_parameter_set_lookup(&list, "result", &result);
+  RNA_parameter_set_lookup(&list, "width", &width);
+  RNA_parameter_set_lookup(&list, "height", &height);
   engine->type->rna_ext.call(NULL, &ptr, func, &list);
 
   RNA_parameter_list_free(&list);
@@ -461,12 +455,6 @@ void rna_RenderPass_rect_set(PointerRNA *ptr, const float *values)
   memcpy(rpass->rect, values, sizeof(float) * rpass->rectx * rpass->recty * rpass->channels);
 }
 
-static PointerRNA rna_BakePixel_next_get(PointerRNA *ptr)
-{
-  BakePixel *bp = ptr->data;
-  return rna_pointer_inherit_refine(ptr, &RNA_BakePixel, bp + 1);
-}
-
 static RenderPass *rna_RenderPass_find_by_type(RenderLayer *rl, int passtype, const char *view)
 {
   return RE_pass_find_by_type(rl, passtype, view);
@@ -535,33 +523,9 @@ static void rna_def_render_engine(BlenderRNA *brna)
                      0,
                      INT_MAX);
   RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
-  parm = RNA_def_int(func,
-                     "object_id",
-                     0,
-                     0,
-                     INT_MAX,
-                     "Object Id",
-                     "Id of the current object being baked in relation to the others",
-                     0,
-                     INT_MAX);
-  RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
-  parm = RNA_def_pointer(func, "pixel_array", "BakePixel", "", "");
-  RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
-  parm = RNA_def_int(func,
-                     "num_pixels",
-                     0,
-                     0,
-                     INT_MAX,
-                     "Number of Pixels",
-                     "Size of the baking batch",
-                     0,
-                     INT_MAX);
-  RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
-  parm = RNA_def_int(
-      func, "depth", 0, 0, INT_MAX, "Pixels depth", "Number of channels", 1, INT_MAX);
+  parm = RNA_def_int(func, "width", 0, 0, INT_MAX, "Width", "Image width", 0, INT_MAX);
   RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
-  /* TODO, see how array size of 0 works, this shouldnt be used */
-  parm = RNA_def_pointer(func, "result", "AnyType", "", "");
+  parm = RNA_def_int(func, "height", 0, 0, INT_MAX, "Height", "Image height", 0, INT_MAX);
   RNA_def_parameter_flags(parm, 0, PARM_REQUIRED);
 
   /* viewport render callbacks */
@@ -1119,53 +1083,6 @@ static void rna_def_render_pass(BlenderRNA *brna)
   RNA_define_verify_sdna(1);
 }
 
-static void rna_def_render_bake_pixel(BlenderRNA *brna)
-{
-  StructRNA *srna;
-  PropertyRNA *prop;
-
-  srna = RNA_def_struct(brna, "BakePixel", NULL);
-  RNA_def_struct_ui_text(srna, "Bake Pixel", "");
-
-  RNA_define_verify_sdna(0);
-
-  prop = RNA_def_property(srna, "primitive_id", PROP_INT, PROP_NONE);
-  RNA_def_property_int_sdna(prop, NULL, "primitive_id");
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  prop = RNA_def_property(srna, "object_id", PROP_INT, PROP_NONE);
-  RNA_def_property_int_sdna(prop, NULL, "object_id");
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  prop = RNA_def_property(srna, "uv", PROP_FLOAT, PROP_NONE);
-  RNA_def_property_array(prop, 2);
-  RNA_def_property_float_sdna(prop, NULL, "uv");
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  prop = RNA_def_property(srna, "du_dx", PROP_FLOAT, PROP_NONE);
-  RNA_def_property_float_sdna(prop, NULL, "du_dx");
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  prop = RNA_def_property(srna, "du_dy", PROP_FLOAT, PROP_NONE);
-  RNA_def_property_float_sdna(prop, NULL, "du_dy");
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  prop = RNA_def_property(srna, "dv_dx", PROP_FLOAT, PROP_NONE);
-  RNA_def_property_float_sdna(prop, NULL, "dv_dx");
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  prop = RNA_def_property(srna, "dv_dy", PROP_FLOAT, PROP_NONE);
-  RNA_def_property_float_sdna(prop, NULL, "dv_dy");
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  prop = RNA_def_property(srna, "next", PROP_POINTER, PROP_NONE);
-  RNA_def_property_struct_type(prop, "BakePixel");
-  RNA_def_property_pointer_funcs(prop, "rna_BakePixel_next_get", NULL, NULL, NULL);
-  RNA_def_property_clear_flag(prop, PROP_EDITABLE);
-
-  RNA_define_verify_sdna(1);
-}
-
 void RNA_def_render(BlenderRNA *brna)
 {
   rna_def_render_engine(brna);
@@ -1173,7 +1090,6 @@ void RNA_def_render(BlenderRNA *brna)
   rna_def_render_view(brna);
   rna_def_render_layer(brna);
   rna_def_render_pass(brna);
-  rna_def_render_bake_pixel(brna);
 }
 
 #endif /* RNA_RUNTIME */
index 372defbe8db3d72404dd6532b6f6380a91ea2cd4..59e344040749baad5cb2b35c59d4140e16e7fd4d 100644 (file)
@@ -67,7 +67,7 @@ bool RE_bake_engine(struct Render *re,
                     struct Object *object,
                     const int object_id,
                     const BakePixel pixel_array[],
-                    const size_t num_pixels,
+                    const BakeImages *bake_images,
                     const int depth,
                     const eScenePassType pass_type,
                     const int pass_filter,
index fd55a2a01df676ef2e1201ceadda51edad745d92..82b45ba9d4a004444948bbf994223c470ab0305e 100644 (file)
@@ -87,11 +87,8 @@ typedef struct RenderEngineType {
                struct Object *object,
                const int pass_type,
                const int pass_filter,
-               const int object_id,
-               const struct BakePixel *pixel_array,
-               const int num_pixels,
-               const int depth,
-               void *result);
+               const int width,
+               const int height);
 
   void (*view_update)(struct RenderEngine *engine,
                       const struct bContext *context,
@@ -140,6 +137,13 @@ typedef struct RenderEngine {
 
   struct ReportList *reports;
 
+  struct {
+    const struct BakePixel *pixels;
+    float *result;
+    int width, height, depth;
+    int object_id;
+  } bake;
+
   /* Depsgraph */
   struct Depsgraph *depsgraph;
 
index fabbd5fb09619faf34366fdd65678c332a663cae..0ed8871b224612c1943401c1d2cd742060b0e0d0 100644 (file)
@@ -84,11 +84,12 @@ void render_result_exr_file_begin(struct Render *re, struct RenderEngine *engine
 void render_result_exr_file_end(struct Render *re, struct RenderEngine *engine);
 
 /* render pass wrapper for gpencil */
-struct RenderPass *gp_add_pass(struct RenderResult *rr,
-                               struct RenderLayer *rl,
-                               int channels,
-                               const char *name,
-                               const char *viewname);
+struct RenderPass *render_layer_add_pass(struct RenderResult *rr,
+                                         struct RenderLayer *rl,
+                                         int channels,
+                                         const char *name,
+                                         const char *viewname,
+                                         const char *chanid);
 
 void render_result_exr_file_merge(struct RenderResult *rr,
                                   struct RenderResult *rrpart,
index 4770e98bd20fdfccf495a0608348106b24993f9a..4d88bb82dd92b10bc1647f59c66072a59ca7f8e0 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "BLI_ghash.h"
 #include "BLI_listbase.h"
+#include "BLI_math_bits.h"
 #include "BLI_rect.h"
 #include "BLI_string.h"
 #include "BLI_utildefines.h"
@@ -167,6 +168,89 @@ void RE_engine_free(RenderEngine *engine)
   MEM_freeN(engine);
 }
 
+/* Bake Render Results */
+
+static RenderResult *render_result_from_bake(RenderEngine *engine, int x, int y, int w, int h)
+{
+  /* Create render result with specified size. */
+  RenderResult *rr = MEM_callocN(sizeof(RenderResult), __func__);
+
+  rr->rectx = w;
+  rr->recty = h;
+  rr->tilerect.xmin = x;
+  rr->tilerect.ymin = y;
+  rr->tilerect.xmax = x + w;
+  rr->tilerect.ymax = y + h;
+
+  /* Add single baking render layer. */
+  RenderLayer *rl = MEM_callocN(sizeof(RenderLayer), "bake render layer");
+  rl->rectx = w;
+  rl->recty = h;
+  BLI_addtail(&rr->layers, rl);
+
+  /* Add render passes. */
+  render_layer_add_pass(rr, rl, engine->bake.depth, RE_PASSNAME_COMBINED, "", "RGBA");
+  RenderPass *primitive_pass = render_layer_add_pass(rr, rl, 4, "BakePrimitive", "", "RGBA");
+  RenderPass *differential_pass = render_layer_add_pass(rr, rl, 4, "BakeDifferential", "", "RGBA");
+
+  /* Fill render passes from bake pixel array, to be read by the render engine. */
+  for (int ty = 0; ty < h; ty++) {
+    size_t offset = ty * w * 4;
+    float *primitive = primitive_pass->rect + offset;
+    float *differential = differential_pass->rect + offset;
+
+    size_t bake_offset = (y + ty) * engine->bake.width + x;
+    const BakePixel *bake_pixel = engine->bake.pixels + bake_offset;
+
+    for (int tx = 0; tx < w; tx++) {
+      if (bake_pixel->object_id != engine->bake.object_id) {
+        primitive[0] = int_as_float(-1);
+        primitive[1] = int_as_float(-1);
+      }
+      else {
+        primitive[0] = int_as_float(bake_pixel->object_id);
+        primitive[1] = int_as_float(bake_pixel->primitive_id);
+        primitive[2] = bake_pixel->uv[0];
+        primitive[3] = bake_pixel->uv[1];
+
+        differential[0] = bake_pixel->du_dx;
+        differential[1] = bake_pixel->du_dy;
+        differential[2] = bake_pixel->dv_dx;
+        differential[3] = bake_pixel->dv_dy;
+      }
+
+      primitive += 4;
+      differential += 4;
+      bake_pixel++;
+    }
+  }
+
+  return rr;
+}
+
+static void render_result_to_bake(RenderEngine *engine, RenderResult *rr)
+{
+  RenderPass *rpass = RE_pass_find_by_name(rr->layers.first, RE_PASSNAME_COMBINED, "");
+
+  if (!rpass) {
+    return;
+  }
+
+  /* Copy from tile render result to full image bake result. */
+  int x = rr->tilerect.xmin;
+  int y = rr->tilerect.ymin;
+  int w = rr->tilerect.xmax - rr->tilerect.xmin;
+  int h = rr->tilerect.ymax - rr->tilerect.ymin;
+
+  for (int ty = 0; ty < h; ty++) {
+    size_t offset = ty * w * engine->bake.depth;
+    size_t bake_offset = ((y + ty) * engine->bake.width + x) * engine->bake.depth;
+    size_t size = w * engine->bake.depth * sizeof(float);
+
+    memcpy(engine->bake.result + bake_offset, rpass->rect + offset, size);
+  }
+}
+
 /* Render Results */
 
 static RenderPart *get_part_from_result(Render *re, RenderResult *result)
@@ -180,6 +264,12 @@ static RenderPart *get_part_from_result(Render *re, RenderResult *result)
 RenderResult *RE_engine_begin_result(
     RenderEngine *engine, int x, int y, int w, int h, const char *layername, const char *viewname)
 {
+  if (engine->bake.pixels) {
+    RenderResult *result = render_result_from_bake(engine, x, y, w, h);
+    BLI_addtail(&engine->fullresult, result);
+    return result;
+  }
+
   Render *re = engine->re;
   RenderResult *result;
   rcti disprect;
@@ -237,6 +327,11 @@ RenderResult *RE_engine_begin_result(
 
 void RE_engine_update_result(RenderEngine *engine, RenderResult *result)
 {
+  if (engine->bake.pixels) {
+    /* No interactive baking updates for now. */
+    return;
+  }
+
   Render *re = engine->re;
 
   if (result) {
@@ -270,6 +365,13 @@ void RE_engine_end_result(
     return;
   }
 
+  if (engine->bake.pixels) {
+    render_result_to_bake(engine, result);
+    BLI_remlink(&engine->fullresult, result);
+    render_result_free(result);
+    return;
+  }
+
   /* merge. on break, don't merge in result for preview renders, looks nicer */
   if (!highlight) {
     /* for exr tile render, detect tiles that are done */
@@ -574,7 +676,7 @@ bool RE_bake_engine(Render *re,
                     Object *object,
                     const int object_id,
                     const BakePixel pixel_array[],
-                    const size_t num_pixels,
+                    const BakeImages *bake_images,
                     const int depth,
                     const eScenePassType pass_type,
                     const int pass_filter,
@@ -619,16 +721,21 @@ bool RE_bake_engine(Render *re,
       type->update(engine, re->main, engine->depsgraph);
     }
 
-    type->bake(engine,
-               engine->depsgraph,
-               object,
-               pass_type,
-               pass_filter,
-               object_id,
-               pixel_array,
-               num_pixels,
-               depth,
-               result);
+    for (int i = 0; i < bake_images->size; i++) {
+      const BakeImage *image = bake_images->data + i;
+
+      engine->bake.pixels = pixel_array + image->offset;
+      engine->bake.result = result + image->offset * depth;
+      engine->bake.width = image->width;
+      engine->bake.height = image->height;
+      engine->bake.depth = depth;
+      engine->bake.object_id = object_id;
+
+      type->bake(
+          engine, engine->depsgraph, object, pass_type, pass_filter, image->width, image->height);
+
+      memset(&engine->bake, 0, sizeof(engine->bake));
+    }
 
     engine->depsgraph = NULL;
   }
index 4a910d9e12c81aa6fc88b104c1e9b028a5da2fd6..d68f74751ec7cc1d810860291af0029c10d466c5 100644 (file)
@@ -2955,5 +2955,5 @@ RenderPass *RE_create_gp_pass(RenderResult *rr, const char *layername, const cha
     BLI_freelinkN(&rl->passes, rp);
   }
   /* create a totally new pass */
-  return gp_add_pass(rr, rl, 4, RE_PASSNAME_COMBINED, viewname);
+  return render_layer_add_pass(rr, rl, 4, RE_PASSNAME_COMBINED, viewname, "RGBA");
 }
index b38c1b573f3ad10c37fe543e290671cd7ba06949..d829033656a30f2c40f16abd9cbec0267986971f 100644 (file)
@@ -213,12 +213,12 @@ static void set_pass_full_name(
 
 /********************************** New **************************************/
 
-static RenderPass *render_layer_add_pass(RenderResult *rr,
-                                         RenderLayer *rl,
-                                         int channels,
-                                         const char *name,
-                                         const char *viewname,
-                                         const char *chan_id)
+RenderPass *render_layer_add_pass(RenderResult *rr,
+                                  RenderLayer *rl,
+                                  int channels,
+                                  const char *name,
+                                  const char *viewname,
+                                  const char *chan_id)
 {
   const int view_id = BLI_findstringindex(&rr->views, viewname, offsetof(RenderView, name));
   RenderPass *rpass = MEM_callocN(sizeof(RenderPass), name);
@@ -280,12 +280,6 @@ static RenderPass *render_layer_add_pass(RenderResult *rr,
 
   return rpass;
 }
-/* wrapper called from render_opengl */
-RenderPass *gp_add_pass(
-    RenderResult *rr, RenderLayer *rl, int channels, const char *name, const char *viewname)
-{
-  return render_layer_add_pass(rr, rl, channels, name, viewname, "RGBA");
-}
 
 /* called by main render as well for parts */
 /* will read info from Render *re to define layers */