Cycles: optimizations for instances in scene updates before render starts,
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Fri, 2 Sep 2011 16:15:18 +0000 (16:15 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Fri, 2 Sep 2011 16:15:18 +0000 (16:15 +0000)
should load a non-trivial mesh instanced many times quite a bit faster now.

intern/cycles/device/device_opencl.cpp
intern/cycles/render/light.cpp
intern/cycles/render/object.cpp
intern/cycles/util/util_transform.h

index 9d25b6df5291b31cd3f059736e0d8f3dcd4ac5b7..787f0e6feabafadda1e58e3cc1018ff04f7d42fb 100644 (file)
@@ -217,8 +217,13 @@ public:
                if(!opencl_version_check())
                        return false;
 
+               /* nvidia opencl cache doesn't not work correct with includes, so force recompile */
+               static double recompile_trick = 0.0;
+               if(recompile_trick == 0.0)
+                       recompile_trick = time_dt();
+
                /* compile source */
-               string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
+               string source = string_printf("#include \"kernel.cl\" // %lf\n", recompile_trick);
                size_t source_len = source.size();
                const char *source_str = source.c_str();
 
index 88a797f753d304aa98fa66ef3adb3a7b803781e8..feb9e35e785ab986a7f3edc6658e692f12c000b2 100644 (file)
@@ -61,12 +61,24 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
 
        foreach(Object *object, scene->objects) {
                Mesh *mesh = object->mesh;
+               bool have_emission = false;
 
-               for(size_t i = 0; i < mesh->triangles.size(); i++) {
-                       Shader *shader = scene->shaders[mesh->shader[i]];
+               /* skip if we have no emission shaders */
+               foreach(uint sindex, mesh->used_shaders) {
+                       if(scene->shaders[sindex]->has_surface_emission) {
+                               have_emission = true;
+                               break;
+                       }
+               }
 
-                       if(shader->has_surface_emission)
-                               num_triangles++;
+               /* count triangles */
+               if(have_emission) {
+                       for(size_t i = 0; i < mesh->triangles.size(); i++) {
+                               Shader *shader = scene->shaders[mesh->shader[i]];
+
+                               if(shader->has_surface_emission)
+                                       num_triangles++;
+                       }
                }
        }
 
@@ -82,25 +94,38 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
 
        foreach(Object *object, scene->objects) {
                Mesh *mesh = object->mesh;
-               Transform tfm = object->tfm;
-               int object_id = (mesh->transform_applied)? -j-1: j;
+               bool have_emission = false;
+
+               /* skip if we have no emission shaders */
+               foreach(uint sindex, mesh->used_shaders) {
+                       if(scene->shaders[sindex]->has_surface_emission) {
+                               have_emission = true;
+                               break;
+                       }
+               }
+
+               /* sum area */
+               if(have_emission) {
+                       Transform tfm = object->tfm;
+                       int object_id = (mesh->transform_applied)? -j-1: j;
 
-               for(size_t i = 0; i < mesh->triangles.size(); i++) {
-                       Shader *shader = scene->shaders[mesh->shader[i]];
+                       for(size_t i = 0; i < mesh->triangles.size(); i++) {
+                               Shader *shader = scene->shaders[mesh->shader[i]];
 
-                       if(shader->has_surface_emission) {
-                               distribution[offset].x = totarea;
-                               distribution[offset].y = __int_as_float(i + mesh->tri_offset);
-                               distribution[offset].z = 1.0f;
-                               distribution[offset].w = __int_as_float(object_id);
-                               offset++;
+                               if(shader->has_surface_emission) {
+                                       distribution[offset].x = totarea;
+                                       distribution[offset].y = __int_as_float(i + mesh->tri_offset);
+                                       distribution[offset].z = 1.0f;
+                                       distribution[offset].w = __int_as_float(object_id);
+                                       offset++;
 
-                               Mesh::Triangle t = mesh->triangles[i];
-                               float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
-                               float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
-                               float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
+                                       Mesh::Triangle t = mesh->triangles[i];
+                                       float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
+                                       float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
+                                       float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
 
-                               totarea += triangle_area(p1, p2, p3);
+                                       totarea += triangle_area(p1, p2, p3);
+                               }
                        }
                }
 
index 4ba2de6e61b7877ebed326d332b45ae668d357ae..fab051bde725dd267da253684abf2ece7ed03c04 100644 (file)
@@ -22,6 +22,7 @@
 #include "scene.h"
 
 #include "util_foreach.h"
+#include "util_map.h"
 #include "util_progress.h"
 
 CCL_NAMESPACE_BEGIN
@@ -103,6 +104,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
 {
        float4 *objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
        int i = 0;
+       map<Mesh*, float> surface_area_map;
 
        foreach(Object *ob, scene->objects) {
                Mesh *mesh = ob->mesh;
@@ -112,16 +114,39 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
                Transform itfm = transform_inverse(tfm);
                Transform ntfm = transform_transpose(itfm);
 
-               /* compute surface area */
+               /* compute surface area. for uniform scale we can do avoid the many
+                  transform calls and share computation for instances */
                /* todo: correct for displacement, and move to a better place */
-               float surfacearea = 0.0f;
-
-               foreach(Mesh::Triangle& t, mesh->triangles) {
-                       float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
-                       float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
-                       float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
-
-                       surfacearea += triangle_area(p1, p2, p3);
+               float uniform_scale;
+               float surface_area = 0.0f;
+               
+               if(transform_uniform_scale(tfm, uniform_scale)) {
+                       map<Mesh*, float>::iterator it = surface_area_map.find(mesh);
+
+                       if(it == surface_area_map.end()) {
+                               foreach(Mesh::Triangle& t, mesh->triangles) {
+                                       float3 p1 = mesh->verts[t.v[0]];
+                                       float3 p2 = mesh->verts[t.v[1]];
+                                       float3 p3 = mesh->verts[t.v[2]];
+
+                                       surface_area += triangle_area(p1, p2, p3);
+                               }
+
+                               surface_area_map[mesh] = surface_area;
+                       }
+                       else
+                               surface_area = it->second;
+
+                       surface_area *= uniform_scale;
+               }
+               else {
+                       foreach(Mesh::Triangle& t, mesh->triangles) {
+                               float3 p1 = transform(&tfm, mesh->verts[t.v[0]]);
+                               float3 p2 = transform(&tfm, mesh->verts[t.v[1]]);
+                               float3 p3 = transform(&tfm, mesh->verts[t.v[2]]);
+
+                               surface_area += triangle_area(p1, p2, p3);
+                       }
                }
 
                /* pack in texture */
@@ -130,7 +155,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
                memcpy(&objects[offset], &tfm, sizeof(float4)*4);
                memcpy(&objects[offset+4], &itfm, sizeof(float4)*4);
                memcpy(&objects[offset+8], &ntfm, sizeof(float4)*4);
-               objects[offset+12] = make_float4(surfacearea, 0.0f, 0.0f, 0.0f);
+               objects[offset+12] = make_float4(surface_area, 0.0f, 0.0f, 0.0f);
 
                i++;
 
index e904674a981e2cbb18ef5a83df2bc600f8ae1e60..998d4161ebf972995f01f2642ccafa8632e93f3d 100644 (file)
@@ -205,6 +205,30 @@ __device_inline float3 transform_get_column(const Transform *t, int column)
 
 Transform transform_inverse(const Transform& a);
 
+__device_inline bool transform_uniform_scale(const Transform& tfm, float& scale)
+{
+       /* the epsilon here is quite arbitrary, but this function is only used for
+          surface area and bump, where we except it to not be so sensitive */
+       Transform ttfm = transform_transpose(tfm);
+       float eps = 1e-7f; 
+       
+       float sx = len(float4_to_float3(tfm.x));
+       float sy = len(float4_to_float3(tfm.y));
+       float sz = len(float4_to_float3(tfm.z));
+       float stx = len(float4_to_float3(ttfm.x));
+       float sty = len(float4_to_float3(ttfm.y));
+       float stz = len(float4_to_float3(ttfm.z));
+       
+       if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps &&
+          fabsf(sx - stx) < eps && fabsf(sx - sty) < eps &&
+          fabsf(sx - stz) < eps) {
+               scale = sx;
+               return true;
+       }
+   
+   return false;
+}
+
 #endif
 
 CCL_NAMESPACE_END