Cycles: reduce memory usage of instanced objects by about 40%, as long as the
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Thu, 20 Dec 2012 19:26:57 +0000 (19:26 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Thu, 20 Dec 2012 19:26:57 +0000 (19:26 +0000)
motion vector pass is not enabled.

intern/cycles/kernel/kernel_object.h
intern/cycles/kernel/kernel_textures.h
intern/cycles/kernel/kernel_triangle.h
intern/cycles/kernel/kernel_types.h
intern/cycles/render/object.cpp
intern/cycles/render/scene.h

index 2b38544e527198c5338ee853b7e460f7c2963802..40aa4753daa85d976ea352a8e2736b0e347b9548 100644 (file)
@@ -20,11 +20,16 @@ CCL_NAMESPACE_BEGIN
 
 enum ObjectTransform {
        OBJECT_TRANSFORM = 0,
-       OBJECT_INVERSE_TRANSFORM = 3,
-       OBJECT_PROPERTIES = 6,
-       OBJECT_TRANSFORM_MOTION_PRE = 8,
-       OBJECT_TRANSFORM_MOTION_POST = 12,
-       OBJECT_DUPLI = 16
+       OBJECT_TRANSFORM_MOTION_PRE = 0,
+       OBJECT_INVERSE_TRANSFORM = 4,
+       OBJECT_TRANSFORM_MOTION_POST = 4,
+       OBJECT_PROPERTIES = 8,
+       OBJECT_DUPLI = 9
+};
+
+enum ObjectVectorTransform {
+       OBJECT_VECTOR_MOTION_PRE = 0,
+       OBJECT_VECTOR_MOTION_POST = 3
 };
 
 __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
@@ -40,6 +45,19 @@ __device_inline Transform object_fetch_transform(KernelGlobals *kg, int object,
        return tfm;
 }
 
+__device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
+{
+       int offset = object*OBJECT_VECTOR_SIZE + (int)type;
+
+       Transform tfm;
+       tfm.x = kernel_tex_fetch(__objects_vector, offset + 0);
+       tfm.y = kernel_tex_fetch(__objects_vector, offset + 1);
+       tfm.z = kernel_tex_fetch(__objects_vector, offset + 2);
+       tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+
+       return tfm;
+}
+
 #ifdef __OBJECT_MOTION__
 __device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
 {
index 4855a948c6efe505ad1a26e8d87f72c651b0b4d2..29f6b3f072c25f48742b792a80608254274c5349 100644 (file)
@@ -34,6 +34,7 @@ KERNEL_TEX(uint, texture_uint, __object_node)
 
 /* objects */
 KERNEL_TEX(float4, texture_float4, __objects)
+KERNEL_TEX(float4, texture_float4, __objects_vector)
 
 /* triangles */
 KERNEL_TEX(float4, texture_float4, __tri_normal)
index 0db447289c8453ebff5707f8778e231cee27eb89..570ae52d6c266f37f9ff615ccfb65ce86a0b6411 100644 (file)
@@ -209,10 +209,10 @@ __device float4 triangle_motion_vector(KernelGlobals *kg, ShaderData *sd)
         * transformation was set match the world/object space of motion_pre/post */
        Transform tfm;
        
-       tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM_MOTION_PRE);
+       tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_PRE);
        motion_pre = transform_point(&tfm, motion_pre);
 
-       tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM_MOTION_POST);
+       tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST);
        motion_post = transform_point(&tfm, motion_post);
 
        float3 P;
index a0673f55681c1be7ea943befc2abc04928b7d07e..d11b96503d975dc3718539b3ba6747ef48c4e178 100644 (file)
@@ -29,7 +29,8 @@
 CCL_NAMESPACE_BEGIN
 
 /* constants */
-#define OBJECT_SIZE            18
+#define OBJECT_SIZE            11
+#define OBJECT_VECTOR_SIZE     6
 #define LIGHT_SIZE                     4
 #define FILTER_TABLE_SIZE      256
 #define RAMP_TABLE_SIZE                256
index bd9f16d64ef82fba31c624e7a6910775a324deb7..5df8e8c136812b6753eebfc7112970593c0dffcf 100644 (file)
@@ -150,12 +150,17 @@ ObjectManager::~ObjectManager()
 
 void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress)
 {
-       float4 *objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
+       float4 *objects;
+       float4 *objects_vector = NULL;
        int i = 0;
        map<Mesh*, float> surface_area_map;
        Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading);
        bool have_motion = false;
 
+       objects = dscene->objects.resize(OBJECT_SIZE*scene->objects.size());
+       if(need_motion == Scene::MOTION_PASS)
+               objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size());
+
        foreach(Object *ob, scene->objects) {
                Mesh *mesh = ob->mesh;
                uint flag = 0;
@@ -205,8 +210,8 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
                int offset = i*OBJECT_SIZE;
 
                memcpy(&objects[offset], &tfm, sizeof(float4)*3);
-               memcpy(&objects[offset+3], &itfm, sizeof(float4)*3);
-               objects[offset+6] = make_float4(surface_area, pass_id, random_number, __int_as_float(ob->particle_id));
+               memcpy(&objects[offset+4], &itfm, sizeof(float4)*3);
+               objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(ob->particle_id));
 
                if(need_motion == Scene::MOTION_PASS) {
                        /* motion transformations, is world/object space depending if mesh
@@ -220,8 +225,8 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
                        if(!mesh->attributes.find(ATTR_STD_MOTION_POST))
                                mtfm_post = mtfm_post * itfm;
 
-                       memcpy(&objects[offset+8], &mtfm_pre, sizeof(float4)*4);
-                       memcpy(&objects[offset+12], &mtfm_post, sizeof(float4)*4);
+                       memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3);
+                       memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3);
                }
 #ifdef __OBJECT_MOTION__
                else if(need_motion == Scene::MOTION_BLUR) {
@@ -230,20 +235,16 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
                                DecompMotionTransform decomp;
 
                                transform_motion_decompose(&decomp, &ob->motion, &ob->tfm);
-                               memcpy(&objects[offset+8], &decomp, sizeof(float4)*8);
+                               memcpy(&objects[offset], &decomp, sizeof(float4)*8);
                                flag |= SD_OBJECT_MOTION;
                                have_motion = true;
                        }
-                       else {
-                               float4 no_motion = make_float4(FLT_MAX);
-                               memcpy(&objects[offset+8], &no_motion, sizeof(float4)*8);
-                       }
                }
 #endif
 
                /* dupli object coords */
-               objects[offset+16] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], 0.0f);
-               objects[offset+17] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], 0.0f, 0.0f);
+               objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], 0.0f);
+               objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], 0.0f, 0.0f);
 
                /* object flag */
                if(ob->use_holdout)
@@ -256,6 +257,8 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
        }
 
        device->tex_alloc("__objects", dscene->objects);
+       if(need_motion == Scene::MOTION_PASS)
+               device->tex_alloc("__objects_vector", dscene->objects_vector);
 
        dscene->data.bvh.have_motion = have_motion;
 }
@@ -297,6 +300,9 @@ void ObjectManager::device_free(Device *device, DeviceScene *dscene)
        device->tex_free(dscene->objects);
        dscene->objects.clear();
 
+       device->tex_free(dscene->objects_vector);
+       dscene->objects_vector.clear();
+
        device->tex_free(dscene->object_flag);
        dscene->object_flag.clear();
 }
index 92ef692b4b9ed558b593c5a13fe06e62113d8caf..ebe932e40e7870b85e51dfe24559968cf43c5f50 100644 (file)
@@ -74,6 +74,7 @@ public:
 
        /* objects */
        device_vector<float4> objects;
+       device_vector<float4> objects_vector;
 
        /* attributes */
        device_vector<uint4> attributes_map;