Code refactor: make Transform always affine, dropping last row.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Thu, 8 Mar 2018 05:48:14 +0000 (06:48 +0100)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Sat, 10 Mar 2018 03:54:05 +0000 (04:54 +0100)
This save a little memory and copying in the kernel by storing only a 4x3
matrix instead of a 4x4 matrix. We already did this in a few places, and
those don't need to be special exceptions anymore now.

25 files changed:
intern/cycles/app/cycles_xml.cpp
intern/cycles/blender/blender_camera.cpp
intern/cycles/blender/blender_util.h
intern/cycles/graph/node_type.cpp
intern/cycles/graph/node_xml.cpp
intern/cycles/kernel/bvh/bvh_nodes.h
intern/cycles/kernel/geom/geom_attribute.h
intern/cycles/kernel/geom/geom_curve_intersect.h
intern/cycles/kernel/geom/geom_object.h
intern/cycles/kernel/kernel_textures.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/osl/osl_services.cpp
intern/cycles/kernel/svm/svm_mapping.h
intern/cycles/kernel/svm/svm_tex_coord.h
intern/cycles/kernel/svm/svm_voxel.h
intern/cycles/render/light.cpp
intern/cycles/render/mesh.cpp
intern/cycles/render/nodes.cpp
intern/cycles/render/object.cpp
intern/cycles/render/object.h
intern/cycles/render/osl.cpp
intern/cycles/render/scene.h
intern/cycles/util/util_projection.h
intern/cycles/util/util_transform.cpp
intern/cycles/util/util_transform.h

index 21ae07e..a469553 100644 (file)
@@ -40,6 +40,7 @@
 
 #include "util/util_foreach.h"
 #include "util/util_path.h"
+#include "util/util_projection.h"
 #include "util/util_transform.h"
 #include "util/util_xml.h"
 
@@ -546,8 +547,10 @@ static void xml_read_transform(xml_node node, Transform& tfm)
 {
        if(node.attribute("matrix")) {
                vector<float> matrix;
-               if(xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16)
-                       tfm = tfm * transform_transpose((*(Transform*)&matrix[0]));
+               if(xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16) {
+                       ProjectionTransform projection = *(ProjectionTransform*)&matrix[0];
+                       tfm = tfm * projection_to_transform(projection_transpose(projection));
+               }
        }
 
        if(node.attribute("translate")) {
index 62e950e..e5f885c 100644 (file)
@@ -246,8 +246,7 @@ static Transform blender_camera_matrix(const Transform& tfm,
                        result = tfm *
                                make_transform(1.0f, 0.0f, 0.0f, 0.0f,
                                               0.0f, 0.0f, 1.0f, 0.0f,
-                                              0.0f, 1.0f, 0.0f, 0.0f,
-                                              0.0f, 0.0f, 0.0f, 1.0f);
+                                              0.0f, 1.0f, 0.0f, 0.0f);
                }
                else {
                        /* Make it so environment camera needs to be pointed in the direction
@@ -257,8 +256,7 @@ static Transform blender_camera_matrix(const Transform& tfm,
                        result = tfm *
                                make_transform( 0.0f, -1.0f, 0.0f, 0.0f,
                                                0.0f,  0.0f, 1.0f, 0.0f,
-                                              -1.0f,  0.0f, 0.0f, 0.0f,
-                                               0.0f,  0.0f, 0.0f, 1.0f);
+                                              -1.0f,  0.0f, 0.0f, 0.0f);
                }
        }
        else {
index 363e19f..c4c4503 100644 (file)
@@ -247,14 +247,15 @@ static inline float *image_get_float_pixels_for_frame(BL::Image& image,
 
 static inline Transform get_transform(const BL::Array<float, 16>& array)
 {
-       Transform tfm;
+       ProjectionTransform projection;
 
-       /* we assume both types to be just 16 floats, and transpose because blender
-        * use column major matrix order while we use row major */
-       memcpy(&tfm, &array, sizeof(float)*16);
-       tfm = transform_transpose(tfm);
+       /* We assume both types to be just 16 floats, and transpose because blender
+        * use column major matrix order while we use row major. */
+       memcpy(&projection, &array, sizeof(float)*16);
+       projection = projection_transpose(projection);
 
-       return tfm;
+       /* Drop last row, matrix is assumed to be affine transform. */
+       return projection_to_transform(projection);
 }
 
 static inline float2 get_float2(const BL::Array<float, 2>& array)
index a3a8fa5..37aae21 100644 (file)
@@ -77,7 +77,7 @@ size_t SocketType::max_size()
 
 void *SocketType::zero_default_value()
 {
-       static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
+       static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
        return &zero_transform;
 }
 
index d26b3b2..f4599e2 100644 (file)
@@ -196,7 +196,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
                        case SocketType::TRANSFORM:
                        {
                                array<Transform> value;
-                               xml_read_float_array<16>(value, attr);
+                               xml_read_float_array<12>(value, attr);
                                if(value.size() == 1) {
                                        node->set(socket, value[0]);
                                }
@@ -205,7 +205,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
                        case SocketType::TRANSFORM_ARRAY:
                        {
                                array<Transform> value;
-                               xml_read_float_array<16>(value, attr);
+                               xml_read_float_array<12>(value, attr);
                                node->set(socket, value);
                                break;
                        }
@@ -400,12 +400,10 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
                        {
                                Transform tfm = node->get_transform(socket);
                                std::stringstream ss;
-                               for(int i = 0; i < 4; i++) {
-                                       ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
-                                       if(i != 3) {
-                                               ss << " ";
-                                       }
+                               for(int i = 0; i < 3; i++) {
+                                       ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
                                }
+                               ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
                                attr = ss.str().c_str();
                                break;
                        }
@@ -416,11 +414,12 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
                                for(size_t j = 0; j < value.size(); j++) {
                                        const Transform& tfm = value[j];
 
-                                       for(int i = 0; i < 4; i++) {
-                                               ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
-                                               if(j != value.size() - 1 || i != 3) {
-                                                       ss << " ";
-                                               }
+                                       for(int i = 0; i < 3; i++) {
+                                               ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
+                                       }
+                                       ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
+                                       if(j != value.size() - 1) {
+                                               ss << " ";
                                        }
                                }
                                attr = ss.str().c_str();
index 6c33dad..060b393 100644 (file)
@@ -25,7 +25,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
        space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
        space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
        space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
-       space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
        return space;
 }
 
index 6e2ee3b..42c0537 100644 (file)
@@ -103,7 +103,6 @@ ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderD
        tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
        tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
        tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
-       tfm.w = kernel_tex_fetch(__attributes_float3, desc.offset + 3);
 
        return tfm;
 }
index faf3e3c..46c3f40 100644 (file)
@@ -170,8 +170,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(
                htfm = make_transform(
                        dir.z / d, 0, -dir.x /d, 0,
                        -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0,
-                       dir.x, dir.y, dir.z, 0,
-                       0, 0, 0, 1);
+                       dir.x, dir.y, dir.z, 0);
 
                float4 v00 = kernel_tex_fetch(__curves, prim);
 
index cd33030..62bc46d 100644 (file)
@@ -28,12 +28,12 @@ CCL_NAMESPACE_BEGIN
 
 enum ObjectTransform {
        OBJECT_TRANSFORM = 0,
-       OBJECT_INVERSE_TRANSFORM = 4,
+       OBJECT_INVERSE_TRANSFORM = 1,
 };
 
 enum ObjectVectorTransform {
        OBJECT_VECTOR_MOTION_PRE = 0,
-       OBJECT_VECTOR_MOTION_POST = 3
+       OBJECT_VECTOR_MOTION_POST = 1
 };
 
 /* Object to world space transformation */
@@ -51,8 +51,6 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object
                tfm.y = kernel_tex_fetch(__objects, object).tfm.pre.y;
                tfm.z = kernel_tex_fetch(__objects, object).tfm.pre.z;
        }
-       tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-
        return tfm;
 }
 
@@ -60,20 +58,12 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object
 
 ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse)
 {
-       Transform tfm;
        if(inverse) {
-               tfm.x = kernel_tex_fetch(__lights, lamp).itfm[0];
-               tfm.y = kernel_tex_fetch(__lights, lamp).itfm[1];
-               tfm.z = kernel_tex_fetch(__lights, lamp).itfm[2];
+               return kernel_tex_fetch(__lights, lamp).itfm;
        }
        else {
-               tfm.x = kernel_tex_fetch(__lights, lamp).tfm[0];
-               tfm.y = kernel_tex_fetch(__lights, lamp).tfm[1];
-               tfm.z = kernel_tex_fetch(__lights, lamp).tfm[2];
+               return kernel_tex_fetch(__lights, lamp).tfm;
        }
-       tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-
-       return tfm;
 }
 
 /* Object to world space transformation for motion vectors */
@@ -81,14 +71,7 @@ ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bo
 ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
 {
        int offset = object*OBJECT_VECTOR_SIZE + (int)type;
-
-       Transform tfm;
-       tfm.x = kernel_tex_fetch(__objects_vector, offset + 0);
-       tfm.y = kernel_tex_fetch(__objects_vector, offset + 1);
-       tfm.z = kernel_tex_fetch(__objects_vector, offset + 2);
-       tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-
-       return tfm;
+       return kernel_tex_fetch(__objects_vector, offset);
 }
 
 /* Motion blurred object transformations */
index 892e0c8..6e07d88 100644 (file)
@@ -32,7 +32,7 @@ KERNEL_TEX(float2, __prim_time)
 
 /* objects */
 KERNEL_TEX(KernelObject, __objects)
-KERNEL_TEX(float4, __objects_vector)
+KERNEL_TEX(Transform, __objects_vector)
 
 /* triangles */
 KERNEL_TEX(uint, __tri_shader)
index 5934ff5..567f2a0 100644 (file)
 CCL_NAMESPACE_BEGIN
 
 /* Constants */
-#define OBJECT_VECTOR_SIZE     6
+#define OBJECT_VECTOR_SIZE     2
 #define FILTER_TABLE_SIZE      1024
 #define RAMP_TABLE_SIZE                256
-#define SHUTTER_TABLE_SIZE             256
+#define SHUTTER_TABLE_SIZE     256
 
 #define BSSRDF_MIN_RADIUS                      1e-8f
 #define BSSRDF_MAX_HITS                                4
@@ -1483,8 +1483,8 @@ typedef struct KernelLight {
        int samples;
        float max_bounces;
        float random;
-       float4 tfm[3];
-       float4 itfm[3];
+       Transform tfm;
+       Transform itfm;
        union {
                KernelSpotLight spot;
                KernelAreaLight area;
index dec56c6..0c5e5e3 100644 (file)
@@ -64,8 +64,7 @@ CCL_NAMESPACE_BEGIN
 
 static void copy_matrix(OSL::Matrix44& m, const Transform& tfm)
 {
-       // TODO: remember when making affine
-       Transform t = transform_transpose(tfm);
+       ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
        memcpy(&m, &t, sizeof(m));
 }
 
@@ -553,8 +552,7 @@ static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives,
 static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
 {
        if(type == TypeDesc::TypeMatrix) {
-               Transform transpose = transform_transpose(tfm);
-               memcpy(val, &transpose, sizeof(Transform));
+               copy_matrix(*(OSL::Matrix44*)val, tfm);
                return true;
        }
 
index 0a89054..42a7ae9 100644 (file)
@@ -26,7 +26,6 @@ ccl_device void svm_node_mapping(KernelGlobals *kg, ShaderData *sd, float *stack
        tfm.x = read_node_float(kg, offset);
        tfm.y = read_node_float(kg, offset);
        tfm.z = read_node_float(kg, offset);
-       tfm.w = read_node_float(kg, offset);
 
        float3 r = transform_point(&tfm, v);
        stack_store_float3(stack, out_offset, r);
index c943274..6ff39e5 100644 (file)
@@ -42,7 +42,6 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
                                tfm.x = read_node_float(kg, offset);
                                tfm.y = read_node_float(kg, offset);
                                tfm.z = read_node_float(kg, offset);
-                               tfm.w = read_node_float(kg, offset);
                                data = transform_point(&tfm, data);
                        }
                        break;
@@ -123,7 +122,6 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
                                tfm.x = read_node_float(kg, offset);
                                tfm.y = read_node_float(kg, offset);
                                tfm.z = read_node_float(kg, offset);
-                               tfm.w = read_node_float(kg, offset);
                                data = transform_point(&tfm, data);
                        }
                        break;
@@ -207,7 +205,6 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
                                tfm.x = read_node_float(kg, offset);
                                tfm.y = read_node_float(kg, offset);
                                tfm.z = read_node_float(kg, offset);
-                               tfm.w = read_node_float(kg, offset);
                                data = transform_point(&tfm, data);
                        }
                        break;
index d967516..43b4336 100644 (file)
@@ -39,7 +39,6 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
                tfm.x = read_node_float(kg, offset);
                tfm.y = read_node_float(kg, offset);
                tfm.z = read_node_float(kg, offset);
-               tfm.w = read_node_float(kg, offset);
                co = transform_point(&tfm, co);
        }
 
index a3f6736..8dec7e4 100644 (file)
@@ -785,11 +785,8 @@ void LightManager::device_update_points(Device *,
                klights[light_index].max_bounces = max_bounces;
                klights[light_index].random = random;
 
-
-               Transform tfm = light->tfm;
-               Transform itfm = transform_inverse(tfm);
-               memcpy(&klights[light_index].tfm, &tfm, sizeof(float4)*3);
-               memcpy(&klights[light_index].itfm, &itfm, sizeof(float4)*3);
+               klights[light_index].tfm = light->tfm;
+               klights[light_index].itfm = transform_inverse(light->tfm);
 
                light_index++;
        }
@@ -825,10 +822,8 @@ void LightManager::device_update_points(Device *,
                klights[light_index].area.dir[0] = dir.x;
                klights[light_index].area.dir[1] = dir.y;
                klights[light_index].area.dir[2] = dir.z;
-               Transform tfm = light->tfm;
-               Transform itfm = transform_inverse(tfm);
-               memcpy(&klights[light_index].tfm, &tfm, sizeof(float4)*3);
-               memcpy(&klights[light_index].itfm, &itfm, sizeof(float4)*3);
+               klights[light_index].tfm = light->tfm;
+               klights[light_index].itfm = transform_inverse(light->tfm);
 
                light_index++;
        }
index 47d2497..71cbf5c 100644 (file)
@@ -1445,11 +1445,11 @@ static void update_attribute_element_offset(Mesh *mesh,
                        Transform *tfm = mattr->data_transform();
                        offset = attr_float3_offset;
 
-                       assert(attr_float3.size() >= offset + size * 4);
-                       for(size_t k = 0; k < size*4; k++) {
+                       assert(attr_float3.size() >= offset + size * 3);
+                       for(size_t k = 0; k < size*3; k++) {
                                attr_float3[offset+k] = (&tfm->x)[k];
                        }
-                       attr_float3_offset += size * 4;
+                       attr_float3_offset += size * 3;
                }
                else {
                        float4 *data = mattr->data_float4();
index 41c9730..f117962 100644 (file)
@@ -117,8 +117,7 @@ Transform TextureMapping::compute_transform()
                case NORMAL:
                        /* no translation for normals, and inverse transpose */
                        mat = rmat*smat;
-                       mat = transform_inverse(mat);
-                       mat = transform_transpose(mat);
+                       mat = transform_transposed_inverse(mat);
                        break;
        }
 
@@ -153,7 +152,6 @@ void TextureMapping::compile(SVMCompiler& compiler, int offset_in, int offset_ou
        compiler.add_node(tfm.x);
        compiler.add_node(tfm.y);
        compiler.add_node(tfm.z);
-       compiler.add_node(tfm.w);
 
        if(use_minmax) {
                compiler.add_node(NODE_MIN_MAX, offset_out, offset_out);
@@ -193,9 +191,7 @@ void TextureMapping::compile_end(SVMCompiler& compiler, ShaderInput *vector_in,
 void TextureMapping::compile(OSLCompiler &compiler)
 {
        if(!skip()) {
-               Transform tfm = transform_transpose(compute_transform());
-
-               compiler.parameter("mapping", tfm);
+               compiler.parameter("mapping", compute_transform());
                compiler.parameter("use_mapping", 1);
        }
 }
@@ -1434,7 +1430,6 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler)
                                compiler.add_node(tfm.x);
                                compiler.add_node(tfm.y);
                                compiler.add_node(tfm.z);
-                               compiler.add_node(tfm.w);
                        }
                }
                else {
@@ -1478,7 +1473,7 @@ void PointDensityTextureNode::compile(OSLCompiler& compiler)
                        compiler.parameter("filename", string_printf("@%d", slot).c_str());
                }
                if(space == NODE_TEX_VOXEL_SPACE_WORLD) {
-                       compiler.parameter("mapping", transform_transpose(tfm));
+                       compiler.parameter("mapping", tfm);
                        compiler.parameter("use_mapping", 1);
                }
                compiler.parameter(this, "interpolation");
@@ -1558,8 +1553,7 @@ void MappingNode::compile(SVMCompiler& compiler)
 
 void MappingNode::compile(OSLCompiler& compiler)
 {
-       Transform tfm = transform_transpose(tex_mapping.compute_transform());
-       compiler.parameter("Matrix", tfm);
+       compiler.parameter("Matrix", tex_mapping.compute_transform());
        compiler.parameter_point("mapping_min", tex_mapping.min);
        compiler.parameter_point("mapping_max", tex_mapping.max);
        compiler.parameter("use_minmax", tex_mapping.use_minmax);
@@ -3220,7 +3214,6 @@ void TextureCoordinateNode::compile(SVMCompiler& compiler)
                        compiler.add_node(ob_itfm.x);
                        compiler.add_node(ob_itfm.y);
                        compiler.add_node(ob_itfm.z);
-                       compiler.add_node(ob_itfm.w);
                }
        }
 
@@ -3259,7 +3252,7 @@ void TextureCoordinateNode::compile(OSLCompiler& compiler)
        if(compiler.output_type() == SHADER_TYPE_VOLUME)
                compiler.parameter("is_volume", true);
        compiler.parameter(this, "use_transform");
-       Transform ob_itfm = transform_transpose(transform_inverse(ob_tfm));
+       Transform ob_itfm = transform_transposed_inverse(ob_tfm);
        compiler.parameter("object_itfm", ob_itfm);
 
        compiler.parameter(this, "from_dupli");
index f632f7c..92c7e9c 100644 (file)
@@ -132,7 +132,7 @@ void Object::apply_transform(bool apply_to_motion)
                /* store matrix to transform later. when accessing these as attributes we
                 * do not want the transform to be applied for consistency between static
                 * and dynamic BVH, so we do it on packing. */
-               mesh->transform_normal = transform_transpose(transform_inverse(tfm));
+               mesh->transform_normal = transform_transposed_inverse(tfm);
 
                /* apply to mesh vertices */
                for(size_t i = 0; i < mesh->verts.size(); i++)
@@ -290,7 +290,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
                                                    int object_index)
 {
        KernelObject& kobject = state->objects[object_index];
-       float4 *objects_vector = state->objects_vector;
+       Transform *objects_vector = state->objects_vector;
 
        Mesh *mesh = ob->mesh;
        uint flag = 0;
@@ -357,11 +357,8 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
                }
        }
 
-       /* OBJECT_TRANSFORM */
-       memcpy(&kobject.tfm.pre, &tfm, sizeof(float4)*3);
-       /* OBJECT_INVERSE_TRANSFORM */
-       memcpy(&kobject.tfm.mid, &itfm, sizeof(float4)*3);
-       /* OBJECT_PROPERTIES */
+       memcpy(&kobject.tfm.pre, &tfm, sizeof(tfm));
+       memcpy(&kobject.tfm.mid, &itfm, sizeof(itfm));
        kobject.surface_area = surface_area;
        kobject.pass_id = pass_id;
        kobject.random_number = random_number;
@@ -395,8 +392,8 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
                        mtfm.post = mtfm.post * itfm;
                }
 
-               memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+0], &mtfm.pre, sizeof(float4)*3);
-               memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+3], &mtfm.post, sizeof(float4)*3);
+               objects_vector[object_index*OBJECT_VECTOR_SIZE+0] = mtfm.pre;
+               objects_vector[object_index*OBJECT_VECTOR_SIZE+1] = mtfm.post;
        }
        else if(state->need_motion == Scene::MOTION_BLUR) {
                if(ob->use_motion) {
index 54046d7..59c1d91 100644 (file)
@@ -135,7 +135,7 @@ protected:
                /* Packed object arrays. Those will be filled in. */
                uint *object_flag;
                KernelObject *objects;
-               float4 *objects_vector;
+               Transform *objects_vector;
 
                /* Flags which will be synchronized to Integrator. */
                bool have_motion;
index 9e93128..f1a2235 100644 (file)
@@ -34,6 +34,7 @@
 #include "util/util_md5.h"
 #include "util/util_path.h"
 #include "util/util_progress.h"
+#include "util/util_projection.h"
 
 #endif
 
@@ -832,7 +833,9 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
                case SocketType::TRANSFORM:
                {
                        Transform value = node->get_transform(socket);
-                       ss->Parameter(uname, TypeDesc::TypeMatrix, &value);
+                       ProjectionTransform projection(value);
+                       projection = projection_transpose(projection);
+                       ss->Parameter(uname, TypeDesc::TypeMatrix, &projection);
                        break;
                }
                case SocketType::BOOLEAN_ARRAY:
@@ -900,7 +903,11 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
                case SocketType::TRANSFORM_ARRAY:
                {
                        const array<Transform>& value = node->get_transform_array(socket);
-                       ss->Parameter(uname, array_typedesc(TypeDesc::TypeMatrix, value.size()), value.data());
+                       array<ProjectionTransform> fvalue(value.size());
+                       for(size_t i = 0; i < value.size(); i++) {
+                               fvalue[i] = projection_transpose(ProjectionTransform(value[i]));
+                       }
+                       ss->Parameter(uname, array_typedesc(TypeDesc::TypeMatrix, fvalue.size()), fvalue.data());
                        break;
                }
                case SocketType::CLOSURE:
@@ -967,7 +974,9 @@ void OSLCompiler::parameter(const char *name, ustring s)
 void OSLCompiler::parameter(const char *name, const Transform& tfm)
 {
        OSL::ShadingSystem *ss = (OSL::ShadingSystem*)shadingsys;
-       ss->Parameter(name, TypeDesc::TypeMatrix, (float*)&tfm);
+       ProjectionTransform projection(tfm);
+       projection = projection_transpose(projection);
+       ss->Parameter(name, TypeDesc::TypeMatrix, (float*)&projection);
 }
 
 void OSLCompiler::parameter_array(const char *name, const float f[], int arraylen)
index 316ffeb..cd5c9c1 100644 (file)
@@ -87,7 +87,7 @@ public:
 
        /* objects */
        device_vector<KernelObject> objects;
-       device_vector<float4> objects_vector;
+       device_vector<Transform> objects_vector;
 
        /* attributes */
        device_vector<uint4> attributes_map;
index 8073568..dbcb987 100644 (file)
@@ -35,7 +35,7 @@ typedef struct ProjectionTransform {
        : x(tfm.x),
          y(tfm.y),
          z(tfm.z),
-         w(tfm.w)
+         w(make_float4(0.0f, 0.0f, 0.0f, 1.0f))
        {
        }
 #endif
@@ -69,6 +69,12 @@ ccl_device_inline float3 transform_perspective_direction(const ProjectionTransfo
 
 #ifndef __KERNEL_GPU__
 
+ccl_device_inline Transform projection_to_transform(const ProjectionTransform& a)
+{
+       Transform tfm = {a.x, a.y, a.z};
+       return tfm;
+}
+
 ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform& a)
 {
        ProjectionTransform t;
@@ -81,12 +87,7 @@ ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTrans
        return t;
 }
 
-ccl_device_inline ProjectionTransform projection_inverse(const ProjectionTransform& a)
-{
-       Transform t = {a.x, a.y, a.z, a.w};
-       t = transform_inverse(t);
-       return ProjectionTransform(t);
-}
+ProjectionTransform projection_inverse(const ProjectionTransform& a);
 
 ccl_device_inline ProjectionTransform make_projection(
        float a, float b, float c, float d,
index edb53cc..e4e0d62 100644 (file)
@@ -46,6 +46,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "util/util_projection.h"
 #include "util/util_transform.h"
 
 #include "util/util_boundbox.h"
@@ -129,9 +130,9 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
        return true;
 }
 
-Transform transform_inverse(const Transform& tfm)
+ProjectionTransform projection_inverse(const ProjectionTransform& tfm)
 {
-       Transform tfmR = transform_identity();
+       ProjectionTransform tfmR = projection_identity();
        float M[4][4], R[4][4];
 
        memcpy(R, &tfmR, sizeof(R));
@@ -145,7 +146,7 @@ Transform transform_inverse(const Transform& tfm)
                M[2][2] += 1e-8f;
 
                if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
-                       return transform_identity();
+                       return projection_identity();
                }
        }
 
@@ -154,6 +155,19 @@ Transform transform_inverse(const Transform& tfm)
        return tfmR;
 }
 
+Transform transform_inverse(const Transform& tfm)
+{
+       ProjectionTransform projection(tfm);
+       return projection_to_transform(projection_inverse(projection));
+}
+
+Transform transform_transposed_inverse(const Transform& tfm)
+{
+       ProjectionTransform projection(tfm);
+       ProjectionTransform iprojection = projection_inverse(projection);
+       return projection_to_transform(projection_transpose(iprojection));
+}
+
 /* Motion Transform */
 
 float4 transform_to_quat(const Transform& tfm)
@@ -209,7 +223,7 @@ static void transform_decompose(DecomposedTransform *decomp, const Transform *tf
 
        /* extract rotation */
        Transform M = *tfm;
-       M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f; M.w.w = 1.0f;
+       M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f;
 
        Transform R = M;
        float norm;
@@ -217,9 +231,9 @@ static void transform_decompose(DecomposedTransform *decomp, const Transform *tf
 
        do {
                Transform Rnext;
-               Transform Rit = transform_inverse(transform_transpose(R));
+               Transform Rit = transform_transposed_inverse(R);
 
-               for(int i = 0; i < 4; i++)
+               for(int i = 0; i < 3; i++)
                        for(int j = 0; j < 4; j++)
                                Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);
                
index 65166e3..12f8405 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-/* Data Types */
+/* Affine transformation, stored as 4x3 matrix. */
 
 typedef struct Transform {
-       float4 x, y, z, w; /* rows */
+       float4 x, y, z;
 
 #ifndef __KERNEL_GPU__
        float4 operator[](int i) const { return *(&x + i); }
@@ -69,7 +69,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
        x = _mm_loadu_ps(&t->x.x);
        y = _mm_loadu_ps(&t->y.x);
        z = _mm_loadu_ps(&t->z.x);
-       w = _mm_loadu_ps(&t->w.x);
+       w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
 
        _MM_TRANSPOSE4_PS(x, y, z, w);
 
@@ -125,29 +125,15 @@ ccl_device_inline float3 transform_direction_transposed(const Transform *t, cons
        return make_float3(dot(x, a), dot(y, a), dot(z, a));
 }
 
-ccl_device_inline Transform transform_transpose(const Transform a)
-{
-       Transform t;
-
-       t.x.x = a.x.x; t.x.y = a.y.x; t.x.z = a.z.x; t.x.w = a.w.x;
-       t.y.x = a.x.y; t.y.y = a.y.y; t.y.z = a.z.y; t.y.w = a.w.y;
-       t.z.x = a.x.z; t.z.y = a.y.z; t.z.z = a.z.z; t.z.w = a.w.z;
-       t.w.x = a.x.w; t.w.y = a.y.w; t.w.z = a.z.w; t.w.w = a.w.w;
-
-       return t;
-}
-
 ccl_device_inline Transform make_transform(float a, float b, float c, float d,
                                            float e, float f, float g, float h,
-                                           float i, float j, float k, float l,
-                                           float m, float n, float o, float p)
+                                           float i, float j, float k, float l)
 {
        Transform t;
 
        t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d;
        t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h;
        t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l;
-       t.w.x = m; t.w.y = n; t.w.z = o; t.w.w = p;
 
        return t;
 }
@@ -161,21 +147,22 @@ ccl_device_inline Transform make_transform_frame(float3 N)
        const float3 dy = normalize(cross(N, dx));
        return make_transform(dx.x, dx.y, dx.z, 0.0f,
                              dy.x, dy.y, dy.z, 0.0f,
-                             N.x , N.y,  N.z,  0.0f,
-                             0.0f, 0.0f, 0.0f, 1.0f);
+                             N.x , N.y,  N.z,  0.0f);
 }
 
 #ifndef __KERNEL_GPU__
 
 ccl_device_inline Transform operator*(const Transform a, const Transform b)
 {
-       Transform c = transform_transpose(b);
-       Transform t;
+       float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
+       float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
+       float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
+       float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
 
-       t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
-       t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
-       t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
-       t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
+       Transform t;
+       t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
+       t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
+       t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
 
        return t;
 }
@@ -185,7 +172,6 @@ ccl_device_inline void print_transform(const char *label, const Transform& t)
        print_float4(label, t.x);
        print_float4(label, t.y);
        print_float4(label, t.z);
-       print_float4(label, t.w);
        printf("\n");
 }
 
@@ -194,8 +180,7 @@ ccl_device_inline Transform transform_translate(float3 t)
        return make_transform(
                1, 0, 0, t.x,
                0, 1, 0, t.y,
-               0, 0, 1, t.z,
-               0, 0, 0, 1);
+               0, 0, 1, t.z);
 }
 
 ccl_device_inline Transform transform_translate(float x, float y, float z)
@@ -208,8 +193,7 @@ ccl_device_inline Transform transform_scale(float3 s)
        return make_transform(
                s.x, 0, 0, 0,
                0, s.y, 0, 0,
-               0, 0, s.z, 0,
-               0, 0, 0, 1);
+               0, 0, s.z, 0);
 }
 
 ccl_device_inline Transform transform_scale(float x, float y, float z)
@@ -239,9 +223,7 @@ ccl_device_inline Transform transform_rotate(float angle, float3 axis)
                axis.z*axis.x*t - s*axis.y,
                axis.z*axis.y*t + s*axis.x,
                axis.z*axis.z*t + c,
-               0.0f,
-
-               0.0f, 0.0f, 0.0f, 1.0f);
+               0.0f);
 }
 
 /* Euler is assumed to be in XYZ order. */
@@ -281,20 +263,20 @@ ccl_device_inline void transform_set_column(Transform *t, int column, float3 val
 }
 
 Transform transform_inverse(const Transform& a);
+Transform transform_transposed_inverse(const Transform& a);
 
 ccl_device_inline bool transform_uniform_scale(const Transform& tfm, float& scale)
 {
        /* the epsilon here is quite arbitrary, but this function is only used for
-        * surface area and bump, where we except it to not be so sensitive */
-       Transform ttfm = transform_transpose(tfm);
+        * surface area and bump, where we expect it to not be so sensitive */
        float eps = 1e-6f;
        
        float sx = len_squared(float4_to_float3(tfm.x));
        float sy = len_squared(float4_to_float3(tfm.y));
        float sz = len_squared(float4_to_float3(tfm.z));
-       float stx = len_squared(float4_to_float3(ttfm.x));
-       float sty = len_squared(float4_to_float3(ttfm.y));
-       float stz = len_squared(float4_to_float3(ttfm.z));
+       float stx = len_squared(transform_get_column(&tfm, 0));
+       float sty = len_squared(transform_get_column(&tfm, 1));
+       float stz = len_squared(transform_get_column(&tfm, 2));
 
        if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps &&
           fabsf(sx - stx) < eps && fabsf(sx - sty) < eps &&
@@ -330,7 +312,6 @@ ccl_device_inline Transform transform_clear_scale(const Transform& tfm)
 ccl_device_inline Transform transform_empty()
 {
        return make_transform(
-               0, 0, 0, 0,
                0, 0, 0, 0,
                0, 0, 0, 0,
                0, 0, 0, 0);
@@ -389,7 +370,6 @@ ccl_device_inline Transform transform_quick_inverse(Transform M)
        R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
        R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
        R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
-       R.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
 
        return R;
 }
@@ -427,7 +407,6 @@ ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransfo
        tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
        tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
        tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
-       tfm->w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
 }
 
 ccl_device void transform_motion_interpolate(Transform *tfm, const ccl_global DecomposedMotionTransform *motion, float t)