Merge of itasc branch. Project files, scons and cmake should be working. Makefile...
[blender.git] / extern / Eigen2 / Eigen / src / Core / Assign.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra. Eigen itself is part of the KDE project.
3 //
4 // Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 // Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
7 //
8 // Eigen is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 3 of the License, or (at your option) any later version.
12 //
13 // Alternatively, you can redistribute it and/or
14 // modify it under the terms of the GNU General Public License as
15 // published by the Free Software Foundation; either version 2 of
16 // the License, or (at your option) any later version.
17 //
18 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
19 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
21 // GNU General Public License for more details.
22 //
23 // You should have received a copy of the GNU Lesser General Public
24 // License and a copy of the GNU General Public License along with
25 // Eigen. If not, see <http://www.gnu.org/licenses/>.
26
27 #ifndef EIGEN_ASSIGN_H
28 #define EIGEN_ASSIGN_H
29
30 /***************************************************************************
31 * Part 1 : the logic deciding a strategy for vectorization and unrolling
32 ***************************************************************************/
33
34 template <typename Derived, typename OtherDerived>
35 struct ei_assign_traits
36 {
37 public:
38   enum {
39     DstIsAligned = Derived::Flags & AlignedBit,
40     SrcIsAligned = OtherDerived::Flags & AlignedBit,
41     SrcAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
42   };
43
44 private:
45   enum {
46     InnerSize = int(Derived::Flags)&RowMajorBit
47               ? Derived::ColsAtCompileTime
48               : Derived::RowsAtCompileTime,
49     InnerMaxSize = int(Derived::Flags)&RowMajorBit
50               ? Derived::MaxColsAtCompileTime
51               : Derived::MaxRowsAtCompileTime,
52     PacketSize = ei_packet_traits<typename Derived::Scalar>::size
53   };
54
55   enum {
56     MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
57                   && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
58     MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
59                        && int(DstIsAligned) && int(SrcIsAligned),
60     MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
61     MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize /* slice vectorization can be slow, so we only
62       want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case
63       of a dynamic block in a fixed-size matrix */
64   };
65
66 public:
67   enum {
68     Vectorization = int(MayInnerVectorize)  ? int(InnerVectorization)
69                   : int(MayLinearVectorize) ? int(LinearVectorization)
70                   : int(MaySliceVectorize)  ? int(SliceVectorization)
71                                             : int(NoVectorization)
72   };
73
74 private:
75   enum {
76     UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)),
77     MayUnrollCompletely = int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
78     MayUnrollInner      = int(InnerSize * OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
79   };
80
81 public:
82   enum {
83     Unrolling = (int(Vectorization) == int(InnerVectorization) || int(Vectorization) == int(NoVectorization))
84               ? (
85                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
86                  : int(MayUnrollInner)      ? int(InnerUnrolling)
87                                             : int(NoUnrolling)
88                 )
89               : int(Vectorization) == int(LinearVectorization)
90               ? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
91               : int(NoUnrolling)
92   };
93 };
94
95 /***************************************************************************
96 * Part 2 : meta-unrollers
97 ***************************************************************************/
98
99 /***********************
100 *** No vectorization ***
101 ***********************/
102
103 template<typename Derived1, typename Derived2, int Index, int Stop>
104 struct ei_assign_novec_CompleteUnrolling
105 {
106   enum {
107     row = int(Derived1::Flags)&RowMajorBit
108         ? Index / int(Derived1::ColsAtCompileTime)
109         : Index % Derived1::RowsAtCompileTime,
110     col = int(Derived1::Flags)&RowMajorBit
111         ? Index % int(Derived1::ColsAtCompileTime)
112         : Index / Derived1::RowsAtCompileTime
113   };
114
115   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
116   {
117     dst.copyCoeff(row, col, src);
118     ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src);
119   }
120 };
121
122 template<typename Derived1, typename Derived2, int Stop>
123 struct ei_assign_novec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
124 {
125   EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
126 };
127
128 template<typename Derived1, typename Derived2, int Index, int Stop>
129 struct ei_assign_novec_InnerUnrolling
130 {
131   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
132   {
133     const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
134     const int row = rowMajor ? row_or_col : Index;
135     const int col = rowMajor ? Index : row_or_col;
136     dst.copyCoeff(row, col, src);
137     ei_assign_novec_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, row_or_col);
138   }
139 };
140
141 template<typename Derived1, typename Derived2, int Stop>
142 struct ei_assign_novec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
143 {
144   EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
145 };
146
147 /**************************
148 *** Inner vectorization ***
149 **************************/
150
151 template<typename Derived1, typename Derived2, int Index, int Stop>
152 struct ei_assign_innervec_CompleteUnrolling
153 {
154   enum {
155     row = int(Derived1::Flags)&RowMajorBit
156         ? Index / int(Derived1::ColsAtCompileTime)
157         : Index % Derived1::RowsAtCompileTime,
158     col = int(Derived1::Flags)&RowMajorBit
159         ? Index % int(Derived1::ColsAtCompileTime)
160         : Index / Derived1::RowsAtCompileTime,
161     SrcAlignment = ei_assign_traits<Derived1,Derived2>::SrcAlignment
162   };
163
164   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
165   {
166     dst.template copyPacket<Derived2, Aligned, SrcAlignment>(row, col, src);
167     ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
168       Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
169   }
170 };
171
172 template<typename Derived1, typename Derived2, int Stop>
173 struct ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
174 {
175   EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &) {}
176 };
177
178 template<typename Derived1, typename Derived2, int Index, int Stop>
179 struct ei_assign_innervec_InnerUnrolling
180 {
181   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src, int row_or_col)
182   {
183     const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
184     const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
185     dst.template copyPacket<Derived2, Aligned, Aligned>(row, col, src);
186     ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
187       Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
188   }
189 };
190
191 template<typename Derived1, typename Derived2, int Stop>
192 struct ei_assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
193 {
194   EIGEN_STRONG_INLINE static void run(Derived1 &, const Derived2 &, int) {}
195 };
196
197 /***************************************************************************
198 * Part 3 : implementation of all cases
199 ***************************************************************************/
200
201 template<typename Derived1, typename Derived2,
202          int Vectorization = ei_assign_traits<Derived1, Derived2>::Vectorization,
203          int Unrolling = ei_assign_traits<Derived1, Derived2>::Unrolling>
204 struct ei_assign_impl;
205
206 /***********************
207 *** No vectorization ***
208 ***********************/
209
210 template<typename Derived1, typename Derived2>
211 struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
212 {
213   inline static void run(Derived1 &dst, const Derived2 &src)
214   {
215     const int innerSize = dst.innerSize();
216     const int outerSize = dst.outerSize();
217     for(int j = 0; j < outerSize; ++j)
218       for(int i = 0; i < innerSize; ++i)
219       {
220         if(int(Derived1::Flags)&RowMajorBit)
221           dst.copyCoeff(j, i, src);
222         else
223           dst.copyCoeff(i, j, src);
224       }
225   }
226 };
227
228 template<typename Derived1, typename Derived2>
229 struct ei_assign_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
230 {
231   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
232   {
233     ei_assign_novec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
234       ::run(dst, src);
235   }
236 };
237
238 template<typename Derived1, typename Derived2>
239 struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
240 {
241   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
242   {
243     const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
244     const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
245     const int outerSize = dst.outerSize();
246     for(int j = 0; j < outerSize; ++j)
247       ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
248         ::run(dst, src, j);
249   }
250 };
251
252 /**************************
253 *** Inner vectorization ***
254 **************************/
255
256 template<typename Derived1, typename Derived2>
257 struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
258 {
259   inline static void run(Derived1 &dst, const Derived2 &src)
260   {
261     const int innerSize = dst.innerSize();
262     const int outerSize = dst.outerSize();
263     const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
264     for(int j = 0; j < outerSize; ++j)
265       for(int i = 0; i < innerSize; i+=packetSize)
266       {
267         if(int(Derived1::Flags)&RowMajorBit)
268           dst.template copyPacket<Derived2, Aligned, Aligned>(j, i, src);
269         else
270           dst.template copyPacket<Derived2, Aligned, Aligned>(i, j, src);
271       }
272   }
273 };
274
275 template<typename Derived1, typename Derived2>
276 struct ei_assign_impl<Derived1, Derived2, InnerVectorization, CompleteUnrolling>
277 {
278   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
279   {
280     ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
281       ::run(dst, src);
282   }
283 };
284
285 template<typename Derived1, typename Derived2>
286 struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
287 {
288   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
289   {
290     const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
291     const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
292     const int outerSize = dst.outerSize();
293     for(int j = 0; j < outerSize; ++j)
294       ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
295         ::run(dst, src, j);
296   }
297 };
298
299 /***************************
300 *** Linear vectorization ***
301 ***************************/
302
303 template<typename Derived1, typename Derived2>
304 struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
305 {
306   inline static void run(Derived1 &dst, const Derived2 &src)
307   {
308     const int size = dst.size();
309     const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
310     const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
311                            : ei_alignmentOffset(&dst.coeffRef(0), size);
312     const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
313
314     for(int index = 0; index < alignedStart; ++index)
315       dst.copyCoeff(index, src);
316
317     for(int index = alignedStart; index < alignedEnd; index += packetSize)
318     {
319       dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
320     }
321
322     for(int index = alignedEnd; index < size; ++index)
323       dst.copyCoeff(index, src);
324   }
325 };
326
327 template<typename Derived1, typename Derived2>
328 struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
329 {
330   EIGEN_STRONG_INLINE static void run(Derived1 &dst, const Derived2 &src)
331   {
332     const int size = Derived1::SizeAtCompileTime;
333     const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
334     const int alignedSize = (size/packetSize)*packetSize;
335
336     ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
337     ei_assign_novec_CompleteUnrolling<Derived1, Derived2, alignedSize, size>::run(dst, src);
338   }
339 };
340
341 /**************************
342 *** Slice vectorization ***
343 ***************************/
344
345 template<typename Derived1, typename Derived2>
346 struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
347 {
348   inline static void run(Derived1 &dst, const Derived2 &src)
349   {
350     const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
351     const int packetAlignedMask = packetSize - 1;
352     const int innerSize = dst.innerSize();
353     const int outerSize = dst.outerSize();
354     const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
355     int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
356                      : ei_alignmentOffset(&dst.coeffRef(0,0), innerSize);
357
358     for(int i = 0; i < outerSize; ++i)
359     {
360       const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
361
362       // do the non-vectorizable part of the assignment
363       for (int index = 0; index<alignedStart ; ++index)
364       {
365         if(Derived1::Flags&RowMajorBit)
366           dst.copyCoeff(i, index, src);
367         else
368           dst.copyCoeff(index, i, src);
369       }
370
371       // do the vectorizable part of the assignment
372       for (int index = alignedStart; index<alignedEnd; index+=packetSize)
373       {
374         if(Derived1::Flags&RowMajorBit)
375           dst.template copyPacket<Derived2, Aligned, Unaligned>(i, index, src);
376         else
377           dst.template copyPacket<Derived2, Aligned, Unaligned>(index, i, src);
378       }
379
380       // do the non-vectorizable part of the assignment
381       for (int index = alignedEnd; index<innerSize ; ++index)
382       {
383         if(Derived1::Flags&RowMajorBit)
384           dst.copyCoeff(i, index, src);
385         else
386           dst.copyCoeff(index, i, src);
387       }
388
389       alignedStart = std::min<int>((alignedStart+alignedStep)%packetSize, innerSize);
390     }
391   }
392 };
393
394 /***************************************************************************
395 * Part 4 : implementation of MatrixBase methods
396 ***************************************************************************/
397
398 template<typename Derived>
399 template<typename OtherDerived>
400 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>
401   ::lazyAssign(const MatrixBase<OtherDerived>& other)
402 {
403   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
404   EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
405     YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
406   ei_assert(rows() == other.rows() && cols() == other.cols());
407   ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
408   return derived();
409 }
410
411 template<typename Derived, typename OtherDerived,
412          bool EvalBeforeAssigning = (int(OtherDerived::Flags) & EvalBeforeAssigningBit) != 0,
413          bool NeedToTranspose = Derived::IsVectorAtCompileTime
414                 && OtherDerived::IsVectorAtCompileTime
415                 && int(Derived::RowsAtCompileTime) == int(OtherDerived::ColsAtCompileTime)
416                 && int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime)
417                 && int(Derived::SizeAtCompileTime) != 1>
418 struct ei_assign_selector;
419
420 template<typename Derived, typename OtherDerived>
421 struct ei_assign_selector<Derived,OtherDerived,false,false> {
422   EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
423 };
424 template<typename Derived, typename OtherDerived>
425 struct ei_assign_selector<Derived,OtherDerived,true,false> {
426   EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
427 };
428 template<typename Derived, typename OtherDerived>
429 struct ei_assign_selector<Derived,OtherDerived,false,true> {
430   EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
431 };
432 template<typename Derived, typename OtherDerived>
433 struct ei_assign_selector<Derived,OtherDerived,true,true> {
434   EIGEN_STRONG_INLINE static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
435 };
436
437 template<typename Derived>
438 template<typename OtherDerived>
439 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>
440   ::operator=(const MatrixBase<OtherDerived>& other)
441 {
442   return ei_assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
443 }
444
445 #endif // EIGEN_ASSIGN_H