nlib
SimdQuaternion.h
Go to the documentation of this file.
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
5 
8 
9 NLIB_NAMESPACE_BEGIN
10 namespace simd {
11 
13  public:
14  static SimdQuaternion __vectorcall Identity() NLIB_NOEXCEPT;
15  static SimdQuaternion __vectorcall Conjugate(SimdQuaternionArg q) NLIB_NOEXCEPT;
16  static f128 __vectorcall Length(SimdQuaternionArg q) NLIB_NOEXCEPT;
17  static f128 __vectorcall LengthSq(SimdQuaternionArg q) NLIB_NOEXCEPT;
18  static f128 __vectorcall RecpLength(SimdQuaternionArg q) NLIB_NOEXCEPT;
19  static SimdQuaternion __vectorcall Normalize(SimdQuaternionArg q) NLIB_NOEXCEPT;
20  static SimdQuaternion __vectorcall NormalizeEst(SimdQuaternionArg q) NLIB_NOEXCEPT;
21  static SimdQuaternion __vectorcall Inverse(SimdQuaternionArg q) NLIB_NOEXCEPT;
22  static SimdQuaternion __vectorcall Ln(SimdQuaternionArg q_normalized) NLIB_NOEXCEPT;
23  static SimdQuaternion __vectorcall Exp(SimdQuaternionArg q) NLIB_NOEXCEPT;
24 
25  static bool __vectorcall IsIdentity(SimdQuaternionArg q) NLIB_NOEXCEPT;
26  static bool __vectorcall IsInfinite(SimdQuaternionArg q) NLIB_NOEXCEPT;
27  static bool __vectorcall IsNaN(SimdQuaternionArg q) NLIB_NOEXCEPT;
28  static bool __vectorcall CmpEq(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT;
29  static bool __vectorcall CmpNe(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT;
30 
31  static f128 __vectorcall Dot(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT;
32  static SimdQuaternion __vectorcall
34 
35  static SimdQuaternion __vectorcall
36  FromRotationAxisAndSinCos(SimdVectorArg axis_normalized, float sin_half_rad,
37  float cos_half_rad) NLIB_NOEXCEPT;
38  static SimdQuaternion __vectorcall FromRotationMatrix(SimdMatrixArg m) NLIB_NOEXCEPT;
39  static SimdQuaternion __vectorcall
40  FromRotationZXY(SimdVectorArg sin_half_xyz, SimdVectorArg cos_half_xyz) NLIB_NOEXCEPT;
41  static SimdVector __vectorcall ToAxisAngle(float* rad, SimdQuaternion q) NLIB_NOEXCEPT;
42 
43  static SimdQuaternion __vectorcall Slerp(SimdQuaternionArg q0_normalized,
44  SimdQuaternionArg q1_normalized,
45  float t) NLIB_NOEXCEPT;
46  static SimdQuaternion __vectorcall
47  Squad(SimdQuaternionArg q0_normalized, SimdQuaternionArg q1_normalized,
48  SimdQuaternionArg q2_normalized, SimdQuaternionArg q3_normalized,
49  float t) NLIB_NOEXCEPT;
50  static void __vectorcall SquadSetup(SimdQuaternion* a, SimdQuaternion* b, SimdQuaternion* c,
53  static SimdQuaternion __vectorcall BaryCentric(SimdQuaternionArg q0, SimdQuaternionArg q1,
54  SimdQuaternionArg q2, float f,
55  float g) NLIB_NOEXCEPT;
56 
57  private:
58  Quaternion(); // forbidden
59 };
60 
61 #ifndef NLIB_DOXYGEN
62 
63 #define NLIB_M(tp) inline tp __vectorcall
64 
65 // XMQuaternionIdentity
66 NLIB_M(SimdQuaternion) Quaternion::Identity() NLIB_NOEXCEPT { return F128::Set0001(); }
67 
68 // XMQuaternionConjugate
69 NLIB_M(SimdQuaternion) Quaternion::Conjugate(SimdQuaternionArg q) NLIB_NOEXCEPT {
70  return F128::NegateEx<true, true, true, false>(q);
71 }
72 
73 // XMQuaternionLength
74 NLIB_M(f128) Quaternion::Length(SimdQuaternionArg q) NLIB_NOEXCEPT { return Vector4::Length(q); }
75 
76 // XMQuaternionLengthSq
77 NLIB_M(f128) Quaternion::LengthSq(SimdQuaternionArg q) NLIB_NOEXCEPT {
78  return Vector4::LengthSq(q);
79 }
80 
81 // XMQuaternionReciprocalLength
82 NLIB_M(f128) Quaternion::RecpLength(SimdQuaternionArg q) NLIB_NOEXCEPT {
83  return Vector4::RecpLength(q);
84 }
85 
86 // XMQuaternionNormalize
87 NLIB_M(SimdQuaternion) Quaternion::Normalize(SimdQuaternionArg q) NLIB_NOEXCEPT {
88  return Vector4::Normalize(q);
89 }
90 
91 // XMQuaternionNormalizeEst
92 NLIB_M(SimdQuaternion) Quaternion::NormalizeEst(SimdQuaternionArg q) NLIB_NOEXCEPT {
93  return Vector4::NormalizeEst(q);
94 }
95 
96 // XMQuaternionInverse
97 NLIB_M(SimdQuaternion) Quaternion::Inverse(SimdQuaternionArg q) NLIB_NOEXCEPT {
98  f128 lenSq = LengthSq(q);
99  SimdQuaternion conj = Conjugate(q);
100 
101  f128 eps = F128::SetEpsilon();
102  f128 nearZero = F128::CmpLe(lenSq, eps);
103 
104  SimdQuaternion inv = F128::Div(conj, lenSq);
105  return F128::AndNot(nearZero, inv);
106 }
107 
108 // XMQuaternionLn
109 NLIB_M(SimdQuaternion) Quaternion::Ln(SimdQuaternionArg q_normalized) NLIB_NOEXCEPT {
110  static const float one_eps = 1.f - 0.00001f;
111  f128 q0 = F128::SetZeroToLane<3>(q_normalized);
112  f128 w = F128::SetValue<3>(q_normalized, each_select32);
113  f128 xyz_len = Vector4::Length(q0);
114  f128 w_not_near_one = F128::InBound(w, F128::SetValue(one_eps, each_float));
115 
116  f128 theta = F128::ArcTan2(xyz_len, w);
117  f128 result = F128::Div(theta, xyz_len);
118  result = F128::Mult(q0, result);
119  result = F128::Select(w_not_near_one, result, q0);
120  return result;
121 }
122 
123 // XMQuaternionExp
124 NLIB_M(SimdQuaternion) Quaternion::Exp(SimdQuaternionArg q) NLIB_NOEXCEPT {
125  f128 xyz_len = Vector4::Length(F128::SetZeroToLane<3>(q));
126  f128x2 sc = F128::SinCos(xyz_len);
127 
128  f128 result = F128::Mult(q, F128::Div(sc.val[0], xyz_len));
129  f128 near_zero = F128::CmpNearEqZero(xyz_len, F128::SetEpsilon());
130  result = F128::Select(near_zero, q, result);
131  result = F128::Splat<false, false, false, true>(result, sc.val[1]);
132  return result;
133 }
134 
135 // XMQuaternionIsIdentity
136 NLIB_M(bool) Quaternion::IsIdentity(SimdQuaternionArg q) NLIB_NOEXCEPT { // NOLINT
137  return Vector4::CmpEq(q, Identity());
138 }
139 
140 // XMQuaternionIsInfinite
141 NLIB_M(bool) Quaternion::IsInfinite(SimdQuaternionArg q) NLIB_NOEXCEPT { // NOLINT
142  return Vector4::IsInfinite(q);
143 }
144 
145 // XMQuaternionIsNaN
146 NLIB_M(bool) Quaternion::IsNaN(SimdQuaternionArg q) NLIB_NOEXCEPT { // NOLINT
147  return Vector4::IsNaN(q);
148 }
149 
150 // XMQuaternionEqual
151 NLIB_M(bool) Quaternion::CmpEq(SimdQuaternionArg q0, // NOLINT
153  return Vector4::CmpEq(q0, q1);
154 }
155 
156 // XMQuaternionNotEqual
157 NLIB_M(bool) Quaternion::CmpNe(SimdQuaternionArg q0, // NOLINT
159  return Vector4::CmpNe(q0, q1);
160 }
161 
162 // XMQuaternionDot
163 NLIB_M(f128) Quaternion::Dot(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT {
164  return Vector4::Dot(q0, q1);
165 }
166 
167 // XMQuaternionMultiply
168 NLIB_M(SimdQuaternion) Quaternion::Mult(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT {
169  SimdVector v1 = F128::Swizzle<3, 0, 1, 2>(q1);
170  f128 r1 = F128::Swizzle<3, 2, 1, 0>(q0);
171  f128 r2 = F128::Swizzle<2, 3, 0, 1>(q0);
172  f128 r3 = F128::Swizzle<1, 0, 3, 2>(q0);
173  SimdMatrix m;
174  m.r[0] = q0;
175  m.r[1] = F128::NegateEx<false, true, false, true>(r1);
176  m.r[2] = F128::NegateEx<false, false, true, true>(r2);
177  m.r[3] = F128::NegateEx<true, false, false, true>(r3);
178  return Vector4::Transform(v1, m);
179 }
180 
181 // sin_half_rad = sin(rad/2), cos_half_rad = cos(rad/2)
182 // XMQuaternionRotationNormal
183 NLIB_M(SimdQuaternion) Quaternion::FromRotationAxisAndSinCos(SimdVectorArg axis_normalized,
184  float sin_half_rad,
185  float cos_half_rad) NLIB_NOEXCEPT {
186  SimdVector axis = F128::SetFloatToLane<3>(axis_normalized, 1.f);
187  f128 scale = F128::SetValue(sin_half_rad, sin_half_rad, sin_half_rad, cos_half_rad);
188  return F128::Mult(axis, scale);
189 }
190 
191 // m must be rotation matrix
192 NLIB_M(SimdQuaternion) Quaternion::FromRotationMatrix(SimdMatrixArg m) NLIB_NOEXCEPT {
193  // 1-2y^2-2z^2 2xy+2wz 2xz-2wy
194  // 2xy-2wz 1-2x^2-2z^2 2yz+2wx
195  // 2xz+2wy 2yz-2wx 1^2x^2^2y^2
196  f128 r0 = m.r[0];
197  f128 r1 = m.r[1];
198  f128 r2 = m.r[2];
199 
200  f128 elem;
201  // elem = { m00 - m11 - m22 + 1.f, -m00 + m11 - m22 + 1.f,
202  // -m00 - m11 + m22 + 1.f, m00 + m11 + m22 + 1.f }
203  // -> { 4x^2, 4y^2, 4z^2, 4w^2 }
204  {
205  f128 m00x = F128::SetValue<0>(r0, each_select32);
206  f128 m11x = F128::SetValue<1>(r1, each_select32);
207  f128 m22x = F128::SetValue<2>(r2, each_select32);
208  m00x = F128::NegateEx<false, true, true, false>(m00x);
209  m11x = F128::NegateEx<true, false, true, false>(m11x);
210  m22x = F128::NegateEx<true, true, false, false>(m22x);
211  f128 one = F128::SetOne();
212  elem = F128::Add(m00x, m11x);
213  elem = F128::Add(elem, m22x);
214  elem = F128::Add(elem, one);
215  }
216 
217  f128 xx_ge_yy;
218  f128 zz_ge_ww;
219  f128 xxyy_ge_zzww;
220  f128 elem_max;
221  {
222  f128 t0, t1, t2, t3;
223  t0 = F128::SetValue<0>(elem, each_select32);
224  t1 = F128::SetValue<1>(elem, each_select32);
225  t2 = F128::SetValue<2>(elem, each_select32);
226  t3 = F128::SetValue<3>(elem, each_select32);
227  xx_ge_yy = F128::CmpGe(t0, t1);
228  zz_ge_ww = F128::CmpGe(t2, t3);
229 
230  t0 = F128::PairwiseMax(elem, elem);
231  t2 = F128::SetValue<0>(t0, each_select32);
232  t3 = F128::SetValue<1>(t0, each_select32);
233  elem_max = F128::PairwiseMax(t0, t0);
234  xxyy_ge_zzww = F128::CmpGe(t2, t3);
235  }
236 
237  f128 v0_25 = F128::SetValue(0.25f, each_float);
238  elem_max = F128::Mult(v0_25, elem_max);
239  f128 mult = F128::RecpSqrt(elem_max);
240  f128 v = F128::Mult(mult, elem_max);
241  mult = F128::Mult(v0_25, mult);
242 
243  f128 m01_20_12;
244  m01_20_12 = F128::Permute<1, 4, 2, -1>(r0, r2);
245  m01_20_12 = F128::Permute<0, 1, 6, -1>(m01_20_12, r1);
246 
247  f128 m10_02_21;
248  m10_02_21 = F128::Permute<0, 1, 5, -1>(r1, r2);
249  m10_02_21 = F128::Permute<0, 6, 2, -1>(m10_02_21, r0);
250 
251  f128 ans_x_biggest, ans_y_biggest, ans_z_biggest, ans_w_biggest;
252  {
253  f128 tmp_x, tmp_y, tmp_z, tmp_w;
254  tmp_x = F128::NegateEx<false, false, true, true>(m10_02_21);
255  tmp_x = F128::Mult(mult, F128::Add(m01_20_12, tmp_x));
256 
257  tmp_y = F128::NegateEx<false, true, false, true>(m10_02_21);
258  tmp_y = F128::Mult(mult, F128::Add(m01_20_12, tmp_y));
259 
260  tmp_z = F128::NegateEx<true, false, false, true>(m10_02_21);
261  tmp_z = F128::Mult(mult, F128::Add(m01_20_12, tmp_z));
262 
263  tmp_w = F128::Mult(mult, F128::Sub(m01_20_12, m10_02_21));
264 
265  ans_x_biggest = F128::Permute<4, 0, 1, 2>(tmp_x, v);
266  ans_y_biggest = F128::Permute<0, 4, 2, 1>(tmp_y, v);
267  ans_z_biggest = F128::Permute<1, 2, 4, 0>(tmp_z, v);
268  ans_w_biggest = F128::Permute<2, 1, 0, 4>(tmp_w, v);
269  }
270 
271  f128 ans_xy = F128::Select(xx_ge_yy, ans_x_biggest, ans_y_biggest);
272  f128 ans_zw = F128::Select(zz_ge_ww, ans_z_biggest, ans_w_biggest);
273  return F128::Select(xxyy_ge_zzww, ans_xy, ans_zw);
274 }
275 
276 // sin_half_xyz = { sin(rad_x/2), sin(rad_y/2), sin(rad_z/2), * }
277 // cos_half_xyz = { cos(rad_x/2), cos(rad_y/2), cos(rad_z/2), * }
278 // XMQuaternionRotationRollPitchYawFromVector
279 NLIB_M(SimdQuaternion) Quaternion::FromRotationZXY(SimdVectorArg sin_half_xyz,
280  SimdVectorArg cos_half_xyz) NLIB_NOEXCEPT {
281  // CxSySz + SxCyCz
282  // -SxCySz + CxSyCz
283  // -SxSyCz + CxCySz
284  // SxSySz + CxCyCz
285  f128 x1 = F128::Permute<4, 0, 0, 0>(sin_half_xyz, cos_half_xyz);
286  f128 y1 = F128::Permute<1, 5, 1, 1>(sin_half_xyz, cos_half_xyz);
287  f128 z1 = F128::Permute<2, 2, 6, 2>(sin_half_xyz, cos_half_xyz);
288  x1 = F128::NegateEx<false, true, true, false>(x1);
289  f128 x0 = F128::Permute<0, 4, 4, 4>(sin_half_xyz, cos_half_xyz);
290  f128 y0 = F128::Permute<5, 1, 5, 5>(sin_half_xyz, cos_half_xyz);
291  f128 z0 = F128::Permute<6, 6, 2, 6>(sin_half_xyz, cos_half_xyz);
292 
293  f128 z0x0y0 = F128::Mult(x0, y0);
294  f128 z1x1y1 = F128::Mult(x1, y1);
295  z0x0y0 = F128::Mult(z0x0y0, z0);
296  return F128::MultAdd(z1x1y1, z1, z0x0y0);
297 }
298 
299 // XMQuaternionToAxisAngle
300 // rad can be NULL
301 NLIB_M(SimdVector) Quaternion::ToAxisAngle(float* rad, SimdQuaternion q) NLIB_NOEXCEPT {
302  if (rad) {
303  *rad = 2.f * F128::GetFloatFromLane<3>(F128::ArcCos(q));
304  }
305  return q;
306 }
307 
308 // XMQuaternionSlerp
309 NLIB_M(SimdVector) Quaternion::Slerp(SimdQuaternionArg q0_normalized,
310  SimdQuaternionArg q1_normalized, float t) NLIB_NOEXCEPT {
311  f128 q0q1 = Dot(q0_normalized, q1_normalized);
312  f128 tooNear;
313  f128 sp;
314  {
315  f128 ss = F128::MultSub(q0q1, q0q1, F128::SetValue(1.f, each_float));
316  f128 eps = F128::SetEpsilon();
317  tooNear = F128::CmpLe(ss, eps);
318  sp = F128::RecpSqrt(ss);
319  }
320  f128 ph = F128::ArcCos(q0q1);
321  f128 k = F128::SetValue(1.f - t, t, 0.f, 0.f);
322  f128 t0t1 = F128::Mult(sp, F128::Sin(F128::Mult(ph, k)));
323  f128 t0 = F128::SetValue<0>(t0t1, each_select32);
324  f128 t1 = F128::SetValue<1>(t0t1, each_select32);
325 
326  SimdVector ret = F128::Mult(q0_normalized, t0);
327  ret = F128::MultAdd(q1_normalized, t1, ret);
328  return F128::Select(tooNear, q0_normalized, ret);
329 }
330 
331 // XMQuaternionSquad
332 NLIB_M(SimdVector) Quaternion::Squad(SimdQuaternionArg q0_normalized,
333  SimdQuaternionArg q1_normalized,
334  SimdQuaternionArg q2_normalized,
335  SimdQuaternionArg q3_normalized, float t) NLIB_NOEXCEPT {
336  float t2 = (t - t * t) * 2.f;
337  SimdQuaternion q03 = Slerp(q0_normalized, q3_normalized, t);
338  SimdQuaternion q12 = Slerp(q1_normalized, q2_normalized, t);
339  return Slerp(q03, q12, t2);
340 }
341 
342 // XMQuaternionBaryCentric
343 NLIB_M(SimdVector) Quaternion::BaryCentric(SimdQuaternionArg q0, SimdQuaternionArg q1,
344  SimdQuaternionArg q2, float f, float g) NLIB_NOEXCEPT {
345  float fg = f + g;
346  if (fg <= 0.00001f && fg >= -0.00001f) return q0; // to avoid division by zero
347  SimdQuaternion q01 = Slerp(q0, q1, fg);
348  SimdQuaternion q02 = Slerp(q0, q2, fg);
349  return Slerp(q01, q02, g / fg);
350 }
351 
352 // XMQuaternionSquadSetup
353 inline void __vectorcall Quaternion::SquadSetup(SimdQuaternion* a, SimdQuaternion* b,
354  SimdQuaternion* c,
358  SimdQuaternion Q0, Q2, Q3;
359  {
360  f128 lensq_a01 = Quaternion::LengthSq(F128::Add(q0, q1));
361  f128 lensq_s01 = Quaternion::LengthSq(F128::Sub(q0, q1));
362  f128 cmp01 = F128::CmpLt(lensq_a01, lensq_s01);
363  f128 neg_q0 = F128::Negate(q0);
364  Q0 = F128::Select(cmp01, neg_q0, q0);
365 
366  f128 lensq_a12 = Quaternion::LengthSq(F128::Add(q1, q2));
367  f128 lensq_s12 = Quaternion::LengthSq(F128::Sub(q1, q2));
368  f128 cmp12 = F128::CmpLt(lensq_a12, lensq_s12);
369  f128 neg_q2 = F128::Negate(q2);
370  Q2 = F128::Select(cmp12, neg_q2, q2);
371 
372  f128 lensq_a23 = Quaternion::LengthSq(F128::Add(q2, q3));
373  f128 lensq_s23 = Quaternion::LengthSq(F128::Sub(q2, q3));
374  f128 cmp23 = F128::CmpLt(lensq_a23, lensq_s23);
375  f128 neg_q3 = F128::Negate(q3);
376  Q3 = F128::Select(cmp23, neg_q3, q3);
377  }
378  const SimdQuaternion& Q1 = q1;
379 
380  SimdQuaternion InvQ1 = Quaternion::Inverse(Q1);
381  SimdQuaternion Ln_ExpQ1_Q2 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q2));
382  SimdQuaternion Ln_ExpQ1_Q0 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q0));
383 
384  SimdQuaternion InvQ2 = Quaternion::Inverse(Q2);
385  SimdQuaternion Ln_ExpQ2_Q3 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q3));
386  SimdQuaternion Ln_ExpQ2_Q1 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q1));
387 
388  f128 v0_25 = F128::SetValue(0.25f, each_float);
389  SimdQuaternion A = F128::Mult(v0_25, F128::Add(Ln_ExpQ1_Q2, Ln_ExpQ1_Q0));
390  SimdQuaternion B = F128::Mult(v0_25, F128::Add(Ln_ExpQ2_Q3, Ln_ExpQ2_Q1));
391  A = Quaternion::Exp(A);
392  B = Quaternion::Exp(B);
393 
394  *a = Quaternion::Mult(Q1, A);
395  *b = Quaternion::Mult(Q2, B);
396  *c = Q2;
397 }
398 
399 #undef NLIB_M
400 
401 #endif // NLIB_DOXYGEN
402 
403 } // namespace simd
404 NLIB_NAMESPACE_END
405 
406 #endif // INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
The class with the collection of functions that handle quaternions.
f128arg SimdVectorArg
f128arg is defined using typedef.
Definition: SimdFloat.h:4137
The type for two SIMD registers for 128-bit, single-precision, floating-point numbers.
Definition: SimdFloat.h:35
#define NLIB_VIS_HIDDEN
Symbols for functions and classes are not made available outside of the library.
Definition: Platform_unix.h:60
constexpr const each_float_tag each_float
The tag for representing a single-precision floating-point number with an each_float_tag-type constan...
Definition: SimdFloat.h:55
f128arg SimdQuaternionArg
f128arg is defined using typedef.
Definition: SimdFloat.h:4139
f128 r[4]
Keeps each row of a 4x4 matrix.
Definition: SimdFloat.h:4164
The structure for keeping a 4x4 matrix.
Definition: SimdFloat.h:4148
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Config.h:86
Defines the class and functions for SIMD computations on single-precision floating-point numbers...
constexpr const each_select32_tag each_select32
The tag for representing the selection of a 32-bit lane with an each_select32_tag-type constant objec...
Definition: SimdInt.h:50
nlib_f128_t f128
nlib_f128_t is defined using typedef.
Definition: SimdFloat.h:58
Defines a four-dimensional vector.
f128 SimdQuaternion
f128 is defined using typedef. Used when handling quaternions.
Definition: SimdFloat.h:4138
f128 SimdVector
f128 is defined using typedef. Used when handling three-dimensional or four-dimensional vectors...
Definition: SimdFloat.h:4136