nlib
SimdQuaternion.h
Go to the documentation of this file.
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
5 
8 
9 NLIB_NAMESPACE_BEGIN
10 namespace simd {
11 
13  public:
14  static SimdQuaternion __vectorcall Identity() NLIB_NOEXCEPT;
15  static SimdQuaternion __vectorcall Conjugate(SimdQuaternionArg q) NLIB_NOEXCEPT;
16  static f128 __vectorcall Length(SimdQuaternionArg q) NLIB_NOEXCEPT;
17  static f128 __vectorcall LengthSq(SimdQuaternionArg q) NLIB_NOEXCEPT;
18  static f128 __vectorcall RecpLength(SimdQuaternionArg q) NLIB_NOEXCEPT;
19  static SimdQuaternion __vectorcall Normalize(SimdQuaternionArg q) NLIB_NOEXCEPT;
20  static SimdQuaternion __vectorcall NormalizeEst(SimdQuaternionArg q) NLIB_NOEXCEPT;
21  static SimdQuaternion __vectorcall Inverse(SimdQuaternionArg q) NLIB_NOEXCEPT;
22  static SimdQuaternion __vectorcall Ln(SimdQuaternionArg q_normalized) NLIB_NOEXCEPT;
23  static SimdQuaternion __vectorcall Exp(SimdQuaternionArg q) NLIB_NOEXCEPT;
24 
25  static bool __vectorcall IsIdentity(SimdQuaternionArg q) NLIB_NOEXCEPT;
26  static bool __vectorcall IsInfinite(SimdQuaternionArg q) NLIB_NOEXCEPT;
27  static bool __vectorcall IsNaN(SimdQuaternionArg q) NLIB_NOEXCEPT;
28  static bool __vectorcall CmpEq(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT;
29  static bool __vectorcall CmpNe(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT;
30 
31  static f128 __vectorcall Dot(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT;
32  static SimdQuaternion __vectorcall
34 
35  static SimdQuaternion __vectorcall
36  FromRotationAxisAndSinCos(SimdVectorArg axis_normalized, float sin_half_rad,
37  float cos_half_rad) NLIB_NOEXCEPT;
38  static SimdQuaternion __vectorcall FromRotationMatrix(SimdMatrixArg m) NLIB_NOEXCEPT;
39  static SimdQuaternion __vectorcall
40  FromRotationZXY(SimdVectorArg sin_half_xyz, SimdVectorArg cos_half_xyz) NLIB_NOEXCEPT;
41  static SimdVector __vectorcall ToAxisAngle(float* rad, SimdQuaternion q) NLIB_NOEXCEPT;
42 
43  static SimdQuaternion __vectorcall Slerp(SimdQuaternionArg q0_normalized,
44  SimdQuaternionArg q1_normalized,
45  float t) NLIB_NOEXCEPT;
46  static SimdQuaternion __vectorcall
47  Squad(SimdQuaternionArg q0_normalized, SimdQuaternionArg q1_normalized,
48  SimdQuaternionArg q2_normalized, SimdQuaternionArg q3_normalized,
49  float t) NLIB_NOEXCEPT;
50  static void __vectorcall SquadSetup(SimdQuaternion* a, SimdQuaternion* b, SimdQuaternion* c,
53  static SimdQuaternion __vectorcall BaryCentric(SimdQuaternionArg q0, SimdQuaternionArg q1,
54  SimdQuaternionArg q2, float f,
55  float g) NLIB_NOEXCEPT;
56 
57  private:
58  Quaternion(); // forbidden
59 };
60 
61 #ifndef NLIB_DOXYGEN
62 
63 #define NLIB_M(tp) inline tp __vectorcall
64 
65 // XMQuaternionIdentity
66 NLIB_M(SimdQuaternion) Quaternion::Identity() NLIB_NOEXCEPT { return F128::LoadA16(F128::v0001_); }
67 
68 // XMQuaternionConjugate
69 NLIB_M(SimdQuaternion) Quaternion::Conjugate(SimdQuaternionArg q) NLIB_NOEXCEPT {
70  return F128::NegateEx<true, true, true, false>(q);
71 }
72 
73 // XMQuaternionLength
74 NLIB_M(f128) Quaternion::Length(SimdQuaternionArg q) NLIB_NOEXCEPT { return Vector4::Length(q); }
75 
76 // XMQuaternionLengthSq
77 NLIB_M(f128) Quaternion::LengthSq(SimdQuaternionArg q) NLIB_NOEXCEPT {
78  return Vector4::LengthSq(q);
79 }
80 
81 // XMQuaternionReciprocalLength
82 NLIB_M(f128) Quaternion::RecpLength(SimdQuaternionArg q) NLIB_NOEXCEPT {
83  return Vector4::RecpLength(q);
84 }
85 
86 // XMQuaternionNormalize
87 NLIB_M(SimdQuaternion) Quaternion::Normalize(SimdQuaternionArg q) NLIB_NOEXCEPT {
88  return Vector4::Normalize(q);
89 }
90 
91 // XMQuaternionNormalizeEst
92 NLIB_M(SimdQuaternion) Quaternion::NormalizeEst(SimdQuaternionArg q) NLIB_NOEXCEPT {
93  return Vector4::NormalizeEst(q);
94 }
95 
96 // XMQuaternionInverse
97 NLIB_M(SimdQuaternion) Quaternion::Inverse(SimdQuaternionArg q) NLIB_NOEXCEPT {
98  f128 lenSq = LengthSq(q);
99  SimdQuaternion conj = Conjugate(q);
100  f128 zero = F128::SetZero();
101 
102  f128 eps = F128::SetEpsilon();
103  f128 nearZero = F128::CmpLe(lenSq, eps);
104 
105  SimdQuaternion inv = F128::Div(conj, lenSq);
106  return F128::Select(nearZero, zero, inv);
107 }
108 
109 // XMQuaternionLn
110 NLIB_M(SimdQuaternion) Quaternion::Ln(SimdQuaternionArg q_normalized) NLIB_NOEXCEPT {
111  static const float one_eps = 1.f - 0.00001f;
112  f128 q0 = F128::SetZeroToLane<3>(q_normalized);
113  f128 w = F128::SetValue<3>(q_normalized, each_select32);
114  f128 xyz_len = Vector4::Length(q0);
115  f128 w_not_near_one = F128::InBound(w, F128::SetValue(one_eps, each_float));
116 
117  f128 theta = F128::ArcTan2(xyz_len, w);
118  f128 result = F128::Div(theta, xyz_len);
119  result = F128::Mult(q0, result);
120  result = F128::Select(w_not_near_one, result, q0);
121  return result;
122 }
123 
124 // XMQuaternionExp
125 NLIB_M(SimdQuaternion) Quaternion::Exp(SimdQuaternionArg q) NLIB_NOEXCEPT {
126  f128 xyz_len = Vector4::Length(F128::SetZeroToLane<3>(q));
127  f128x2 sc = F128::SinCos(xyz_len);
128 
129  f128 result = F128::Mult(q, F128::Div(sc.val[0], xyz_len));
130  f128 near_zero = F128::CmpNearEq(xyz_len, F128::SetZero(), F128::SetEpsilon());
131  result = F128::Select(near_zero, q, result);
132  result = F128::Splat<false, false, false, true>(result, sc.val[1]);
133  return result;
134 }
135 
136 // XMQuaternionIsIdentity
137 NLIB_M(bool) Quaternion::IsIdentity(SimdQuaternionArg q) NLIB_NOEXCEPT { // NOLINT
138  return Vector4::CmpEq(q, Identity());
139 }
140 
141 // XMQuaternionIsInfinite
142 NLIB_M(bool) Quaternion::IsInfinite(SimdQuaternionArg q) NLIB_NOEXCEPT { // NOLINT
143  return Vector4::IsInfinite(q);
144 }
145 
146 // XMQuaternionIsNaN
147 NLIB_M(bool) Quaternion::IsNaN(SimdQuaternionArg q) NLIB_NOEXCEPT { // NOLINT
148  return Vector4::IsNaN(q);
149 }
150 
151 // XMQuaternionEqual
152 NLIB_M(bool) Quaternion::CmpEq(SimdQuaternionArg q0, // NOLINT
153  SimdQuaternionArg q1) NLIB_NOEXCEPT {
154  return Vector4::CmpEq(q0, q1);
155 }
156 
157 // XMQuaternionNotEqual
158 NLIB_M(bool) Quaternion::CmpNe(SimdQuaternionArg q0, // NOLINT
159  SimdQuaternionArg q1) NLIB_NOEXCEPT {
160  return Vector4::CmpNe(q0, q1);
161 }
162 
163 // XMQuaternionDot
164 NLIB_M(f128) Quaternion::Dot(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT {
165  return Vector4::Dot(q0, q1);
166 }
167 
168 // XMQuaternionMultiply
169 NLIB_M(SimdQuaternion) Quaternion::Mult(SimdQuaternionArg q0, SimdQuaternionArg q1) NLIB_NOEXCEPT {
170  SimdVector v1 = F128::Swizzle<3, 0, 1, 2>(q1);
171  f128 r1 = F128::Swizzle<3, 2, 1, 0>(q0);
172  f128 r2 = F128::Swizzle<2, 3, 0, 1>(q0);
173  f128 r3 = F128::Swizzle<1, 0, 3, 2>(q0);
174  SimdMatrix m;
175  m.r[0] = q0;
176  m.r[1] = F128::NegateEx<false, true, false, true>(r1);
177  m.r[2] = F128::NegateEx<false, false, true, true>(r2);
178  m.r[3] = F128::NegateEx<true, false, false, true>(r3);
179  return Vector4::Transform(v1, m);
180 }
181 
182 // sin_half_rad = sin(rad/2), cos_half_rad = cos(rad/2)
183 // XMQuaternionRotationNormal
184 NLIB_M(SimdQuaternion) Quaternion::FromRotationAxisAndSinCos(SimdVectorArg axis_normalized,
185  float sin_half_rad,
186  float cos_half_rad) NLIB_NOEXCEPT {
187  SimdVector axis = F128::SetFloatToLane<3>(axis_normalized, 1.f);
188  f128 scale = F128::SetValue(sin_half_rad, sin_half_rad, sin_half_rad, cos_half_rad);
189  return F128::Mult(axis, scale);
190 }
191 
192 // m must be rotation matrix
193 NLIB_M(SimdQuaternion) Quaternion::FromRotationMatrix(SimdMatrixArg m) NLIB_NOEXCEPT {
194  // 1-2y^2-2z^2 2xy+2wz 2xz-2wy
195  // 2xy-2wz 1-2x^2-2z^2 2yz+2wx
196  // 2xz+2wy 2yz-2wx 1^2x^2^2y^2
197  f128 r0 = m.r[0];
198  f128 r1 = m.r[1];
199  f128 r2 = m.r[2];
200 
201  f128 elem;
202  // elem = { m00 - m11 - m22 + 1.f, -m00 + m11 - m22 + 1.f,
203  // -m00 - m11 + m22 + 1.f, m00 + m11 + m22 + 1.f }
204  // -> { 4x^2, 4y^2, 4z^2, 4w^2 }
205  {
206  f128 m00x = F128::SetValue<0>(r0, each_select32);
207  f128 m11x = F128::SetValue<1>(r1, each_select32);
208  f128 m22x = F128::SetValue<2>(r2, each_select32);
209  m00x = F128::NegateEx<false, true, true, false>(m00x);
210  m11x = F128::NegateEx<true, false, true, false>(m11x);
211  m22x = F128::NegateEx<true, true, false, false>(m22x);
212  f128 one = F128::SetOne();
213  elem = F128::Add(m00x, m11x);
214  elem = F128::Add(elem, m22x);
215  elem = F128::Add(elem, one);
216  }
217 
218  f128 xx_ge_yy;
219  f128 zz_ge_ww;
220  f128 xxyy_ge_zzww;
221  f128 elem_max;
222  {
223  f128 t0, t1, t2, t3;
224  t0 = F128::SetValue<0>(elem, each_select32);
225  t1 = F128::SetValue<1>(elem, each_select32);
226  t2 = F128::SetValue<2>(elem, each_select32);
227  t3 = F128::SetValue<3>(elem, each_select32);
228  xx_ge_yy = F128::CmpGe(t0, t1);
229  zz_ge_ww = F128::CmpGe(t2, t3);
230 
231  t0 = F128::PairwiseMax(elem, elem);
232  t2 = F128::SetValue<0>(t0, each_select32);
233  t3 = F128::SetValue<1>(t0, each_select32);
234  elem_max = F128::PairwiseMax(t0, t0);
235  xxyy_ge_zzww = F128::CmpGe(t2, t3);
236  }
237 
238  f128 v0_25 = F128::SetValue(0.25f, each_float);
239  elem_max = F128::Mult(v0_25, elem_max);
240  f128 mult = F128::RecpSqrt(elem_max);
241  f128 v = F128::Mult(mult, elem_max);
242  mult = F128::Mult(v0_25, mult);
243 
244  f128 m01_20_12;
245  m01_20_12 = F128::Permute<1, 4, 2, 8>(r0, r2);
246  m01_20_12 = F128::Permute<0, 1, 6, 8>(m01_20_12, r1);
247 
248  f128 m10_02_21;
249  m10_02_21 = F128::Permute<0, 1, 5, 8>(r1, r2);
250  m10_02_21 = F128::Permute<0, 6, 2, 8>(m10_02_21, r0);
251 
252  f128 ans_x_biggest, ans_y_biggest, ans_z_biggest, ans_w_biggest;
253  {
254  f128 tmp_x, tmp_y, tmp_z, tmp_w;
255  tmp_x = F128::NegateEx<false, false, true, true>(m10_02_21);
256  tmp_x = F128::Mult(mult, F128::Add(m01_20_12, tmp_x));
257 
258  tmp_y = F128::NegateEx<false, true, false, true>(m10_02_21);
259  tmp_y = F128::Mult(mult, F128::Add(m01_20_12, tmp_y));
260 
261  tmp_z = F128::NegateEx<true, false, false, true>(m10_02_21);
262  tmp_z = F128::Mult(mult, F128::Add(m01_20_12, tmp_z));
263 
264  tmp_w = F128::Mult(mult, F128::Sub(m01_20_12, m10_02_21));
265 
266  ans_x_biggest = F128::Permute<4, 0, 1, 2>(tmp_x, v);
267  ans_y_biggest = F128::Permute<0, 4, 2, 1>(tmp_y, v);
268  ans_z_biggest = F128::Permute<1, 2, 4, 0>(tmp_z, v);
269  ans_w_biggest = F128::Permute<2, 1, 0, 4>(tmp_w, v);
270  }
271 
272  f128 ans_xy = F128::Select(xx_ge_yy, ans_x_biggest, ans_y_biggest);
273  f128 ans_zw = F128::Select(zz_ge_ww, ans_z_biggest, ans_w_biggest);
274  return F128::Select(xxyy_ge_zzww, ans_xy, ans_zw);
275 }
276 
277 // sin_half_xyz = { sin(rad_x/2), sin(rad_y/2), sin(rad_z/2), * }
278 // cos_half_xyz = { cos(rad_x/2), cos(rad_y/2), cos(rad_z/2), * }
279 // XMQuaternionRotationRollPitchYawFromVector
280 NLIB_M(SimdQuaternion) Quaternion::FromRotationZXY(SimdVectorArg sin_half_xyz,
281  SimdVectorArg cos_half_xyz) NLIB_NOEXCEPT {
282  // CxSySz + SxCyCz
283  // -SxCySz + CxSyCz
284  // -SxSyCz + CxCySz
285  // SxSySz + CxCyCz
286  f128 x1 = F128::Permute<4, 0, 0, 0>(sin_half_xyz, cos_half_xyz);
287  f128 y1 = F128::Permute<1, 5, 1, 1>(sin_half_xyz, cos_half_xyz);
288  f128 z1 = F128::Permute<2, 2, 6, 2>(sin_half_xyz, cos_half_xyz);
289  x1 = F128::NegateEx<false, true, true, false>(x1);
290  f128 x0 = F128::Permute<0, 4, 4, 4>(sin_half_xyz, cos_half_xyz);
291  f128 y0 = F128::Permute<5, 1, 5, 5>(sin_half_xyz, cos_half_xyz);
292  f128 z0 = F128::Permute<6, 6, 2, 6>(sin_half_xyz, cos_half_xyz);
293 
294  f128 z0x0y0 = F128::Mult(x0, y0);
295  f128 z1x1y1 = F128::Mult(x1, y1);
296  z0x0y0 = F128::Mult(z0x0y0, z0);
297  return F128::MultAdd(z1x1y1, z1, z0x0y0);
298 }
299 
300 // XMQuaternionToAxisAngle
301 // rad can be NULL
302 NLIB_M(SimdVector) Quaternion::ToAxisAngle(float* rad, SimdQuaternion q) NLIB_NOEXCEPT {
303  if (rad) {
304  *rad = 2.f * F128::GetFloatFromLane<3>(F128::ArcCos(q));
305  }
306  return q;
307 }
308 
309 // XMQuaternionSlerp
310 NLIB_M(SimdVector) Quaternion::Slerp(SimdQuaternionArg q0_normalized,
311  SimdQuaternionArg q1_normalized, float t) NLIB_NOEXCEPT {
312  f128 q0q1 = Dot(q0_normalized, q1_normalized);
313  f128 tooNear;
314  f128 sp;
315  {
316  f128 ss = F128::MultSub(q0q1, q0q1, F128::SetValue(1.f, each_float));
317  f128 eps = F128::SetEpsilon();
318  tooNear = F128::CmpLe(ss, eps);
319  sp = F128::RecpSqrt(ss);
320  }
321  f128 ph = F128::ArcCos(q0q1);
322  f128 k = F128::SetValue(1.f - t, t, 0.f, 0.f);
323  f128 t0t1 = F128::Mult(sp, F128::Sin(F128::Mult(ph, k)));
324  f128 t0 = F128::SetValue<0>(t0t1, each_select32);
325  f128 t1 = F128::SetValue<1>(t0t1, each_select32);
326 
327  SimdVector ret = F128::Mult(q0_normalized, t0);
328  ret = F128::MultAdd(q1_normalized, t1, ret);
329  return F128::Select(tooNear, q0_normalized, ret);
330 }
331 
332 // XMQuaternionSquad
333 NLIB_M(SimdVector) Quaternion::Squad(SimdQuaternionArg q0_normalized,
334  SimdQuaternionArg q1_normalized,
335  SimdQuaternionArg q2_normalized,
336  SimdQuaternionArg q3_normalized, float t) NLIB_NOEXCEPT {
337  float t2 = (t - t * t) * 2.f;
338  SimdQuaternion q03 = Slerp(q0_normalized, q3_normalized, t);
339  SimdQuaternion q12 = Slerp(q1_normalized, q2_normalized, t);
340  return Slerp(q03, q12, t2);
341 }
342 
343 // XMQuaternionBaryCentric
344 NLIB_M(SimdVector) Quaternion::BaryCentric(SimdQuaternionArg q0, SimdQuaternionArg q1,
345  SimdQuaternionArg q2, float f, float g) NLIB_NOEXCEPT {
346  float fg = f + g;
347  if (fg <= 0.00001f && fg >= -0.00001f) return q0; // to avoid division by zero
348  SimdQuaternion q01 = Slerp(q0, q1, fg);
349  SimdQuaternion q02 = Slerp(q0, q2, fg);
350  return Slerp(q01, q02, g / fg);
351 }
352 
353 // XMQuaternionSquadSetup
354 inline void __vectorcall Quaternion::SquadSetup(SimdQuaternion* a, SimdQuaternion* b,
355  SimdQuaternion* c,
358  SimdQuaternionArg q3) NLIB_NOEXCEPT {
359  SimdQuaternion Q0, Q2, Q3;
360  {
361  f128 lensq_a01 = Quaternion::LengthSq(F128::Add(q0, q1));
362  f128 lensq_s01 = Quaternion::LengthSq(F128::Sub(q0, q1));
363  f128 cmp01 = F128::CmpLt(lensq_a01, lensq_s01);
364  f128 neg_q0 = F128::Negate(q0);
365  Q0 = F128::Select(cmp01, neg_q0, q0);
366 
367  f128 lensq_a12 = Quaternion::LengthSq(F128::Add(q1, q2));
368  f128 lensq_s12 = Quaternion::LengthSq(F128::Sub(q1, q2));
369  f128 cmp12 = F128::CmpLt(lensq_a12, lensq_s12);
370  f128 neg_q2 = F128::Negate(q2);
371  Q2 = F128::Select(cmp12, neg_q2, q2);
372 
373  f128 lensq_a23 = Quaternion::LengthSq(F128::Add(q2, q3));
374  f128 lensq_s23 = Quaternion::LengthSq(F128::Sub(q2, q3));
375  f128 cmp23 = F128::CmpLt(lensq_a23, lensq_s23);
376  f128 neg_q3 = F128::Negate(q3);
377  Q3 = F128::Select(cmp23, neg_q3, q3);
378  }
379  const SimdQuaternion& Q1 = q1;
380 
381  SimdQuaternion InvQ1 = Quaternion::Inverse(Q1);
382  SimdQuaternion Ln_ExpQ1_Q2 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q2));
383  SimdQuaternion Ln_ExpQ1_Q0 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q0));
384 
385  SimdQuaternion InvQ2 = Quaternion::Inverse(Q2);
386  SimdQuaternion Ln_ExpQ2_Q3 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q3));
387  SimdQuaternion Ln_ExpQ2_Q1 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q1));
388 
389  f128 v0_25 = F128::SetValue(0.25f, each_float);
390  SimdQuaternion A = F128::Mult(v0_25, F128::Add(Ln_ExpQ1_Q2, Ln_ExpQ1_Q0));
391  SimdQuaternion B = F128::Mult(v0_25, F128::Add(Ln_ExpQ2_Q3, Ln_ExpQ2_Q1));
392  A = Quaternion::Exp(A);
393  B = Quaternion::Exp(B);
394 
395  *a = Quaternion::Mult(Q1, A);
396  *b = Quaternion::Mult(Q2, B);
397  *c = Q2;
398 }
399 
400 #undef NLIB_M
401 
402 #endif // NLIB_DOXYGEN
403 
404 } // namespace simd
405 NLIB_NAMESPACE_END
406 
407 #endif // INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Platform.h:2151
The class with the collection of functions that handle quaternions.
f128arg SimdVectorArg
f128arg is defined using typedef.
Definition: SimdFloat.h:3927
#define NLIB_VIS_HIDDEN
Symbols for functions and classes are not made available outside of the library.
Definition: Platform_unix.h:50
constexpr const each_float_tag each_float
The tag for representing a single-precision floating-point number with an each_float_tag-type constan...
Definition: SimdFloat.h:51
f128arg SimdQuaternionArg
f128arg is defined using typedef.
Definition: SimdFloat.h:3929
The structure for keeping a 4x4 matrix.
Definition: SimdFloat.h:3938
nlib_f128x2_t f128x2
nlib_f128x2_t is defined using typedef.
Definition: SimdFloat.h:56
Defines the class and functions for SIMD computations on single-precision floating-point numbers...
constexpr const each_select32_tag each_select32
The tag for representing the selection of a 32-bit lane with an each_select32_tag-type constant objec...
Definition: SimdInt.h:56
nlib_f128_t f128
nlib_f128_t is is defined using typedef.
Definition: SimdFloat.h:54
Defines a four-dimensional vector.
f128 SimdQuaternion
f128 is defined using typedef. Used when handling quaternions.
Definition: SimdFloat.h:3928
f128 SimdVector
f128 is defined using typedef. Used when handling three-dimensional or four-dimensional vectors...
Definition: SimdFloat.h:3926