3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
36 FromRotationAxisAndSinCos(
SimdVectorArg axis_normalized,
float sin_half_rad,
63 #define NLIB_M(tp) inline tp __vectorcall
70 return F128::NegateEx<true, true, true, false>(q);
78 return Vector4::LengthSq(q);
83 return Vector4::RecpLength(q);
88 return Vector4::Normalize(q);
93 return Vector4::NormalizeEst(q);
98 f128 lenSq = LengthSq(q);
100 f128 zero = F128::SetZero();
102 f128 eps = F128::SetEpsilon();
103 f128 nearZero = F128::CmpLe(lenSq, eps);
106 return F128::Select(nearZero, zero, inv);
111 static const float one_eps = 1.f - 0.00001f;
112 f128 q0 = F128::SetZeroToLane<3>(q_normalized);
114 f128 xyz_len = Vector4::Length(q0);
115 f128 w_not_near_one = F128::InBound(w, F128::SetValue(one_eps,
each_float));
117 f128 theta = F128::ArcTan2(xyz_len, w);
118 f128 result = F128::Div(theta, xyz_len);
119 result = F128::Mult(q0, result);
120 result = F128::Select(w_not_near_one, result, q0);
126 f128 xyz_len = Vector4::Length(F128::SetZeroToLane<3>(q));
127 f128x2 sc = F128::SinCos(xyz_len);
129 f128 result = F128::Mult(q, F128::Div(sc.val[0], xyz_len));
130 f128 near_zero = F128::CmpNearEq(xyz_len, F128::SetZero(), F128::SetEpsilon());
131 result = F128::Select(near_zero, q, result);
132 result = F128::Splat<false, false, false, true>(result, sc.val[1]);
138 return Vector4::CmpEq(q, Identity());
143 return Vector4::IsInfinite(q);
148 return Vector4::IsNaN(q);
154 return Vector4::CmpEq(q0, q1);
160 return Vector4::CmpNe(q0, q1);
165 return Vector4::Dot(q0, q1);
170 SimdVector v1 = F128::Swizzle<3, 0, 1, 2>(q1);
171 f128 r1 = F128::Swizzle<3, 2, 1, 0>(q0);
172 f128 r2 = F128::Swizzle<2, 3, 0, 1>(q0);
173 f128 r3 = F128::Swizzle<1, 0, 3, 2>(q0);
176 m.r[1] = F128::NegateEx<false, true, false, true>(r1);
177 m.r[2] = F128::NegateEx<false, false, true, true>(r2);
178 m.r[3] = F128::NegateEx<true, false, false, true>(r3);
179 return Vector4::Transform(v1, m);
186 float cos_half_rad) NLIB_NOEXCEPT {
187 SimdVector axis = F128::SetFloatToLane<3>(axis_normalized, 1.f);
188 f128 scale = F128::SetValue(sin_half_rad, sin_half_rad, sin_half_rad, cos_half_rad);
189 return F128::Mult(axis, scale);
193 NLIB_M(
SimdQuaternion) Quaternion::FromRotationMatrix(SimdMatrixArg m) NLIB_NOEXCEPT {
209 m00x = F128::NegateEx<false, true, true, false>(m00x);
210 m11x = F128::NegateEx<true, false, true, false>(m11x);
211 m22x = F128::NegateEx<true, true, false, false>(m22x);
212 f128 one = F128::SetOne();
213 elem = F128::Add(m00x, m11x);
214 elem = F128::Add(elem, m22x);
215 elem = F128::Add(elem, one);
228 xx_ge_yy = F128::CmpGe(t0, t1);
229 zz_ge_ww = F128::CmpGe(t2, t3);
231 t0 = F128::PairwiseMax(elem, elem);
234 elem_max = F128::PairwiseMax(t0, t0);
235 xxyy_ge_zzww = F128::CmpGe(t2, t3);
239 elem_max = F128::Mult(v0_25, elem_max);
240 f128 mult = F128::RecpSqrt(elem_max);
241 f128 v = F128::Mult(mult, elem_max);
242 mult = F128::Mult(v0_25, mult);
245 m01_20_12 = F128::Permute<1, 4, 2, 8>(r0, r2);
246 m01_20_12 = F128::Permute<0, 1, 6, 8>(m01_20_12, r1);
249 m10_02_21 = F128::Permute<0, 1, 5, 8>(r1, r2);
250 m10_02_21 = F128::Permute<0, 6, 2, 8>(m10_02_21, r0);
252 f128 ans_x_biggest, ans_y_biggest, ans_z_biggest, ans_w_biggest;
254 f128 tmp_x, tmp_y, tmp_z, tmp_w;
255 tmp_x = F128::NegateEx<false, false, true, true>(m10_02_21);
256 tmp_x = F128::Mult(mult, F128::Add(m01_20_12, tmp_x));
258 tmp_y = F128::NegateEx<false, true, false, true>(m10_02_21);
259 tmp_y = F128::Mult(mult, F128::Add(m01_20_12, tmp_y));
261 tmp_z = F128::NegateEx<true, false, false, true>(m10_02_21);
262 tmp_z = F128::Mult(mult, F128::Add(m01_20_12, tmp_z));
264 tmp_w = F128::Mult(mult, F128::Sub(m01_20_12, m10_02_21));
266 ans_x_biggest = F128::Permute<4, 0, 1, 2>(tmp_x, v);
267 ans_y_biggest = F128::Permute<0, 4, 2, 1>(tmp_y, v);
268 ans_z_biggest = F128::Permute<1, 2, 4, 0>(tmp_z, v);
269 ans_w_biggest = F128::Permute<2, 1, 0, 4>(tmp_w, v);
272 f128 ans_xy = F128::Select(xx_ge_yy, ans_x_biggest, ans_y_biggest);
273 f128 ans_zw = F128::Select(zz_ge_ww, ans_z_biggest, ans_w_biggest);
274 return F128::Select(xxyy_ge_zzww, ans_xy, ans_zw);
286 f128 x1 = F128::Permute<4, 0, 0, 0>(sin_half_xyz, cos_half_xyz);
287 f128 y1 = F128::Permute<1, 5, 1, 1>(sin_half_xyz, cos_half_xyz);
288 f128 z1 = F128::Permute<2, 2, 6, 2>(sin_half_xyz, cos_half_xyz);
289 x1 = F128::NegateEx<false, true, true, false>(x1);
290 f128 x0 = F128::Permute<0, 4, 4, 4>(sin_half_xyz, cos_half_xyz);
291 f128 y0 = F128::Permute<5, 1, 5, 5>(sin_half_xyz, cos_half_xyz);
292 f128 z0 = F128::Permute<6, 6, 2, 6>(sin_half_xyz, cos_half_xyz);
294 f128 z0x0y0 = F128::Mult(x0, y0);
295 f128 z1x1y1 = F128::Mult(x1, y1);
296 z0x0y0 = F128::Mult(z0x0y0, z0);
297 return F128::MultAdd(z1x1y1, z1, z0x0y0);
304 *rad = 2.f * F128::GetFloatFromLane<3>(F128::ArcCos(q));
312 f128 q0q1 = Dot(q0_normalized, q1_normalized);
316 f128 ss = F128::MultSub(q0q1, q0q1, F128::SetValue(1.f,
each_float));
317 f128 eps = F128::SetEpsilon();
318 tooNear = F128::CmpLe(ss, eps);
319 sp = F128::RecpSqrt(ss);
321 f128 ph = F128::ArcCos(q0q1);
322 f128 k = F128::SetValue(1.f - t, t, 0.f, 0.f);
323 f128 t0t1 = F128::Mult(sp, F128::Sin(F128::Mult(ph, k)));
327 SimdVector ret = F128::Mult(q0_normalized, t0);
328 ret = F128::MultAdd(q1_normalized, t1, ret);
329 return F128::Select(tooNear, q0_normalized, ret);
337 float t2 = (t - t * t) * 2.f;
340 return Slerp(q03, q12, t2);
347 if (fg <= 0.00001f && fg >= -0.00001f)
return q0;
350 return Slerp(q01, q02, g / fg);
361 f128 lensq_a01 = Quaternion::LengthSq(F128::Add(q0, q1));
362 f128 lensq_s01 = Quaternion::LengthSq(F128::Sub(q0, q1));
363 f128 cmp01 = F128::CmpLt(lensq_a01, lensq_s01);
364 f128 neg_q0 = F128::Negate(q0);
365 Q0 = F128::Select(cmp01, neg_q0, q0);
367 f128 lensq_a12 = Quaternion::LengthSq(F128::Add(q1, q2));
368 f128 lensq_s12 = Quaternion::LengthSq(F128::Sub(q1, q2));
369 f128 cmp12 = F128::CmpLt(lensq_a12, lensq_s12);
370 f128 neg_q2 = F128::Negate(q2);
371 Q2 = F128::Select(cmp12, neg_q2, q2);
373 f128 lensq_a23 = Quaternion::LengthSq(F128::Add(q2, q3));
374 f128 lensq_s23 = Quaternion::LengthSq(F128::Sub(q2, q3));
375 f128 cmp23 = F128::CmpLt(lensq_a23, lensq_s23);
376 f128 neg_q3 = F128::Negate(q3);
377 Q3 = F128::Select(cmp23, neg_q3, q3);
382 SimdQuaternion Ln_ExpQ1_Q2 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q2));
383 SimdQuaternion Ln_ExpQ1_Q0 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q0));
386 SimdQuaternion Ln_ExpQ2_Q3 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q3));
387 SimdQuaternion Ln_ExpQ2_Q1 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q1));
390 SimdQuaternion A = F128::Mult(v0_25, F128::Add(Ln_ExpQ1_Q2, Ln_ExpQ1_Q0));
391 SimdQuaternion B = F128::Mult(v0_25, F128::Add(Ln_ExpQ2_Q3, Ln_ExpQ2_Q1));
392 A = Quaternion::Exp(A);
393 B = Quaternion::Exp(B);
395 *a = Quaternion::Mult(Q1, A);
396 *b = Quaternion::Mult(Q2, B);
402 #endif // NLIB_DOXYGEN
407 #endif // INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
The class with the collection of functions that handle quaternions.
f128arg SimdVectorArg
f128arg is defined using typedef.
constexpr const each_float_tag each_float
The tag for representing a single-precision floating-point number with an each_float_tag-type constan...
f128arg SimdQuaternionArg
f128arg is defined using typedef.
The structure for keeping a 4x4 matrix.
nlib_f128x2_t f128x2
nlib_f128x2_t is defined using typedef.
Defines the class and functions for SIMD computations on single-precision floating-point numbers...
constexpr const each_select32_tag each_select32
The tag for representing the selection of a 32-bit lane with an each_select32_tag-type constant objec...
nlib_f128_t f128
nlib_f128_t is is defined using typedef.
Defines a four-dimensional vector.
f128 SimdQuaternion
f128 is defined using typedef. Used when handling quaternions.
f128 SimdVector
f128 is defined using typedef. Used when handling three-dimensional or four-dimensional vectors...