3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_ 4 #define INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_ 36 FromRotationAxisAndSinCos(
SimdVectorArg axis_normalized,
float sin_half_rad,
63 #define NLIB_M(tp) inline tp __vectorcall 70 return F128::NegateEx<true, true, true, false>(q);
78 return Vector4::LengthSq(q);
83 return Vector4::RecpLength(q);
88 return Vector4::Normalize(q);
93 return Vector4::NormalizeEst(q);
98 f128 lenSq = LengthSq(q);
101 f128 eps = F128::SetEpsilon();
102 f128 nearZero = F128::CmpLe(lenSq, eps);
105 return F128::AndNot(nearZero, inv);
110 static const float one_eps = 1.f - 0.00001f;
111 f128 q0 = F128::SetZeroToLane<3>(q_normalized);
113 f128 xyz_len = Vector4::Length(q0);
114 f128 w_not_near_one = F128::InBound(w, F128::SetValue(one_eps,
each_float));
116 f128 theta = F128::ArcTan2(xyz_len, w);
117 f128 result = F128::Div(theta, xyz_len);
118 result = F128::Mult(q0, result);
119 result = F128::Select(w_not_near_one, result, q0);
125 f128 xyz_len = Vector4::Length(F128::SetZeroToLane<3>(q));
126 f128x2 sc = F128::SinCos(xyz_len);
128 f128 result = F128::Mult(q, F128::Div(sc.val[0], xyz_len));
129 f128 near_zero = F128::CmpNearEqZero(xyz_len, F128::SetEpsilon());
130 result = F128::Select(near_zero, q, result);
131 result = F128::Splat<false, false, false, true>(result, sc.val[1]);
137 return Vector4::CmpEq(q, Identity());
142 return Vector4::IsInfinite(q);
147 return Vector4::IsNaN(q);
153 return Vector4::CmpEq(q0, q1);
159 return Vector4::CmpNe(q0, q1);
164 return Vector4::Dot(q0, q1);
169 SimdVector v1 = F128::Swizzle<3, 0, 1, 2>(q1);
170 f128 r1 = F128::Swizzle<3, 2, 1, 0>(q0);
171 f128 r2 = F128::Swizzle<2, 3, 0, 1>(q0);
172 f128 r3 = F128::Swizzle<1, 0, 3, 2>(q0);
175 m.r[1] = F128::NegateEx<false, true, false, true>(r1);
176 m.r[2] = F128::NegateEx<false, false, true, true>(r2);
177 m.r[3] = F128::NegateEx<true, false, false, true>(r3);
178 return Vector4::Transform(v1, m);
186 SimdVector axis = F128::SetFloatToLane<3>(axis_normalized, 1.f);
187 f128 scale = F128::SetValue(sin_half_rad, sin_half_rad, sin_half_rad, cos_half_rad);
188 return F128::Mult(axis, scale);
208 m00x = F128::NegateEx<false, true, true, false>(m00x);
209 m11x = F128::NegateEx<true, false, true, false>(m11x);
210 m22x = F128::NegateEx<true, true, false, false>(m22x);
211 f128 one = F128::SetOne();
212 elem = F128::Add(m00x, m11x);
213 elem = F128::Add(elem, m22x);
214 elem = F128::Add(elem, one);
227 xx_ge_yy = F128::CmpGe(t0, t1);
228 zz_ge_ww = F128::CmpGe(t2, t3);
230 t0 = F128::PairwiseMax(elem, elem);
233 elem_max = F128::PairwiseMax(t0, t0);
234 xxyy_ge_zzww = F128::CmpGe(t2, t3);
238 elem_max = F128::Mult(v0_25, elem_max);
239 f128 mult = F128::RecpSqrt(elem_max);
240 f128 v = F128::Mult(mult, elem_max);
241 mult = F128::Mult(v0_25, mult);
244 m01_20_12 = F128::Permute<1, 4, 2, -1>(r0, r2);
245 m01_20_12 = F128::Permute<0, 1, 6, -1>(m01_20_12, r1);
248 m10_02_21 = F128::Permute<0, 1, 5, -1>(r1, r2);
249 m10_02_21 = F128::Permute<0, 6, 2, -1>(m10_02_21, r0);
251 f128 ans_x_biggest, ans_y_biggest, ans_z_biggest, ans_w_biggest;
253 f128 tmp_x, tmp_y, tmp_z, tmp_w;
254 tmp_x = F128::NegateEx<false, false, true, true>(m10_02_21);
255 tmp_x = F128::Mult(mult, F128::Add(m01_20_12, tmp_x));
257 tmp_y = F128::NegateEx<false, true, false, true>(m10_02_21);
258 tmp_y = F128::Mult(mult, F128::Add(m01_20_12, tmp_y));
260 tmp_z = F128::NegateEx<true, false, false, true>(m10_02_21);
261 tmp_z = F128::Mult(mult, F128::Add(m01_20_12, tmp_z));
263 tmp_w = F128::Mult(mult, F128::Sub(m01_20_12, m10_02_21));
265 ans_x_biggest = F128::Permute<4, 0, 1, 2>(tmp_x, v);
266 ans_y_biggest = F128::Permute<0, 4, 2, 1>(tmp_y, v);
267 ans_z_biggest = F128::Permute<1, 2, 4, 0>(tmp_z, v);
268 ans_w_biggest = F128::Permute<2, 1, 0, 4>(tmp_w, v);
271 f128 ans_xy = F128::Select(xx_ge_yy, ans_x_biggest, ans_y_biggest);
272 f128 ans_zw = F128::Select(zz_ge_ww, ans_z_biggest, ans_w_biggest);
273 return F128::Select(xxyy_ge_zzww, ans_xy, ans_zw);
285 f128 x1 = F128::Permute<4, 0, 0, 0>(sin_half_xyz, cos_half_xyz);
286 f128 y1 = F128::Permute<1, 5, 1, 1>(sin_half_xyz, cos_half_xyz);
287 f128 z1 = F128::Permute<2, 2, 6, 2>(sin_half_xyz, cos_half_xyz);
288 x1 = F128::NegateEx<false, true, true, false>(x1);
289 f128 x0 = F128::Permute<0, 4, 4, 4>(sin_half_xyz, cos_half_xyz);
290 f128 y0 = F128::Permute<5, 1, 5, 5>(sin_half_xyz, cos_half_xyz);
291 f128 z0 = F128::Permute<6, 6, 2, 6>(sin_half_xyz, cos_half_xyz);
293 f128 z0x0y0 = F128::Mult(x0, y0);
294 f128 z1x1y1 = F128::Mult(x1, y1);
295 z0x0y0 = F128::Mult(z0x0y0, z0);
296 return F128::MultAdd(z1x1y1, z1, z0x0y0);
303 *rad = 2.f * F128::GetFloatFromLane<3>(F128::ArcCos(q));
311 f128 q0q1 = Dot(q0_normalized, q1_normalized);
315 f128 ss = F128::MultSub(q0q1, q0q1, F128::SetValue(1.f,
each_float));
316 f128 eps = F128::SetEpsilon();
317 tooNear = F128::CmpLe(ss, eps);
318 sp = F128::RecpSqrt(ss);
320 f128 ph = F128::ArcCos(q0q1);
321 f128 k = F128::SetValue(1.f - t, t, 0.f, 0.f);
322 f128 t0t1 = F128::Mult(sp, F128::Sin(F128::Mult(ph, k)));
326 SimdVector ret = F128::Mult(q0_normalized, t0);
327 ret = F128::MultAdd(q1_normalized, t1, ret);
328 return F128::Select(tooNear, q0_normalized, ret);
336 float t2 = (t - t * t) * 2.f;
339 return Slerp(q03, q12, t2);
346 if (fg <= 0.00001f && fg >= -0.00001f)
return q0;
349 return Slerp(q01, q02, g / fg);
360 f128 lensq_a01 = Quaternion::LengthSq(F128::Add(q0, q1));
361 f128 lensq_s01 = Quaternion::LengthSq(F128::Sub(q0, q1));
362 f128 cmp01 = F128::CmpLt(lensq_a01, lensq_s01);
363 f128 neg_q0 = F128::Negate(q0);
364 Q0 = F128::Select(cmp01, neg_q0, q0);
366 f128 lensq_a12 = Quaternion::LengthSq(F128::Add(q1, q2));
367 f128 lensq_s12 = Quaternion::LengthSq(F128::Sub(q1, q2));
368 f128 cmp12 = F128::CmpLt(lensq_a12, lensq_s12);
369 f128 neg_q2 = F128::Negate(q2);
370 Q2 = F128::Select(cmp12, neg_q2, q2);
372 f128 lensq_a23 = Quaternion::LengthSq(F128::Add(q2, q3));
373 f128 lensq_s23 = Quaternion::LengthSq(F128::Sub(q2, q3));
374 f128 cmp23 = F128::CmpLt(lensq_a23, lensq_s23);
375 f128 neg_q3 = F128::Negate(q3);
376 Q3 = F128::Select(cmp23, neg_q3, q3);
381 SimdQuaternion Ln_ExpQ1_Q2 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q2));
382 SimdQuaternion Ln_ExpQ1_Q0 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q0));
385 SimdQuaternion Ln_ExpQ2_Q3 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q3));
386 SimdQuaternion Ln_ExpQ2_Q1 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q1));
389 SimdQuaternion A = F128::Mult(v0_25, F128::Add(Ln_ExpQ1_Q2, Ln_ExpQ1_Q0));
390 SimdQuaternion B = F128::Mult(v0_25, F128::Add(Ln_ExpQ2_Q3, Ln_ExpQ2_Q1));
391 A = Quaternion::Exp(A);
392 B = Quaternion::Exp(B);
394 *a = Quaternion::Mult(Q1, A);
395 *b = Quaternion::Mult(Q2, B);
401 #endif // NLIB_DOXYGEN 406 #endif // INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_ The class with the collection of functions that handle quaternions.
f128arg SimdVectorArg
f128arg is defined using typedef.
The type for two SIMD registers for 128-bit, single-precision, floating-point numbers.
constexpr const each_float_tag each_float
The tag for representing a single-precision floating-point number with an each_float_tag-type constan...
f128arg SimdQuaternionArg
f128arg is defined using typedef.
f128 r[4]
Keeps each row of a 4x4 matrix.
The structure for keeping a 4x4 matrix.
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Defines the class and functions for SIMD computations on single-precision floating-point numbers...
constexpr const each_select32_tag each_select32
The tag for representing the selection of a 32-bit lane with an each_select32_tag-type constant objec...
nlib_f128_t f128
nlib_f128_t is defined using typedef.
Defines a four-dimensional vector.
f128 SimdQuaternion
f128 is defined using typedef. Used when handling quaternions.
f128 SimdVector
f128 is defined using typedef. Used when handling three-dimensional or four-dimensional vectors...