16 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_ 17 #define INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_ 78 #define NLIB_M(tp) inline tp __vectorcall 82 return F128::Set0001();
87 return F128::NegateEx<true, true, true, false>(q);
92 return Vector4::Length(q);
97 return Vector4::LengthSq(q);
102 return Vector4::RecpLength(q);
107 return Vector4::Normalize(q);
112 return Vector4::NormalizeEst(q);
117 f128 len_sq = LengthSq(q);
120 f128 eps = F128::SetEpsilon();
121 f128 near_zero = F128::CmpLe(len_sq, eps);
124 return F128::AndNot(near_zero, inv);
129 static const float one_eps = 1.f - 0.00001f;
130 f128 q0 = F128::SetZeroToLane<3>(q_normalized);
132 f128 xyz_len = Vector4::Length(q0);
133 f128 w_not_near_one = F128::InBound(w, F128::SetValue(one_eps,
each_float));
135 f128 theta = F128::ArcTan2(xyz_len, w);
136 f128 result = F128::Div(theta, xyz_len);
137 result = F128::Mult(q0, result);
138 result = F128::Select(w_not_near_one, result, q0);
144 f128 xyz_len = Vector4::Length(F128::SetZeroToLane<3>(q));
145 f128x2 sc = F128::SinCos(xyz_len);
147 f128 result = F128::Mult(q, F128::Div(sc.val[0], xyz_len));
148 f128 near_zero = F128::CmpNearEqZero(xyz_len, F128::SetEpsilon());
149 result = F128::Select(near_zero, q, result);
150 result = F128::Splat<false, false, false, true>(result, sc.val[1]);
156 return Vector4::CmpEq(q, Identity());
161 return Vector4::IsInfinite(q);
166 return Vector4::IsNaN(q);
172 return Vector4::CmpEq(q0, q1);
178 return Vector4::CmpNe(q0, q1);
183 return Vector4::Dot(q0, q1);
188 SimdVector v1 = F128::Swizzle<3, 0, 1, 2>(q1);
189 f128 r1 = F128::Swizzle<3, 2, 1, 0>(q0);
190 f128 r2 = F128::Swizzle<2, 3, 0, 1>(q0);
191 f128 r3 = F128::Swizzle<1, 0, 3, 2>(q0);
194 m.r[1] = F128::NegateEx<false, true, false, true>(r1);
195 m.r[2] = F128::NegateEx<false, false, true, true>(r2);
196 m.r[3] = F128::NegateEx<true, false, false, true>(r3);
197 return Vector4::Transform(v1, m);
203 Quaternion::FromRotationAxisAndSinCos(
SimdVectorArg axis_normalized,
float sin_half_rad,
205 SimdVector axis = F128::SetFloatToLane<3>(axis_normalized, 1.f);
206 f128 scale = F128::SetValue(sin_half_rad, sin_half_rad, sin_half_rad, cos_half_rad);
207 return F128::Mult(axis, scale);
227 m00x = F128::NegateEx<false, true, true, false>(m00x);
228 m11x = F128::NegateEx<true, false, true, false>(m11x);
229 m22x = F128::NegateEx<true, true, false, false>(m22x);
230 f128 one = F128::SetOne();
231 elem = F128::Add(m00x, m11x);
232 elem = F128::Add(elem, m22x);
233 elem = F128::Add(elem, one);
246 xx_ge_yy = F128::CmpGe(t0, t1);
247 zz_ge_ww = F128::CmpGe(t2, t3);
249 t0 = F128::PairwiseMax(elem, elem);
252 elem_max = F128::PairwiseMax(t0, t0);
253 xxyy_ge_zzww = F128::CmpGe(t2, t3);
257 elem_max = F128::Mult(v0_25, elem_max);
258 f128 mult = F128::RecpSqrt(elem_max);
259 f128 v = F128::Mult(mult, elem_max);
260 mult = F128::Mult(v0_25, mult);
263 m01_20_12 = F128::Permute<1, 4, 2, -1>(r0, r2);
264 m01_20_12 = F128::Permute<0, 1, 6, -1>(m01_20_12, r1);
267 m10_02_21 = F128::Permute<0, 1, 5, -1>(r1, r2);
268 m10_02_21 = F128::Permute<0, 6, 2, -1>(m10_02_21, r0);
270 f128 ans_x_biggest, ans_y_biggest, ans_z_biggest, ans_w_biggest;
272 f128 tmp_x, tmp_y, tmp_z, tmp_w;
273 tmp_x = F128::NegateEx<false, false, true, true>(m10_02_21);
274 tmp_x = F128::Mult(mult, F128::Add(m01_20_12, tmp_x));
276 tmp_y = F128::NegateEx<false, true, false, true>(m10_02_21);
277 tmp_y = F128::Mult(mult, F128::Add(m01_20_12, tmp_y));
279 tmp_z = F128::NegateEx<true, false, false, true>(m10_02_21);
280 tmp_z = F128::Mult(mult, F128::Add(m01_20_12, tmp_z));
282 tmp_w = F128::Mult(mult, F128::Sub(m01_20_12, m10_02_21));
284 ans_x_biggest = F128::Permute<4, 0, 1, 2>(tmp_x, v);
285 ans_y_biggest = F128::Permute<0, 4, 2, 1>(tmp_y, v);
286 ans_z_biggest = F128::Permute<1, 2, 4, 0>(tmp_z, v);
287 ans_w_biggest = F128::Permute<2, 1, 0, 4>(tmp_w, v);
290 f128 ans_xy = F128::Select(xx_ge_yy, ans_x_biggest, ans_y_biggest);
291 f128 ans_zw = F128::Select(zz_ge_ww, ans_z_biggest, ans_w_biggest);
292 return F128::Select(xxyy_ge_zzww, ans_xy, ans_zw);
304 f128 x1 = F128::Permute<4, 0, 0, 0>(sin_half_xyz, cos_half_xyz);
305 f128 y1 = F128::Permute<1, 5, 1, 1>(sin_half_xyz, cos_half_xyz);
306 f128 z1 = F128::Permute<2, 2, 6, 2>(sin_half_xyz, cos_half_xyz);
307 x1 = F128::NegateEx<false, true, true, false>(x1);
308 f128 x0 = F128::Permute<0, 4, 4, 4>(sin_half_xyz, cos_half_xyz);
309 f128 y0 = F128::Permute<5, 1, 5, 5>(sin_half_xyz, cos_half_xyz);
310 f128 z0 = F128::Permute<6, 6, 2, 6>(sin_half_xyz, cos_half_xyz);
312 f128 z0x0y0 = F128::Mult(x0, y0);
313 f128 z1x1y1 = F128::Mult(x1, y1);
314 z0x0y0 = F128::Mult(z0x0y0, z0);
315 return F128::MultAdd(z1x1y1, z1, z0x0y0);
322 *rad = 2.f * F128::GetFloatFromLane<3>(F128::ArcCos(q));
331 f128 q0q1 = Dot(q0_normalized, q1_normalized);
335 f128 ss = F128::MultSub(q0q1, q0q1, F128::SetValue(1.f,
each_float));
336 f128 eps = F128::SetEpsilon();
337 too_near = F128::CmpLe(ss, eps);
338 sp = F128::RecpSqrt(ss);
340 f128 ph = F128::ArcCos(q0q1);
341 f128 k = F128::SetValue(1.f - t, t, 0.f, 0.f);
342 f128 t0t1 = F128::Mult(sp, F128::Sin(F128::Mult(ph, k)));
346 SimdVector ret = F128::Mult(q0_normalized, t0);
347 ret = F128::MultAdd(q1_normalized, t1, ret);
348 return F128::Select(too_near, q0_normalized, ret);
356 float t2 = (t - t * t) * 2.f;
359 return Slerp(q03, q12, t2);
367 if (fg <= 0.00001f && fg >= -0.00001f)
return q0;
370 return Slerp(q01, q02, g / fg);
380 f128 lensq_a01 = Quaternion::LengthSq(F128::Add(q0, q1));
381 f128 lensq_s01 = Quaternion::LengthSq(F128::Sub(q0, q1));
382 f128 cmp01 = F128::CmpLt(lensq_a01, lensq_s01);
383 f128 neg_q0 = F128::Negate(q0);
384 Q0 = F128::Select(cmp01, neg_q0, q0);
386 f128 lensq_a12 = Quaternion::LengthSq(F128::Add(q1, q2));
387 f128 lensq_s12 = Quaternion::LengthSq(F128::Sub(q1, q2));
388 f128 cmp12 = F128::CmpLt(lensq_a12, lensq_s12);
389 f128 neg_q2 = F128::Negate(q2);
390 Q2 = F128::Select(cmp12, neg_q2, q2);
392 f128 lensq_a23 = Quaternion::LengthSq(F128::Add(q2, q3));
393 f128 lensq_s23 = Quaternion::LengthSq(F128::Sub(q2, q3));
394 f128 cmp23 = F128::CmpLt(lensq_a23, lensq_s23);
395 f128 neg_q3 = F128::Negate(q3);
396 Q3 = F128::Select(cmp23, neg_q3, q3);
401 SimdQuaternion Ln_ExpQ1_Q2 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q2));
402 SimdQuaternion Ln_ExpQ1_Q0 = Quaternion::Ln(Quaternion::Mult(InvQ1, Q0));
405 SimdQuaternion Ln_ExpQ2_Q3 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q3));
406 SimdQuaternion Ln_ExpQ2_Q1 = Quaternion::Ln(Quaternion::Mult(InvQ2, Q1));
409 SimdQuaternion A = F128::Mult(v0_25, F128::Add(Ln_ExpQ1_Q2, Ln_ExpQ1_Q0));
410 SimdQuaternion B = F128::Mult(v0_25, F128::Add(Ln_ExpQ2_Q3, Ln_ExpQ2_Q1));
411 A = Quaternion::Exp(A);
412 B = Quaternion::Exp(B);
414 *a = Quaternion::Mult(Q1, A);
415 *b = Quaternion::Mult(Q2, B);
421 #endif // NLIB_DOXYGEN 426 #endif // INCLUDE_NN_NLIB_SIMD_SIMDQUATERNION_H_ The class with the collection of functions that handle quaternions.
f128arg SimdVectorArg
f128arg is defined using typedef.
constexpr const each_float_tag each_float
The tag for representing a single-precision floating-point number with an each_float_tag-type constan...
f128arg SimdQuaternionArg
f128arg is defined using typedef.
The structure for keeping a 4x4 matrix.
nlib_f128x2_t f128x2
nlib_f128x2_t is defined using typedef.
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Defines the class and functions for SIMD computations on single-precision floating-point numbers...
constexpr const each_select32_tag each_select32
The tag for representing the selection of a 32-bit lane with an each_select32_tag-type constant objec...
nlib_f128_t f128
nlib_f128_t is defined using typedef.
Defines a four-dimensional vector.
f128 SimdQuaternion
f128 is defined using typedef. Used when handling quaternions.
f128 SimdVector
f128 is defined using typedef. Used when handling three-dimensional or four-dimensional vectors...