16 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDVECTOR3_H_ 17 #define INCLUDE_NN_NLIB_SIMD_SIMDVECTOR3_H_ 29 template<
typename MyVector3>
32 template<
typename MyVector3>
40 static bool __vectorcall
46 template <
bool SetLane0,
bool SetLane1,
bool SetLane2,
bool SetLane3>
51 static float __vectorcall Normalize(
SimdVector* normalized,
53 static float __vectorcall NormalizeEst(
SimdVector* normalized,
85 #define NLIB_M(tp) inline tp __vectorcall 86 #define NLIB_MH(tp) inline tp __vectorcall 90 #ifdef NLIB_F128_SIMD_NOUSE 97 #elif defined(NLIB_SSE41) 98 __m128 x = _mm_load_ss(&p->
x);
99 __m128 y = _mm_load_ss(&p->
y);
100 __m128 z = _mm_load_ss(&p->
z);
101 __m128 xy = _mm_unpacklo_ps(x, y);
102 return _mm_shuffle_ps(xy, z, _MM_SHUFFLE(1, 0, 1, 0));
103 #elif defined(NLIB_NEON) 104 float32x2_t xy = vld1_f32(&p->
x);
105 float32x2_t z = vld1_lane_f32(&p->
z, xy, 0);
106 return vcombine_f32(xy, z);
109 ret.vec.ps[0][0] = p->
x;
110 ret.vec.ps[0][1] = p->
y;
111 ret.vec.ps[1][0] = p->
z;
116 template<
typename MyVector3>
123 return Vector3::LoadFloat3(reinterpret_cast<const Float3*>(&p->x));
128 #ifdef NLIB_F128_SIMD_NOUSE 132 #elif defined(NLIB_SSE41) 133 f128 y = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1));
134 f128 z = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(2, 2, 2, 2));
135 _mm_store_ss(&p->
x, vec);
136 _mm_store_ss(&p->
y, y);
137 _mm_store_ss(&p->
z, z);
138 #elif defined(NLIB_NEON) 139 float32x2_t lo = vget_low_f32(vec);
141 vst1q_lane_f32(&p->
z, vec, 2);
143 p->
x = vec.vec.ps[0][0];
144 p->
y = vec.vec.ps[0][1];
145 p->
z = vec.vec.ps[1][0];
149 template<
typename MyVector3>
156 Vector3::StoreFloat3(reinterpret_cast<Float3*>(&p->x), vec);
161 f128 mask = F128::CmpEq(vec1, vec2);
162 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
167 f128 mask = F128::CmpLt(vec1, vec2);
168 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
173 f128 mask = F128::CmpLe(vec1, vec2);
174 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
179 f128 mask = F128::CmpGt(vec1, vec2);
180 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
185 f128 mask = F128::CmpGe(vec1, vec2);
186 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
191 f128 mask = F128::CmpNe(vec1, vec2);
192 return !F128::IsAllMaskFalse(F128::Swizzle<0, 1, 2, 2>(mask));
195 inline bool __vectorcall
197 f128 mask = F128::CmpNearEq(vec1, vec2, eps);
198 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
203 f128 mask = F128::IsNaN(vec);
204 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
209 f128 mask = F128::IsInfinite(vec);
210 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
215 f128 mask = F128::InBound(vec, bounds);
216 return F128::IsAllMaskTrue(F128::Swizzle<0, 1, 2, 2>(mask));
221 #ifdef NLIB_F128_SIMD_NOUSE 222 float tmp = vec1.vec.v[0] * vec2.vec.v[0] + vec1.vec.v[1] * vec2.vec.v[1] +
223 vec1.vec.v[2] * vec2.vec.v[2];
225 rval.vec.v[0] = rval.vec.v[1] = rval.vec.v[2] = rval.vec.v[3] = tmp;
227 #elif defined(NLIB_SSE41) 228 return _mm_dp_ps(vec1, vec2, 0x7F);
229 #elif defined(NLIB_NEON) 231 float32x4_t tmp = vmulq_f32(vec1, vec2);
232 tmp = F128::Permute<0, 1, 2, 6>(tmp, vdupq_n_f32(0.f));
233 tmp = vpaddq_f32(tmp, tmp);
234 tmp = vpaddq_f32(tmp, tmp);
237 f128 tmp = vmulq_f32(vec1, vec2);
238 float32x2_t lo = vget_low_f32(tmp);
239 lo = vpadd_f32(lo, lo);
240 float32x2_t hi = vdup_lane_f32(vget_high_f32(tmp), 0);
241 lo = vadd_f32(lo, hi);
242 return vcombine_f32(lo, lo);
245 f128 tmp = F128::Mult(vec1, vec2);
246 f32x2 val = __PS_SUM0(tmp.vec.ps[0], tmp.vec.ps[0], tmp.vec.ps[0]);
247 val = __PS_ADD(val, tmp.vec.ps[1]);
249 ret.vec.ps[0] = ret.vec.ps[1] = __PS_FDUP(val[0]);
254 template <
bool SetLane0,
bool SetLane1,
bool SetLane2,
bool SetLane3>
257 #if !defined(NLIB_F128_SIMD_NOUSE) && defined(NLIB_SSE41) 258 return _mm_dp_ps(vec1, vec2,
259 (0x70 | (SetLane0 ? 1 : 0) | (SetLane1 ? 2 : 0) |
260 (SetLane2 ? 4 : 0) | (SetLane3 ? 8 : 0)));
262 return F128::Splat<SetLane0, SetLane1, SetLane2, SetLane3>(F128::SetZero(), Dot(vec1, vec2));
268 SimdVector rval = F128::Mult(F128::Swizzle<1, 2, 0, -1>(vec1),
269 F128::Swizzle<2, 0, 1, -1>(vec2));
271 rval = F128::MultSub(F128::Swizzle<2, 0, 1, -1>(vec1), F128::Swizzle<1, 2, 0, -1>(vec2), rval);
278 f128 dot = Vector3::Dot(vec, vec);
279 #if defined(NLIB_NEON) && !defined(NLIB_F128_SIMD_NOUSE) 281 x = vrsqrteq_f32(dot);
282 x = vmulq_f32(x, vrsqrtsq_f32(dot, vmulq_f32(x, x)));
283 f128 rsqrt = vmulq_f32(x, vrsqrtsq_f32(dot, vmulq_f32(x, x)));
285 f128 rsqrt = F128::RecpSqrt(dot);
287 f128 inf = F128::SetInfinity();
288 f128 eqzero = F128::CmpEqZero(dot);
289 f128 eqinf = F128::CmpEq(dot, inf);
291 f128 nan = F128::SetNaN();
292 ret = F128::AndNot(eqzero, ret);
293 ret = F128::Select(eqinf, nan, ret);
298 f128 dot = Vector3::Dot(vec, vec);
299 f128 rsqrt = F128::RecpSqrt(dot);
300 *normalized = F128::Mult(vec, rsqrt);
301 return F128::GetFloatFromLane<0>(dot);
309 return F128::Sqrt(Dot(vec, vec));
314 return F128::SqrtEst(Dot(vec, vec));
319 return F128::RecpSqrt(Dot(vec, vec));
324 return F128::RecpSqrtEst(Dot(vec, vec));
328 return F128::Mult(vec, RecpLengthEst(vec));
332 f128 dot = Vector3::Dot(vec, vec);
333 f128 rsqrt = F128::RecpSqrtEst(dot);
334 *normalized = F128::Mult(vec, rsqrt);
335 return F128::GetFloatFromLane<0>(dot);
341 f128 ret = Dot(vec1_normalized, vec2_normalized);
342 ret = F128::Clamp(ret, F128::SetNegativeOne(), F128::SetOne());
343 return F128::ArcCos(ret);
347 f128 s = Dot(vec, normal);
349 return F128::MultSub(s, normal, vec);
365 return F128::Add(m.
r[3], ret);
370 f128 tmp = Vector3::Transform(vec, m);
371 return F128::Div(tmp, F128::SetValue<3>(tmp,
each_select32));
394 return Quaternion::Mult(Quaternion::Mult(conj, v), q_normalized);
401 return Quaternion::Mult(Quaternion::Mult(q_normalized, v), conj);
407 #endif // NLIB_DOXYGEN 412 #endif // INCLUDE_NN_NLIB_SIMD_SIMDVECTOR3_H_ float x
The x-coordinate of the 3D vector.
f128arg SimdVectorArg
f128arg is defined using typedef.
f128arg SimdQuaternionArg
f128arg is defined using typedef.
The class with the collection of functions that perform calculations on three-dimensional vectors...
f128 r[4]
Keeps each row of a 4x4 matrix.
The structure for keeping a 4x4 matrix.
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Defines the class and functions for SIMD computations on single-precision floating-point numbers...
The type for reading and writing three-dimensional vectors in memory. Keeps float-type x...
constexpr const each_select32_tag each_select32
The tag for representing the selection of a 32-bit lane with an each_select32_tag-type constant objec...
float y
The y-coordinate of the 3D vector.
nlib_f128_t f128
nlib_f128_t is defined using typedef.
float z
The z-coordinate of the 3D vector.
#define NLIB_STATIC_ASSERT(exp)
Defines a static assertion. Uses static_assert if it is available for use.
Defines a four-dimensional vector.
f128 SimdQuaternion
f128 is defined using typedef. Used when handling quaternions.
f128 SimdVector
f128 is defined using typedef. Used when handling three-dimensional or four-dimensional vectors...