nlib
SimdVector3.h
[詳解]
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDVECTOR3_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDVECTOR3_H_
5 
9 
10 NLIB_NAMESPACE_BEGIN
11 namespace simd {
12 
14  public:
15  static SimdVector __vectorcall LoadFloat3(const Float3* p) NLIB_NOEXCEPT;
16  template<typename MyVector3>
17  static SimdVector __vectorcall LoadFloat3(const MyVector3* p) NLIB_NOEXCEPT;
18  static void __vectorcall StoreFloat3(Float3* p, SimdVectorArg vec) NLIB_NOEXCEPT;
19  template<typename MyVector3>
20  static void __vectorcall StoreFloat3(MyVector3* p, SimdVectorArg vec) NLIB_NOEXCEPT;
21  static bool __vectorcall CmpEq(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
22  static bool __vectorcall CmpLt(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
23  static bool __vectorcall CmpLe(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
24  static bool __vectorcall CmpGt(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
25  static bool __vectorcall CmpGe(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
26  static bool __vectorcall CmpNe(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
27  static bool __vectorcall
28  CmpNearEq(SimdVectorArg vec1, SimdVectorArg vec2, SimdVectorArg eps) NLIB_NOEXCEPT;
29  static bool __vectorcall IsNaN(SimdVectorArg vec) NLIB_NOEXCEPT;
30  static bool __vectorcall IsInfinite(SimdVectorArg vec) NLIB_NOEXCEPT;
31  static bool __vectorcall InBound(SimdVectorArg vec, SimdVectorArg bounds) NLIB_NOEXCEPT;
32  static f128 __vectorcall Dot(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
33  template <bool SetLane0, bool SetLane1, bool SetLane2, bool SetLane3>
34  static f128 __vectorcall DotEx(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
35  static SimdVector __vectorcall Cross(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT;
36  static SimdVector __vectorcall Normalize(SimdVectorArg vec) NLIB_NOEXCEPT;
37  static SimdVector __vectorcall NormalizeEst(SimdVectorArg vec) NLIB_NOEXCEPT;
38  static f128 __vectorcall LengthSq(SimdVectorArg vec) NLIB_NOEXCEPT;
39  static f128 __vectorcall Length(SimdVectorArg vec) NLIB_NOEXCEPT;
40  static f128 __vectorcall LengthEst(SimdVectorArg vec) NLIB_NOEXCEPT;
41  static f128 __vectorcall RecpLength(SimdVectorArg vec) NLIB_NOEXCEPT;
42  static f128 __vectorcall RecpLengthEst(SimdVectorArg vec) NLIB_NOEXCEPT;
43  static f128 __vectorcall GetAngle(SimdVectorArg vec1_normalized,
44  SimdVectorArg vec2_normalized) NLIB_NOEXCEPT;
45  static SimdVector __vectorcall Reflect(SimdVectorArg vec, SimdVectorArg normal) NLIB_NOEXCEPT;
46  static SimdVector __vectorcall Orthogonal(SimdVector vec) NLIB_NOEXCEPT; // not implemented yet
47 
48  static SimdVector __vectorcall Transform(SimdVectorArg vec, SimdMatrixArg m) NLIB_NOEXCEPT;
49  static SimdVector __vectorcall
50  TransformCoord(SimdVectorArg vec, SimdMatrixArg m) NLIB_NOEXCEPT;
51  static SimdVector __vectorcall
52  TransformNormal(SimdVectorArg vec, SimdMatrixArg m) NLIB_NOEXCEPT;
53 
54  static SimdVector __vectorcall
55  Rotate(SimdVectorArg vec, SimdQuaternionArg q_normalized) NLIB_NOEXCEPT;
56  static SimdVector __vectorcall
57  InvRotate(SimdVectorArg vec, SimdQuaternionArg q_normalized) NLIB_NOEXCEPT;
58 
59  // Transform****Stream
60  // Project, Unproject
61 
62  private:
63  Vector3(); // forbidden
64 };
65 
66 #ifndef NLIB_DOXYGEN
67 
68 #define NLIB_M(tp) inline tp __vectorcall
69 #define NLIB_MH(tp) inline tp __vectorcall
70 
71 // p is 4 bytes aligned
72 NLIB_M(SimdVector) Vector3::LoadFloat3(const Float3* p) NLIB_NOEXCEPT {
73 #ifdef NLIB_F128_SIMD_NOUSE
74  f128 ret;
75  ret.vec.v[0] = p->x;
76  ret.vec.v[1] = p->y;
77  ret.vec.v[2] = p->z;
78  ret.vec.v[3] = 0.f;
79  return ret;
80 #elif defined(NLIB_SSE41)
81  __m128 x = _mm_load_ss(&p->x);
82  __m128 y = _mm_load_ss(&p->y);
83  __m128 z = _mm_load_ss(&p->z);
84  __m128 xy = _mm_unpacklo_ps(x, y);
85  return _mm_shuffle_ps(xy, z, _MM_SHUFFLE(1, 0, 1, 0));
86 #elif defined(NLIB_NEON)
87  float32x2_t xy = vld1_f32(&p->x);
88  float32x2_t z = vld1_lane_f32(&p->z, xy, 0);
89  return vcombine_f32(xy, z);
90 #elif defined(NLIB_CAFE_PPC)
91  f128 ret;
92  ret.vec.ps[0][0] = p->x;
93  ret.vec.ps[0][1] = p->y;
94  ret.vec.ps[1][0] = p->z;
95  return ret;
96 #endif
97 }
98 
99 template<typename MyVector3>
100 // MyVector is a structure like 'struct MyVector3 { float x, y, z; };'
101 // DirectX::XMFLOAT3, DirectX::XMFLOAT3A for example
102 NLIB_MH(SimdVector) Vector3::LoadFloat3(const MyVector3* p) NLIB_NOEXCEPT {
103  NLIB_STATIC_ASSERT(sizeof(p->x) == 4);
104  NLIB_STATIC_ASSERT(sizeof(p->y) == 4);
105  NLIB_STATIC_ASSERT(sizeof(p->z) == 4);
106  return Vector3::LoadFloat3(reinterpret_cast<const Float3*>(&p->x));
107 }
108 
109 // p is 4 bytes aligned
110 NLIB_M(void) Vector3::StoreFloat3(Float3* p, SimdVectorArg vec) NLIB_NOEXCEPT { // NOLINT
111 #ifdef NLIB_F128_SIMD_NOUSE
112  p->x = vec.vec.v[0];
113  p->y = vec.vec.v[1];
114  p->z = vec.vec.v[2];
115 #elif defined(NLIB_SSE41)
116  f128 y = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1));
117  f128 z = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(2, 2, 2, 2));
118  _mm_store_ss(&p->x, vec);
119  _mm_store_ss(&p->y, y);
120  _mm_store_ss(&p->z, z);
121 #elif defined(NLIB_NEON)
122  float32x2_t lo = vget_low_f32(vec);
123  vst1_f32(&p->x, lo);
124  vst1q_lane_f32(&p->z, vec, 2);
125 #elif defined(NLIB_CAFE_PPC)
126  p->x = vec.vec.ps[0][0];
127  p->y = vec.vec.ps[0][1];
128  p->z = vec.vec.ps[1][0];
129 #endif
130 }
131 
132 template<typename MyVector3>
133 NLIB_MH(void) Vector3::StoreFloat3(MyVector3* p, SimdVectorArg vec) NLIB_NOEXCEPT { // NOLINT
134  // MyVector is a structure like 'struct MyVector3 { float x, y, z; };'
135  // DirectX::XMFLOAT3, DirectX::XMFLOAT3A for example
136  NLIB_STATIC_ASSERT(sizeof(p->x) == 4);
137  NLIB_STATIC_ASSERT(sizeof(p->y) == 4);
138  NLIB_STATIC_ASSERT(sizeof(p->z) == 4);
139  Vector3::StoreFloat3(reinterpret_cast<Float3*>(&p->x), vec);
140 }
141 
142 #if defined(NLIB_NEON)
143 #define NLIB_NEON_CMPVector3(op, vec1, vec2) \
144  uint8x16_t cmp = vreinterpretq_u8_u32(op(vec1, vec2)); \
145  uint8x8x2_t zip = vzip_u8(vget_low_u8(cmp), vget_high_u8(cmp)); \
146  uint16x4x2_t zip2 = vzip_u16(vreinterpret_u16_u8(zip.val[0]), \
147  vreinterpret_u16_u8(zip.val[1])); \
148  return (vget_lane_u32(vreinterpret_u32_u16(zip2.val[0]), 0) & 0xFFFFFFU) == 0xFFFFFFU
149 #endif
150 
151 // true if vec1[xyz] == vec2[xyz]
152 inline bool __vectorcall Vector3::CmpEq(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
153 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41)
154  int mask = F128::MoveMask(F128::CmpEq(vec1, vec2));
155  return ((mask & 7) == 7);
156 #elif defined(NLIB_NEON)
157  NLIB_NEON_CMPVector3(vceqq_f32, vec1, vec2);
158 #elif defined(NLIB_CAFE_PPC)
159  return vec1.vec.ps[0][0] == vec2.vec.ps[0][0] &&
160  vec1.vec.ps[0][1] == vec2.vec.ps[0][1] &&
161  vec1.vec.ps[1][0] == vec2.vec.ps[1][0];
162 #endif
163 }
164 
165 // true if vec1[xyz] < vec2[xyz]
166 inline bool __vectorcall Vector3::CmpLt(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
167 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41)
168  int mask = F128::MoveMask(F128::CmpLt(vec1, vec2));
169  return ((mask & 7) == 7);
170 #elif defined(NLIB_NEON)
171  NLIB_NEON_CMPVector3(vcltq_f32, vec1, vec2);
172 #elif defined(NLIB_CAFE_PPC)
173  return vec1.vec.ps[0][0] < vec2.vec.ps[0][0] &&
174  vec1.vec.ps[0][1] < vec2.vec.ps[0][1] &&
175  vec1.vec.ps[1][0] < vec2.vec.ps[1][0];
176 #endif
177 }
178 
179 // true if vec1[xyz] <= vec2[xyz]
180 inline bool __vectorcall Vector3::CmpLe(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
181 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41)
182  int mask = F128::MoveMask(F128::CmpLe(vec1, vec2));
183  return ((mask & 7) == 7);
184 #elif defined(NLIB_NEON)
185  NLIB_NEON_CMPVector3(vcleq_f32, vec1, vec2);
186 #elif defined(NLIB_CAFE_PPC)
187  return vec1.vec.ps[0][0] <= vec2.vec.ps[0][0] &&
188  vec1.vec.ps[0][1] <= vec2.vec.ps[0][1] &&
189  vec1.vec.ps[1][0] <= vec2.vec.ps[1][0];
190 #endif
191 }
192 
193 // true if vec1[xyz] > vec2[xyz]
194 inline bool __vectorcall Vector3::CmpGt(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
195 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41)
196  int mask = F128::MoveMask(F128::CmpGt(vec1, vec2));
197  return ((mask & 7) == 7);
198 #elif defined(NLIB_NEON)
199  NLIB_NEON_CMPVector3(vcgtq_f32, vec1, vec2);
200 #elif defined(NLIB_CAFE_PPC)
201  return vec1.vec.ps[0][0] > vec2.vec.ps[0][0] &&
202  vec1.vec.ps[0][1] > vec2.vec.ps[0][1] &&
203  vec1.vec.ps[1][0] > vec2.vec.ps[1][0];
204 #endif
205 }
206 
207 // true if vec1[xyz] >= vec2[xyz]
208 inline bool __vectorcall Vector3::CmpGe(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
209 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41)
210  int mask = F128::MoveMask(F128::CmpGe(vec1, vec2));
211  return ((mask & 7) == 7);
212 #elif defined(NLIB_NEON)
213  NLIB_NEON_CMPVector3(vcgeq_f32, vec1, vec2);
214 #elif defined(NLIB_CAFE_PPC)
215  return vec1.vec.ps[0][0] >= vec2.vec.ps[0][0] &&
216  vec1.vec.ps[0][1] >= vec2.vec.ps[0][1] &&
217  vec1.vec.ps[1][0] >= vec2.vec.ps[1][0];
218 #endif
219 }
220 
221 // vec1.x != vec2.x || vec1.y != vec2.y || vec1.z != vec2.z
222 inline bool __vectorcall Vector3::CmpNe(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
223 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41)
224  int mask = F128::MoveMask(F128::CmpEq(vec1, vec2));
225  return ((mask & 7) != 7);
226 #elif defined(NLIB_NEON)
227  uint8x16_t cmp = vreinterpretq_u8_u32(vceqq_f32(vec1, vec2));
228  uint8x8x2_t zip = vzip_u8(vget_low_u8(cmp), vget_high_u8(cmp));
229  uint16x4x2_t zip2 = vzip_u16(vreinterpret_u16_u8(zip.val[0]),
230  vreinterpret_u16_u8(zip.val[1]));
231  return (vget_lane_u32(vreinterpret_u32_u16(zip2.val[0]), 0) & 0xFFFFFFU) != 0xFFFFFFU;
232 #elif defined(NLIB_CAFE_PPC)
233  return vec1.vec.ps[0][0] != vec2.vec.ps[0][0] ||
234  vec1.vec.ps[0][1] != vec2.vec.ps[0][1] ||
235  vec1.vec.ps[1][0] != vec2.vec.ps[1][0];
236 #endif
237 }
238 
239 inline bool __vectorcall
240 Vector3::CmpNearEq(SimdVectorArg vec1, SimdVectorArg vec2, SimdVectorArg eps) NLIB_NOEXCEPT {
241 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41) || defined(NLIB_CAFE_PPC)
242  int mask = F128::MoveMask(F128::CmpNearEq(vec1, vec2, eps));
243  return ((mask & 7) == 7);
244 #elif defined(NLIB_NEON)
245  f128 cmp_ = F128::CmpNearEq(vec1, vec2, eps);
246  uint8x16_t cmp = vreinterpretq_u8_f32(cmp_);
247  uint8x8x2_t zip = vzip_u8(vget_low_u8(cmp), vget_high_u8(cmp));
248  uint16x4x2_t zip2 = vzip_u16(vreinterpret_u16_u8(zip.val[0]),
249  vreinterpret_u16_u8(zip.val[1]));
250  return (vget_lane_u32(vreinterpret_u32_u16(zip2.val[0]), 0) & 0xFFFFFFU) == 0xFFFFFFU;
251 #endif
252 }
253 
254 #if defined(NLIB_NEON)
255 #undef NLIB_NEON_CMPVector3
256 #endif
257 
258 // isnan(vec.x) || isnan(vec.y) || isnan(vec.z)
259 inline bool __vectorcall Vector3::IsNaN(SimdVectorArg vec) NLIB_NOEXCEPT {
260 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41) || defined(NLIB_CAFE_PPC)
261  int mask = F128::MoveMask(F128::IsNaN(vec));
262  return ((mask & 7) == 7);
263 #elif defined(NLIB_NEON)
264  f128 cmp_ = F128::IsNaN(vec);
265  uint8x16_t cmp = vreinterpretq_u8_f32(cmp_);
266  uint8x8x2_t zip = vzip_u8(vget_low_u8(cmp), vget_high_u8(cmp));
267  uint16x4x2_t zip2 = vzip_u16(vreinterpret_u16_u8(zip.val[0]),
268  vreinterpret_u16_u8(zip.val[1]));
269  return (vget_lane_u32(vreinterpret_u32_u16(zip2.val[0]), 0) & 0xFFFFFFU) == 0xFFFFFFU;
270 #endif
271 }
272 
273 // isinf(vec.x) || isinf(vec.y) || isinf(vec.z)
274 inline bool __vectorcall Vector3::IsInfinite(SimdVectorArg vec) NLIB_NOEXCEPT {
275 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41) || defined(NLIB_CAFE_PPC)
276  int mask = F128::MoveMask(F128::IsInfinite(vec));
277  return ((mask & 7) == 7);
278 #elif defined(NLIB_NEON)
279  f128 cmp_ = F128::IsInfinite(vec);
280  uint8x16_t cmp = vreinterpretq_u8_f32(cmp_);
281  uint8x8x2_t zip = vzip_u8(vget_low_u8(cmp), vget_high_u8(cmp));
282  uint16x4x2_t zip2 = vzip_u16(vreinterpret_u16_u8(zip.val[0]),
283  vreinterpret_u16_u8(zip.val[1]));
284  return (vget_lane_u32(vreinterpret_u32_u16(zip2.val[0]), 0) & 0xFFFFFFU) == 0xFFFFFFU;
285 #endif
286 }
287 
288 // true if abs(vec[xyz]) <= bounds[xyz]
289 inline bool __vectorcall Vector3::InBound(SimdVectorArg vec, SimdVectorArg bounds) NLIB_NOEXCEPT {
290 #if defined(NLIB_F128_SIMD_NOUSE) || defined(NLIB_SSE41) || defined(NLIB_CAFE_PPC)
291  int mask = F128::MoveMask(F128::InBound(vec, bounds));
292  return ((mask & 7) == 7);
293 #elif defined(NLIB_NEON)
294  f128 cmp_ = F128::InBound(vec, bounds);
295  uint8x16_t cmp = vreinterpretq_u8_f32(cmp_);
296  uint8x8x2_t zip = vzip_u8(vget_low_u8(cmp), vget_high_u8(cmp));
297  uint16x4x2_t zip2 = vzip_u16(vreinterpret_u16_u8(zip.val[0]),
298  vreinterpret_u16_u8(zip.val[1]));
299  return (vget_lane_u32(vreinterpret_u32_u16(zip2.val[0]), 0) & 0xFFFFFFU) == 0xFFFFFFU;
300 #endif
301 }
302 
303 // r = { dot, dot, dot, dot }
304 NLIB_M(f128) Vector3::Dot(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
305 #ifdef NLIB_F128_SIMD_NOUSE
306  float tmp = vec1.vec.v[0] * vec2.vec.v[0] + vec1.vec.v[1] * vec2.vec.v[1] +
307  vec1.vec.v[2] * vec2.vec.v[2];
308  f128 rval;
309  rval.vec.v[0] = rval.vec.v[1] = rval.vec.v[2] = rval.vec.v[3] = tmp;
310  return rval;
311 #elif defined(NLIB_SSE41)
312  return _mm_dp_ps(vec1, vec2, 0x7F);
313 #elif defined(NLIB_NEON)
314  f128 tmp = vmulq_f32(vec1, vec2);
315  float32x2_t lo = vget_low_f32(tmp);
316  lo = vpadd_f32(lo, lo);
317  float32x2_t hi = vdup_lane_f32(vget_high_f32(tmp), 0);
318  lo = vadd_f32(lo, hi);
319  return vcombine_f32(lo, lo);
320 #elif defined(NLIB_CAFE_PPC)
321  f128 tmp = F128::Mult(vec1, vec2);
322  f32x2 val = __PS_SUM0(tmp.vec.ps[0], tmp.vec.ps[0], tmp.vec.ps[0]);
323  val = __PS_ADD(val, tmp.vec.ps[1]);
324  f128 ret;
325  ret.vec.ps[0] = ret.vec.ps[1] = __PS_FDUP(val[0]);
326  return ret;
327 #endif
328 }
329 
330 template <bool SetLane0, bool SetLane1, bool SetLane2, bool SetLane3>
331 // r[i] = SetLane[i] ? dot : 0.f
332 NLIB_MH(f128) Vector3::DotEx(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
333 #if !defined(NLIB_F128_SIMD_NOUSE) && defined(NLIB_SSE41)
334  return _mm_dp_ps(vec1, vec2,
335  (0x70 | (SetLane0 ? 1 : 0) | (SetLane1 ? 2 : 0) |
336  (SetLane2 ? 4 : 0) | (SetLane3 ? 8 : 0)));
337 #else
338  return F128::Splat<SetLane0, SetLane1, SetLane2, SetLane3>(F128::SetZero(), Dot(vec1, vec2));
339 #endif
340 }
341 
342 // 'w' component is arbitrary
343 NLIB_M(SimdVector) Vector3::Cross(SimdVectorArg vec1, SimdVectorArg vec2) NLIB_NOEXCEPT {
344  SimdVector rval = F128::Mult(F128::Swizzle<1, 2, 0, 1>(vec1), F128::Swizzle<2, 0, 1, 1>(vec2));
345  // yz, zx, xy, *
346  rval = F128::MultSub(F128::Swizzle<2, 0, 1, 1>(vec1), F128::Swizzle<1, 2, 0, 1>(vec2), rval);
347  // yz - zy, zx - xz, xy - yx, *
348  return rval;
349  // NOTE:
350  // optimization for NEON may be needed.
351 }
352 
353 // nan if |vec| is too big, 0 if |vec| = 0
354 NLIB_M(SimdVector) Vector3::Normalize(SimdVectorArg vec) NLIB_NOEXCEPT {
355  f128 dot = Vector3::Dot(vec, vec);
356  f128 rsqrt = F128::RecpSqrt(dot);
357  f128 inf = F128::SetInfinity();
358  f128 zero = F128::SetZero();
359  f128 eqzero = F128::CmpEq(dot, zero);
360  f128 eqinf = F128::CmpEq(dot, inf);
361  SimdVector ret = F128::Mult(vec, rsqrt);
362  f128 nan = F128::SetNaN();
363  ret = F128::Select(eqzero, zero, ret);
364  ret = F128::Select(eqinf, nan, ret);
365  return ret;
366 }
367 
368 // r = { lensq, lensq, lensq, lensq }
369 NLIB_M(f128) Vector3::LengthSq(SimdVectorArg vec) NLIB_NOEXCEPT { return Dot(vec, vec); }
370 
371 // r = { len, len, len, len }
372 NLIB_M(f128) Vector3::Length(SimdVectorArg vec) NLIB_NOEXCEPT {
373  return F128::Sqrt(Dot(vec, vec));
374 }
375 
376 // r = { len, len, len, len }
377 NLIB_M(f128) Vector3::LengthEst(SimdVectorArg vec) NLIB_NOEXCEPT {
378  return F128::SqrtEst(Dot(vec, vec));
379 }
380 
381 // r = { recpLen, recpLen, recpLen, recpLen }
382 NLIB_M(f128) Vector3::RecpLength(SimdVectorArg vec) NLIB_NOEXCEPT {
383  return F128::RecpSqrt(Dot(vec, vec));
384 }
385 
386 // r = { recpLen, recpLen, recpLen, recpLen }
387 NLIB_M(f128) Vector3::RecpLengthEst(SimdVectorArg vec) NLIB_NOEXCEPT {
388  return F128::RecpSqrtEst(Dot(vec, vec));
389 }
390 
391 NLIB_M(SimdVector) Vector3::NormalizeEst(SimdVectorArg vec) NLIB_NOEXCEPT {
392  return F128::Mult(vec, RecpLengthEst(vec));
393 }
394 
395 // { radian, radian, radian, radian }
396 NLIB_M(f128) Vector3::GetAngle(SimdVectorArg vec1_normalized,
397  SimdVectorArg vec2_normalized) NLIB_NOEXCEPT {
398  f128 ret = Dot(vec1_normalized, vec2_normalized);
399  ret = F128::Clamp(ret, F128::SetNegativeOne(), F128::SetOne());
400  return F128::ArcCos(ret);
401 }
402 
403 NLIB_M(SimdVector) Vector3::Reflect(SimdVectorArg vec, SimdVectorArg normal) NLIB_NOEXCEPT {
404  f128 s = Dot(vec, normal);
405  s = F128::Add(s, s);
406  return F128::MultSub(s, normal, vec);
407 }
408 
409 /*
410 // Vector3::Transform(vec, m) is better in performance
411 NLIB_M(SimdVector) Vector3::Transform(SimdMatrixArg m, SimdVectorArg vec) NLIB_NOEXCEPT {
412  f128 v = F128::SetFloatToLane<3>(vec, 1.f);
413  return Vector4::Transform(m, v);
414 }
415 */
416 
417 // r = { vec.x, vec.y, vec.z, 1 } * m
418 NLIB_M(SimdVector) Vector3::Transform(SimdVectorArg vec, SimdMatrixArg m) NLIB_NOEXCEPT {
419  f128 ret = F128::Mult<0>(vec, m.r[0], each_select32);
420  ret = F128::MultAdd<1>(vec, m.r[1], ret, each_select32);
421  ret = F128::MultAdd<2>(vec, m.r[2], ret, each_select32);
422  return F128::Add(m.r[3], ret);
423 }
424 
425 // r = { vec.x, vec.y, vec.z, 1.f } * m, r = r / r.w
426 NLIB_M(SimdVector) Vector3::TransformCoord(SimdVectorArg vec, SimdMatrixArg m) NLIB_NOEXCEPT {
427  f128 tmp = Vector3::Transform(vec, m);
428  return F128::Div(tmp, F128::SetValue<3>(tmp, each_select32));
429 }
430 
431 /*
432 // Vector3::TransformNormal(vec, m) is better in performance
433 NLIB_M(SimdVector) Vector3::TransformNormal(SimdMatrixArg m, SimdVectorArg vec) NLIB_NOEXCEPT {
434  f128 v = F128::SetZeroToLane<3>(vec);
435  return Vector4::Transform(m, v);
436 }
437 */
438 
439 // r = { vec.x, vec.y, vec.z, 0 } * m
440 NLIB_M(SimdVector) Vector3::TransformNormal(SimdVectorArg vec, SimdMatrixArg m) NLIB_NOEXCEPT {
441  f128 ret = F128::Mult<0>(vec, m.r[0], each_select32);
442  ret = F128::MultAdd<1>(vec, m.r[1], ret, each_select32);
443  ret = F128::MultAdd<2>(vec, m.r[2], ret, each_select32);
444  return ret;
445 }
446 
447 NLIB_M(SimdVector) Vector3::Rotate(SimdVectorArg vec,
448  SimdQuaternionArg q_normalized) NLIB_NOEXCEPT {
449  SimdVector v = F128::SetZeroToLane<3>(vec);
450  SimdQuaternion conj = Quaternion::Conjugate(q_normalized);
451  return Quaternion::Mult(Quaternion::Mult(conj, v), q_normalized);
452 }
453 
454 NLIB_M(SimdVector) Vector3::InvRotate(SimdVectorArg vec,
455  SimdQuaternionArg q_normalized) NLIB_NOEXCEPT {
456  SimdVector v = F128::SetZeroToLane<3>(vec);
457  SimdQuaternion conj = Quaternion::Conjugate(q_normalized);
458  return Quaternion::Mult(Quaternion::Mult(q_normalized, v), conj);
459 }
460 
461 #undef NLIB_M
462 #undef NLIB_MH
463 
464 #endif // NLIB_DOXYGEN
465 
466 } // namespace simd
467 NLIB_NAMESPACE_END
468 
469 #endif // INCLUDE_NN_NLIB_SIMD_SIMDVECTOR3_H_
#define NLIB_NOEXCEPT
環境に合わせてnoexcept 又は同等の定義がされます。
Definition: Platform.h:2151
クォータニオンが定義されています。
f128arg SimdVectorArg
f128argがtypedefされています。
Definition: SimdFloat.h:3927
#define NLIB_VIS_HIDDEN
関数やクラス等のシンボルをライブラリの外部に公開しません。
Definition: Platform_unix.h:50
f128arg SimdQuaternionArg
f128argがtypedefされています。
Definition: SimdFloat.h:3929
3次元ベクトルの計算を行う関数が集められたクラスです。全ての関数でレーン3に設定された値は無視されます...
Definition: SimdVector3.h:13
4x4行列を保持する構造体です。
Definition: SimdFloat.h:3938
単精度浮動小数点数のSIMD演算を行うためのクラスや関数が定義されています。
3次元ベクトルをメモリから読み出したりメモリに書き出したりするための型です。float型のx, y, zをデータメンバとして保持します。
Definition: SimdFloat.h:4047
constexpr const each_select32_tag each_select32
each_select32_tag型の定数オブジェクトで、32bitのレーンを選択することを示すためのタグです。 ...
Definition: SimdInt.h:56
nlib_f128_t f128
nlib_f128_tがtypedefされています。
Definition: SimdFloat.h:54
#define NLIB_STATIC_ASSERT(exp)
静的アサートが定義されます。利用可能であればstatic_assertを利用します。
Definition: Config.h:117
4次元ベクトルが定義されています。
f128 SimdQuaternion
f128がtypedefされています。クォータニオンを扱う場合に利用されます。
Definition: SimdFloat.h:3928
f128 SimdVector
f128がtypedefされています。3次元ベクトル又は4次元ベクトルを扱う場合に利用されます。 ...
Definition: SimdFloat.h:3926