nlib
SimdGeometry.h
[詳解]
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDGEOMETRY_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDGEOMETRY_H_
5 
11 
12 NLIB_NAMESPACE_BEGIN
13 namespace simd {
14 
15 class AxisAlignedBox;
16 class OrientedBox;
17 class Frustum;
18 
19 // satisfies plane[0] * x + plane[1] * y + plane[2] * z + plane[3] = 0,
20 // note that most of the APIs require the plane to be normalized.
22  public:
23  static f128 __vectorcall Dot(SimdPlaneArg plane, SimdVectorArg vec) NLIB_NOEXCEPT;
24  static f128 __vectorcall DotCoord(SimdPlaneArg plane, SimdVectorArg vec) NLIB_NOEXCEPT;
25  static f128 __vectorcall DotNormal(SimdPlaneArg plane, SimdVectorArg vec) NLIB_NOEXCEPT;
26  static SimdPlane __vectorcall FromPointAndNormal(SimdVectorArg point,
28  static SimdPlane __vectorcall FromPoint(SimdVectorArg point0,
29  SimdVectorArg point1,
31  static SimdPlane __vectorcall Normalize(SimdPlaneArg plane) NLIB_NOEXCEPT;
32  static SimdPlane __vectorcall NormalizeEst(SimdPlaneArg plane) NLIB_NOEXCEPT;
33  static SimdPlane __vectorcall Transform(SimdPlaneArg plane, SimdMatrixArg m) NLIB_NOEXCEPT;
34 
35  private:
36  Plane(); // forbidden
37 };
38 
39 // the lane 012 is the center of the sphere, and the lane 3 is radius.
41  public:
42  // ctors
43  static SimdSphere __vectorcall FromPoints(const Float3* points, size_t count) NLIB_NOEXCEPT;
44  static SimdSphere __vectorcall Merge(SimdSphereArg sphere0,
46 
47  // get/set
48  static float __vectorcall GetRadius(SimdSphereArg sphere) NLIB_NOEXCEPT;
49  static SimdSphere __vectorcall SetRadius(SimdSphereArg sphere, float radius) NLIB_NOEXCEPT;
50  static SimdVector __vectorcall GetCenter(SimdSphereArg sphere) NLIB_NOEXCEPT;
51  static SimdSphere __vectorcall
52  SetCenter(SimdSphereArg sphere, SimdVectorArg center) NLIB_NOEXCEPT;
53 
54  static SimdSphere __vectorcall Transform(SimdSphereArg sphere, SimdMatrixArg m) NLIB_NOEXCEPT;
55 
56  private:
57  Sphere(); // forbidden
58 };
59 
60 // point_min[012] must be less than point_max[012]. the lane 3 is ignored.
62  public:
63  AxisAlignedBox() NLIB_NOEXCEPT {} // no initialize
64  AxisAlignedBox(const Float3& pmin, const Float3& pmax);
65  void GetCorners(Float3* corners) const NLIB_NOEXCEPT;
66  void __vectorcall Transform(AxisAlignedBox* box, SimdMatrixArg m) NLIB_NOEXCEPT;
67  static void Merge(AxisAlignedBox* box, const AxisAlignedBox& box0,
68  const AxisAlignedBox& box1) NLIB_NOEXCEPT;
69  static void __vectorcall FromSphere(AxisAlignedBox* box, SimdSphereArg sphere) NLIB_NOEXCEPT;
70  static void __vectorcall FromPoints(AxisAlignedBox* box, SimdVectorArg point0,
72  static void FromPoints(AxisAlignedBox* box, const Float3* points, size_t count) NLIB_NOEXCEPT;
73 
74  public:
77 };
78 
79 // extent[012] must be positive
81  public:
82  OrientedBox() NLIB_NOEXCEPT {} // no initialize
83  void __vectorcall Transform(OrientedBox* box, SimdMatrixArg m) NLIB_NOEXCEPT;
84  void GetCorners(Float3* corners) const NLIB_NOEXCEPT;
85  static void FromAxisAlignedBox(OrientedBox* box, const AxisAlignedBox& aabb) NLIB_NOEXCEPT;
86  // static void FromPoints(OrientedBox* box, const Float3* points, size_t count) NLIB_NOEXCEPT;
87 
88  public:
92 };
93 
94 class NLIB_VIS_HIDDEN Capsule {
95  public:
96  Capsule() NLIB_NOEXCEPT {} // no initialize
97  Capsule(const Float3& pt0, const Float3& pt1, float radius) NLIB_NOEXCEPT;
98  void __vectorcall Transform(Capsule* capsule, SimdMatrixArg m) NLIB_NOEXCEPT;
99 
100  public:
101  SimdSphere sphere;
102  SimdVector point;
103 };
104 
106  public:
107  Frustum() NLIB_NOEXCEPT {} // no initialize
108  Frustum(const Frustum& rhs) NLIB_NOEXCEPT;
109  Frustum& operator=(const Frustum& rhs) NLIB_NOEXCEPT;
110  Frustum(SimdVectorArg center, SimdQuaternionArg rotation, float top, float bottom, float left,
111  float right, float n, float f) NLIB_NOEXCEPT;
112  void __vectorcall Set(SimdVectorArg center, SimdQuaternionArg rotation, float top, float bottom,
113  float left, float right, float n, float f) NLIB_NOEXCEPT;
114  void __vectorcall Transform(Frustum* frustum, float scale, SimdQuaternionArg rotation,
115  SimdVectorArg translation) const NLIB_NOEXCEPT;
116  void __vectorcall Transform(Frustum* frustum, SimdMatrixArg m) const NLIB_NOEXCEPT;
117  void GetCorners(Float3* corners) const NLIB_NOEXCEPT;
118 
119  private:
120  // Right-handed
121  SimdVector m_Center;
122  SimdQuaternion m_Rotation;
123  SimdPlane m_NearPlane; // plane = (0, 0, -1, -near)
124  SimdPlane m_FarPlane; // plane = (0, 0, 1, -far)
125  SimdPlane m_TopPlane; // plane = (0, 1, top/near, 0)
126  SimdPlane m_BottomPlane; // plane = (0, -1, -bottom/near, 0)
127  SimdPlane m_LeftPlane; // plane = (-1, 0, -left/near, 0)
128  SimdPlane m_RightPlane; // plane = (1, 0, right/near, 0)
129  friend class Intersection;
130  friend class Containment;
131 };
132 
133 // has only static member functions
135  public:
136  static f128 __vectorcall PointLine(SimdVectorArg point, SimdVectorArg line_point,
137  SimdVectorArg line_dir_normalized) NLIB_NOEXCEPT;
138  static f128 __vectorcall PointRay(SimdVectorArg point, SimdVectorArg ray_point,
139  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
140  static f128 __vectorcall PointSegment(SimdVectorArg point, SimdVectorArg segment_point0,
141  SimdVectorArg segment_point1) NLIB_NOEXCEPT;
142  static f128 __vectorcall PointPlane(SimdVector* point_on_plane, SimdVectorArg point,
143  SimdPlaneArg plane_normalized) NLIB_NOEXCEPT;
144  static f128 __vectorcall
145  SpherePlane(SimdSphereArg sphere, SimdPlaneArg plane_normalized) NLIB_NOEXCEPT;
146  static f128 __vectorcall PointAxisAlignedBox(SimdVector* point_on_box, SimdVectorArg point,
147  const AxisAlignedBox& aabb) NLIB_NOEXCEPT;
148  static f128 __vectorcall LineLine(SimdVector* point_on_line0, SimdVector* point_on_line1,
149  SimdVectorArg line0_point, SimdVectorArg line0_dir_normalized,
150  SimdVectorArg line1_point,
151  SimdVectorArg line1_dir_normalized) NLIB_NOEXCEPT;
152  static f128 __vectorcall
153  SegmentSegment(SimdVector* point_on_segment0, SimdVector* point_on_segment1,
154  SimdVectorArg segment0_point0, SimdVectorArg segment0_point1,
155  SimdVectorArg segment1_point0, SimdVectorArg segment1_point1) NLIB_NOEXCEPT;
156  static f128 __vectorcall LineRay(SimdVector* point_on_line, SimdVector* point_on_ray,
157  SimdVectorArg line_point, SimdVectorArg line_dir_normalized,
158  SimdVectorArg ray_point,
159  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
160  static f128 __vectorcall
161  LineSegment(SimdVector* point_on_line, SimdVector* point_on_segment,
162  SimdVectorArg line_point, SimdVectorArg line_dir_normalized,
163  SimdVectorArg segment_point0, SimdVectorArg segment_point1) NLIB_NOEXCEPT;
164  static f128 __vectorcall RayRay(SimdVector* point_on_ray0, SimdVector* point_on_ray1,
165  SimdVectorArg ray0_point, SimdVectorArg ray0_dir_normalized,
166  SimdVectorArg ray1_point,
167  SimdVectorArg ray1_dir_normalized) NLIB_NOEXCEPT;
168  static f128 __vectorcall RaySegment(SimdVector* point_on_ray, SimdVector* point_on_segment,
169  SimdVectorArg ray_point0, SimdVectorArg ray_dir_normalized,
170  SimdVectorArg segment_point0,
171  SimdVectorArg segment_point1) NLIB_NOEXCEPT;
172 
173  private:
174  DistanceSq(); // forbidden
175 };
176 
177 // has only static member functions
179  public:
180  // OrientedBox, AxisAlignedBox, Sphere, Triangle, Plane, Line, Ray, Segment
181  enum PlaneResult {
182  PLANE_FRONT = 0,
183  PLANE_INTERSECT = 1,
184  PLANE_BACK = 2
185  };
186  static SimdVector __vectorcall PlaneLine(SimdPlaneArg plane, SimdVectorArg line_point,
187  SimdVectorArg line_dir_normalized) NLIB_NOEXCEPT;
188  static SimdVector __vectorcall PlaneRay(SimdPlaneArg plane, SimdVectorArg ray_point,
189  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
190  static SimdVector __vectorcall PlaneSegment(SimdPlaneArg plane, SimdVectorArg segment_point0,
191  SimdVectorArg segment_point1) NLIB_NOEXCEPT;
192 
193  // static bool __vectorcall SphereLine(SimdSphereArg sphere, SimdVectorArg line_point,
194  // SimdVectorArg line_dir_normalized) NLIB_NOEXCEPT;
195  static bool __vectorcall SphereRay(float* distance, SimdSphereArg sphere,
196  SimdVectorArg ray_point,
197  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
198  // static bool __vectorcall SphereSegment(SimdVector* point0, SimdVector* point1,
199  // SimdSphereArg sphere, SimdVectorArg segment_point0,
200  // SimdVectorArg segment_point1) NLIB_NOEXCEPT;
201 
202  static bool __vectorcall TriangleRay(float* distance, SimdVectorArg triangle_point0,
203  SimdVectorArg triangle_point1,
204  SimdVectorArg triangle_point2, SimdVectorArg ray_point,
205  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
206  static PlaneResult __vectorcall
207  TrianglePlane(SimdVectorArg triangle_point0, SimdVectorArg triangle_point1,
208  SimdVectorArg triangle_point2, SimdPlaneArg plane_normalized) NLIB_NOEXCEPT;
209  static PlaneResult __vectorcall
210  SpherePlane(SimdSphereArg sphere, SimdPlaneArg plane_normalized) NLIB_NOEXCEPT;
211  static bool __vectorcall
212  SphereSphere(SimdSphereArg sphere0, SimdSphereArg sphere1) NLIB_NOEXCEPT;
213  static f128x2 __vectorcall PlanePlane(SimdPlaneArg plane0, SimdPlaneArg plane1) NLIB_NOEXCEPT;
214  static bool __vectorcall SphereTriangle(SimdSphereArg sphere, SimdVectorArg triangle_point0,
215  SimdVectorArg triangle_point1,
216  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
217 
218  static PlaneResult __vectorcall
219  AxisAlignedBoxPlane(const AxisAlignedBox& aabb,
220  SimdPlaneArg plane_normalized) NLIB_NOEXCEPT;
221  static bool __vectorcall AxisAlignedBoxRay(const AxisAlignedBox& aabb, SimdVectorArg ray_point,
222  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
223  static bool __vectorcall
224  AxisAlignedBoxSphere(const AxisAlignedBox& aabb, SimdSphereArg sphere) NLIB_NOEXCEPT;
225  static bool __vectorcall
226  AxisAlignedBoxAxisAlignedBox(const AxisAlignedBox& aabb0,
227  const AxisAlignedBox& aabb1) NLIB_NOEXCEPT;
228  static bool __vectorcall AxisAlignedBoxTriangle(const AxisAlignedBox& aabb,
229  SimdVectorArg triangle_point0,
230  SimdVectorArg triangle_point1,
231  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
232 
233  static bool __vectorcall
234  OrientedBoxSphere(const OrientedBox& box, SimdSphereArg sphere) NLIB_NOEXCEPT;
235  static bool __vectorcall
236  OrientedBoxAxisAlignedBox(const OrientedBox& box, const AxisAlignedBox& aabb) NLIB_NOEXCEPT;
237  static bool __vectorcall
238  OrientedBoxOrientedBox(const OrientedBox& box0, const OrientedBox& box1) NLIB_NOEXCEPT;
239  static bool __vectorcall OrientedBoxTriangle(const OrientedBox& box,
240  SimdVectorArg triangle_point0,
241  SimdVectorArg triangle_point1,
242  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
243  static PlaneResult __vectorcall
244  OrientedBoxPlane(const OrientedBox& box, SimdPlaneArg plane_normalized) NLIB_NOEXCEPT;
245  static bool __vectorcall OrientedBoxRay(const OrientedBox& box, SimdVectorArg ray_point,
246  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
247 
248  static bool __vectorcall
249  FrustumSphere(const Frustum& frustum, SimdSphereArg sphere) NLIB_NOEXCEPT;
250  static bool __vectorcall
251  FrustumAxisAlignedBox(const Frustum& frustum, const AxisAlignedBox& aabb) NLIB_NOEXCEPT;
252  static bool __vectorcall
253  FrustumOrientedBox(const Frustum& frustum, const OrientedBox& obb) NLIB_NOEXCEPT;
254  static bool __vectorcall FrustumTriangle(const Frustum& frustum, SimdVectorArg triangle_point0,
255  SimdVectorArg triangle_point1,
256  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
257  static PlaneResult __vectorcall
258  FrustumPlane(const Frustum& frustum, SimdPlaneArg plane) NLIB_NOEXCEPT;
259  static bool __vectorcall FrustumRay(const Frustum& frustum, SimdVectorArg ray_point,
260  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT;
261 
262  // TODO(nishida_kenji): TriangleTriangle
263  private:
264  Intersection(); // forbidden
265 };
266 
267 // has only static member functions
269  public:
270  // OrientedBox, AxisAlignedBox, Sphere
271  // Point, Triangle
272 
273  // Sphere contains X
274  static bool __vectorcall SpherePoint(SimdSphereArg sphere, SimdVectorArg point) NLIB_NOEXCEPT;
275  static bool __vectorcall SphereTriangle(SimdSphereArg sphere, SimdVectorArg triangle_point0,
276  SimdVectorArg triangle_point1,
277  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
278  static bool __vectorcall
279  SphereOrientedBox(SimdSphereArg sphere, const OrientedBox& obb) NLIB_NOEXCEPT;
280  static bool __vectorcall
281  SphereAxisAlignedBox(SimdSphereArg sphere, const AxisAlignedBox& aabb) NLIB_NOEXCEPT;
282  static bool __vectorcall
283  SphereSphere(SimdSphereArg sphere, SimdSphereArg contained_sphere) NLIB_NOEXCEPT;
284  static bool __vectorcall
285  SphereFrustum(SimdSphereArg sphere, const Frustum& frustum) NLIB_NOEXCEPT;
286 
287  // AABB contains X
288  static bool __vectorcall
289  AxisAlignedBoxPoint(const AxisAlignedBox& aabb, SimdVectorArg point) NLIB_NOEXCEPT;
290  static bool __vectorcall AxisAlignedBoxTriangle(const AxisAlignedBox& aabb,
291  SimdVectorArg triangle_point0,
292  SimdVectorArg triangle_point1,
293  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
294  static bool __vectorcall
295  AxisAlignedBoxOrientedBox(const AxisAlignedBox& aabb, const OrientedBox& obb) NLIB_NOEXCEPT;
296  static bool __vectorcall
297  AxisAlignedBoxAxisAlignedBox(const AxisAlignedBox& aabb,
298  const AxisAlignedBox& contained_aabb) NLIB_NOEXCEPT;
299  static bool __vectorcall
300  AxisAlignedBoxSphere(const AxisAlignedBox& aabb, SimdSphereArg sphere) NLIB_NOEXCEPT;
301  static bool __vectorcall
302  AxisAlignedBoxFrustum(const AxisAlignedBox& aabb, const Frustum& frustum) NLIB_NOEXCEPT;
303 
304  // OBB contains X
305  static bool __vectorcall
306  OrientedBoxPoint(const OrientedBox& box, SimdVectorArg point) NLIB_NOEXCEPT;
307  static bool __vectorcall OrientedBoxTriangle(const OrientedBox& box,
308  SimdVectorArg triangle_point0,
309  SimdVectorArg triangle_point1,
310  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
311  static bool __vectorcall OrientedBoxOrientedBox(const OrientedBox& box,
312  const OrientedBox& box_contained) NLIB_NOEXCEPT;
313  static bool __vectorcall
314  OrientedBoxAxisAlignedBox(const OrientedBox& box, const AxisAlignedBox& aabb) NLIB_NOEXCEPT;
315  static bool __vectorcall
316  OrientedBoxSphere(const OrientedBox& box, SimdSphereArg sphere) NLIB_NOEXCEPT;
317  static bool __vectorcall
318  OrientedBoxFrustum(const OrientedBox& box, const Frustum& frustum) NLIB_NOEXCEPT;
319 
320  // Frustum contains X
321  static bool __vectorcall
322  FrustumPoint(const Frustum& frustum, SimdVectorArg point) NLIB_NOEXCEPT;
323  static bool __vectorcall FrustumTriangle(const Frustum& frustum, SimdVectorArg triangle_point0,
324  SimdVectorArg triangle_point1,
325  SimdVectorArg triangle_point2) NLIB_NOEXCEPT;
326  static bool __vectorcall
327  FrustumSphere(const Frustum& frustum, SimdSphereArg sphere) NLIB_NOEXCEPT;
328  static bool __vectorcall
329  FrustumAxisAlignedBox(const Frustum& frustum, const AxisAlignedBox& aabb) NLIB_NOEXCEPT;
330  static bool __vectorcall
331  FrustumOrientedBox(const Frustum& frustum, const OrientedBox& box) NLIB_NOEXCEPT;
332  static bool __vectorcall
333  FrustumFrustum(const Frustum& frustum, const Frustum& contained) NLIB_NOEXCEPT;
334 
335  private:
336  Containment(); // forbidden
337 };
338 
339 #ifndef NLIB_DOXYGEN
340 
341 #define NLIB_M(tp) inline tp __vectorcall
342 #define NLIB_B inline bool __vectorcall
343 
344 //
345 // Plane
346 //
347 #ifdef _MSC_VER
348 # pragma region Plane
349 #endif
350 // XMPlaneDot
351 NLIB_M(f128) Plane::Dot(SimdPlaneArg plane, SimdVectorArg vec) NLIB_NOEXCEPT {
352  return Vector4::Dot(plane, vec);
353 }
354 
355 // XMPlaneDotCoord
356 NLIB_M(f128) Plane::DotCoord(SimdPlaneArg plane, SimdVectorArg vec) NLIB_NOEXCEPT {
357  return Vector4::Dot(plane, F128::SetFloatToLane<3>(vec, 1.f));
358 }
359 
360 // XMPlaneDotNormal
361 NLIB_M(f128) Plane::DotNormal(SimdPlaneArg plane, SimdVectorArg vec) NLIB_NOEXCEPT {
362  return Vector3::Dot(plane, vec);
363 }
364 
365 // XMPlaneFromPointNormal
366 NLIB_M(SimdPlane) Plane::FromPointAndNormal(SimdVectorArg point,
367  SimdVectorArg normal) NLIB_NOEXCEPT {
368  f128 dot = Vector3::Dot(point, normal);
369  dot = F128::Negate(dot);
370  return F128::Splat<false, false, false, true>(normal, dot);
371 }
372 
373 // XMPlaneFromPoints
374 NLIB_M(SimdPlane) Plane::FromPoint(SimdVectorArg point0, SimdVectorArg point1,
375  SimdVectorArg point2) NLIB_NOEXCEPT {
376  SimdVector v10 = F128::Sub(point0, point1);
377  SimdVector v20 = F128::Sub(point0, point2);
378  SimdVector normal = Vector3::Normalize(Vector3::Cross(v10, v20));
379  return FromPointAndNormal(point0, normal);
380 }
381 
382 // XMPlaneNormalize
383 NLIB_M(SimdVector) Plane::Normalize(SimdPlaneArg plane) NLIB_NOEXCEPT {
384  // fail safe needed?
385  return F128::Mult(Vector3::RecpLength(plane), plane);
386 }
387 
388 // XMPlaneNormalizeEst
389 NLIB_M(SimdVector) Plane::NormalizeEst(SimdPlaneArg plane) NLIB_NOEXCEPT {
390  return F128::Mult(Vector3::RecpLengthEst(plane), plane);
391 }
392 
393 // XMPlaneTransform
394 NLIB_M(SimdPlane) Plane::Transform(SimdPlaneArg plane, SimdMatrixArg m) NLIB_NOEXCEPT {
395  return Vector4::Transform(plane, m);
396 }
397 #ifdef _MSC_VER
398 # pragma endregion Plane function implementation
399 #endif
400 
401 //
402 // Sphere
403 //
404 #ifdef _MSC_VER
405 # pragma region Sphere
406 #endif
407 // BoundingSphere::Transform
408 NLIB_M(SimdSphere) Sphere::Transform(SimdSphereArg sphere, SimdMatrixArg m) NLIB_NOEXCEPT {
409  f128 radius = F128::SetValue<3>(sphere, each_select32);
410  SimdVector center = Vector3::Transform(sphere, m);
411  f128 dot_x = Vector3::Dot(m.r[0], m.r[0]);
412  f128 dot_y = Vector3::Dot(m.r[1], m.r[1]);
413  f128 dot_z = Vector3::Dot(m.r[2], m.r[2]);
414  f128 scale = F128::Max(dot_x, dot_y);
415  scale = F128::Max(scale, dot_z);
416  radius = F128::Mult(radius, scale);
417 
418  return F128::Splat<false, false, false, true>(center, radius);
419 }
420 
421 // BoundingSphere::CreateFromPoints
422 NLIB_M(SimdSphere) Sphere::FromPoints(const Float3* points, size_t count) NLIB_NOEXCEPT {
423  // approximation algorithm from Graphics Gems
424  f128 min_x, min_y, min_z;
425  f128 max_x, max_y, max_z;
426  min_x = min_y = min_z = max_x = max_y = max_z = Vector3::LoadFloat3(points);
427 
428  size_t i;
429  for (i = 1; i < count; ++i) {
430  f128 cmp0, cmp1;
431  f128 pt = Vector3::LoadFloat3(points + i);
432 
433  f128 pt_x = F128::SetValue<0>(pt, each_select32);
434  cmp0 = F128::CmpLt(pt_x, F128::SetValue<0>(min_x, each_select32));
435  min_x = F128::Select(cmp0, pt, min_x);
436  cmp1 = F128::CmpGt(pt_x, F128::SetValue<0>(max_x, each_select32));
437  max_x = F128::Select(cmp1, pt, max_x);
438 
439  f128 pt_y = F128::SetValue<1>(pt, each_select32);
440  cmp0 = F128::CmpLt(pt_y, F128::SetValue<0>(min_y, each_select32));
441  min_y = F128::Select(cmp0, pt, min_y);
442  cmp1 = F128::CmpGt(pt_y, F128::SetValue<0>(max_y, each_select32));
443  max_y = F128::Select(cmp1, pt, max_y);
444 
445  f128 pt_z = F128::SetValue<2>(pt, each_select32);
446  cmp0 = F128::CmpLt(pt_z, F128::SetValue<0>(min_z, each_select32));
447  min_z = F128::Select(cmp0, pt, min_z);
448  cmp1 = F128::CmpGt(pt_z, F128::SetValue<0>(max_z, each_select32));
449  max_z = F128::Select(cmp1, pt, max_z);
450  }
451 
452  f128 center;
453  f128 radius;
454  f128 c1_2 = F128::SetValue(0.5f, each_float);
455  {
456  f128 distsq_x = Vector3::LengthSq(F128::Sub(max_x, min_x));
457  f128 distsq_y = Vector3::LengthSq(F128::Sub(max_y, min_y));
458  f128 distsq_z = Vector3::LengthSq(F128::Sub(max_z, min_z));
459  if (Vector4::CmpGt(distsq_x, distsq_y)) {
460  if (Vector4::CmpGt(distsq_x, distsq_z)) {
461  center = F128::Lerp(min_x, max_x, c1_2);
462  radius = F128::Mult(F128::Sqrt(distsq_x), c1_2);
463  } else {
464  center = F128::Lerp(min_z, max_z, c1_2);
465  radius = F128::Mult(F128::Sqrt(distsq_z), c1_2);
466  }
467  } else {
468  if (Vector4::CmpGt(distsq_y, distsq_z)) {
469  center = F128::Lerp(min_y, max_y, c1_2);
470  radius = F128::Mult(F128::Sqrt(distsq_y), c1_2);
471  } else {
472  center = F128::Lerp(min_z, max_z, c1_2);
473  radius = F128::Mult(F128::Sqrt(distsq_z), c1_2);
474  }
475  }
476  }
477 
478  f128 radsq = F128::Mult(radius, radius);
479  for (i = 0; i < count; ++i) {
480  f128 pt = Vector3::LoadFloat3(points + i);
481  f128 diff = F128::Sub(pt, center);
482  f128 distsq = Vector3::LengthSq(diff);
483  if (Vector3::CmpGt(distsq, radsq)) {
484  f128 dist = F128::Sqrt(distsq);
485  f128 t = F128::Div(radius, dist);
486  t = F128::MultSub(c1_2, t, c1_2);
487  radius = F128::Mult(c1_2, F128::Add(radius, dist));
488  radsq = F128::Mult(radius, radius);
489  center = F128::MultAdd(t, diff, center);
490  }
491  }
492 
493  return F128::Splat<false, false, false, true>(center, radius);
494 }
495 
496 // BoundingSphere::CreateMerged()
497 NLIB_M(SimdSphere) Sphere::Merge(SimdSphereArg sphere0, SimdSphereArg sphere1) NLIB_NOEXCEPT {
498  SimdVector diff = F128::Sub(sphere1, sphere0);
499  f128 dist_sq = Vector3::LengthSq(diff);
500  f128 radius_diff_sq = F128::SetValue<3>(F128::Mult(diff, diff), each_select32);
501  f128 is_contained = F128::CmpLe(dist_sq, radius_diff_sq);
502 
503  f128 dist = F128::Sqrt(dist_sq);
504  f128 r0 = F128::SetValue<3>(sphere0, each_select32);
505  f128 r1 = F128::SetValue<3>(sphere1, each_select32);
506  f128 new_radius = F128::Add(r0, r1);
507  new_radius = F128::Add(new_radius, dist);
508  new_radius = F128::Mult(0.5f, new_radius);
509 
510  f128 t = F128::Div(diff, dist);
511  f128 use_new_center = F128::CmpGt(dist, F128::SetEpsilon());
512 
513  SimdSphere ret;
514  ret = F128::MultAdd(t, F128::Sub(new_radius, r0), sphere0);
515  ret = F128::Select(use_new_center, ret, sphere0);
516  ret = F128::Splat<false, false, false, true>(ret, new_radius);
517 
518  f128 s0_contained = F128::And(is_contained, F128::CmpLt(r0, r1));
519  f128 s1_contained = F128::And(is_contained, F128::CmpGe(r0, r1));
520  ret = F128::Select(s0_contained, sphere1, ret);
521  ret = F128::Select(s1_contained, sphere0, ret);
522  return ret;
523 }
524 
525 NLIB_M(float) Sphere::GetRadius(SimdSphereArg sphere) NLIB_NOEXCEPT { // NOLINT
526  return F128::GetFloatFromLane<3>(sphere);
527 }
528 
529 NLIB_M(SimdSphere) Sphere::SetRadius(SimdSphereArg sphere, float radius) NLIB_NOEXCEPT {
530  return F128::SetFloatToLane<3>(sphere, radius);
531 }
532 
533 NLIB_M(SimdSphere) Sphere::GetCenter(SimdSphereArg sphere) NLIB_NOEXCEPT {
534  return sphere;
535 }
536 
537 NLIB_M(SimdSphere) Sphere::SetCenter(SimdSphereArg sphere, SimdVectorArg center) NLIB_NOEXCEPT {
538  return F128::Permute<4, 5, 6, 3>(sphere, center);
539 }
540 #ifdef _MSC_VER
541 # pragma endregion Sphere function implementation
542 #endif
543 
544 //
545 // AxisAlignedBox
546 //
547 #ifdef _MSC_VER
548 # pragma region AxisAlignedBox
549 #endif
550 inline AxisAlignedBox::AxisAlignedBox(const Float3& pmin, const Float3& pmax) {
551  Vector3::LoadFloat3(&pmin);
552  Vector3::LoadFloat3(&pmax);
553 }
554 
555 // BoundingBox::GetCorners
556 inline void AxisAlignedBox::GetCorners(Float3* corners) const NLIB_NOEXCEPT {
557  f128 p0 = point_min;
558  f128 p1 = F128::Splat<true, false, false, false>(point_min, point_max);
559  f128 p2 = F128::Splat<true, true, false, false>(point_min, point_max);
560  f128 p3 = F128::Splat<false, true, false, false>(point_min, point_max);
561  f128 p4 = F128::Splat<false, true, true, true>(point_min, point_max);
562  f128 p5 = F128::Splat<false, false, true, true>(point_min, point_max);
563  f128 p6 = F128::Splat<true, false, true, true>(point_min, point_max);
564  f128 p7 = point_max;
565 
566  Vector3::StoreFloat3(corners + 0, p0);
567  Vector3::StoreFloat3(corners + 1, p1);
568  Vector3::StoreFloat3(corners + 2, p2);
569  Vector3::StoreFloat3(corners + 3, p3);
570  Vector3::StoreFloat3(corners + 4, p4);
571  Vector3::StoreFloat3(corners + 5, p5);
572  Vector3::StoreFloat3(corners + 6, p6);
573  Vector3::StoreFloat3(corners + 7, p7);
574 }
575 
576 // BoundingBox::Transform
577 inline void __vectorcall
578 AxisAlignedBox::Transform(AxisAlignedBox* box, SimdMatrixArg m) NLIB_NOEXCEPT {
579  f128 p0 = point_min;
580  f128 p1 = F128::Splat<true, false, false, false>(point_min, point_max);
581  f128 p2 = F128::Splat<true, true, false, false>(point_min, point_max);
582  f128 p3 = F128::Splat<false, true, false, false>(point_min, point_max);
583  f128 p4 = F128::Splat<false, true, true, true>(point_min, point_max);
584  f128 p5 = F128::Splat<false, false, true, true>(point_min, point_max);
585  f128 p6 = F128::Splat<true, false, true, true>(point_min, point_max);
586  f128 p7 = point_max;
587 
588  p0 = Vector3::Transform(p0, m);
589  p1 = Vector3::Transform(p1, m);
590  p2 = Vector3::Transform(p2, m);
591  p3 = Vector3::Transform(p3, m);
592  p4 = Vector3::Transform(p4, m);
593  p5 = Vector3::Transform(p5, m);
594  p6 = Vector3::Transform(p6, m);
595  p7 = Vector3::Transform(p7, m);
596 
597  f128 pmin, pmax;
598  pmin = F128::Min(p0, p1);
599  pmax = F128::Max(p0, p1);
600  pmin = F128::Min(pmin, p2);
601  pmax = F128::Max(pmax, p2);
602  pmin = F128::Min(pmin, p3);
603  pmax = F128::Max(pmax, p3);
604  pmin = F128::Min(pmin, p4);
605  pmax = F128::Max(pmax, p4);
606  pmin = F128::Min(pmin, p5);
607  pmax = F128::Max(pmax, p5);
608  pmin = F128::Min(pmin, p6);
609  pmax = F128::Max(pmax, p6);
610  pmin = F128::Min(pmin, p7);
611  pmax = F128::Max(pmax, p7);
612  box->point_min = pmin;
613  box->point_max = pmax;
614 }
615 
616 // BoundingBox::CreateMerged
617 inline void AxisAlignedBox::Merge(AxisAlignedBox* box, const AxisAlignedBox& box0,
618  const AxisAlignedBox& box1) NLIB_NOEXCEPT {
619  box->point_min = F128::Min(box0.point_min, box1.point_min);
620  box->point_max = F128::Max(box0.point_max, box1.point_max);
621 }
622 
623 // BoundingBox::CreateFromSphere
624 inline void __vectorcall
625 AxisAlignedBox::FromSphere(AxisAlignedBox* box, SimdSphereArg sphere) NLIB_NOEXCEPT {
626  f128 r = F128::SetValue<3>(sphere, each_select32);
627  box->point_min = F128::Sub(sphere, r);
628  box->point_max = F128::Add(sphere, r);
629 }
630 
631 // BoundingBox::CreateFromPoints
632 inline void __vectorcall AxisAlignedBox::FromPoints(AxisAlignedBox* box, SimdVectorArg point0,
633  SimdVectorArg point1) NLIB_NOEXCEPT {
634  box->point_min = F128::Min(point0, point1);
635  box->point_max = F128::Max(point0, point1);
636 }
637 
638 // BoundingBox::CreateFromPoints
639 inline void AxisAlignedBox::FromPoints(AxisAlignedBox* box, const Float3* points,
640  size_t count) NLIB_NOEXCEPT {
641  f128 pmin, pmax;
642  pmin = pmax = Vector3::LoadFloat3(points);
643  for (size_t i = 1; i < count; ++i) {
644  f128 pt = Vector3::LoadFloat3(points + i);
645  pmin = F128::Min(pmin, pt);
646  pmax = F128::Max(pmax, pt);
647  }
648  box->point_min = pmin;
649  box->point_max = pmax;
650 }
651 #ifdef _MSC_VER
652 # pragma endregion AxisAlignedBox function implementation
653 #endif
654 
655 //
656 // OrientedBox
657 //
658 #ifdef _MSC_VER
659 # pragma region OrientedBox
660 #endif
661 
662 // BoundingOrientedBox::Transform
663 inline void __vectorcall OrientedBox::Transform(OrientedBox* box, SimdMatrixArg m) NLIB_NOEXCEPT {
664  box->center = Vector3::Transform(this->center, m);
665  f128 inv_dx = Vector3::RecpLength(m.r[0]);
666  f128 inv_dy = Vector3::RecpLength(m.r[1]);
667  f128 inv_dz = Vector3::RecpLength(m.r[2]);
668 
669  {
670  SimdMatrix rot_matrix;
671  rot_matrix.r[0] = F128::Mult(m.r[0], inv_dx);
672  rot_matrix.r[1] = F128::Mult(m.r[1], inv_dy);
673  rot_matrix.r[2] = F128::Mult(m.r[2], inv_dz);
674  rot_matrix.r[3] = F128::LoadA16(F128::v0001_);
675  SimdQuaternion rot = Quaternion::FromRotationMatrix(rot_matrix);
676  box->rotation = Quaternion::Mult(this->rotation, rot);
677  }
678 
679  f128 scale;
680  scale = F128::Permute<0, 4, 8, 8>(inv_dx, inv_dy);
681  scale = F128::Permute<0, 1, 4, 8>(scale, inv_dz);
682  scale = F128::Recp(scale);
683  box->extent = F128::Mult(scale, this->extent);
684 }
685 
686 // BoundingOrientedBox::GetCorners
687 inline void OrientedBox::GetCorners(Float3* corners) const NLIB_NOEXCEPT {
688  f128 point_max = F128::Add(center, extent);
689  f128 point_min = F128::Sub(center, extent);
690  point_max = F128::SetZeroToLane<3>(point_max);
691  point_min = F128::SetZeroToLane<3>(point_min);
692  f128 pt;
693 
694  pt = point_min;
695  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
696  Vector3::StoreFloat3(corners + 0, pt);
697 
698  pt = F128::Splat<true, false, false, false>(point_min, point_max);
699  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
700  Vector3::StoreFloat3(corners + 1, pt);
701 
702  pt = F128::Splat<true, true, false, false>(point_min, point_max);
703  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
704  Vector3::StoreFloat3(corners + 2, pt);
705 
706  pt = F128::Splat<false, true, false, false>(point_min, point_max);
707  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
708  Vector3::StoreFloat3(corners + 3, pt);
709 
710  pt = F128::Splat<false, true, true, true>(point_min, point_max);
711  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
712  Vector3::StoreFloat3(corners + 4, pt);
713 
714  pt = F128::Splat<false, false, true, true>(point_min, point_max);
715  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
716  Vector3::StoreFloat3(corners + 5, pt);
717 
718  pt = F128::Splat<true, false, true, true>(point_min, point_max);
719  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
720  Vector3::StoreFloat3(corners + 6, pt);
721 
722  pt = point_max;
723  pt = F128::Add(center, Vector3::Rotate(pt, rotation));
724  Vector3::StoreFloat3(corners + 7, pt);
725 }
726 
727 // BoundingOrientedBox::CreateFromBoundingBox
728 inline void OrientedBox::FromAxisAlignedBox(OrientedBox* box,
729  const AxisAlignedBox& aabb) NLIB_NOEXCEPT {
730  f128 c1_2 = F128::SetValue(0.5f, each_float);
731  f128 half = F128::Mult(c1_2, aabb.point_max);
732  box->rotation = F128::LoadA16(F128::v0001_);
733  SimdVector box_min = aabb.point_min;
734  box->center = F128::MultAdd(c1_2, box_min, half);
735  box->extent = F128::MultSub(c1_2, box_min, half);
736 }
737 
738 #ifdef _MSC_VER
739 # pragma endregion OrientedBox function implementation
740 #endif
741 
742 //
743 // Capsule
744 //
745 #ifdef _MSC_VER
746 # pragma region Capsule
747 #endif
748 inline Capsule::Capsule(const Float3& pt0, const Float3& pt1, float radius) NLIB_NOEXCEPT {
749  f128 sphere_ = Vector3::LoadFloat3(&pt0);
750  point = Vector3::LoadFloat3(&pt1);
751  sphere = F128::SetFloatToLane<3>(sphere_, radius);
752 }
753 
754 inline void __vectorcall Capsule::Transform(Capsule* capsule, SimdMatrixArg m) NLIB_NOEXCEPT {
755  capsule->sphere = Sphere::Transform(sphere, m);
756  capsule->point = Vector3::Transform(point, m);
757 }
758 #ifdef _MSC_VER
759 # pragma endregion Capsule function implementation
760 #endif
761 
762 //
763 // Frustum
764 //
765 #ifdef _MSC_VER
766 # pragma region Frustum
767 #endif
768 
769 inline Frustum::Frustum(const Frustum& rhs) NLIB_NOEXCEPT : m_Center(rhs.m_Center),
770  m_Rotation(rhs.m_Rotation),
771  m_NearPlane(rhs.m_NearPlane),
772  m_FarPlane(rhs.m_FarPlane),
773  m_TopPlane(rhs.m_TopPlane),
774  m_BottomPlane(rhs.m_BottomPlane),
775  m_LeftPlane(rhs.m_LeftPlane),
776  m_RightPlane(rhs.m_RightPlane) {
777 }
778 
779 inline Frustum& Frustum::operator=(const Frustum& rhs) NLIB_NOEXCEPT {
780  m_Center = rhs.m_Center;
781  m_Rotation = rhs.m_Rotation;
782  m_NearPlane = rhs.m_NearPlane;
783  m_FarPlane = rhs.m_FarPlane;
784  m_TopPlane = rhs.m_TopPlane;
785  m_BottomPlane = rhs.m_BottomPlane;
786  m_LeftPlane = rhs.m_LeftPlane;
787  m_RightPlane = rhs.m_RightPlane;
788  return *this;
789 }
790 
791 inline Frustum::Frustum(SimdVectorArg center, SimdQuaternionArg rotation, float top, float bottom,
792  float left, float right, float n, float f) NLIB_NOEXCEPT {
793  // Right-handed
794  NLIB_ASSERT(n > 0.f);
795  m_Center = center;
796  m_Rotation = rotation;
797  m_NearPlane = Plane::Normalize(F128::SetValue(0.f, 0.f, 1.f, n));
798  m_FarPlane = Plane::Normalize(F128::SetValue(0.f, 0.f, -1.f, -f));
799  m_TopPlane = Plane::Normalize(F128::SetValue(0.f, 1.f, top / n, 0.f));
800  m_BottomPlane = Plane::Normalize(F128::SetValue(0.f, -1.f, -bottom / n, 0.f));
801  m_LeftPlane = Plane::Normalize(F128::SetValue(-1.f, 0.f, -left / n, 0.f));
802  m_RightPlane = Plane::Normalize(F128::SetValue(1.f, 0.f, right / n, 0.f));
803 }
804 
805 inline void __vectorcall Frustum::Set(SimdVectorArg center, SimdQuaternionArg rotation, float top,
806  float bottom, float left, float right, float n,
807  float f) NLIB_NOEXCEPT {
808  // Right-handed
809  NLIB_ASSERT(n > 0.f);
810  m_Center = center;
811  m_Rotation = rotation;
812  m_NearPlane = Plane::Normalize(F128::SetValue(0.f, 0.f, 1.f, n));
813  m_FarPlane = Plane::Normalize(F128::SetValue(0.f, 0.f, -1.f, -f));
814  m_TopPlane = Plane::Normalize(F128::SetValue(0.f, 1.f, top / n, 0.f));
815  m_BottomPlane = Plane::Normalize(F128::SetValue(0.f, -1.f, -bottom / n, 0.f));
816  m_LeftPlane = Plane::Normalize(F128::SetValue(-1.f, 0.f, -left / n, 0.f));
817  m_RightPlane = Plane::Normalize(F128::SetValue(1.f, 0.f, right / n, 0.f));
818 }
819 
820 inline void __vectorcall Frustum::Transform(Frustum* frustum, float scale,
821  SimdQuaternionArg rotation,
822  SimdVectorArg translation) const NLIB_NOEXCEPT {
823  f128 s = F128::SetValue(1.f, 1.f, 1.f, scale);
824  frustum->m_Rotation = Quaternion::Mult(m_Rotation, rotation);
825  SimdVector center = Vector3::Rotate(F128::Mult(scale, m_Center), rotation);
826  frustum->m_Center = F128::Add(center, translation);
827  frustum->m_TopPlane = m_TopPlane;
828  frustum->m_BottomPlane = m_BottomPlane;
829  frustum->m_LeftPlane = m_LeftPlane;
830  frustum->m_RightPlane = m_RightPlane;
831  frustum->m_NearPlane = F128::Mult(s, m_NearPlane);
832  frustum->m_FarPlane = F128::Mult(s, m_FarPlane);
833 }
834 
835 inline void __vectorcall Frustum::Transform(Frustum* frustum, SimdMatrixArg m) const NLIB_NOEXCEPT {
836  f128 r0 = m.r[0];
837  f128 r1 = m.r[1];
838  f128 r2 = m.r[2];
839  {
840  SimdQuaternion rotation = m_Rotation;
841  SimdVector center = m_Center;
842  SimdMatrix M;
843  M.r[0] = Vector3::Normalize(r0);
844  M.r[1] = Vector3::Normalize(r1);
845  M.r[2] = Vector3::Normalize(r2);
846  M.r[3] = F128::LoadA16(F128::v0001_);
847  SimdQuaternion rot = Quaternion::FromRotationMatrix(M);
848  frustum->m_Rotation = Quaternion::Mult(rotation, rot);
849  frustum->m_Center = Vector3::Transform(center, m);
850  }
851 
852  float scale;
853  {
854  f128 x_sq = Vector3::LengthSq(r0);
855  f128 y_sq = Vector3::LengthSq(r1);
856  f128 z_sq = Vector3::LengthSq(r2);
857  f128 sq = F128::Max(z_sq, F128::Max(x_sq, y_sq));
858  scale = F128::GetFloatFromLane<0>(F128::Sqrt(sq));
859  }
860 
861  f128 s = F128::SetValue(1.f, 1.f, 1.f, scale);
862  frustum->m_TopPlane = m_TopPlane;
863  frustum->m_BottomPlane = m_BottomPlane;
864  frustum->m_LeftPlane = m_LeftPlane;
865  frustum->m_RightPlane = m_RightPlane;
866  frustum->m_NearPlane = F128::Mult(s, m_NearPlane);
867  frustum->m_FarPlane = F128::Mult(s, m_FarPlane);
868 }
869 
870 inline void Frustum::GetCorners(Float3* corners) const NLIB_NOEXCEPT {
871  float n = F128::GetFloatFromLane<3>(m_NearPlane) / F128::GetFloatFromLane<2>(m_NearPlane);
872  float f = F128::GetFloatFromLane<3>(m_FarPlane) / F128::GetFloatFromLane<2>(m_FarPlane);
873  float top = F128::GetFloatFromLane<2>(m_TopPlane) / F128::GetFloatFromLane<1>(m_TopPlane);
874  top = top * n;
875  float bottom =
876  F128::GetFloatFromLane<2>(m_BottomPlane) / F128::GetFloatFromLane<1>(m_BottomPlane);
877  bottom = bottom * n;
878  float left = F128::GetFloatFromLane<2>(m_LeftPlane) / F128::GetFloatFromLane<0>(m_LeftPlane);
879  left = left * n;
880  float right =
881  F128::GetFloatFromLane<2>(m_RightPlane) / F128::GetFloatFromLane<0>(m_RightPlane);
882  right = right * n;
883 
884  SimdVector pt;
885  SimdQuaternion rot = m_Rotation;
886  SimdVector center = m_Center;
887 
888  // left top
889  pt = F128::SetValue(left, top, -n, 0.f);
890  pt = F128::Add(Vector3::Rotate(pt, rot), center);
891  Vector3::StoreFloat3(corners + 0, pt);
892 
893  // right top
894  pt = F128::SetValue(right, top, -n, 0.f);
895  pt = F128::Add(Vector3::Rotate(pt, rot), center);
896  Vector3::StoreFloat3(corners + 1, pt);
897 
898  // right bottom
899  pt = F128::SetValue(right, bottom, -n, 0.f);
900  pt = F128::Add(Vector3::Rotate(pt, rot), center);
901  Vector3::StoreFloat3(corners + 2, pt);
902 
903  // left bottom
904  pt = F128::SetValue(left, bottom, -n, 0.f);
905  pt = F128::Add(Vector3::Rotate(pt, rot), center);
906  Vector3::StoreFloat3(corners + 3, pt);
907 
908  float f_n = f / n;
909 
910  // left top far
911  pt = F128::SetValue(left * f_n, top * f_n, -f, 0.f);
912  pt = F128::Add(Vector3::Rotate(pt, rot), center);
913  Vector3::StoreFloat3(corners + 4, pt);
914 
915  // right top far
916  pt = F128::SetValue(right * f_n, top * f_n, -f, 0.f);
917  pt = F128::Add(Vector3::Rotate(pt, rot), center);
918  Vector3::StoreFloat3(corners + 5, pt);
919 
920  // right bottom far
921  pt = F128::SetValue(right * f_n, bottom * f_n, -f, 0.f);
922  pt = F128::Add(Vector3::Rotate(pt, rot), center);
923  Vector3::StoreFloat3(corners + 6, pt);
924 
925  // left bottom far
926  pt = F128::SetValue(left * f_n, bottom * f_n, -f, 0.f);
927  pt = F128::Add(Vector3::Rotate(pt, rot), center);
928  Vector3::StoreFloat3(corners + 7, pt);
929 }
930 
931 #ifdef _MSC_VER
932 # pragma endregion Frustum function implementation
933 #endif
934 
935 //
936 // DistanceSq
937 //
938 #ifdef _MSC_VER
939 # pragma region DistanceSq
940 #endif
941 NLIB_M(f128) DistanceSq::PointLine(SimdVectorArg point, SimdVectorArg line_point,
942  SimdVectorArg line_dir_normalized) NLIB_NOEXCEPT {
943  SimdVector l0_pt = F128::Sub(point, line_point);
944  f128 dot = Vector3::Dot(l0_pt, line_dir_normalized);
945  SimdVector dist_vec = F128::Mult(line_dir_normalized, dot);
946  dist_vec = F128::Sub(l0_pt, dist_vec);
947  return Vector3::LengthSq(dist_vec);
948 }
949 
950 NLIB_M(f128) DistanceSq::PointRay(SimdVectorArg point, SimdVectorArg ray_point,
951  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
952  SimdVector l0_pt = F128::Sub(point, ray_point);
953  f128 dot = Vector3::Dot(l0_pt, ray_dir_normalized);
954  SimdVector dist_vec = F128::Mult(ray_dir_normalized, dot);
955  dist_vec = F128::Sub(l0_pt, dist_vec);
956 
957  f128 distsq0 = Vector3::LengthSq(dist_vec);
958  f128 distsq1 = Vector3::LengthSq(l0_pt);
959  f128 cmp = F128::CmpLt(dot, F128::SetZero());
960  return F128::Select(cmp, distsq1, distsq0);
961 }
962 
963 NLIB_M(f128) DistanceSq::PointSegment(SimdVectorArg point, SimdVectorArg segment_point0,
964  SimdVectorArg segment_point1) NLIB_NOEXCEPT {
965  SimdVector l0_pt = F128::Sub(point, segment_point0);
966  SimdVector l1_pt = F128::Sub(point, segment_point1);
967  SimdVector seg_dir_normalized = F128::Sub(segment_point1, segment_point0);
968  f128 seg_recplen = Vector3::RecpLength(seg_dir_normalized);
969  seg_dir_normalized = F128::Mult(seg_dir_normalized, seg_recplen);
970 
971  f128 dot = Vector3::Dot(l0_pt, seg_dir_normalized);
972  SimdVector dist_vec = F128::Mult(seg_dir_normalized, dot);
973  dist_vec = F128::Sub(l0_pt, dist_vec);
974 
975  f128 distsq0 = Vector3::LengthSq(dist_vec);
976  f128 distsq1 = Vector3::LengthSq(l0_pt);
977  f128 distsq2 = Vector3::LengthSq(l1_pt);
978  f128 cmp = F128::CmpLt(dot, F128::SetZero());
979  f128 ret = F128::Select(cmp, distsq1, distsq0);
980  cmp = F128::CmpLt(F128::Mult(dot, seg_recplen), F128::SetOne());
981  ret = F128::Select(cmp, ret, distsq2);
982  return ret;
983 }
984 
985 // pointer arguments can be NULL
986 NLIB_M(f128) DistanceSq::PointPlane(SimdVector* point_on_plane, SimdVectorArg point,
987  SimdPlaneArg plane_normalized) NLIB_NOEXCEPT {
988  SimdVector pt = F128::SetZeroToLane<3>(point);
989  f128 dot = Vector4::Dot(pt, plane_normalized);
990  f128 ret = F128::Mult(dot, dot);
991  if (point_on_plane) {
992  *point_on_plane = F128::Sub(point, F128::Mult(dot, plane_normalized));
993  }
994  return ret;
995 }
996 
997 NLIB_M(f128) DistanceSq::SpherePlane(SimdSphereArg sphere,
998  SimdPlaneArg plane_normalized) NLIB_NOEXCEPT {
999  SimdVector c = F128::SetZeroToLane<3>(sphere);
1000  f128 r = F128::SetValue<3>(sphere, each_select32);
1001  f128 dist = F128::Abs(Vector4::Dot(c, plane_normalized));
1002  f128 cmp = F128::CmpGt(dist, r);
1003  f128 ret = F128::Sub(dist, r);
1004  ret = F128::Mult(ret, ret);
1005  return F128::Select(cmp, ret, F128::SetZero());
1006 }
1007 
1008 // pointer arguments can be NULL
1009 NLIB_M(f128) DistanceSq::PointAxisAlignedBox(SimdVector* point_on_box, SimdVectorArg point,
1010  const AxisAlignedBox& aabb) NLIB_NOEXCEPT {
1011  f128 vv = F128::Clamp(point, aabb.point_min, aabb.point_max);
1012  f128 distsq = F128::Sub(vv, point);
1013  distsq = Vector3::LengthSq(distsq);
1014  if (point_on_box) *point_on_box = vv;
1015  return distsq;
1016 }
1017 
1018 // pointer arguments can be NULL
1019 NLIB_M(f128) DistanceSq::LineLine(SimdVector* point_on_line0, SimdVector* point_on_line1,
1020  SimdVectorArg line0_point, SimdVectorArg line0_dir_normalized,
1021  SimdVectorArg line1_point,
1022  SimdVectorArg line1_dir_normalized) NLIB_NOEXCEPT {
1023  SimdVector u;
1024  u = F128::Sub(line0_point, line1_point);
1025  f128 b = Vector3::Dot(line0_dir_normalized, line1_dir_normalized);
1026  f128 d = Vector3::Dot(line0_dir_normalized, u);
1027  f128 e = Vector3::Dot(line1_dir_normalized, u);
1028  f128 det = F128::MultSub(b, b, F128::SetOne());
1029 
1030  f128 recp_det = F128::Recp(det);
1031  f128 s = F128::MultAdd(b, e, F128::Negate(d));
1032  f128 t = F128::MultSub(b, d, e);
1033  s = F128::Mult(s, recp_det);
1034  t = F128::Mult(t, recp_det);
1035 
1036  SimdVector tmp = F128::Mult(s, line0_dir_normalized);
1037  tmp = F128::MultSub(t, line1_dir_normalized, tmp);
1038  SimdVector ret = F128::Add(u, tmp);
1039  ret = Vector3::LengthSq(ret);
1040 
1041  f128 parallel = F128::CmpLt(det, F128::SetEpsilon());
1042  tmp = Vector3::LengthSq(F128::MultSub(e, line1_dir_normalized, u));
1043  ret = F128::Select(parallel, tmp, ret);
1044 
1045  if (point_on_line0) {
1046  s = F128::Select(parallel, F128::SetZero(), s);
1047  *point_on_line0 = F128::MultAdd(s, line0_dir_normalized, line0_point);
1048  }
1049  if (point_on_line1) {
1050  t = F128::Select(parallel, e, t);
1051  *point_on_line1 = F128::MultAdd(t, line1_dir_normalized, line1_point);
1052  }
1053 
1054  return ret;
1055 }
1056 
1057 // pointer arguments can be NULL
1058 NLIB_M(f128) DistanceSq::SegmentSegment(SimdVector* point_on_segment0,
1059  SimdVector* point_on_segment1,
1060  SimdVectorArg segment0_point0,
1061  SimdVectorArg segment0_point1,
1062  SimdVectorArg segment1_point0,
1063  SimdVectorArg segment1_point1) NLIB_NOEXCEPT {
1064  SimdVector u = F128::Sub(segment0_point1, segment0_point0);
1065  SimdVector v = F128::Sub(segment1_point1, segment1_point0);
1066  SimdVector w = F128::Sub(segment0_point0, segment1_point0);
1067  f128 a = Vector3::LengthSq(u);
1068  f128 b = Vector3::Dot(u, v);
1069  f128 c = Vector3::LengthSq(v);
1070  f128 d = Vector3::Dot(u, w);
1071  f128 e = Vector3::Dot(v, w);
1072  f128 D = F128::MultSub(b, b, F128::Mult(a, c)); // D >= 0, D == 0 if parallel
1073 
1074  f128 zero = F128::SetZero();
1075  f128 epsilon = F128::SetEpsilon();
1076  f128 sD;
1077  f128 tD;
1078  f128 sN = F128::MultSub(c, d, F128::Mult(b, e)); // b * e - c * d
1079  f128 tN = F128::MultSub(b, d, F128::Mult(a, e)); // a * e - b * d
1080 
1081  {
1082  f128 parallel = F128::CmpLt(D, epsilon);
1083  f128 sN_under = F128::CmpLt(sN, zero); // sc < 0
1084  f128 sN_out = F128::Or(sN_under, F128::CmpGt(sN, D));
1085  f128 cmp = F128::Or(parallel, sN_out);
1086  f128 under_or_parallel = F128::Or(parallel, sN_under);
1087 
1088  // sN:
1089  // 0.f if parallel
1090  // 0.f if sN < 0.f
1091  // D if sN > D
1092  // sN otherwise
1093  sN = F128::Select(cmp, F128::Select(under_or_parallel, zero, D), sN);
1094 
1095  // tN:
1096  // e if parallel
1097  // e if sN < 0.f
1098  // e + b if sN > D
1099  // tN otherwise
1100  tN = F128::Select(cmp, F128::Select(under_or_parallel, e, F128::Add(e, b)), tN);
1101 
1102  // tD:
1103  // c if parallel
1104  // c if sN < 0.f
1105  // c if sN > D
1106  // D otherwise
1107  tD = F128::Select(cmp, c, D);
1108 
1109  // 1.f if parallel
1110  // D otherwise
1111  sD = F128::Select(parallel, F128::SetOne(), D);
1112  }
1113 
1114  {
1115  f128 tN_under = F128::CmpLt(tN, zero);
1116  f128 tN_out = F128::Or(tN_under, F128::CmpGt(tN, tD));
1117 
1118  // tN:
1119  // 0.f if tN < 0.f
1120  // tD if tN > tD
1121  // tN otherwise
1122  tN = F128::Clamp(tN, zero, tD);
1123 
1124  f128 val = F128::Select(tN_under, F128::Negate(d), F128::Sub(b, d));
1125 
1126  // sD:
1127  // a if tN < 0.f && !(0.f <= -d <= a)
1128  // a if tN > tD && !(0.f <= b - d <= a)
1129  // sD otherwise
1130  f128 val_in = F128::And(F128::CmpGe(val, zero), F128::CmpLe(val, a));
1131  sD = F128::Select(F128::And(tN_out, val_in), a, sD);
1132 
1133  // sN:
1134  // 0.f if tN < 0.f && -d < 0.f
1135  // 0.f if tN > tD && b - d < 0.f
1136  // sD if tN < 0.f && -d > a
1137  // sD if tN > tD && b - d > a
1138  // sN otherwise
1139  f128 sntmp = F128::Select(F128::CmpLt(val, zero), zero, val);
1140  sntmp = F128::Select(F128::CmpGt(val, a), sD, sntmp);
1141  sN = F128::Select(tN_out, sntmp, sN);
1142  }
1143 
1144  f128 sc = F128::Select(F128::InBound(sN, epsilon), zero, F128::Div(sN, sD));
1145  f128 tc = F128::Select(F128::InBound(tN, epsilon), zero, F128::Div(tN, tD));
1146 
1147  // w + sc * u - tc * v
1148  f128 pt_s0 = F128::MultAdd(sc, u, segment0_point0);
1149  f128 pt_s1 = F128::MultAdd(tc, v, segment1_point0);
1150  f128 ret = F128::Sub(pt_s0, pt_s1);
1151  ret = Vector3::LengthSq(ret);
1152 
1153  if (point_on_segment0) *point_on_segment0 = pt_s0;
1154  if (point_on_segment1) *point_on_segment1 = pt_s1;
1155  return ret;
1156 }
1157 
1158 // pointer arguments can be NULL
1159 NLIB_M(f128) DistanceSq::LineRay(SimdVector* point_on_line, SimdVector* point_on_ray,
1160  SimdVectorArg line_point, SimdVectorArg line_dir_normalized,
1161  SimdVectorArg ray_point,
1162  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
1163  SimdVector u = F128::Sub(line_point, ray_point);
1164  f128 b = Vector3::Dot(line_dir_normalized, ray_dir_normalized);
1165  f128 d = Vector3::Dot(line_dir_normalized, u);
1166  f128 e = Vector3::Dot(ray_dir_normalized, u);
1167  f128 one = F128::SetOne();
1168  f128 zero = F128::SetZero();
1169  f128 det = F128::MultSub(b, b, one);
1170  f128 parallel = F128::CmpLt(det, F128::SetEpsilon()); // true if parallel
1171 
1172  // tNum:
1173  // e if parallel
1174  // e - b * d otherwise
1175  f128 tNum = F128::MultSub(b, d, e);
1176  tNum = F128::Select(parallel, e, tNum);
1177  f128 tNum_negative = F128::CmpLt(tNum, zero);
1178 
1179  // sNum:
1180  // 0.f if parallel
1181  // b * e - d otherwise
1182  f128 minus_d = F128::Negate(d);
1183  f128 sNum = F128::MultAdd(b, e, minus_d);
1184  sNum = F128::Select(parallel, zero, sNum);
1185 
1186  // tDenom:
1187  // 1.f if parallel
1188  // det otherwise
1189  f128 tDenom = F128::Select(parallel, one, det);
1190 
1191  // sDenom:
1192  // 1.f if tNum < 0.f or parallel
1193  // det otherwise
1194  f128 sDenom = F128::Select(F128::Or(tNum_negative, parallel), one, det);
1195 
1196  // sNum:
1197  // -d if tNum < 0.f
1198  // sNum otherwise
1199  sNum = F128::Select(tNum_negative, minus_d, sNum);
1200 
1201  // tNum:
1202  // 0.f if tNum < 0.f
1203  // tNum otherwise
1204  tNum = F128::Select(tNum_negative, zero, tNum);
1205 
1206  f128 s = F128::Div(sNum, sDenom);
1207  f128 t = F128::Div(tNum, tDenom);
1208 
1209  f128 pt_l = F128::MultAdd(s, line_dir_normalized, line_point);
1210  f128 pt_r = F128::MultAdd(t, ray_dir_normalized, ray_point);
1211  SimdVector ret = F128::Sub(pt_l, pt_r);
1212  ret = Vector3::LengthSq(ret);
1213 
1214  if (point_on_line) *point_on_line = pt_l;
1215  if (point_on_ray) *point_on_ray = pt_r;
1216  return ret;
1217 }
1218 
1219 // pointer arguments can be NULL
1220 NLIB_M(f128) DistanceSq::LineSegment(SimdVector* point_on_line, SimdVector* point_on_segment,
1221  SimdVectorArg line_point, SimdVectorArg line_dir_normalized,
1222  SimdVectorArg segment_point0,
1223  SimdVectorArg segment_point1) NLIB_NOEXCEPT {
1224  SimdVector seg_dir = F128::Sub(segment_point1, segment_point0);
1225  SimdVector u = F128::Sub(line_point, segment_point0);
1226  f128 b = Vector3::Dot(line_dir_normalized, seg_dir);
1227  f128 c = Vector3::LengthSq(seg_dir);
1228  f128 d = Vector3::Dot(line_dir_normalized, u);
1229  f128 e = Vector3::Dot(seg_dir, u);
1230  f128 det = F128::MultSub(b, b, c); // 0 if parallel
1231  f128 parallel = F128::CmpLt(det, F128::SetEpsilon());
1232  f128 zero = F128::SetZero();
1233  f128 one = F128::SetOne();
1234 
1235  // tNum:
1236  // e if parallel
1237  // e - b * d otherwise
1238  f128 tNum = F128::Select(parallel, e, F128::MultSub(b, d, e));
1239 
1240  // tDenom:
1241  // c if parallel
1242  // det otherwise
1243  f128 tDenom = F128::Select(parallel, c, det);
1244 
1245  // sNum:
1246  // 0.f if parallel
1247  // b * e - c * d otherwise
1248  f128 tmp = F128::Mult(b, e);
1249  tmp = F128::MultSub(c, d, tmp);
1250  f128 sNum = F128::Select(parallel, zero, tmp);
1251 
1252  // sNum:
1253  // -d if tNum < 0.f
1254  // b - d if tNum > tDenom
1255  // sNum otherwise
1256  f128 tNum_under = F128::CmpLt(tNum, zero);
1257  f128 tNum_out = F128::Or(tNum_under, F128::CmpGt(tNum, tDenom));
1258  tmp = F128::Select(tNum_under, F128::Negate(d), F128::Sub(b, d));
1259  sNum = F128::Select(tNum_out, tmp, sNum);
1260 
1261  // sDenom:
1262  // 1.f if tNum < 0.f or tNum > tDenom or parallel
1263  // det otherwise
1264  f128 sDenom = F128::Select(F128::Or(tNum_out, parallel), one, det);
1265 
1266  // tNum:
1267  // 0.f if tNum < 0.f
1268  // tDenom if tNum > tDenom
1269  tNum = F128::Clamp(tNum, zero, tDenom);
1270 
1271  f128 s = F128::Div(sNum, sDenom);
1272  f128 t = F128::Div(tNum, tDenom);
1273 
1274  f128 pt_l = F128::MultAdd(s, line_dir_normalized, line_point);
1275  f128 pt_s = F128::MultAdd(t, seg_dir, segment_point0);
1276  SimdVector ret = F128::Sub(pt_l, pt_s);
1277  ret = Vector3::LengthSq(ret);
1278 
1279  if (point_on_line) *point_on_line = pt_l;
1280  if (point_on_segment) *point_on_segment = pt_s;
1281  return ret;
1282 }
1283 
1284 // pointer arguments can be NULL
1285 NLIB_M(f128) DistanceSq::RayRay(SimdVector* point_on_ray0, SimdVector* point_on_ray1,
1286  SimdVectorArg ray0_point, SimdVectorArg ray0_dir_normalized,
1287  SimdVectorArg ray1_point,
1288  SimdVectorArg ray1_dir_normalized) NLIB_NOEXCEPT {
1289  SimdVector u = F128::Sub(ray0_point, ray1_point);
1290  f128 b = Vector3::Dot(ray0_dir_normalized, ray1_dir_normalized);
1291  f128 d = Vector3::Dot(ray0_dir_normalized, u);
1292  f128 e = Vector3::Dot(ray1_dir_normalized, u);
1293  f128 zero = F128::SetZero();
1294  f128 one = F128::SetOne();
1295  f128 det = F128::MultSub(b, b, one);
1296  f128 parallel = F128::CmpLt(det, F128::SetEpsilon());
1297 
1298  // sNum:
1299  // 0.f if parallel
1300  // b * e - d otherwise
1301  f128 minus_d = F128::Negate(d);
1302  f128 sNum = F128::Select(parallel, zero, F128::MultAdd(b, e, minus_d));
1303  f128 sNum_negative = F128::CmpLt(sNum, zero);
1304  f128 parallel_or_sNum_negative = F128::Or(parallel, sNum_negative);
1305 
1306  // tNum:
1307  // e if parallel or sNum < 0.f
1308  // e - b * d otherwise
1309  f128 tNum = F128::Select(parallel_or_sNum_negative, e, F128::MultSub(b, d, e));
1310 
1311  // tDenom:
1312  // 1.f if parallel or sNum < 0.f
1313  // det otherwise
1314  f128 tDenom = F128::Select(parallel_or_sNum_negative, one, det);
1315 
1316  // sNum:
1317  // 0.f if sNum < 0.f
1318  // sNum otherwise
1319  sNum = F128::Select(sNum_negative, zero, sNum);
1320 
1321  // sNum:
1322  // 0.f if tNum < 0.f and d > 0
1323  // -d if tNum < 0.f and d <= 0
1324  // sNum otherwise
1325  f128 tNum_negative = F128::CmpLt(tNum, zero);
1326  f128 d_negative = F128::CmpLe(d, zero);
1327  f128 tmp = F128::Select(d_negative, minus_d, zero);
1328  sNum = F128::Select(tNum_negative, tmp, sNum);
1329 
1330  // sDenom:
1331  // 1.f if (tNum < 0.f && d <= 0) || parallel
1332  // det otherwise
1333  tmp = F128::Or(parallel, F128::And(tNum_negative, d_negative));
1334  f128 sDenom = F128::Select(tmp, one, det);
1335 
1336  // tNum:
1337  // 0.f if tNum < 0.f
1338  // tNum otherwise
1339  tNum = F128::Select(tNum_negative, zero, tNum);
1340 
1341  f128 s = F128::Div(sNum, sDenom);
1342  f128 t = F128::Div(tNum, tDenom);
1343 
1344  f128 pt_r0 = F128::MultAdd(s, ray0_dir_normalized, ray0_point);
1345  f128 pt_r1 = F128::MultAdd(t, ray1_dir_normalized, ray1_point);
1346  SimdVector ret = F128::Sub(pt_r0, pt_r1);
1347  ret = Vector3::LengthSq(ret);
1348 
1349  if (point_on_ray0) *point_on_ray0 = pt_r0;
1350  if (point_on_ray1) *point_on_ray1 = pt_r1;
1351  return ret;
1352 }
1353 
1354 // pointer arguments can be NULL
1355 NLIB_M(f128) DistanceSq::RaySegment(SimdVector* point_on_ray, SimdVector* point_on_segment,
1356  SimdVectorArg ray_point, SimdVectorArg ray_dir_normalized,
1357  SimdVectorArg segment_point0,
1358  SimdVectorArg segment_point1) NLIB_NOEXCEPT {
1359  SimdVector seg_dir = F128::Sub(segment_point1, segment_point0);
1360  SimdVector u = F128::Sub(ray_point, segment_point0);
1361  f128 b = Vector3::Dot(ray_dir_normalized, seg_dir);
1362  f128 c = Vector3::LengthSq(seg_dir);
1363  f128 d = Vector3::Dot(ray_dir_normalized, u);
1364  f128 e = Vector3::Dot(seg_dir, u);
1365  f128 det = F128::MultSub(b, b, c);
1366  f128 zero = F128::SetZero();
1367  f128 one = F128::SetOne();
1368  f128 parallel = F128::CmpLt(det, F128::SetEpsilon());
1369  f128 tmp0, tmp1;
1370 
1371  // sNum:
1372  // 0.f if parallel
1373  // b * e - c * d otherwise
1374  tmp0 = F128::Mult(b, e);
1375  tmp0 = F128::MultSub(c, d, tmp0);
1376  f128 sNum = F128::Select(parallel, zero, tmp0);
1377 
1378  // tNum:
1379  // e if parallel or sNum < 0.f
1380  // e - b * d otherwise
1381  f128 sNum_negative = F128::CmpLt(sNum, zero);
1382  f128 parallel_or_sNum_negative = F128::Or(parallel, sNum_negative);
1383  tmp1 = F128::MultSub(b, d, e);
1384  f128 tNum = F128::Select(parallel_or_sNum_negative, e, tmp1);
1385 
1386  // tDenom:
1387  // c if parallel or sNum < 0.f
1388  // det otherwise
1389  f128 tDenom = F128::Select(parallel_or_sNum_negative, c, det);
1390 
1391  // sNum:
1392  // 0.f if sNum < 0.f
1393  // sNum otherwise
1394  sNum = F128::Select(sNum_negative, zero, sNum);
1395 
1396  // sNum:
1397  // 0.f if tNum < 0.f and -d < 0.f
1398  // -d if tNum < 0.f and !(-d < 0.f)
1399  // 0.f if tNum > tDenom and b - d < 0.f
1400  // b - d if tNum > tDenom and !(b - d < 0.f)
1401  // sNum otherwise
1402  f128 minus_d = F128::Negate(d);
1403  f128 b_minus_d = F128::Sub(b, d);
1404  f128 tNum_negative = F128::CmpLt(tNum, zero);
1405  f128 tNum_out = F128::Or(tNum_negative, F128::CmpGt(tNum, tDenom));
1406  tmp0 = F128::Select(tNum_negative, minus_d, b_minus_d);
1407  f128 cmp = F128::CmpGe(tmp0, zero);
1408  tmp1 = F128::Select(cmp, tmp0, zero);
1409  sNum = F128::Select(tNum_out, tmp1, sNum);
1410 
1411  // sDenom:
1412  // 1.f parallel
1413  // 1.f tNum < 0.f and !(-d < 0)
1414  // 1.f tNum > tDenom and !(b - d < 0.f)
1415  // det otherwise
1416  tmp0 = F128::Or(parallel, F128::And(tNum_out, cmp));
1417  f128 sDenom = F128::Select(tmp0, one, det);
1418 
1419  // tNum:
1420  // 0.f if tNum < 0.f
1421  // tDenom if tNum > tDenom
1422  // tNum otherwise
1423  tNum = F128::Clamp(tNum, zero, tDenom);
1424 
1425  f128 s = F128::Div(sNum, sDenom);
1426  f128 t = F128::Div(tNum, tDenom);
1427 
1428  f128 pt_r = F128::MultAdd(s, ray_dir_normalized, ray_point);
1429  f128 pt_s = F128::MultAdd(t, seg_dir, segment_point0);
1430  SimdVector ret = F128::Sub(pt_r, pt_s);
1431  ret = Vector3::LengthSq(ret);
1432 
1433  if (point_on_ray) *point_on_ray = pt_r;
1434  if (point_on_segment) *point_on_segment = pt_s;
1435  return ret;
1436 }
1437 
1438 #ifdef _MSC_VER
1439 # pragma endregion DistanceSq function implementation
1440 #endif
1441 
1442 //
1443 // Intersection
1444 //
1445 #ifdef _MSC_VER
1446 # pragma region Intersection
1447 #endif
1448 
1449 // nan if parallel
1450 // XMPlaneIntersectLine
1451 NLIB_M(SimdVector) Intersection::PlaneLine(SimdPlaneArg plane, SimdVectorArg line_point,
1452  SimdVectorArg line_dir_normalized) NLIB_NOEXCEPT {
1453  f128 dot = Plane::DotNormal(plane, line_dir_normalized);
1454  SimdVector t = Plane::DotCoord(plane, line_point);
1455  t = F128::Div(t, dot);
1456  SimdVector ret = F128::MultSub(t, line_dir_normalized, line_point);
1457 
1458  f128 eps = F128::SetEpsilon();
1459  f128 zero = F128::SetZero();
1460  f128 dotzero = F128::CmpNearEq(zero, dot, eps);
1461  f128 nan = F128::SetNaN();
1462 
1463  return F128::Select(dotzero, nan, ret);
1464 }
1465 
1466 // nan if parallel or no intersection
1467 NLIB_M(SimdVector) Intersection::PlaneRay(SimdPlaneArg plane, SimdVectorArg ray_point,
1468  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
1469  f128 dot = Plane::DotNormal(plane, ray_dir_normalized);
1470  SimdVector t = Plane::DotCoord(plane, ray_point);
1471  t = F128::Div(t, dot);
1472  SimdVector ret = F128::MultSub(t, ray_dir_normalized, ray_point);
1473 
1474  f128 eps = F128::SetEpsilon();
1475  f128 zero = F128::SetZero();
1476  f128 dotzero = F128::CmpNearEq(zero, dot, eps);
1477  f128 t_gt_0 = F128::CmpGt(t, zero);
1478  f128 nan = F128::SetNaN();
1479 
1480  return F128::Select(F128::Or(t_gt_0, dotzero), nan, ret);
1481 }
1482 
1483 // nan if no intersection
1484 NLIB_M(SimdVector) Intersection::PlaneSegment(SimdPlaneArg plane, SimdVectorArg segment_point0,
1485  SimdVectorArg segment_point1) NLIB_NOEXCEPT {
1486  SimdVector dir = F128::Sub(segment_point1, segment_point0);
1487  f128 recp_len = Vector3::RecpLength(dir);
1488  dir = F128::Mult(dir, recp_len); // normalization
1489 
1490  f128 dot = Plane::DotNormal(plane, dir);
1491  SimdVector t = Plane::DotCoord(plane, segment_point1);
1492  t = F128::Div(t, dot);
1493  SimdVector ret = F128::MultSub(t, dir, segment_point1);
1494 
1495  f128 t_gt_len = F128::CmpGt(F128::Mult(t, recp_len), F128::SetOne());
1496  f128 zero = F128::SetZero();
1497  f128 t_lt_0 = F128::CmpLt(t, zero);
1498  f128 nan = F128::SetNaN();
1499 
1500  return F128::Select(F128::Or(t_gt_len, t_lt_0), nan, ret);
1501 }
1502 
1503 // BoundingSphere::Intersects()
1504 NLIB_B Intersection::SphereRay(float* distance, SimdSphereArg sphere, SimdVectorArg ray_point,
1505  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
1506  SimdVector pt_c = F128::Sub(sphere, ray_point);
1507 
1508  f128 len_on_ray = Vector3::Dot(pt_c, ray_dir_normalized);
1509  f128 lensq_pt_c = Vector3::LengthSq(pt_c);
1510  f128 r_sq = F128::SetValue<3>(sphere, each_select32);
1511  r_sq = F128::Mult(r_sq, r_sq);
1512  f128 distsq = F128::MultSub(len_on_ray, len_on_ray, lensq_pt_c);
1513 
1514  f128 origin_inside = F128::CmpLe(lensq_pt_c, r_sq);
1515  f128 intersection_none;
1516  intersection_none = F128::CmpLt(len_on_ray, F128::SetZero());
1517  intersection_none = F128::AndNot(origin_inside, intersection_none);
1518  intersection_none = F128::Or(intersection_none, F128::CmpGt(distsq, r_sq));
1519 
1520  if (F128::IsAllMaskFalse(intersection_none)) {
1521  if (distance) {
1522  // compute the distance from ray_point to the surface of the sphere
1523  // r_sq = distsq + q^2
1524  f128 q = F128::Sqrt(F128::Sub(r_sq, distsq));
1525  f128 t0 = F128::Sub(len_on_ray, q); // from outside to inside
1526  f128 t1 = F128::Add(len_on_ray, q); // from inside to outside
1527  f128 t = F128::Select(origin_inside, t1, t0);
1528  *distance = F128::GetFloatFromLane<0>(t);
1529  }
1530  return true;
1531  } else {
1532  return false;
1533  }
1534 }
1535 
1536 // TriangleTests::Intersects()
1537 NLIB_B Intersection::TriangleRay(float* distance, SimdVectorArg triangle_point0,
1538  SimdVectorArg triangle_point1, SimdVectorArg triangle_point2,
1539  SimdVectorArg ray_point,
1540  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
1541  // See http://www.graphics.cornell.edu/pubs/1997/MT97.html
1542 
1543  SimdVector E1 = F128::Sub(triangle_point1, triangle_point0);
1544  SimdVector E2 = F128::Sub(triangle_point2, triangle_point0);
1545  SimdVector P = Vector3::Cross(ray_dir_normalized, E2); // D x E2
1546 
1547  f128 det = Vector3::Dot(E1, P);
1548  f128 inv_det = F128::Recp(det);
1549 
1550  // if det is near zero, ray lies in plane of triangle
1551  f128 intersection_none = F128::InBound(det, F128::SetEpsilon());
1552 
1553  // calculate distance from triangle_point0 to ray_point
1554  SimdVector T = F128::Sub(ray_point, triangle_point0); // O - V0
1555  SimdVector Q = Vector3::Cross(T, E1); // Q = T x E1
1556 
1557  // calculate u parameter
1558  f128 u = Vector3::Dot(T, P);
1559  // calculate v parameter
1560  f128 v = Vector3::Dot(ray_dir_normalized, Q);
1561  f128 uv = F128::Mult(F128::Permute<0, 1, 4, 5>(u, v), inv_det);
1562  // calculate t
1563  f128 t = F128::Mult(Vector3::Dot(E2, Q), inv_det);
1564 
1565  // intersects if 0 <= u,v <= 1 and t >= 0
1566  intersection_none = F128::Or(intersection_none, F128::CmpLt(uv, F128::SetZero()));
1567  intersection_none = F128::Or(intersection_none, F128::CmpGt(uv, F128::SetOne()));
1568  intersection_none = F128::Or(intersection_none, F128::CmpLt(t, F128::SetZero()));
1569 
1570  if (F128::IsAllMaskFalse(intersection_none)) {
1571  if (distance) {
1572  *distance = F128::GetFloatFromLane<0>(t);
1573  }
1574  return true;
1575  } else {
1576  return false;
1577  }
1578 }
1579 
1580 // TriangleTests::Intersects()
1581 NLIB_M(Intersection::PlaneResult)
1582 Intersection::TrianglePlane(SimdVectorArg triangle_point0, SimdVectorArg triangle_point1,
1583  SimdVectorArg triangle_point2,
1584  SimdPlaneArg plane_normalized) NLIB_NOEXCEPT {
1585  SimdVector tp0 = F128::SetFloatToLane<3>(triangle_point0, 1.f);
1586  SimdVector tp1 = F128::SetFloatToLane<3>(triangle_point1, 1.f);
1587  SimdVector tp2 = F128::SetFloatToLane<3>(triangle_point2, 1.f);
1588 
1589 #ifdef NLIB_SSE41
1590  f128 dist0 = Vector4::Dot(plane_normalized, tp0);
1591  f128 dist1 = Vector4::Dot(plane_normalized, tp1);
1592  f128 dist2 = Vector4::Dot(plane_normalized, tp2);
1593 #else
1594  f128 dist012 = Vector4::Dot3(plane_normalized, tp0, tp1, tp2);
1595  f128 dist0 = F128::SetValue<0>(dist012, each_select32);
1596  f128 dist1 = F128::SetValue<1>(dist012, each_select32);
1597  f128 dist2 = F128::SetValue<2>(dist012, each_select32);
1598 #endif
1599  f128 min_dist = F128::Min(dist0, dist1);
1600  f128 max_dist = F128::Max(dist0, dist1);
1601  min_dist = F128::Min(min_dist, dist2);
1602  max_dist = F128::Max(max_dist, dist2);
1603 
1604  f128 zero = F128::SetZero();
1605  f128 outside = F128::CmpGt(min_dist, zero);
1606  f128 inside = F128::CmpLt(max_dist, zero);
1607 
1608  if (F128::IsAllMaskTrue(outside)) return PLANE_FRONT;
1609  if (F128::IsAllMaskTrue(inside)) return PLANE_BACK;
1610  return PLANE_INTERSECT;
1611 }
1612 
1613 // BoundingSphere::Intersects()
1614 NLIB_M(Intersection::PlaneResult)
1615 Intersection::SpherePlane(SimdSphereArg sphere, SimdPlaneArg plane_normalized) NLIB_NOEXCEPT {
1616  f128 c = F128::SetFloatToLane<3>(sphere, 1.f);
1617  f128 r = F128::SetValue<3>(sphere, each_select32);
1618  f128 dist = Vector4::Dot(plane_normalized, c);
1619  f128 outside = F128::CmpGt(dist, r);
1620  f128 inside = F128::CmpLt(dist, F128::Negate(r));
1621 
1622  if (F128::IsAllMaskTrue(outside)) return PLANE_FRONT;
1623  if (F128::IsAllMaskTrue(inside)) return PLANE_BACK;
1624  return PLANE_INTERSECT;
1625 }
1626 
1627 // BoundingSphere::Intersects()
1628 NLIB_B Intersection::SphereSphere(SimdSphereArg sphere0, SimdSphereArg sphere1) NLIB_NOEXCEPT {
1629  f128 distsq = Vector3::LengthSq(F128::Sub(sphere0, sphere1));
1630  f128 radsq = F128::SetValue<3>(F128::Add(sphere0, sphere1), each_select32);
1631  radsq = F128::Mult(radsq, radsq);
1632  return F128::IsAllMaskFalse(F128::CmpGt(distsq, radsq));
1633 }
1634 
1635 // return nan if parallel
1636 // XMPlaneIntersectPlane
1637 NLIB_M(f128x2) Intersection::PlanePlane(SimdPlaneArg plane0, SimdPlaneArg plane1) NLIB_NOEXCEPT {
1638  // Len(A x B) = Len(A) * Len(B) * sin(AB)
1639  SimdVector lineVec = Vector3::Cross(plane1, plane0);
1640  f128 lineVecLenSq = Vector3::LengthSq(lineVec);
1641 
1642  // A x (B x C) = Dot(A, C)B - Dot(A, B)C
1643  // n1 x (n1 x n0) = Dot(n0,n1)n1 - Dot(n1,n1)n0
1644  SimdVector p1_line = Vector3::Cross(plane1, lineVec);
1645  SimdVector point = F128::Mult<3>(plane0, p1_line, each_select32);
1646 
1647  // (A x B) x C = Dot(A, C)B - Dot(B, C)A
1648  // (n1 x n0) x n0 = Dot(n0,n1)n0 - Dot(n0,n0)n1
1649  SimdVector line_p0 = Vector3::Cross(lineVec, plane0);
1650  point = F128::MultAdd<3>(plane1, line_p0, point, each_select32);
1651 
1652  SimdVector point0 = F128::Div(point, lineVecLenSq);
1653  SimdVector point1 = F128::Add(point0, lineVec); // lineVec is along the plane0 and plane1
1654  f128 eps = F128::SetEpsilon();
1655  f128 nan = F128::SetNaN();
1656  f128 parallel = F128::CmpNearEq(F128::SetZero(), lineVecLenSq, eps);
1657  f128x2 ret;
1658  ret.val[0] = F128::Select(parallel, nan, point0);
1659  ret.val[1] = F128::Select(parallel, nan, point1);
1660  return ret;
1661 }
1662 
1663 // BoundingSphere::Intersects()
1664 NLIB_B Intersection::SphereTriangle(SimdSphereArg sphere, SimdVectorArg triangle_point0,
1665  SimdVectorArg triangle_point1,
1666  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
1667  SimdPlane plane = Plane::FromPoint(triangle_point0, triangle_point1, triangle_point2);
1668 
1669  SimdVector q;
1670  // no intersection if the plane is farther from the center than the radius.
1671  f128 distsq = DistanceSq::PointPlane(&q, sphere, plane);
1672  f128 r_sq = F128::SetValue<3>(sphere, each_select32);
1673  r_sq = F128::Mult(r_sq, r_sq);
1674  f128 cmp = F128::CmpLe(distsq, r_sq);
1675  if (F128::IsAllMaskFalse(cmp)) return false;
1676 
1677  // if q is inside the triangle, it must intersect.
1678  // (e.g. in case of a bigger triangle, each edge is far from the sphere)
1679  SimdVector c0 =
1680  Vector3::Cross(F128::Sub(q, triangle_point0), F128::Sub(triangle_point1, triangle_point0));
1681  SimdVector c1 =
1682  Vector3::Cross(F128::Sub(q, triangle_point1), F128::Sub(triangle_point2, triangle_point1));
1683  SimdVector c2 =
1684  Vector3::Cross(F128::Sub(q, triangle_point2), F128::Sub(triangle_point0, triangle_point2));
1685  f128 zero = F128::SetZero();
1686  plane = F128::SetZeroToLane<3>(plane); // normal vector now
1687  f128 three_dots = Vector4::Dot3(plane, c0, c1, c2);
1688  three_dots = F128::Swizzle<0, 1, 2, 2>(three_dots);
1689  cmp = F128::CmpGe(three_dots, zero);
1690  if (F128::IsAllMaskFalse(cmp) || F128::IsAllMaskTrue(cmp)) return true;
1691 
1692  // if the distance between an edge of the triangle and the center is
1693  // less(or equal) than the radius, it must intersect.
1694  f128 seg_dist01 = DistanceSq::PointSegment(sphere, triangle_point0, triangle_point1);
1695  f128 seg_dist02 = DistanceSq::PointSegment(sphere, triangle_point0, triangle_point2);
1696  f128 seg_dist = F128::Min(seg_dist01, seg_dist02);
1697  f128 seg_dist12 = DistanceSq::PointSegment(sphere, triangle_point1, triangle_point2);
1698  seg_dist = F128::Min(seg_dist, seg_dist12);
1699  cmp = F128::CmpLe(seg_dist, r_sq);
1700  if (F128::IsAllMaskFalse(cmp)) return false;
1701  return true;
1702 }
1703 
1704 // BoundingBox::Intersects()
1705 NLIB_M(Intersection::PlaneResult)
1706 Intersection::AxisAlignedBoxPlane(const AxisAlignedBox& aabb,
1707  SimdPlaneArg plane_normalized) NLIB_NOEXCEPT {
1708  SimdVector aabb_max = aabb.point_max;
1709  SimdVector aabb_min = aabb.point_min;
1710  SimdVector center = F128::Add(aabb_max, aabb_min);
1711  center = F128::Mult(0.5f, center);
1712  SimdVector extents = F128::Sub(aabb_max, center);
1713  center = F128::SetFloatToLane<3>(center, 1.f);
1714 
1715  f128 dist = Vector4::Dot(center, plane_normalized);
1716  f128 radius = Vector3::Dot(extents, F128::Abs(plane_normalized));
1717  if (F128::IsAllMaskFalse(F128::CmpLe(dist, radius))) {
1718  return PLANE_FRONT;
1719  }
1720  if (F128::IsAllMaskFalse(F128::CmpGe(dist, F128::Negate(radius)))) {
1721  return PLANE_BACK;
1722  }
1723  return PLANE_INTERSECT;
1724 }
1725 
1726 // BoundingBox::Intersects()
1727 NLIB_B Intersection::AxisAlignedBoxRay(const AxisAlignedBox& aabb, SimdVectorArg ray_point,
1728  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
1729  // https://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm
1730  // https://truesculpt.googlecode.com/hg-history/Release%25200.8/Doc/ray_box_intersect.pdf
1731  f128 inv_dir = F128::Recp(ray_dir_normalized);
1732  f128 zero = F128::SetZero();
1733  f128 selector = F128::CmpGe(ray_dir_normalized, zero);
1734  SimdVector aabb_min = aabb.point_min;
1735  SimdVector aabb_max = aabb.point_max;
1736  f128 tmin = F128::Select(selector, aabb_min, aabb_max);
1737  f128 tmax = F128::Select(selector, aabb_max, aabb_min);
1738  tmin = F128::Sub(tmin, ray_point);
1739  tmax = F128::Sub(tmax, ray_point);
1740  tmin = F128::Mult(tmin, inv_dir);
1741  tmax = F128::Mult(tmax, inv_dir);
1742 
1743  // false if
1744  // tmin.x > tmax.y || tmin.y > tmax.x ||
1745  // tmin.x > tmax.z || tmin.z > tmax.x
1746  tmin = F128::Swizzle<0, 1, 2, 0>(tmin);
1747  tmax = F128::Swizzle<2, 0, 0, 1>(tmax);
1748  f128 result = F128::CmpGe(tmax, zero);
1749  if (!F128::IsAllMaskFalse(F128::CmpGt(tmin, tmax))) return false;
1750 
1751  // true if min(tmax.xyz) >= 0
1752  return F128::IsAllMaskTrue(result);
1753 }
1754 
1755 // BoundingBox::Intersects()
1756 NLIB_B Intersection::AxisAlignedBoxSphere(const AxisAlignedBox& aabb,
1757  SimdSphereArg sphere) NLIB_NOEXCEPT {
1758  SimdVector aabb_min = aabb.point_min;
1759  SimdVector aabb_max = aabb.point_max;
1760  f128 max_center = F128::Sub(sphere, aabb_max);
1761  f128 min_center = F128::Sub(sphere, aabb_min);
1762  f128 max_mask = F128::CmpGt(sphere, aabb_max);
1763  f128 min_mask = F128::CmpLt(sphere, aabb_min);
1764  f128 dist_sq = F128::SetZero();
1765  dist_sq = F128::Select(max_mask, max_center, dist_sq);
1766  dist_sq = F128::Select(min_mask, min_center, dist_sq);
1767  dist_sq = Vector3::LengthSq(dist_sq);
1768  f128 r_sq = F128::SetValue<3>(sphere, each_select32);
1769  r_sq = F128::Mult(r_sq, r_sq);
1770  return F128::IsAllMaskTrue(F128::CmpLe(dist_sq, r_sq));
1771 }
1772 
1773 // BoundingBox::Intersects()
1774 NLIB_B Intersection::AxisAlignedBoxAxisAlignedBox(const AxisAlignedBox& aabb0,
1775  const AxisAlignedBox& aabb1) NLIB_NOEXCEPT {
1776  f128 cond0 = F128::CmpGt(aabb0.point_min, aabb1.point_max); // true if disjoint
1777  f128 cond1 = F128::CmpGt(aabb1.point_min, aabb0.point_max); // true if disjoint
1778  return F128::IsAllMaskFalse(F128::Or(cond0, cond1));
1779 }
1780 
1781 // BoundingBox::Intersects()
1782 NLIB_B Intersection::AxisAlignedBoxTriangle(const AxisAlignedBox& aabb,
1783  SimdVectorArg triangle_point0,
1784  SimdVectorArg triangle_point1,
1785  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
1786  // See "Fast 3D Triangle-Box Overlap Testing"
1787 
1788  SimdVector box_min = aabb.point_min;
1789  SimdVector box_max = aabb.point_max;
1790  SimdVector tri0 = triangle_point0;
1791  SimdVector tri1 = triangle_point1;
1792  SimdVector tri2 = triangle_point2;
1793  {
1794  // minimal AABB around the triangle
1795  SimdVector tri_min =
1796  F128::Min(F128::Min(tri0, tri1), tri2);
1797  SimdVector tri_max =
1798  F128::Max(F128::Max(tri0, tri1), tri2);
1799  f128 cond0 = F128::CmpGt(tri_min, box_max);
1800  f128 cond1 = F128::CmpGt(box_min, tri_max);
1801  if (!F128::IsAllMaskFalse(F128::Or(cond0, cond1))) {
1802  // no intersection if the two AABBs are disjoint
1803  return false;
1804  }
1805 
1806  // true if a vertex in AABB
1807  f128 point_in;
1808  point_in = F128::And(F128::CmpGe(tri0, box_min),
1809  F128::CmpLe(tri0, box_max));
1810  if (F128::IsAllMaskTrue(point_in)) return true;
1811  point_in = F128::And(F128::CmpGe(tri1, box_min),
1812  F128::CmpLe(tri1, box_max));
1813  if (F128::IsAllMaskTrue(point_in)) return true;
1814  point_in = F128::And(F128::CmpGe(tri2, box_min),
1815  F128::CmpLe(tri2, box_max));
1816  if (F128::IsAllMaskTrue(point_in)) return true;
1817  }
1818 
1819  SimdVector f0 = F128::Sub(tri1, tri0);
1820  SimdVector f1 = F128::Sub(tri2, tri1);
1821 
1822  // fast plane/AABB overlap test
1823  {
1824  // Nv - D = 0
1825  SimdVector N = Vector3::Cross(f0, f1);
1826  N = F128::SetZeroToLane<3>(N);
1827  f128 D = Vector3::Dot(N, tri0);
1828  f128 normal_gt_0 = F128::CmpGt(N, F128::SetZero());
1829  SimdVector v_near = F128::Select(normal_gt_0, box_min, box_max);
1830  SimdVector v_far = F128::Select(normal_gt_0, box_max, box_min);
1831  f128 min_near = Vector4::Dot(N, v_near);
1832  f128 max_far = Vector4::Dot(N, v_far);
1833  min_near = F128::CmpGt(min_near, D);
1834  max_far = F128::CmpLt(max_far, D);
1835  if (!F128::IsAllMaskFalse(F128::Or(min_near, max_far))) {
1836  return false;
1837  }
1838  }
1839 
1840  SimdVector box_center = F128::Add(box_max, box_min);
1841  box_center = F128::Mult(0.5f, box_center);
1842  SimdVector extents = F128::Sub(box_center, box_min);
1843  SimdVector tp0 = F128::Sub(tri0, box_center);
1844  SimdVector tp1 = F128::Sub(tri1, box_center);
1845  SimdVector tp2 = F128::Sub(tri2, box_center);
1846  tp0 = F128::SetZeroToLane<3>(tp0);
1847  tp1 = F128::SetZeroToLane<3>(tp1);
1848  tp2 = F128::SetZeroToLane<3>(tp2);
1849 
1850  SimdVector f2 = F128::Sub(tri0, tri2);
1851  f0 = F128::SetZeroToLane<3>(f0);
1852  f1 = F128::SetZeroToLane<3>(f1);
1853  f2 = F128::SetZeroToLane<3>(f2);
1854 
1855  f128 pmax, pmin;
1856  f128 radius;
1857  f128 tp0dot_A00A01A02A10;
1858  f128 tp0dot_A11A12A20A21;
1859  f128 tp1dot_A01A02;
1860  f128 tp1dot_A11A12A21A22;
1861  f128 tp2dot_A00A10A20A22;
1862  f128 neg_f0 = F128::Negate(f0);
1863  f128 neg_f1 = F128::Negate(f1);
1864  f128 neg_f2 = F128::Negate(f2);
1865 
1866  // e = (1, 0, 0), cross(e, f0) = (0, -f0.z, f0.y)
1867  SimdVector A00 = F128::Permute<7, 6, 1, 3>(f0, neg_f0);
1868  // e = (1, 0, 0), cross(e, f1) = (0, -f1.z, f1.y)
1869  SimdVector A01 = F128::Permute<7, 6, 1, 3>(f1, neg_f1);
1870  // e = (1, 0, 0), cross(e, f1) = (0, -f2.z, f2.y)
1871  SimdVector A02 = F128::Permute<7, 6, 1, 3>(f2, neg_f2);
1872 
1873  // e = (0, 1, 0), cross(e, f0) = (f0.z, 0, -f0.x)
1874  SimdVector A10 = F128::Permute<2, 3, 4, 3>(f0, neg_f0);
1875  // e = (0, 1, 0), cross(e, f1) = (f1.z, 0, -f1.x)
1876  SimdVector A11 = F128::Permute<2, 3, 4, 3>(f1, neg_f1);
1877  // e = (0, 1, 0), cross(e, f1) = (f2.z, 0, -f2.x)
1878  SimdVector A12 = F128::Permute<2, 3, 4, 3>(f2, neg_f2);
1879 
1880  // e = (0, 0, 1), cross(e, f0) = (-f0.y, f0.x, 0)
1881  SimdVector A20 = F128::Permute<5, 0, 3, 3>(f0, neg_f0);
1882  // e = (0, 0, 1), cross(e, f1) = (f1.z, 0, -f1.x)
1883  SimdVector A21 = F128::Permute<5, 0, 3, 3>(f1, neg_f1);
1884  // e = (0, 0, 1), cross(e, f1) = (f2.z, 0, -f2.x)
1885  SimdVector A22 = F128::Permute<5, 0, 3, 3>(f2, neg_f2);
1886 
1887  tp0dot_A00A01A02A10 = Vector4::Dot4(tp0, A00, A01, A02, A10);
1888  tp0dot_A11A12A20A21 = Vector4::Dot4(tp0, A11, A12, A10, A11);
1889  tp1dot_A01A02 = Vector4::Dot2(tp1, A01, A02);
1890  tp1dot_A11A12A21A22 = Vector4::Dot4(tp1, A11, A12, A21, A22);
1891  tp2dot_A00A10A20A22 = Vector4::Dot4(tp2, A00, A10, A20, A22);
1892 
1893  A00 = F128::Abs(A00);
1894  A01 = F128::Abs(A01);
1895  A02 = F128::Abs(A02);
1896  A10 = F128::Abs(A10);
1897  A11 = F128::Abs(A11);
1898  A12 = F128::Abs(A12);
1899  A20 = F128::Abs(A20);
1900  A21 = F128::Abs(A21);
1901  A22 = F128::Abs(A22);
1902  f128 tp12dot;
1903 
1904  // A00, A01, A02, A10
1905  radius = Vector4::Dot4(extents, A00, A01, A02, A10);
1906  tp12dot = F128::Permute<4, 0, 1, 5>(tp1dot_A01A02, tp2dot_A00A10A20A22);
1907  pmax = F128::Max(tp0dot_A00A01A02A10, tp12dot);
1908  pmin = F128::Min(tp0dot_A00A01A02A10, tp12dot);
1909  f128 intersection_none = F128::CmpGt(pmin, radius);
1910  intersection_none = F128::Or(intersection_none, F128::CmpLt(pmax, F128::Negate(radius)));
1911 
1912  // A11, A12, A20, A21
1913  radius = Vector4::Dot4(extents, A11, A12, A20, A21);
1914  tp12dot = F128::Permute<0, 1, 6, 2>(tp1dot_A11A12A21A22, tp2dot_A00A10A20A22);
1915  pmax = F128::Max(tp0dot_A11A12A20A21, tp12dot);
1916  pmin = F128::Min(tp0dot_A11A12A20A21, tp12dot);
1917  intersection_none = F128::Or(intersection_none, F128::CmpGt(pmin, radius));
1918  intersection_none = F128::Or(intersection_none, F128::CmpLt(pmax, F128::Negate(radius)));
1919 
1920  // A22
1921  radius = Vector4::Dot(extents, A22);
1922  pmax = F128::Max(tp1dot_A11A12A21A22, tp2dot_A00A10A20A22);
1923  pmin = F128::Min(tp1dot_A11A12A21A22, tp2dot_A00A10A20A22);
1924  pmax = F128::SetValue<3>(pmax, each_select32);
1925  pmin = F128::SetValue<3>(pmin, each_select32);
1926  intersection_none = F128::Or(intersection_none, F128::CmpGt(pmin, radius));
1927  intersection_none = F128::Or(intersection_none, F128::CmpLt(pmax, F128::Negate(radius)));
1928 
1929  return F128::IsAllMaskFalse(intersection_none);
1930 }
1931 
1932 // BoundingOrientedBox::Intersects()
1933 NLIB_B
1934 Intersection::OrientedBoxAxisAlignedBox(const OrientedBox& box,
1935  const AxisAlignedBox& aabb) NLIB_NOEXCEPT {
1936  SimdVector aabb_center, aabb_extent;
1937  {
1938  SimdVector aabb_max = aabb.point_max;
1939  SimdVector aabb_min = aabb.point_min;
1940  aabb_center = F128::Mult(0.5f, F128::Add(aabb_max, aabb_min));
1941  aabb_extent = F128::Sub(aabb_max, aabb_center);
1942  aabb_extent = F128::SetZeroToLane<3>(aabb_extent);
1943  }
1944  SimdVector box_center = F128::Sub(box.center, aabb_center);
1945  box_center = F128::SetZeroToLane<3>(box_center);
1946  SimdVector box_extent = F128::SetZeroToLane<3>(box.extent);
1947  SimdVector Mx0, Mx1, Mx2;
1948  SimdVector neg_Mx0, neg_Mx1, neg_Mx2;
1949  SimdVector abs_M0x, abs_M1x, abs_M2x;
1950 
1951  {
1952  SimdMatrix rot = Matrix::FromRotationQuaternion(box.rotation);
1953  abs_M0x = F128::Abs(rot.r[0]);
1954  abs_M1x = F128::Abs(rot.r[1]);
1955  abs_M2x = F128::Abs(rot.r[2]);
1956  NLIB_F128_TRANSPOSE(rot.r[0], rot.r[1], rot.r[2], rot.r[3]);
1957  Mx0 = rot.r[0];
1958  Mx1 = rot.r[1];
1959  Mx2 = rot.r[2];
1960  neg_Mx0 = F128::Negate(Mx0);
1961  neg_Mx1 = F128::Negate(Mx1);
1962  neg_Mx2 = F128::Negate(Mx2);
1963  }
1964 
1965  // use the separating axis theorem.
1966  // (two convex sets are disjoint if there is a plane(axis) separating them)
1967  //
1968  // there are 15 separating axes(planes).
1969  // Axx are the normals of the planes.
1970  //
1971  // aabb(0-2) = (1, 0, 0), (0, 1, 0), (0, 0, 1)
1972  // box(0-2) = B0, B1, B2 = Mx0, Mx1, Mx2
1973  // aabb_extent = extents of aabb
1974  // box_extent = extents of box
1975  //
1976  // A0(0-2) (1, 0, 0), (0, 1, 0), (0, 0, 1)
1977  // A1(0-2) B0, B1, B2
1978  // A2(0-2) (1, 0, 0) x B0, B1, and B2
1979  // A3(0-2) (0, 1, 0) x B0, B1, and B2
1980  // A4(0-2) (0, 0, 1) x B0, B1, and B2
1981  //
1982  // aabb_extent and box_extent are mapped onto Aij,
1983  // d_aabb and d_box are the ranges(radius) of them.
1984  // if the two ranges do not overlap, box and aabb are disjoint(the separating axis theorem).
1985  //
1986  // for (Aij in separating axes)
1987  // d_aabb = dot(aabb_extent, abs{dot((1, 0, 0), Aij), ...})
1988  // d_box = dot(box_extent, abs{dot(B0, Aij), dot(B1, Aij), dot(B2, Aij)})
1989  // d = abs(dot(box_center, Aij))
1990  // if (d > d_aabb + d_box) return false;
1991  f128 d, d_aabb, d_box;
1992  SimdVector t0, t1, t2;
1993  f128 intersection_none = F128::SetZero();
1994  f128 d_aabb_box;
1995  f128 cmp;
1996 
1997  // A00, A01, A02
1998  d = F128::Abs(box_center);
1999  d_aabb = aabb_extent;
2000  d_box = Vector4::Dot3(box_extent, F128::Abs(Mx0), F128::Abs(Mx1), F128::Abs(Mx2));
2001  d_aabb_box = F128::Add(d_aabb, d_box);
2002  intersection_none = F128::CmpGt(d, d_aabb_box);
2003 
2004  // A10, A11, A12
2005  d = F128::Abs(Vector4::Dot3(box_center, Mx0, Mx1, Mx2));
2006  d_aabb = Vector4::Dot3(aabb_extent, F128::Abs(Mx0), F128::Abs(Mx1), F128::Abs(Mx2));
2007  d_box = box_extent;
2008  d_aabb_box = F128::Add(d_aabb, d_box);
2009  cmp = F128::CmpGt(d, d_aabb_box);
2010  intersection_none = F128::Or(intersection_none, cmp);
2011 
2012  // note that dot(P, Q x R) == dot(Q, R x P)
2013 
2014  // A20 = (1, 0, 0) x Mx0 = (0, -M20, M10)
2015  // A21 = (1, 0, 0) x Mx1 = (0, -M21, M11)
2016  // A22 = (1, 0, 0) x Mx2 = (0, -M22, M12)
2017  t0 = F128::Permute<3, 6, 1, 3>(Mx0, neg_Mx0);
2018  t1 = F128::Permute<3, 6, 1, 3>(Mx1, neg_Mx1);
2019  t2 = F128::Permute<3, 6, 1, 3>(Mx2, neg_Mx2);
2020  d = F128::Abs(Vector4::Dot3(box_center, t0, t1, t2));
2021  d_aabb = Vector4::Dot3(aabb_extent, F128::Abs(t0), F128::Abs(t1), F128::Abs(t2));
2022  // abs(dot(Mx0-2, (1, 0, 0) x Mx0)) = abs(dot((1, 0, 0), Mx0 x Mx0-2)) =
2023  t0 = F128::Swizzle<3, 2, 1, 3>(abs_M0x); // abs(0, M02, M01)
2024  t1 = F128::Swizzle<2, 3, 0, 3>(abs_M0x); // abs(M02, 0, M00)
2025  t2 = F128::Swizzle<1, 0, 3, 3>(abs_M0x); // abs(M01, M00, 0)
2026  d_box = Vector4::Dot3(box_extent, t0, t1, t2);
2027  d_aabb_box = F128::Add(d_aabb, d_box);
2028  cmp = F128::CmpGt(d, d_aabb_box);
2029  intersection_none = F128::Or(intersection_none, cmp);
2030 
2031  // A30 = (0, 1, 0) x Mx0 = (M20, 0, -M00)
2032  // A31 = (0, 1, 0) x Mx1 = (M21, 0, -M01)
2033  // A32 = (0, 1, 0) x Mx2 = (M22, 0, -M02)
2034  t0 = F128::Permute<2, 3, 4, 3>(Mx0, neg_Mx0);
2035  t1 = F128::Permute<2, 3, 4, 3>(Mx1, neg_Mx1);
2036  t2 = F128::Permute<2, 3, 4, 3>(Mx2, neg_Mx2);
2037  d = F128::Abs(Vector4::Dot3(box_center, t0, t1, t2));
2038  d_aabb = Vector4::Dot3(aabb_extent, F128::Abs(t0), F128::Abs(t1), F128::Abs(t2));
2039  // abs(dot(Mx0-2, (0, 1, 0) x Mx0)) = abs(dot((0, 1, 0), Mx0 x Mx0-2)) =
2040  t0 = F128::Swizzle<3, 2, 1, 3>(abs_M1x); // abs(0, M12, M11)
2041  t1 = F128::Swizzle<2, 3, 0, 3>(abs_M1x); // abs(M12, 0, M10)
2042  t2 = F128::Swizzle<1, 0, 3, 3>(abs_M1x); // abs(M11, M10, 0)
2043  d_box = Vector4::Dot3(box_extent, t0, t1, t2);
2044  d_aabb_box = F128::Add(d_aabb, d_box);
2045  cmp = F128::CmpGt(d, d_aabb_box);
2046  intersection_none = F128::Or(intersection_none, cmp);
2047 
2048  // A40 = (0, 0, 1) x Mx0 = (-M10, M00, 0)
2049  // A41 = (0, 0, 1) x Mx1 = (-M11, M01, 0)
2050  // A42 = (0, 0, 1) x Mx2 = (-M12, M02, 0)
2051  t0 = F128::Permute<5, 0, 3, 3>(Mx0, neg_Mx0);
2052  t1 = F128::Permute<5, 0, 3, 3>(Mx1, neg_Mx1);
2053  t2 = F128::Permute<5, 0, 3, 3>(Mx2, neg_Mx2);
2054  d = F128::Abs(Vector4::Dot3(box_center, t0, t1, t2));
2055  d_aabb = Vector4::Dot3(aabb_extent, F128::Abs(t0), F128::Abs(t1), F128::Abs(t2));
2056  // abs(dot(Mx0-2, (0, 0, 1) x Mx0)) = abs(dot((0, 0, 1), Mx0 x Mx0-2)) =
2057  t0 = F128::Swizzle<3, 2, 1, 3>(abs_M2x); // abs(0, M22, M21)
2058  t1 = F128::Swizzle<2, 3, 0, 3>(abs_M2x); // abs(M22, 0, M20)
2059  t2 = F128::Swizzle<1, 0, 3, 3>(abs_M2x); // abs(M21, M20, 0)
2060  d_box = Vector4::Dot3(box_extent, t0, t1, t2);
2061  d_aabb_box = F128::Add(d_aabb, d_box);
2062  cmp = F128::CmpGt(d, d_aabb_box);
2063  intersection_none = F128::Or(intersection_none, cmp);
2064 
2065  intersection_none = F128::SetZeroToLane<3>(intersection_none);
2066  return F128::IsAllMaskFalse(intersection_none);
2067 }
2068 
2069 // BoundingOrientedBox::Intersects()
2070 NLIB_B Intersection::OrientedBoxOrientedBox(const OrientedBox& box0,
2071  const OrientedBox& box1) NLIB_NOEXCEPT {
2072  OrientedBox obox;
2073  SimdQuaternion box0_rot = box0.rotation;
2074  SimdQuaternion box1_rot = box1.rotation;
2075  obox.rotation = Quaternion::Mult(box0_rot, Quaternion::Conjugate(box1_rot));
2076  obox.center = Vector3::InvRotate(F128::Sub(box1.center, box0.center), box0_rot);
2077  obox.extent = box1.extent;
2078 
2079  AxisAlignedBox aabb;
2080  aabb.point_max = box0.extent;
2081  aabb.point_min = F128::Negate(aabb.point_max);
2082 
2083  return Intersection::OrientedBoxAxisAlignedBox(obox, aabb);
2084 }
2085 
2086 // BoundingOrientedBox::Intersects()
2087 NLIB_B Intersection::OrientedBoxSphere(const OrientedBox& box, SimdSphereArg sphere) NLIB_NOEXCEPT {
2088  SimdVector box_center = box.center;
2089  SimdVector box_extent = box.extent;
2090  SimdQuaternion box_rotation = box.rotation;
2091 
2092  // transform the sphere
2093  SimdVector sphere_center = F128::Sub(sphere, box_center);
2094  sphere_center = Vector3::InvRotate(sphere_center, box_rotation);
2095 
2096  // intersection between AABB and sphere (local to the AABB)
2097  f128 lt_min = F128::CmpLt(sphere_center, F128::Negate(box_extent));
2098  f128 gt_max = F128::CmpGt(sphere_center, box_extent);
2099  SimdVector diff_min = F128::Add(sphere_center, box_extent);
2100  SimdVector diff_max = F128::Sub(sphere_center, box_extent);
2101 
2102  f128 dist_sq = F128::SetZero();
2103  dist_sq = F128::Select(lt_min, diff_min, dist_sq);
2104  dist_sq = F128::Select(gt_max, diff_max, dist_sq);
2105  dist_sq = Vector3::Dot(dist_sq, dist_sq);
2106 
2107  f128 rad_sq = F128::Mult(sphere, sphere);
2108  rad_sq = F128::SetValue<3>(rad_sq, each_select32);
2109 
2110  return Vector4::CmpLe(dist_sq, rad_sq);
2111 }
2112 
2113 // BoundingOrientedBox::Intersects()
2114 NLIB_B Intersection::OrientedBoxTriangle(const OrientedBox& box, SimdVectorArg triangle_point0,
2115  SimdVectorArg triangle_point1,
2116  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
2117  // Transform the triangle points
2118  SimdVector box_center = box.center;
2119  SimdQuaternion box_rotation = box.rotation;
2120  SimdVector tp0 = F128::Sub(triangle_point0, box_center);
2121  SimdVector tp1 = F128::Sub(triangle_point1, box_center);
2122  SimdVector tp2 = F128::Sub(triangle_point2, box_center);
2123  tp0 = Vector3::InvRotate(tp0, box_rotation);
2124  tp1 = Vector3::InvRotate(tp1, box_rotation);
2125  tp2 = Vector3::InvRotate(tp2, box_rotation);
2126 
2127  AxisAlignedBox aabb;
2128  aabb.point_max = box.extent;
2129  aabb.point_min = F128::Negate(aabb.point_max);
2130 
2131  return Intersection::AxisAlignedBoxTriangle(aabb, tp0, tp1, tp2);
2132 }
2133 
2134 // BoundingOrientedBox::Intersects()
2135 inline Intersection::PlaneResult __vectorcall
2136 Intersection::OrientedBoxPlane(const OrientedBox& box,
2137  SimdPlaneArg plane_normalized) NLIB_NOEXCEPT {
2138  // Transform the plane
2139  SimdVector new_plane = Vector3::InvRotate(plane_normalized, box.rotation);
2140  f128 new_d = F128::Sub(plane_normalized, Vector3::Dot(plane_normalized, box.center));
2141  new_plane = F128::Permute<0, 1, 2, 7>(new_plane, new_d);
2142 
2143  AxisAlignedBox aabb;
2144  aabb.point_max = box.extent;
2145  aabb.point_min = F128::Negate(aabb.point_max);
2146 
2147  return Intersection::AxisAlignedBoxPlane(aabb, new_plane);
2148 }
2149 
2150 // BoundingOrientedBox::Intersects()
2151 NLIB_B Intersection::OrientedBoxRay(const OrientedBox& box, SimdVectorArg ray_point,
2152  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
2153  SimdQuaternion box_rotation = box.rotation;
2154  SimdVector P = F128::Sub(ray_point, box.center);
2155  P = Vector3::InvRotate(P, box_rotation);
2156  SimdVector D = Vector3::InvRotate(ray_dir_normalized, box_rotation);
2157 
2158  AxisAlignedBox aabb;
2159  aabb.point_max = box.extent;
2160  aabb.point_min = F128::Negate(aabb.point_max);
2161 
2162  return Intersection::AxisAlignedBoxRay(aabb, P, D);
2163 }
2164 
2165 NLIB_B Intersection::FrustumSphere(const Frustum& frustum, SimdSphereArg sphere) NLIB_NOEXCEPT {
2166  SimdPlane near_plane = frustum.m_NearPlane;
2167  SimdPlane far_plane = frustum.m_FarPlane;
2168  SimdPlane left_plane = frustum.m_LeftPlane;
2169  SimdPlane right_plane = frustum.m_RightPlane;
2170  SimdPlane top_plane = frustum.m_TopPlane;
2171  SimdPlane bottom_plane = frustum.m_BottomPlane;
2172  SimdVector frustum_center = frustum.m_Center;
2173  SimdQuaternion frustum_rot = frustum.m_Rotation;
2174  SimdVector center = sphere;
2175  SimdVector radius = F128::SetValue<3>(center, each_select32);
2176 
2177  // The center of the sphere to the local space of frustum
2178  center = Vector3::InvRotate(F128::Sub(center, frustum_center), frustum_rot);
2179  center = F128::SetFloatToLane<3>(center, 1.f);
2180 
2181  // distance from the planes
2182  f128 dist0, dist1;
2183  dist0 = Vector4::Dot4(center, near_plane, far_plane, left_plane, right_plane);
2184  dist1 = Vector4::Dot2(center, top_plane, bottom_plane);
2185  dist1 = F128::Swizzle<0, 1, 0, 1>(dist1);
2186  f128 zero = F128::SetZero();
2187 
2188  // outside if Px exists such that Px(center) > radius
2189  f128 outside = F128::CmpGt(dist0, radius);
2190  outside = F128::Or(F128::CmpGt(dist1, radius), outside);
2191  if (!F128::IsAllMaskFalse(outside)) return false;
2192 
2193  // inside if the center is inside all the planes
2194  f128 center_inside = F128::CmpLe(dist0, zero);
2195  center_inside = F128::And(F128::CmpLe(dist0, zero), center_inside);
2196  if (F128::IsAllMaskTrue(center_inside)) return true;
2197 
2198  // dist to a plane <= radius already satisfied(outside is all false)
2199  // 1) map center onto the plane, which the center is outside.
2200  // 2) return true if the point(pt_plane) is inside all the adjacent planes
2201  SimdVector pt_plane;
2202  f128 inside;
2203  f128 dist_plane;
2204 
2205  dist_plane = Vector4::Dot4(center, near_plane, far_plane, left_plane, right_plane);
2206 
2207  // onto the near plane
2208  pt_plane = F128::MultSub(near_plane, F128::SetValue<0>(dist0, each_select32), center);
2209  pt_plane = F128::SetFloatToLane<3>(pt_plane, 1.f);
2210  inside = Vector4::Dot4(pt_plane, left_plane, right_plane, top_plane, bottom_plane);
2211  inside = F128::CmpLe(inside, zero);
2212  inside = F128::And(inside, F128::CmpGt(F128::SetValue<0>(dist_plane, each_select32), zero));
2213  if (F128::IsAllMaskTrue(inside)) return true;
2214 
2215  // onto the far plane
2216  pt_plane = F128::MultSub(far_plane, F128::SetValue<1>(dist0, each_select32), center);
2217  pt_plane = F128::SetFloatToLane<3>(pt_plane, 1.f);
2218  inside = Vector4::Dot4(pt_plane, left_plane, right_plane, top_plane, bottom_plane);
2219  inside = F128::CmpLe(inside, zero);
2220  inside = F128::And(inside, F128::CmpGt(F128::SetValue<1>(dist_plane, each_select32), zero));
2221  if (F128::IsAllMaskTrue(inside)) return true;
2222 
2223  // onto the left plane
2224  pt_plane = F128::MultSub(left_plane, F128::SetValue<2>(dist0, each_select32), center);
2225  pt_plane = F128::SetFloatToLane<3>(pt_plane, 1.f);
2226  inside = Vector4::Dot4(pt_plane, near_plane, far_plane, top_plane, bottom_plane);
2227  inside = F128::CmpLe(inside, zero);
2228  inside = F128::And(inside, F128::CmpGt(F128::SetValue<2>(dist_plane, each_select32), zero));
2229  if (F128::IsAllMaskTrue(inside)) return true;
2230 
2231  // onto the right plane
2232  pt_plane = F128::MultSub(right_plane, F128::SetValue<3>(dist0, each_select32), center);
2233  pt_plane = F128::SetFloatToLane<3>(pt_plane, 1.f);
2234  inside = Vector4::Dot4(pt_plane, near_plane, far_plane, top_plane, bottom_plane);
2235  inside = F128::CmpLe(inside, zero);
2236  inside = F128::And(inside, F128::CmpGt(F128::SetValue<3>(dist_plane, each_select32), zero));
2237  if (F128::IsAllMaskTrue(inside)) return true;
2238 
2239  dist_plane = Vector4::Dot2(center, top_plane, bottom_plane);
2240 
2241  // onto the top plane
2242  pt_plane = F128::MultSub(top_plane, F128::SetValue<0>(dist1, each_select32), center);
2243  pt_plane = F128::SetFloatToLane<3>(pt_plane, 1.f);
2244  inside = Vector4::Dot4(pt_plane, near_plane, far_plane, left_plane, right_plane);
2245  inside = F128::CmpLe(inside, zero);
2246  inside = F128::And(inside, F128::CmpGt(F128::SetValue<0>(dist_plane, each_select32), zero));
2247  if (F128::IsAllMaskTrue(inside)) return true;
2248 
2249  // onto the bottom plane
2250  pt_plane = F128::MultSub(bottom_plane, F128::SetValue<1>(dist1, each_select32), center);
2251  pt_plane = F128::SetFloatToLane<3>(pt_plane, 1.f);
2252  inside = Vector4::Dot4(pt_plane, near_plane, far_plane, left_plane, right_plane);
2253  inside = F128::CmpLe(inside, zero);
2254  inside = F128::And(inside, F128::CmpGt(F128::SetValue<1>(dist_plane, each_select32), zero));
2255  if (F128::IsAllMaskTrue(inside)) return true;
2256 
2257  // sphere may be near the edges of the frustum
2258  center = sphere; // back to world space
2259  f128 radius_sq = F128::Mult(radius, radius);
2260  f128 dist_sq;
2261 
2262  // NOTE:
2263  // depends on the implementation of Frustum::GetCorners()
2264  Float3 corners[8];
2265  frustum.GetCorners(&corners[0]);
2266  SimdVector left_top = Vector3::LoadFloat3(&corners[0]);
2267  SimdVector right_top = Vector3::LoadFloat3(&corners[1]);
2268  SimdVector right_bottom = Vector3::LoadFloat3(&corners[2]);
2269  SimdVector left_bottom = Vector3::LoadFloat3(&corners[3]);
2270  SimdVector left_top_far = Vector3::LoadFloat3(&corners[4]);
2271  SimdVector right_top_far = Vector3::LoadFloat3(&corners[5]);
2272  SimdVector right_bottom_far = Vector3::LoadFloat3(&corners[6]);
2273  SimdVector left_bottom_far = Vector3::LoadFloat3(&corners[7]);
2274 
2275  // check the distance from the segments to the corner
2276  dist_sq = DistanceSq::PointSegment(center, left_top, right_top);
2277  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, right_top, right_bottom));
2278  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, right_bottom, left_bottom));
2279  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, left_bottom, left_top));
2280 
2281  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, left_top_far, right_top_far));
2282  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, right_top_far, right_bottom_far));
2283  dist_sq =
2284  F128::Min(dist_sq, DistanceSq::PointSegment(center, right_bottom_far, left_bottom_far));
2285  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, left_bottom_far, left_top_far));
2286 
2287  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, left_top_far, left_top));
2288  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, right_top_far, right_top));
2289  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, right_bottom_far, right_bottom));
2290  dist_sq = F128::Min(dist_sq, DistanceSq::PointSegment(center, left_bottom_far, left_bottom));
2291 
2292  return F128::IsAllMaskTrue(F128::CmpLe(dist_sq, radius_sq));
2293 }
2294 
2295 NLIB_B Containment::FrustumPoint(const Frustum& frustum, SimdVectorArg point) NLIB_NOEXCEPT {
2296  SimdVector pt = F128::Sub(point, frustum.m_Center);
2297  pt = Vector3::InvRotate(pt, frustum.m_Rotation);
2298  pt = F128::SetFloatToLane<3>(pt, 1.f);
2299 
2300  f128 dot1 = Vector4::Dot4(pt, frustum.m_NearPlane, frustum.m_FarPlane, frustum.m_TopPlane,
2301  frustum.m_BottomPlane);
2302  f128 dot2 = Vector4::Dot2(pt, frustum.m_LeftPlane, frustum.m_RightPlane);
2303  dot2 = F128::Swizzle<0, 1, 0, 1>(dot2);
2304  dot1 = F128::Max(dot1, dot2);
2305  f128 outside = F128::CmpGt(dot1, F128::SetZero());
2306  return F128::IsAllMaskFalse(outside);
2307 }
2308 
2309 NLIB_B Intersection::FrustumAxisAlignedBox(const Frustum& frustum,
2310  const AxisAlignedBox& aabb) NLIB_NOEXCEPT {
2311  // Check the center point for short cut.
2312  // In general, aabb is much smaller than frustum,
2313  // and the center point is near the vertices in compared with the frustum.
2314  SimdVector C = F128::Mult(0.5f, F128::Add(aabb.point_max, aabb.point_min));
2315  if (Containment::FrustumPoint(frustum, C)) return true;
2316  // if (Containment::FrustumPoint(frustum, aabb.point_max)) return true;
2317  // if (Containment::FrustumPoint(frustum, aabb.point_min)) return true;
2318 
2319  // Transform AABB's center to the origin
2320  SimdVector extent = F128::Sub(aabb.point_max, C);
2321  Frustum fstm;
2322  frustum.Transform(&fstm, 1.f, F128::LoadA16(F128::v0001_), F128::Negate(C));
2323 
2324  Float3 corners_[8];
2325  fstm.GetCorners(&corners_[0]);
2326 
2327  SimdVector corners[8];
2328  for (size_t i = 0; i < 8; ++i) {
2329  corners[i] = Vector3::LoadFloat3(&corners_[i]);
2330  corners[i] = F128::SetFloatToLane<3>(corners[i], 1.f);
2331  }
2332 
2333  // test 3 axes of the planes of the AABB
2334  SimdVector pt_min, pt_max;
2335  pt_min = pt_max = corners[0];
2336  for (size_t i = 1; i < 8; ++i) {
2337  SimdVector vec = corners[i];
2338  pt_min = F128::Min(vec, pt_min);
2339  pt_max = F128::Max(vec, pt_max);
2340  }
2341  f128 disjoint = F128::Or(F128::CmpGt(pt_min, extent),
2342  F128::CmpLt(pt_max, F128::Negate(extent)));
2343  disjoint = F128::SetZeroToLane<3>(disjoint);
2344  if (!F128::IsAllMaskFalse(disjoint)) return false;
2345 
2346  // test 5 axes of the planes of the frustum
2347  SimdPlane plane;
2348  SimdVector N;
2349  f128 D;
2350  SimdVector frustum_center = fstm.m_Center;
2351  SimdQuaternion frustum_rot = fstm.m_Rotation;
2352  f128 dist0_frustum, dist1_frustum;
2353  f128 dist_aabb_max, dist_aabb_min;
2354  f128 intersect;
2355 
2356  // near plane / far plane (they are parallel)
2357  plane = fstm.m_NearPlane;
2358  N = Vector3::Rotate(plane, frustum_rot);
2359  D = F128::Sub(plane, Vector3::Dot(N, frustum_center));
2360  plane = F128::Permute<0, 1, 2, 7>(N, D);
2361  D = F128::SetValue<3>(D, each_select32);
2362  dist0_frustum = Vector4::Dot4(plane, corners[0], corners[1], corners[2], corners[3]);
2363  dist1_frustum = Vector4::Dot4(plane, corners[4], corners[5], corners[6], corners[7]);
2364  N = Vector3::Dot(F128::Abs(N), extent);
2365  dist_aabb_max = F128::Add(D, N);
2366  dist_aabb_min = F128::Sub(D, N);
2367  intersect = F128::Or(F128::CmpGe(dist0_frustum, dist_aabb_min),
2368  F128::CmpGe(dist1_frustum, dist_aabb_min));
2369  if (F128::IsAllMaskFalse(intersect)) return false;
2370  intersect = F128::Or(F128::CmpLe(dist0_frustum, dist_aabb_max),
2371  F128::CmpLe(dist1_frustum, dist_aabb_max));
2372  if (F128::IsAllMaskFalse(intersect)) return false;
2373 
2374  // left plane
2375  plane = fstm.m_LeftPlane;
2376  N = Vector3::Rotate(plane, frustum_rot);
2377  D = F128::Sub(plane, Vector3::Dot(N, frustum_center));
2378  plane = F128::Permute<0, 1, 2, 7>(N, D);
2379  D = F128::SetValue<3>(D, each_select32);
2380  dist0_frustum = Vector4::Dot4(plane, corners[0], corners[1], corners[2], corners[3]);
2381  dist1_frustum = Vector4::Dot4(plane, corners[4], corners[5], corners[6], corners[7]);
2382  N = Vector3::Dot(F128::Abs(N), extent);
2383  dist_aabb_max = F128::Add(D, N);
2384  dist_aabb_min = F128::Sub(D, N);
2385  intersect = F128::Or(F128::CmpGe(dist0_frustum, dist_aabb_min),
2386  F128::CmpGe(dist1_frustum, dist_aabb_min));
2387  if (F128::IsAllMaskFalse(intersect)) return false;
2388  intersect = F128::Or(F128::CmpLe(dist0_frustum, dist_aabb_max),
2389  F128::CmpLe(dist1_frustum, dist_aabb_max));
2390  if (F128::IsAllMaskFalse(intersect)) return false;
2391 
2392  // right plane
2393  plane = fstm.m_RightPlane;
2394  N = Vector3::Rotate(plane, frustum_rot);
2395  D = F128::Sub(plane, Vector3::Dot(N, frustum_center));
2396  plane = F128::Permute<0, 1, 2, 7>(N, D);
2397  D = F128::SetValue<3>(D, each_select32);
2398  dist0_frustum = Vector4::Dot4(plane, corners[0], corners[1], corners[2], corners[3]);
2399  dist1_frustum = Vector4::Dot4(plane, corners[4], corners[5], corners[6], corners[7]);
2400  N = Vector3::Dot(F128::Abs(N), extent);
2401  dist_aabb_max = F128::Add(D, N);
2402  dist_aabb_min = F128::Sub(D, N);
2403  intersect = F128::Or(F128::CmpGe(dist0_frustum, dist_aabb_min),
2404  F128::CmpGe(dist1_frustum, dist_aabb_min));
2405  if (F128::IsAllMaskFalse(intersect)) return false;
2406  intersect = F128::Or(F128::CmpLe(dist0_frustum, dist_aabb_max),
2407  F128::CmpLe(dist1_frustum, dist_aabb_max));
2408  if (F128::IsAllMaskFalse(intersect)) return false;
2409 
2410  // top plane
2411  plane = fstm.m_TopPlane;
2412  N = Vector3::Rotate(plane, frustum_rot);
2413  D = F128::Sub(plane, Vector3::Dot(N, frustum_center));
2414  plane = F128::Permute<0, 1, 2, 7>(N, D);
2415  D = F128::SetValue<3>(D, each_select32);
2416  dist0_frustum = Vector4::Dot4(plane, corners[0], corners[1], corners[2], corners[3]);
2417  dist1_frustum = Vector4::Dot4(plane, corners[4], corners[5], corners[6], corners[7]);
2418  N = Vector3::Dot(F128::Abs(N), extent);
2419  dist_aabb_max = F128::Add(D, N);
2420  dist_aabb_min = F128::Sub(D, N);
2421  intersect = F128::Or(F128::CmpGe(dist0_frustum, dist_aabb_min),
2422  F128::CmpGe(dist1_frustum, dist_aabb_min));
2423  if (F128::IsAllMaskFalse(intersect)) return false;
2424  intersect = F128::Or(F128::CmpLe(dist0_frustum, dist_aabb_max),
2425  F128::CmpLe(dist1_frustum, dist_aabb_max));
2426  if (F128::IsAllMaskFalse(intersect)) return false;
2427 
2428  // bottom plane
2429  plane = fstm.m_BottomPlane;
2430  N = Vector3::Rotate(plane, frustum_rot);
2431  D = F128::Sub(plane, Vector3::Dot(N, frustum_center));
2432  plane = F128::Permute<0, 1, 2, 7>(N, D);
2433  D = F128::SetValue<3>(D, each_select32);
2434  dist0_frustum = Vector4::Dot4(plane, corners[0], corners[1], corners[2], corners[3]);
2435  dist1_frustum = Vector4::Dot4(plane, corners[4], corners[5], corners[6], corners[7]);
2436  N = Vector3::Dot(F128::Abs(N), extent);
2437  dist_aabb_max = F128::Add(D, N);
2438  dist_aabb_min = F128::Sub(D, N);
2439  intersect = F128::Or(F128::CmpGe(dist0_frustum, dist_aabb_min),
2440  F128::CmpGe(dist1_frustum, dist_aabb_min));
2441  if (F128::IsAllMaskFalse(intersect)) return false;
2442  intersect = F128::Or(F128::CmpLe(dist0_frustum, dist_aabb_max),
2443  F128::CmpLe(dist1_frustum, dist_aabb_max));
2444  if (F128::IsAllMaskFalse(intersect)) return false;
2445 
2446  // Axis = Cross(aabb edge, frustum edge)
2447  SimdVector E0[3];
2448  E0[0] = F128::LoadA16(F128::v1000_);
2449  E0[1] = F128::LoadA16(F128::v0100_);
2450  E0[2] = F128::LoadA16(F128::v0010_);
2451  SimdVector E1[6];
2452  E1[0] = F128::Sub(corners[0], corners[4]);
2453  E1[1] = F128::Sub(corners[1], corners[5]);
2454  E1[2] = F128::Sub(corners[2], corners[6]);
2455  E1[3] = F128::Sub(corners[3], corners[7]);
2456  E1[4] = F128::Sub(corners[1], corners[0]);
2457  E1[5] = F128::Sub(corners[1], corners[2]);
2458 
2459  f128 dot0, dot1;
2460  for (size_t i = 0; i < 3; ++i) {
2461  for (size_t j = 0; j < 6; ++j) {
2462  SimdVector axis = Vector3::Cross(E0[i], E1[j]);
2463  axis = F128::SetZeroToLane<3>(axis);
2464  N = Vector3::Dot(F128::Abs(axis), extent);
2465  D = F128::SetValue<3>(axis, each_select32);
2466  f128 aabb_min = F128::Sub(D, N);
2467  f128 aabb_max = F128::Add(D, N);
2468  dot0 = Vector4::Dot4(axis, corners[0], corners[1], corners[2], corners[3]);
2469  dot1 = Vector4::Dot4(axis, corners[4], corners[5], corners[6], corners[7]);
2470  // false if no overlap
2471  intersect = F128::CmpGe(F128::Max(dot0, dot1), aabb_min);
2472  if (F128::IsAllMaskFalse(intersect)) return false;
2473  intersect = F128::CmpLe(F128::Min(dot0, dot1), aabb_max);
2474  if (F128::IsAllMaskFalse(intersect)) return false;
2475  }
2476  }
2477  return true;
2478 }
2479 
2480 NLIB_B Intersection::FrustumOrientedBox(const Frustum& frustum,
2481  const OrientedBox& obb) NLIB_NOEXCEPT {
2482  Frustum frstm;
2483  frustum.Transform(&frstm, 1.f, F128::LoadA16(F128::v0001_), F128::Negate(obb.center));
2484  frstm.Transform(&frstm, 1.f, Quaternion::Conjugate(obb.rotation), F128::SetZero());
2485 
2486  AxisAlignedBox aabb;
2487  aabb.point_max = obb.extent;
2488  aabb.point_min = F128::Negate(aabb.point_max);
2489  return Intersection::FrustumAxisAlignedBox(frstm, aabb);
2490 }
2491 
2492 NLIB_B Intersection::FrustumTriangle(const Frustum& frustum, SimdVectorArg triangle_point0,
2493  SimdVectorArg triangle_point1,
2494  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
2495  // the triangle to the local space of the frustum
2496  SimdQuaternion frustum_rot = frustum.m_Rotation;
2497  SimdVector frustum_center = frustum.m_Center;
2498  SimdVector pt0 = Vector3::InvRotate(F128::Sub(triangle_point0, frustum_center), frustum_rot);
2499  SimdVector pt1 = Vector3::InvRotate(F128::Sub(triangle_point1, frustum_center), frustum_rot);
2500  SimdVector pt2 = Vector3::InvRotate(F128::Sub(triangle_point2, frustum_center), frustum_rot);
2501  pt0 = F128::SetFloatToLane<3>(pt0, 1.f);
2502  pt1 = F128::SetFloatToLane<3>(pt1, 1.f);
2503  pt2 = F128::SetFloatToLane<3>(pt2, 1.f);
2504 
2505  SimdPlane near_plane = frustum.m_NearPlane;
2506  SimdPlane far_plane = frustum.m_FarPlane;
2507  SimdPlane left_plane = frustum.m_LeftPlane;
2508  SimdPlane right_plane = frustum.m_RightPlane;
2509  SimdPlane top_plane = frustum.m_TopPlane;
2510  SimdPlane bottom_plane = frustum.m_BottomPlane;
2511 
2512  f128 dist0, dist1, dist_max;
2513  f128 dist0_min, dist1_min;
2514  f128 zero = F128::SetZero();
2515  f128 outside;
2516 
2517  // point0
2518  dist0 = Vector4::Dot4(pt0, near_plane, far_plane, left_plane, right_plane);
2519  dist0_min = dist0;
2520  dist1 = Vector4::Dot2(pt0, top_plane, bottom_plane);
2521  dist1 = F128::Swizzle<0, 1, 0, 1>(dist1);
2522  dist1_min = dist1;
2523  dist_max = F128::Max(dist0, dist1);
2524  outside = F128::CmpGt(dist_max, zero);
2525  if (F128::IsAllMaskFalse(outside)) return true; // point0 is inside the frustum
2526 
2527  // point1
2528  dist0 = Vector4::Dot4(pt1, near_plane, far_plane, left_plane, right_plane);
2529  dist0_min = F128::Min(dist0, dist0_min);
2530  dist1 = Vector4::Dot2(pt1, top_plane, bottom_plane);
2531  dist1 = F128::Swizzle<0, 1, 0, 1>(dist1);
2532  dist1_min = F128::Min(dist1, dist1_min);
2533  dist_max = F128::Max(dist0, dist1);
2534  outside = F128::CmpGt(dist_max, zero);
2535  if (F128::IsAllMaskFalse(outside)) return true; // point1 is inside the frustum
2536 
2537  // point2
2538  dist0 = Vector4::Dot4(pt2, near_plane, far_plane, left_plane, right_plane);
2539  dist0_min = F128::Min(dist0, dist0_min);
2540  dist1 = Vector4::Dot2(pt2, top_plane, bottom_plane);
2541  dist1 = F128::Swizzle<0, 1, 0, 1>(dist1);
2542  dist1_min = F128::Min(dist1, dist1_min);
2543  dist_max = F128::Max(dist0, dist1);
2544  outside = F128::CmpGt(dist_max, zero);
2545  if (F128::IsAllMaskFalse(outside)) return true; // point2 is inside the frustum
2546 
2547  // check if there is a plane such that all points are outside
2548  if (!F128::IsAllMaskFalse(F128::Or(
2549  F128::CmpGt(dist0_min, zero), F128::CmpGt(dist1_min, zero)))) return false;
2550 
2551  // All the points are outside the frustum, but the triangle may intersect
2552 
2553  // return to the world space
2554  pt0 = triangle_point0;
2555  pt1 = triangle_point1;
2556  pt2 = triangle_point2;
2557 
2558  // Axis = the normal of the triangle
2559  SimdPlane triangle = Plane::FromPoint(pt0, pt1, pt2);
2560  Float3 corners_[8];
2561  frustum.GetCorners(&corners_[0]);
2562  SimdVector corners[8];
2563  for (size_t i = 0; i < 8; ++i) {
2564  corners[i] = Vector3::LoadFloat3(&corners_[i]);
2565  corners[i] = F128::SetFloatToLane<3>(corners[i], 1.f);
2566  }
2567  f128 dot0 = Vector4::Dot4(triangle, corners[0], corners[1], corners[2], corners[3]);
2568  f128 dot1 = Vector4::Dot4(triangle, corners[4], corners[5], corners[6], corners[7]);
2569  // all the points of the frustum are inside of the triangle plane
2570  if (F128::IsAllMaskFalse(F128::CmpGe(F128::Max(dot0, dot1), zero))) return false;
2571  // all the points of the frustum are outside of the triangle plane
2572  if (F128::IsAllMaskFalse(F128::CmpLe(F128::Min(dot0, dot1), zero))) return false;
2573 
2574  // Axis = Cross(triangle edge, frustum edge)
2575  SimdVector E0[3];
2576  E0[0] = F128::Sub(pt1, pt0);
2577  E0[1] = F128::Sub(pt2, pt1);
2578  E0[2] = F128::Sub(pt0, pt2);
2579  SimdVector E1[6];
2580  E1[0] = F128::Sub(corners[0], corners[4]);
2581  E1[1] = F128::Sub(corners[1], corners[5]);
2582  E1[2] = F128::Sub(corners[2], corners[6]);
2583  E1[3] = F128::Sub(corners[3], corners[7]);
2584  E1[4] = F128::Sub(corners[1], corners[0]);
2585  E1[5] = F128::Sub(corners[1], corners[2]);
2586 
2587  for (size_t i = 0; i < 3; ++i) {
2588  for (size_t j = 0; j < 6; ++j) {
2589  SimdVector axis = Vector3::Cross(E0[i], E1[j]);
2590  axis = F128::SetZeroToLane<3>(axis);
2591  f128 tri_min, tri_max;
2592  {
2593  f128 tmp0 = Vector3::Dot(axis, pt0);
2594  f128 tmp1 = Vector3::Dot(axis, pt1);
2595  f128 tmp2 = Vector3::Dot(axis, pt2);
2596  tri_min = F128::Min(tmp0, tmp1);
2597  tri_max = F128::Max(tmp0, tmp1);
2598  tri_min = F128::Min(tri_min, tmp2);
2599  tri_max = F128::Max(tri_max, tmp2);
2600  }
2601  dot0 = Vector4::Dot4(axis, corners[0], corners[1], corners[2], corners[3]);
2602  dot1 = Vector4::Dot4(axis, corners[4], corners[5], corners[6], corners[7]);
2603  // all the points of the frustum are inside of the triangle plane
2604  if (F128::IsAllMaskFalse(F128::CmpGe(F128::Max(dot0, dot1), tri_min))) return false;
2605  // all the points of the frustum are outside of the triangle plane
2606  if (F128::IsAllMaskFalse(F128::CmpLe(F128::Min(dot0, dot1), tri_max))) return false;
2607  }
2608  }
2609  return true;
2610 }
2611 
2612 inline Intersection::PlaneResult __vectorcall
2613 Intersection::FrustumPlane(const Frustum& frustum, SimdPlaneArg plane) NLIB_NOEXCEPT {
2614  Float3 corners_[8];
2615  frustum.GetCorners(&corners_[0]);
2616  SimdVector corners[8];
2617  for (size_t i = 0; i < 8; ++i) {
2618  corners[i] = Vector3::LoadFloat3(&corners_[i]);
2619  corners[i] = F128::SetFloatToLane<3>(corners[i], 1.f);
2620  }
2621  f128 dot0 = Vector4::Dot4(plane, corners[0], corners[1], corners[2], corners[3]);
2622  f128 dot1 = Vector4::Dot4(plane, corners[4], corners[5], corners[6], corners[7]);
2623  f128 zero = F128::SetZero();
2624 
2625  // check if all vertices are front
2626  f128 outside = F128::CmpLe(F128::Min(dot0, dot1), zero);
2627  if (F128::IsAllMaskFalse(outside)) return Intersection::PLANE_FRONT;
2628 
2629  // check if all vertices are back
2630  f128 inside = F128::CmpGe(F128::Max(dot0, dot1), zero);
2631  if (F128::IsAllMaskFalse(inside)) return Intersection::PLANE_BACK;
2632 
2633  return Intersection::PLANE_INTERSECT;
2634 }
2635 
2636 NLIB_B Intersection::FrustumRay(const Frustum& frustum, SimdVectorArg ray_point,
2637  SimdVectorArg ray_dir_normalized) NLIB_NOEXCEPT {
2638  // intersects if the ray origin is inside the frustum
2639  if (Containment::FrustumPoint(frustum, ray_point)) return true;
2640 
2641  // transform ray to the local coord of the frusutum
2642  SimdVector rp = F128::Sub(ray_point, frustum.m_Center);
2643  rp = Vector3::InvRotate(rp, frustum.m_Rotation);
2644  rp = F128::SetFloatToLane<3>(rp, 1.f);
2645  SimdVector rd = Vector3::InvRotate(ray_dir_normalized, frustum.m_Rotation);
2646  rd = F128::SetZeroToLane<3>(rd);
2647 
2648  f128 dot_rp0, dot_rp1;
2649  dot_rp0 = Vector4::Dot4(rp, frustum.m_NearPlane, frustum.m_FarPlane,
2650  frustum.m_LeftPlane, frustum.m_RightPlane);
2651  dot_rp1 = Vector4::Dot2(rp, frustum.m_TopPlane, frustum.m_BottomPlane);
2652  dot_rp1 = F128::Swizzle<0, 1, 0, 1>(dot_rp1);
2653 
2654  f128 dot_rd0, dot_rd1;
2655  dot_rd0 = Vector4::Dot4(rd, frustum.m_NearPlane, frustum.m_FarPlane,
2656  frustum.m_LeftPlane, frustum.m_RightPlane);
2657  dot_rd1 = Vector4::Dot2(rd, frustum.m_TopPlane, frustum.m_BottomPlane);
2658  dot_rd1 = F128::Swizzle<0, 1, 0, 1>(dot_rd1);
2659 
2660  // return false if ray is parallel to plane, and origin is outside plane
2661  f128 eps = F128::SetEpsilon();
2662  f128 zero = F128::SetZero();
2663  f128 parallel0 = F128::CmpLe(F128::Abs(dot_rd0), eps);
2664  f128 parallel1 = F128::CmpLe(F128::Abs(dot_rd1), eps);
2665  f128 mask0 = F128::And(parallel0, F128::CmpGt(dot_rp0, zero));
2666  f128 mask1 = F128::And(parallel1, F128::CmpGt(dot_rp1, zero));
2667  if (!F128::IsAllMaskFalse(F128::Or(mask0, mask1))) return false;
2668 
2669  f128 t0 = F128::Negate(F128::Div(dot_rp0, dot_rd0)); // t0 > 0
2670  f128 t1 = F128::Negate(F128::Div(dot_rp1, dot_rd1)); // t1 > 0
2671  f128 inf = F128::SetInfinity();
2672 
2673  // select the nearest back to front intersection point
2674  mask0 = F128::AndNot(parallel0, F128::CmpGt(dot_rd0, zero)); // true if back -> front
2675  mask1 = F128::AndNot(parallel1, F128::CmpGt(dot_rd1, zero)); // true if back -> front
2676  f128 to = F128::Min(F128::Select(mask0, t0, inf), F128::Select(mask1, t1, inf));
2677  to = F128::PairwiseMin(to, to);
2678  to = F128::PairwiseMin(to, to);
2679 
2680  // select the farthest front to back intersection point
2681  inf = F128::Negate(inf);
2682  mask0 = F128::AndNot(parallel0, F128::CmpLt(dot_rd0, zero)); // true if front -> back
2683  mask1 = F128::AndNot(parallel1, F128::CmpLt(dot_rd1, zero)); // true if front -> back
2684  f128 from = F128::Max(F128::Select(mask0, t0, inf), F128::Select(mask1, t1, inf));
2685  from = F128::PairwiseMax(from, from);
2686  from = F128::PairwiseMax(from, from);
2687 
2688  float n = F128::GetFloatFromLane<0>(from);
2689  float f = F128::GetFloatFromLane<0>(to);
2690  return n <= f && (n >= 0.f || f >= 0.f);
2691 }
2692 
2693 #ifdef _MSC_VER
2694 # pragma endregion Intersection function implementation
2695 #endif
2696 
2697 //
2698 // Containment
2699 //
2700 #ifdef _MSC_VER
2701 # pragma region Containment
2702 #endif
2703 
2704 // true if point in sphere
2705 // BoundingSphere::Contains()
2706 NLIB_B Containment::SpherePoint(SimdSphereArg sphere, SimdVectorArg point) NLIB_NOEXCEPT {
2707  f128 dist_sq = Vector3::LengthSq(F128::Sub(sphere, point));
2708  f128 rsq = F128::SetValue<3>(F128::Mult(sphere, sphere), each_select32);
2709  return F128::IsAllMaskFalse(F128::CmpGt(dist_sq, rsq));
2710 }
2711 
2712 // true if triangle in sphere
2713 // BoundingSphere::Contains()
2714 NLIB_B Containment::SphereTriangle(SimdSphereArg sphere, SimdVectorArg triangle_point0,
2715  SimdVectorArg triangle_point1,
2716  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
2717  f128 r_sq = F128::SetValue<3>(sphere, each_select32);
2718  SimdVector tp0_c = F128::Sub(triangle_point0, sphere);
2719  SimdVector tp1_c = F128::Sub(triangle_point1, sphere);
2720  SimdVector tp2_c = F128::Sub(triangle_point2, sphere);
2721  r_sq = F128::Mult(r_sq, r_sq);
2722  tp0_c = Vector3::LengthSq(tp0_c);
2723  tp1_c = Vector3::LengthSq(tp1_c);
2724  tp2_c = Vector3::LengthSq(tp2_c);
2725  f128 dist_sq = F128::Max(F128::Max(tp0_c, tp1_c), tp2_c);
2726  return F128::IsAllMaskFalse(F128::CmpGt(dist_sq, r_sq));
2727 }
2728 
2729 // true if aabb in sphere
2730 // BoundingSphere::Contains()
2731 NLIB_B Containment::SphereAxisAlignedBox(SimdSphereArg sphere,
2732  const AxisAlignedBox& aabb) NLIB_NOEXCEPT {
2733  f128 r_sq = F128::SetValue<3>(sphere, each_select32);
2734  SimdVector aabb_max = F128::Sub(aabb.point_max, sphere);
2735  SimdVector aabb_min = F128::Sub(aabb.point_min, sphere);
2736  r_sq = F128::Mult(r_sq, r_sq);
2737  aabb_max = F128::Abs(aabb_max);
2738  aabb_min = F128::Abs(aabb_min);
2739  f128 dist_sq = Vector3::LengthSq(F128::Max(aabb_max, aabb_min));
2740  return F128::IsAllMaskFalse(F128::CmpGt(dist_sq, r_sq));
2741 }
2742 
2743 // true if obb in sphere
2744 // BoundingSphere::Contains()
2745 NLIB_B Containment::SphereOrientedBox(SimdSphereArg sphere, const OrientedBox& obb) NLIB_NOEXCEPT {
2746  SimdVector new_sphere = F128::Sub(sphere, obb.center);
2747  new_sphere = Vector3::InvRotate(new_sphere, obb.rotation);
2748  new_sphere = F128::Permute<0, 1, 2, 7>(new_sphere, sphere);
2749  SimdVector box_extent = obb.extent;
2750  AxisAlignedBox aabb;
2751  aabb.point_max = box_extent;
2752  aabb.point_min = F128::Negate(box_extent);
2753  return Containment::SphereAxisAlignedBox(new_sphere, aabb);
2754 }
2755 
2756 // true if contained in sphere
2757 // BoundingSphere::Contains()
2758 NLIB_B Containment::SphereSphere(SimdSphereArg sphere, SimdSphereArg contained) NLIB_NOEXCEPT {
2759  float dist = F128::GetFloatFromLane<0>(Vector3::Length(F128::Sub(sphere, contained)));
2760  float r0 = F128::GetFloatFromLane<3>(sphere);
2761  float r1 = F128::GetFloatFromLane<3>(contained);
2762  return (r0 - r1) >= dist;
2763 }
2764 
2765 // true if contained in sphere
2766 // BoundingSphere::Contains()
2767 NLIB_B Containment::SphereFrustum(SimdSphereArg sphere, const Frustum& frustum) NLIB_NOEXCEPT {
2768  Float3 corners[8];
2769  frustum.GetCorners(&corners[0]);
2770  SimdSphere sp = sphere;
2771  f128 rad_sq = F128::SetValue<3>(F128::Mult(sp, sp), each_select32);
2772  f128 d_sq;
2773  f128 inside;
2774  SimdVector pt;
2775 
2776  pt = Vector3::LoadFloat3(&corners[0]);
2777  d_sq = Vector3::LengthSq(F128::Sub(pt, sp));
2778  inside = F128::CmpLe(d_sq, rad_sq);
2779  for (size_t i = 1; i < 8; ++i) {
2780  pt = Vector3::LoadFloat3(&corners[i]);
2781  d_sq = Vector3::LengthSq(F128::Sub(pt, sp));
2782  inside = F128::And(inside, F128::CmpLe(d_sq, rad_sq));
2783  }
2784  return F128::IsAllMaskTrue(inside);
2785 }
2786 
2787 // true if point in aabb
2788 // BoundingBox::Contains()
2789 NLIB_B Containment::AxisAlignedBoxPoint(const AxisAlignedBox& aabb,
2790  SimdVectorArg point) NLIB_NOEXCEPT {
2791  f128 out = F128::Or(F128::CmpGt(point, aabb.point_max),
2792  F128::CmpLt(point, aabb.point_min));
2793  out = F128::SetZeroToLane<3>(out);
2794  return F128::IsAllMaskFalse(out);
2795 }
2796 
2797 // true if triangle in aabb
2798 // BoundingBox::Contains()
2799 NLIB_B Containment::AxisAlignedBoxTriangle(const AxisAlignedBox& aabb,
2800  SimdVectorArg triangle_point0,
2801  SimdVectorArg triangle_point1,
2802  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
2803  SimdVector pt_max, pt_min;
2804  pt_max = F128::Max(triangle_point0, triangle_point1);
2805  pt_min = F128::Min(triangle_point0, triangle_point1);
2806  pt_max = F128::Max(pt_max, triangle_point2);
2807  pt_min = F128::Min(pt_min, triangle_point2);
2808  f128 out = F128::Or(F128::CmpGt(pt_max, aabb.point_max),
2809  F128::CmpLt(pt_min, aabb.point_min));
2810  out = F128::SetZeroToLane<3>(out);
2811  return F128::IsAllMaskFalse(out);
2812 }
2813 
2814 // true if box in aabb
2815 // BoundingBox::Contains()
2816 NLIB_B Containment::AxisAlignedBoxOrientedBox(const AxisAlignedBox& aabb,
2817  const OrientedBox& obb) NLIB_NOEXCEPT {
2818  f128 zero = F128::SetZero();
2819  SimdVector box_extent = obb.extent;
2820  SimdVector P = F128::Splat<false, true, true, true>(box_extent, zero);
2821  SimdVector Q = F128::Splat<true, false, true, true>(box_extent, zero);
2822  SimdVector R = F128::Splat<true, true, false, true>(box_extent, zero);
2823  SimdQuaternion box_rotation = obb.rotation;
2824  P = Vector3::Rotate(P, box_rotation);
2825  Q = Vector3::Rotate(Q, box_rotation);
2826  R = Vector3::Rotate(R, box_rotation);
2827 
2828  SimdVector pt_min, pt_max;
2829  SimdVector v, v_neg;
2830 
2831  // (x, y, z) and (-x, -y, -z)
2832  v = F128::Add(F128::Add(P, Q), R);
2833  v_neg = F128::Negate(v);
2834  pt_min = F128::Min(v, v_neg);
2835  pt_max = F128::Max(v, v_neg);
2836 
2837  // (x, y, -z) and (-x, -y, z)
2838  v = F128::Sub(F128::Add(P, Q), R);
2839  v_neg = F128::Negate(v);
2840  pt_min = F128::Min(pt_min, v);
2841  pt_max = F128::Max(pt_max, v);
2842  pt_min = F128::Min(pt_min, v_neg);
2843  pt_max = F128::Max(pt_max, v_neg);
2844 
2845  // (x, y, -z) and (-x, -y, z)
2846  v = F128::Sub(F128::Add(P, R), Q);
2847  v_neg = F128::Negate(v);
2848  pt_min = F128::Min(pt_min, v);
2849  pt_max = F128::Max(pt_max, v);
2850  pt_min = F128::Min(pt_min, v_neg);
2851  pt_max = F128::Max(pt_max, v_neg);
2852 
2853  SimdVector box_center = obb.center;
2854  SimdVector aabb_min = F128::Sub(aabb.point_min, box_center);
2855  SimdVector aabb_max = F128::Sub(aabb.point_max, box_center);
2856  f128 out = F128::Or(F128::CmpGt(pt_max, aabb_max),
2857  F128::CmpLt(pt_min, aabb_min));
2858  out = F128::SetZeroToLane<3>(out);
2859  return F128::IsAllMaskFalse(out);
2860 }
2861 
2862 // true if contained_aabb in aabb
2863 // BoundingBox::Contains()
2864 NLIB_B Containment::AxisAlignedBoxAxisAlignedBox(
2865  const AxisAlignedBox& aabb, const AxisAlignedBox& contained) NLIB_NOEXCEPT {
2866  f128 out = F128::Or(F128::CmpGt(contained.point_max, aabb.point_max),
2867  F128::CmpLt(contained.point_min, aabb.point_min));
2868  out = F128::SetZeroToLane<3>(out);
2869  return F128::IsAllMaskFalse(out);
2870 }
2871 
2872 // true if sphere in aabb
2873 // BoundingBox::Contains()
2874 NLIB_B Containment::AxisAlignedBoxSphere(const AxisAlignedBox& aabb,
2875  SimdSphereArg sphere) NLIB_NOEXCEPT {
2876  f128 radius = F128::SetValue<3>(sphere, each_select32);
2877  SimdVector pt_min = F128::Sub(sphere, radius);
2878  SimdVector pt_max = F128::Add(sphere, radius);
2879  f128 out = F128::Or(F128::CmpGt(pt_max, aabb.point_max),
2880  F128::CmpLt(pt_min, aabb.point_min));
2881  out = F128::SetZeroToLane<3>(out);
2882  return F128::IsAllMaskFalse(out);
2883 }
2884 
2885 // true if frustum in aabb
2886 // BoundingBox::Contains()
2887 NLIB_B Containment::AxisAlignedBoxFrustum(const AxisAlignedBox& aabb,
2888  const Frustum& frustum) NLIB_NOEXCEPT {
2889  Float3 corners[8];
2890  frustum.GetCorners(&corners[0]);
2891  SimdVector frustum_max = Vector3::LoadFloat3(&corners[0]);
2892  SimdVector frustum_min = frustum_max;
2893  for (size_t i = 1; i < 8; ++i) {
2894  SimdVector tmp = Vector3::LoadFloat3(&corners[i]);
2895  frustum_max = F128::Max(tmp, frustum_max);
2896  frustum_min = F128::Min(tmp, frustum_min);
2897  }
2898  AxisAlignedBox contained;
2899  contained.point_max = frustum_max;
2900  contained.point_min = frustum_min;
2901  return Containment::AxisAlignedBoxAxisAlignedBox(aabb, contained);
2902 }
2903 
2904 // true if point in box
2905 // BoundingOrientedBox::Contains()
2906 NLIB_B Containment::OrientedBoxPoint(const OrientedBox& box, SimdVectorArg point) NLIB_NOEXCEPT {
2907  SimdVector pt = F128::Sub(point, box.center);
2908  pt = Vector3::InvRotate(pt, box.rotation);
2909  return Vector3::InBound(pt, box.extent);
2910 }
2911 
2912 // true if triangle in box
2913 // BoundingOrientedBox::Contains()
2914 NLIB_B Containment::OrientedBoxTriangle(const OrientedBox& box, SimdVectorArg triangle_point0,
2915  SimdVectorArg triangle_point1,
2916  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
2917  SimdVector center = box.center;
2918  SimdVector pt0 = F128::Sub(triangle_point0, center);
2919  SimdVector pt1 = F128::Sub(triangle_point1, center);
2920  SimdVector pt2 = F128::Sub(triangle_point2, center);
2921  SimdQuaternion rot = box.rotation;
2922  pt0 = Vector3::InvRotate(pt0, rot);
2923  pt1 = Vector3::InvRotate(pt1, rot);
2924  pt2 = Vector3::InvRotate(pt2, rot);
2925  pt0 = F128::Abs(pt0);
2926  pt1 = F128::Abs(pt1);
2927  pt2 = F128::Abs(pt2);
2928  SimdVector pt_absmax = F128::Max(pt0, pt1);
2929  pt_absmax = F128::Max(pt_absmax, pt2);
2930  return Vector3::CmpLe(pt_absmax, box.extent);
2931 }
2932 
2933 // true if aabb in box
2934 // BoundingOrientedBox::Contains()
2935 NLIB_B Containment::OrientedBoxAxisAlignedBox(const OrientedBox& box,
2936  const AxisAlignedBox& aabb) NLIB_NOEXCEPT {
2937  OrientedBox new_aabb;
2938  SimdVector aabb_max = aabb.point_max;
2939  SimdVector aabb_min = aabb.point_min;
2940  SimdVector aabb_center = F128::Add(aabb_min, aabb_max);
2941  aabb_center = F128::Mult(0.5f, aabb_center);
2942  new_aabb.extent = F128::Sub(aabb_max, aabb_center);
2943  aabb_center = F128::Sub(aabb_center, box.center);
2944  new_aabb.center = aabb_center;
2945  new_aabb.rotation = Quaternion::Inverse(box.rotation);
2946 
2947  AxisAlignedBox new_box;
2948  new_box.point_max = box.extent;
2949  new_box.point_min = F128::Negate(new_box.point_max);
2950 
2951  return Containment::AxisAlignedBoxOrientedBox(new_box, new_aabb);
2952 }
2953 
2954 // true if box_contained in box
2955 // BoundingOrientedBox::Contains()
2956 NLIB_B Containment::OrientedBoxOrientedBox(const OrientedBox& box,
2957  const OrientedBox& contained) NLIB_NOEXCEPT {
2958  // box -> aabb
2959  OrientedBox contained_obb;
2960  SimdQuaternion inv_rot = Quaternion::Inverse(box.rotation);
2961  contained_obb.extent = contained.extent;
2962  contained_obb.center = Vector3::Rotate(F128::Sub(contained.center, box.center), inv_rot);
2963  contained_obb.rotation = Quaternion::Mult(contained.rotation, inv_rot);
2964 
2965  AxisAlignedBox aabb;
2966  aabb.point_max = box.extent;
2967  aabb.point_min = F128::Negate(aabb.point_max);
2968 
2969  return Containment::AxisAlignedBoxOrientedBox(aabb, contained_obb);
2970 }
2971 
2972 // true if sphere in box
2973 // BoundingOrientedBox::Contains()
2974 NLIB_B Containment::OrientedBoxSphere(const OrientedBox& box, SimdSphereArg sphere) NLIB_NOEXCEPT {
2975  AxisAlignedBox aabb;
2976  SimdVector box_extent = box.extent;
2977  aabb.point_max = box_extent;
2978  aabb.point_min = F128::Negate(box_extent);
2979 
2980  SimdSphere new_sphere = Vector3::InvRotate(F128::Sub(sphere, box.center), box.rotation);
2981  new_sphere = F128::Permute<0, 1, 2, 7>(new_sphere, sphere);
2982 
2983  return Containment::AxisAlignedBoxSphere(aabb, new_sphere);
2984 }
2985 
2986 NLIB_B Containment::OrientedBoxFrustum(const OrientedBox& box,
2987  const Frustum& frustum) NLIB_NOEXCEPT {
2988  Frustum frstm;
2989  frustum.Transform(&frstm, 1.f, F128::LoadA16(F128::v0001_), F128::Negate(box.center));
2990  frstm.Transform(&frstm, 1.f, Quaternion::Conjugate(box.rotation), F128::SetZero());
2991  AxisAlignedBox aabb;
2992  aabb.point_max = box.extent;
2993  aabb.point_min = F128::Negate(box.extent);
2994  return Containment::AxisAlignedBoxFrustum(aabb, frstm);
2995 }
2996 
2997 NLIB_B Containment::FrustumTriangle(const Frustum& frustum, SimdVectorArg triangle_point0,
2998  SimdVectorArg triangle_point1,
2999  SimdVectorArg triangle_point2) NLIB_NOEXCEPT {
3000  SimdVector pt;
3001  f128 dot, dot1, dot2;
3002  SimdPlane near_plane = frustum.m_NearPlane;
3003  SimdPlane far_plane = frustum.m_FarPlane;
3004  SimdPlane top_plane = frustum.m_TopPlane;
3005  SimdPlane bottom_plane = frustum.m_BottomPlane;
3006  SimdPlane left_plane = frustum.m_LeftPlane;
3007  SimdPlane right_plane = frustum.m_RightPlane;
3008  SimdVector center = frustum.m_Center;
3009  SimdQuaternion rot = frustum.m_Rotation;
3010 
3011  pt = F128::Sub(triangle_point0, center);
3012  pt = Vector3::InvRotate(pt, rot);
3013  pt = F128::SetFloatToLane<3>(pt, 1.f);
3014  dot1 = Vector4::Dot4(pt, near_plane, far_plane, top_plane, bottom_plane);
3015  dot2 = Vector4::Dot2(pt, left_plane, right_plane);
3016  dot2 = F128::Swizzle<0, 1, 0, 1>(dot2);
3017  dot = F128::Max(dot1, dot2);
3018 
3019  pt = F128::Sub(triangle_point1, center);
3020  pt = Vector3::InvRotate(pt, rot);
3021  pt = F128::SetFloatToLane<3>(pt, 1.f);
3022  dot1 = Vector4::Dot4(pt, near_plane, far_plane, top_plane, bottom_plane);
3023  dot2 = Vector4::Dot2(pt, left_plane, right_plane);
3024  dot2 = F128::Swizzle<0, 1, 0, 1>(dot2);
3025  dot = F128::Max(dot, F128::Max(dot1, dot2));
3026 
3027  pt = F128::Sub(triangle_point2, center);
3028  pt = Vector3::InvRotate(pt, rot);
3029  pt = F128::SetFloatToLane<3>(pt, 1.f);
3030  dot1 = Vector4::Dot4(pt, near_plane, far_plane, top_plane, bottom_plane);
3031  dot2 = Vector4::Dot2(pt, left_plane, right_plane);
3032  dot2 = F128::Swizzle<0, 1, 0, 1>(dot2);
3033  dot = F128::Max(dot, F128::Max(dot1, dot2));
3034 
3035  f128 outside = F128::CmpGt(dot, F128::SetZero());
3036  return F128::IsAllMaskFalse(outside);
3037 }
3038 
3039 NLIB_B Containment::FrustumSphere(const Frustum& frustum, SimdSphereArg sphere) NLIB_NOEXCEPT {
3040  f128 dot = F128::SetValue<3>(sphere, each_select32);
3041  dot = F128::Negate(dot);
3042 
3043  SimdVector pt = F128::Sub(sphere, frustum.m_Center);
3044  pt = Vector3::InvRotate(pt, frustum.m_Rotation);
3045  pt = F128::SetFloatToLane<3>(pt, 1.f);
3046 
3047  f128 dot1 = Vector4::Dot4(pt, frustum.m_NearPlane, frustum.m_FarPlane, frustum.m_TopPlane,
3048  frustum.m_BottomPlane);
3049  f128 dot2 = Vector4::Dot2(pt, frustum.m_LeftPlane, frustum.m_RightPlane);
3050  dot2 = F128::Swizzle<0, 1, 0, 1>(dot2);
3051  dot1 = F128::Max(dot1, dot2);
3052  f128 outside = F128::CmpGt(dot1, dot);
3053  return F128::IsAllMaskFalse(outside);
3054 }
3055 
3056 NLIB_B Containment::FrustumAxisAlignedBox(const Frustum& frustum,
3057  const AxisAlignedBox& aabb) NLIB_NOEXCEPT {
3058  SimdVector pt_min = aabb.point_min;
3059  SimdVector pt_max = aabb.point_max;
3060  SimdVector center = F128::Mult(0.5f, F128::Add(pt_min, pt_max));
3061  center = F128::SetFloatToLane<3>(center, 1.f);
3062  SimdVector extent_neg = F128::Sub(center, pt_max);
3063  extent_neg = F128::SetZeroToLane<3>(extent_neg);
3064 
3065  // transform planes
3066  SimdQuaternion rot = frustum.m_Rotation;
3067  SimdVector frustum_center = frustum.m_Center;
3068  SimdVector N;
3069  f128 D;
3070  SimdPlane tmp;
3071 
3072  // construct the 6 planes of the frustum in the world space
3073  tmp = frustum.m_NearPlane;
3074  N = Vector3::Rotate(tmp, rot);
3075  D = F128::Sub(tmp, Vector3::Dot(N, frustum_center));
3076  SimdPlane near_plane = F128::Permute<0, 1, 2, 7>(N, D);
3077 
3078  tmp = frustum.m_FarPlane;
3079  N = Vector3::Rotate(tmp, rot);
3080  D = F128::Sub(tmp, Vector3::Dot(N, frustum_center));
3081  SimdPlane far_plane = F128::Permute<0, 1, 2, 7>(N, D);
3082 
3083  tmp = frustum.m_RightPlane;
3084  N = Vector3::Rotate(tmp, rot);
3085  D = F128::Sub(tmp, Vector3::Dot(N, frustum_center));
3086  SimdPlane right_plane = F128::Permute<0, 1, 2, 7>(N, D);
3087 
3088  tmp = frustum.m_LeftPlane;
3089  N = Vector3::Rotate(tmp, rot);
3090  D = F128::Sub(tmp, Vector3::Dot(N, frustum_center));
3091  SimdPlane left_plane = F128::Permute<0, 1, 2, 7>(N, D);
3092 
3093  tmp = frustum.m_TopPlane;
3094  N = Vector3::Rotate(tmp, rot);
3095  D = F128::Sub(tmp, Vector3::Dot(N, frustum_center));
3096  SimdPlane top_plane = F128::Permute<0, 1, 2, 7>(N, D);
3097 
3098  tmp = frustum.m_BottomPlane;
3099  N = Vector3::Rotate(tmp, rot);
3100  D = F128::Sub(tmp, Vector3::Dot(N, frustum_center));
3101  SimdPlane bottom_plane = F128::Permute<0, 1, 2, 7>(N, D);
3102 
3103  f128 dist;
3104  f128 radius_neg;
3105  f128 all_points_inside;
3106 
3107  // compute the distances from AABB's center to the planes
3108  dist = Vector4::Dot4(center, near_plane, far_plane, right_plane, left_plane);
3109  // dot4(((center, 1) + (extent0-7, 0)), Plane) <= 0
3110  radius_neg = Vector4::Dot4(extent_neg, F128::Abs(near_plane), F128::Abs(far_plane),
3111  F128::Abs(right_plane), F128::Abs(left_plane));
3112  // inside if dist + radius <= 0
3113  all_points_inside = F128::CmpLe(dist, radius_neg);
3114 
3115  dist = Vector4::Dot2(center, top_plane, bottom_plane);
3116  radius_neg = Vector4::Dot2(extent_neg, F128::Abs(top_plane), F128::Abs(bottom_plane));
3117  all_points_inside =
3118  F128::And(all_points_inside, F128::Swizzle<0, 1, 0, 1>(F128::CmpLe(dist, radius_neg)));
3119 
3120  return F128::IsAllMaskTrue(all_points_inside);
3121 }
3122 
3123 NLIB_B Containment::FrustumOrientedBox(const Frustum& frustum,
3124  const OrientedBox& box) NLIB_NOEXCEPT {
3125  Frustum frstm;
3126  // move the center of the box to the origin
3127  frustum.Transform(&frstm, 1.f, F128::LoadA16(F128::v0001_), F128::Negate(box.center));
3128  // make the obb to the aabb
3129  frstm.Transform(&frstm, 1.f, Quaternion::Conjugate(box.rotation), F128::SetZero());
3130 
3131  AxisAlignedBox aabb;
3132  aabb.point_max = box.extent;
3133  aabb.point_min = F128::Negate(box.extent);
3134  return Containment::FrustumAxisAlignedBox(frstm, aabb);
3135 }
3136 
3137 NLIB_B Containment::FrustumFrustum(const Frustum& frustum, const Frustum& contained) NLIB_NOEXCEPT {
3138  Float3 corners[8];
3139  contained.GetCorners(&corners[0]);
3140  for (size_t i = 0; i < 8; ++i) {
3141  SimdVector pt = Vector3::LoadFloat3(&corners[i]);
3142  if (!Containment::FrustumPoint(frustum, pt)) return false;
3143  }
3144  return true;
3145 }
3146 
3147 #ifdef _MSC_VER
3148 # pragma endregion Containment function implementation
3149 #endif
3150 
3151 #undef NLIB_B
3152 #undef NLIB_M
3153 
3154 #endif // NLIB_DOXYGEN
3155 
3156 } // namespace simd
3157 NLIB_NAMESPACE_END
3158 
3159 #endif // INCLUDE_NN_NLIB_SIMD_SIMDGEOMETRY_H_
SimdQuaternion rotation
OBBの方向(orientation)を表すクォータニオンです。
Definition: SimdGeometry.h:91
#define NLIB_NOEXCEPT
環境に合わせてnoexcept 又は同等の定義がされます。
Definition: Platform.h:2151
SimdVector point_min
AABBの最小座標です。レーン3は無視されます。
Definition: SimdGeometry.h:75
クォータニオンが定義されています。
4x4行列が定義されています。
視錐台を表すクラスです。
Definition: SimdGeometry.h:105
Frustum() noexcept
デフォルトコンストラクタです。データメンバの初期設定は行いません。
Definition: SimdGeometry.h:107
f128arg SimdVectorArg
f128argがtypedefされています。
Definition: SimdFloat.h:3927
128bitの単精度浮動小数点数用SIMDレジスタを2つ持つ型です。
Definition: SimdFloat.h:31
#define NLIB_VIS_HIDDEN
関数やクラス等のシンボルをライブラリの外部に公開しません。
Definition: Platform_unix.h:50
SimdVector extent
OBBのxyz各座標の大きさです。各要素は全て正の値である必要があります。3次元ベクトルです。 ...
Definition: SimdGeometry.h:90
AxisAlignedBox() noexcept
デフォルトコンストラクタです。データメンバの初期設定は行いません。
Definition: SimdGeometry.h:63
f128arg SimdSphereArg
f128argがtypedefされています。
Definition: SimdFloat.h:3933
OBB(有向境界ボックス)を表すクラスです。中心座標(center)とxyz軸方向の大きさ(extent)及び回転クォータニ...
Definition: SimdGeometry.h:80
SimdVector point_max
AABBの最大座標です。レーン3は無視されます。
Definition: SimdGeometry.h:76
包含関係の判定を行う関数をまとめたクラスです。
Definition: SimdGeometry.h:268
3次元空間上の球を扱う静的メンバ関数が集められたクラスです。このクラスはインスタンス化できません。 ...
Definition: SimdGeometry.h:40
constexpr const each_float_tag each_float
each_float_tag型の定数オブジェクトで、単精度浮動小数点数を示すためのタグです。
Definition: SimdFloat.h:51
f128arg SimdQuaternionArg
f128argがtypedefされています。
Definition: SimdFloat.h:3929
3次元空間上の平面を扱う関数が集められたクラスです。
Definition: SimdGeometry.h:21
f128arg SimdPlaneArg
f128argがtypedefされています。
Definition: SimdFloat.h:3931
距離(の2乗)の計算を行う関数をまとめたクラスです。
Definition: SimdGeometry.h:134
4x4行列を保持する構造体です。
Definition: SimdFloat.h:3938
PlaneResult
平面との交差判定の戻り値の型です。平面の法線方向と同じ側を平面の表とします。
Definition: SimdGeometry.h:181
nlib_f128x2_t f128x2
nlib_f128x2_tがtypedefされています。
Definition: SimdFloat.h:56
f128 SimdSphere
f128がtypedefされています。球を扱う場合に利用されます。
Definition: SimdFloat.h:3932
SimdVector center
OBBの中心座標です。3次元ベクトルです。
Definition: SimdGeometry.h:89
OrientedBox() noexcept
デフォルトコンストラクタです。データメンバの初期設定は行いません。
Definition: SimdGeometry.h:82
単精度浮動小数点数のSIMD演算を行うためのクラスや関数が定義されています。
3次元ベクトルをメモリから読み出したりメモリに書き出したりするための型です。float型のx, y, zをデータメンバとして保持します。
Definition: SimdFloat.h:4047
constexpr const each_select32_tag each_select32
each_select32_tag型の定数オブジェクトで、32bitのレーンを選択することを示すためのタグです。 ...
Definition: SimdInt.h:56
#define NLIB_F128_TRANSPOSE(row0, row1, row2, row3)
インプレイスで行列を転置するためのマクロです。
Definition: SimdFloat.h:4001
3次元ベクトルが定義されています。
nlib_f128_t f128
nlib_f128_tがtypedefされています。
Definition: SimdFloat.h:54
3次元空間におけるAABB(軸並行境界ボックス)を表すクラスです。最小座標(point_min)と最大座標(point_max)を...
Definition: SimdGeometry.h:61
4次元ベクトルが定義されています。
交差の判定を行う関数をまとめたクラスです。
Definition: SimdGeometry.h:178
f128 SimdQuaternion
f128がtypedefされています。クォータニオンを扱う場合に利用されます。
Definition: SimdFloat.h:3928
f128 SimdPlane
f128がtypedefされています。平面を扱う場合に利用されます。
Definition: SimdFloat.h:3930
f128 SimdVector
f128がtypedefされています。3次元ベクトル又は4次元ベクトルを扱う場合に利用されます。 ...
Definition: SimdFloat.h:3926