nlib
SimdMatrix.h
[詳解]
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDMATRIX_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDMATRIX_H_
5 
10 // #include "nn/nlib/simd/SimdGeometry.h"
11 
12 NLIB_NAMESPACE_BEGIN
13 namespace simd {
14 
16  public:
17  static SimdMatrix __vectorcall LoadFloat4x4(const Float4x4* p) NLIB_NOEXCEPT;
18  static SimdMatrix __vectorcall LoadFloat3x4(const Float3x4* p) NLIB_NOEXCEPT;
19  static SimdMatrix __vectorcall LoadFloat4x3(const Float4x3* p) NLIB_NOEXCEPT;
20  static SimdMatrix __vectorcall LoadFloat3x3(const Float3x3* p) NLIB_NOEXCEPT;
21  static void __vectorcall StoreFloat4x4(Float4x4* p, SimdMatrixArg m) NLIB_NOEXCEPT;
22  static void __vectorcall StoreFloat3x4(Float3x4* p, SimdMatrixArg m) NLIB_NOEXCEPT;
23  static void __vectorcall StoreFloat4x3(Float4x3* p, SimdMatrixArg m) NLIB_NOEXCEPT;
24  static void __vectorcall StoreFloat3x3(Float3x3* p, SimdMatrixArg m) NLIB_NOEXCEPT;
25 
26  static SimdVector __vectorcall Determinant(SimdMatrixArg m) NLIB_NOEXCEPT;
27  static SimdMatrix __vectorcall Identity() NLIB_NOEXCEPT;
28  static SimdMatrix __vectorcall Inverse(SimdVector* det, SimdMatrixArg m) NLIB_NOEXCEPT;
29 
30  static bool __vectorcall IsIdentity(SimdMatrixArg m) NLIB_NOEXCEPT;
31  static bool __vectorcall IsInfinite(SimdMatrixArg m) NLIB_NOEXCEPT;
32  static bool __vectorcall IsNaN(SimdMatrixArg m) NLIB_NOEXCEPT;
33  static SimdMatrix __vectorcall Mult(SimdMatrixArg a, SimdMatrixArg b) NLIB_NOEXCEPT;
34  static SimdMatrix __vectorcall Transpose(SimdMatrixArg m) NLIB_NOEXCEPT;
35  static SimdMatrix __vectorcall MultTranspose(SimdMatrixArg a, SimdMatrixArg b) NLIB_NOEXCEPT;
36 
37  static SimdMatrix __vectorcall FromScaling(float scale_x, float scale_y,
38  float scale_z) NLIB_NOEXCEPT;
39  static SimdMatrix __vectorcall FromScaling(SimdVectorArg scale) NLIB_NOEXCEPT;
40  static SimdMatrix __vectorcall FromTranslation(float ofs_x, float ofs_y,
41  float ofs_z) NLIB_NOEXCEPT;
42  static SimdMatrix __vectorcall FromTranslation(SimdVectorArg ofs) NLIB_NOEXCEPT;
43 
44  static SimdMatrix __vectorcall FromRotationX(float sin_value, float cos_value) NLIB_NOEXCEPT;
45  static SimdMatrix __vectorcall FromRotationY(float sin_value, float cos_value) NLIB_NOEXCEPT;
46  static SimdMatrix __vectorcall FromRotationZ(float sin_value, float cos_value) NLIB_NOEXCEPT;
47  static SimdMatrix __vectorcall FromRotationAxisAndSinCos(SimdVectorArg axis_normalized,
48  float sin_value,
49  float cos_value) NLIB_NOEXCEPT;
50  static SimdMatrix __vectorcall FromRotationQuaternion(SimdQuaternionArg quat) NLIB_NOEXCEPT;
51  static SimdMatrix __vectorcall
52  FromRotationZXY(SimdVectorArg sin_xyz, SimdVectorArg cos_xyz) NLIB_NOEXCEPT;
53 
54  static SimdMatrix __vectorcall LookToLh(SimdVectorArg eye_pos, SimdVectorArg eye_dir_normalized,
55  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT;
56  static SimdMatrix __vectorcall LookAtLh(SimdVectorArg eye_pos, SimdVectorArg at_pos,
57  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT;
58  static SimdMatrix __vectorcall LookToRh(SimdVectorArg eye_pos, SimdVectorArg eye_dir_normalized,
59  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT;
60  static SimdMatrix __vectorcall LookAtRh(SimdVectorArg eye_pos, SimdVectorArg at_pos,
61  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT;
62 
63  static SimdMatrix __vectorcall
64  PerspectiveLh(float width, float height, float near_z, float far_z) NLIB_NOEXCEPT;
65  static SimdMatrix __vectorcall
66  PerspectiveRh(float width, float height, float near_z, float far_z) NLIB_NOEXCEPT;
67  static SimdMatrix __vectorcall PerspectiveFovLh(float half_fovy_sin, float half_fovy_cos,
68  float aspect, float near_z,
69  float far_z) NLIB_NOEXCEPT;
70  static SimdMatrix __vectorcall PerspectiveFovRh(float half_fovy_sin, float half_fovy_cos,
71  float aspect, float near_z,
72  float far_z) NLIB_NOEXCEPT;
73  static SimdMatrix __vectorcall PerspectiveOffCenterLh(float left, float right, float bottom,
74  float top, float near_z,
75  float far_z) NLIB_NOEXCEPT;
76  static SimdMatrix __vectorcall PerspectiveOffCenterRh(float left, float right, float bottom,
77  float top, float near_z,
78  float far_z) NLIB_NOEXCEPT;
79 
80  static SimdMatrix __vectorcall
81  OrthographicLh(float width, float height, float near_z, float far_z) NLIB_NOEXCEPT;
82  static SimdMatrix __vectorcall
83  OrthographicRh(float width, float height, float near_z, float far_z) NLIB_NOEXCEPT;
84  static SimdMatrix __vectorcall OrthographicOffCenterLh(float left, float right, float bottom,
85  float top, float near_z,
86  float far_z) NLIB_NOEXCEPT;
87  static SimdMatrix __vectorcall OrthographicOffCenterRh(float left, float right, float bottom,
88  float top, float near_z,
89  float far_z) NLIB_NOEXCEPT;
90 
91  static SimdMatrix __vectorcall
92  Shadow(SimdPlaneArg shadow_plane, SimdVector light_pos) NLIB_NOEXCEPT;
93  static SimdMatrix __vectorcall Reflect(SimdPlaneArg reflection_plane) NLIB_NOEXCEPT;
94  static void __vectorcall Decompose(SimdVector* scale, SimdMatrix* rot, SimdVector* trans,
96  // AffineTransform
97 
98  private:
99  Matrix(); // forbidden
100 };
101 
102 #ifndef NLIB_DOXYGEN
103 
104 #define NLIB_M(tp) inline tp __vectorcall
105 
106 NLIB_M(SimdMatrix) Matrix::LoadFloat4x4(const Float4x4* p) NLIB_NOEXCEPT {
107  SimdMatrix m;
108  m.r[0] = F128::LoadA16(&p->m[0][0]);
109  m.r[1] = F128::LoadA16(&p->m[1][0]);
110  m.r[2] = F128::LoadA16(&p->m[2][0]);
111  m.r[3] = F128::LoadA16(&p->m[3][0]);
112  return m;
113 }
114 
115 // Load and convert to column major order matrix
116 NLIB_M(SimdMatrix) Matrix::LoadFloat3x4(const Float3x4* p) NLIB_NOEXCEPT {
117  SimdMatrix m;
118  m.r[0] = F128::LoadA16(&p->m[0][0]);
119  m.r[1] = F128::LoadA16(&p->m[1][0]);
120  m.r[2] = F128::LoadA16(&p->m[2][0]);
121  m.r[3] = F128::LoadA16(F128::v0001_);
122  NLIB_F128_TRANSPOSE(m.r[0], m.r[1], m.r[2], m.r[3]);
123  return m;
124 }
125 
126 NLIB_M(SimdMatrix) Matrix::LoadFloat4x3(const Float4x3* p) NLIB_NOEXCEPT {
127  f128 t0 = F128::LoadA16(&p->m[0][0]);
128  f128 t1 = F128::LoadA16(&p->m[1][1]);
129  f128 t2 = F128::LoadA16(&p->m[2][2]);
130  SimdMatrix m;
131  m.r[0] = F128::SetZeroToLane<3>(t0);
132  f128 tmp1 = F128::Permute<3, 4, 5, 8>(t0, t1);
133  m.r[1] = F128::SetZeroToLane<3>(tmp1);
134  f128 tmp2 = F128::Permute<2, 3, 4, 8>(t1, t2);
135  m.r[2] = F128::SetZeroToLane<3>(tmp2);
136  m.r[3] = F128::Permute<1, 2, 3, 7>(t2, F128::SetOne());
137  return m;
138 }
139 
140 NLIB_M(SimdMatrix) Matrix::LoadFloat3x3(const Float3x3* p) NLIB_NOEXCEPT {
141  f128 t0 = F128::LoadA4(&p->m[0][0]);
142  f128 t1 = F128::LoadA4(&p->m[1][0]);
143  f128 t2 = F128::LoadA4(&p->m[1][2]);
144  f128 zero = F128::SetZero();
145  SimdMatrix m;
146  m.r[0] = F128::SetZeroToLane<3>(t0);
147  m.r[1] = F128::SetZeroToLane<3>(t1);
148  m.r[2] = F128::Permute<1, 2, 3, 7>(t2, zero);
149  m.r[3] = F128::LoadA16(F128::v0001_);
150  return m;
151 }
152 
153 inline void __vectorcall Matrix::StoreFloat4x4(Float4x4* p, SimdMatrixArg m) NLIB_NOEXCEPT {
154  F128::StoreA16(&p->m[0][0], m.r[0]);
155  F128::StoreA16(&p->m[1][0], m.r[1]);
156  F128::StoreA16(&p->m[2][0], m.r[2]);
157  F128::StoreA16(&p->m[3][0], m.r[3]);
158 }
159 
160 inline void __vectorcall Matrix::StoreFloat4x3(Float4x3* p, SimdMatrixArg m) NLIB_NOEXCEPT {
161  f128 t0 = F128::Permute<0, 1, 2, 4>(m.r[0], m.r[1]);
162  f128 t1 = F128::Permute<1, 2, 4, 5>(m.r[1], m.r[2]);
163  f128 t2 = F128::Permute<2, 4, 5, 6>(m.r[2], m.r[3]);
164  F128::StoreA16(&p->m[0][0], t0);
165  F128::StoreA16(&p->m[1][1], t1);
166  F128::StoreA16(&p->m[2][2], t2);
167 }
168 
169 inline void __vectorcall Matrix::StoreFloat3x3(Float3x3* p, SimdMatrixArg m) NLIB_NOEXCEPT {
170  f128 t0 = F128::Permute<0, 1, 2, 4>(m.r[0], m.r[1]);
171  f128 t1 = F128::Permute<1, 2, 4, 5>(m.r[1], m.r[2]);
172  F128::StoreA4(&p->m[0][0], t0);
173  F128::StoreA4(&p->m[1][1], t1);
174  p->m[2][2] = F128::GetFloatFromLane<2>(m.r[2]);
175 }
176 
177 // XMMatrixDeterminant
178 // r = determinant(m)
179 NLIB_M(f128) Matrix::Determinant(SimdMatrixArg m) NLIB_NOEXCEPT {
180  // a0 b0 c0 d0
181  // a1 b1 c1 d1
182  // a2 b2 c2 d2
183  // a3 b3 c3 d3
184  //
185  // a0|bcd123| - b0|acd123| + c0|abd123| + d0|abc123| -> SubAdd & Dot
186 
187  // |bcd123| = b1|cd23| - c1|bd23| + d1|bc23| -> lane0
188  // |acd123| = a1|cd23| - c1|ad23| + d1|ac23| -> lane1
189  // |abd123| = a1|bd23| - b1|ad23| + d1|ab23| -> lane2
190  // |abc123| = a1|bc23| - b1|ac23| + c1|ab23| -> lane3
191 
192  f128 c0det, c1det, c2det;
193  {
194  f128 ccbb_2 = F128::Swizzle<2, 2, 1, 1>(m.r[2]);
195  f128 ccbb_3 = F128::Swizzle<2, 2, 1, 1>(m.r[3]);
196 
197  f128 dddc_2 = F128::Swizzle<3, 3, 3, 2>(m.r[2]);
198  f128 dddc_3 = F128::Swizzle<3, 3, 3, 2>(m.r[3]);
199 
200  f128 baaa_2 = F128::Swizzle<1, 0, 0, 0>(m.r[2]);
201  f128 baaa_3 = F128::Swizzle<1, 0, 0, 0>(m.r[3]);
202 
203  f128 tmp0 = F128::Mult(ccbb_2, dddc_3);
204  f128 tmp1 = F128::Mult(baaa_2, dddc_3);
205  f128 tmp2 = F128::Mult(baaa_2, ccbb_3);
206 
207  c0det = F128::MultSub(dddc_2, ccbb_3, tmp0);
208  c1det = F128::MultSub(dddc_2, baaa_3, tmp1);
209  c2det = F128::MultSub(ccbb_2, baaa_3, tmp2);
210  }
211 
212  f128 ccbb_1 = F128::Swizzle<2, 2, 1, 1>(m.r[1]);
213  f128 baaa_1 = F128::Swizzle<1, 0, 0, 0>(m.r[1]);
214  f128 dddc_1 = F128::Swizzle<3, 3, 3, 2>(m.r[1]);
215  f128 r0x = F128::NegateEx<true, false, true, false>(m.r[0]);
216 
217  f128 det3_neg = F128::Mult(c1det, ccbb_1);
218  det3_neg = F128::MultSub(c0det, baaa_1, det3_neg);
219  det3_neg = F128::MultSub(c2det, dddc_1, det3_neg);
220  return Vector4::Dot(r0x, det3_neg);
221 }
222 
223 // XMMatrixIdentity
224 // r[0] = { 1, 0, 0, 0 }
225 // r[1] = { 0, 1, 0, 0 }
226 // r[2] = { 0, 0, 1, 0 }
227 // r[3] = { 0, 0, 0, 1 }
228 NLIB_M(SimdMatrix) Matrix::Identity() NLIB_NOEXCEPT {
229  SimdMatrix m;
230  m.r[0] = F128::LoadA16(F128::v1000_);
231  m.r[1] = F128::LoadA16(F128::v0100_);
232  m.r[2] = F128::LoadA16(F128::v0010_);
233  m.r[3] = F128::LoadA16(F128::v0001_);
234  return m;
235 }
236 
237 // XMMatrixTranspose
238 // use NLIB_F128_TRANSPOSE(m.r[0], m.r[1], m.r[2], m.r[3]) if you can change 'm' itself
239 NLIB_M(SimdMatrix) Matrix::Transpose(SimdMatrixArg m) NLIB_NOEXCEPT {
240  SimdMatrix ret;
241 #if !defined(NLIB_F128_SIMD_NOUSE) && defined(NLIB_NEON)
242  float32x4x2_t trn_f0 = vtrnq_f32(m.r[0], m.r[1]);
243  float32x4x2_t trn_f1 = vtrnq_f32(m.r[2], m.r[3]);
244  ret.r[0] = vcombine_f32(vget_low_f32(trn_f0.val[0]), vget_low_f32(trn_f1.val[0]));
245  ret.r[1] = vcombine_f32(vget_low_f32(trn_f0.val[1]), vget_low_f32(trn_f1.val[1]));
246  ret.r[2] = vcombine_f32(vget_high_f32(trn_f0.val[0]), vget_high_f32(trn_f1.val[0]));
247  ret.r[3] = vcombine_f32(vget_high_f32(trn_f0.val[1]), vget_high_f32(trn_f1.val[1]));
248  return ret;
249 #elif !defined(NLIB_F128_SIMD_NOUSE) && defined(NLIB_CAFE_PPC)
250  f32x2 tmp0, tmp1;
251 
252  tmp0 = __PS_MERGE00(m.r[0].vec.ps[0], m.r[1].vec.ps[0]);
253  tmp1 = __PS_MERGE11(m.r[0].vec.ps[0], m.r[1].vec.ps[0]);
254  ret.r[0].vec.ps[0] = tmp0;
255  ret.r[1].vec.ps[0] = tmp1;
256 
257  tmp0 = __PS_MERGE00(m.r[2].vec.ps[1], m.r[3].vec.ps[1]);
258  tmp1 = __PS_MERGE11(m.r[2].vec.ps[1], m.r[3].vec.ps[1]);
259  ret.r[2].vec.ps[1] = tmp0;
260  ret.r[3].vec.ps[1] = tmp1;
261 
262  tmp0 = __PS_MERGE00(m.r[0].vec.ps[1], m.r[1].vec.ps[1]);
263  tmp1 = __PS_MERGE11(m.r[0].vec.ps[1], m.r[1].vec.ps[1]);
264  ret.r[2].vec.ps[0] = tmp0;
265  ret.r[3].vec.ps[0] = tmp1;
266 
267  tmp0 = __PS_MERGE00(m.r[2].vec.ps[0], m.r[3].vec.ps[0]);
268  tmp1 = __PS_MERGE11(m.r[2].vec.ps[0], m.r[3].vec.ps[0]);
269  ret.r[0].vec.ps[1] = tmp0;
270  ret.r[1].vec.ps[1] = tmp1;
271  return ret;
272 #else
273  f128 tmp0 = F128::Permute<0, 1, 4, 5>(m.r[0], m.r[1]);
274  f128 tmp2 = F128::Permute<2, 3, 6, 7>(m.r[0], m.r[1]);
275  f128 tmp1 = F128::Permute<0, 1, 4, 5>(m.r[2], m.r[3]);
276  f128 tmp3 = F128::Permute<2, 3, 6, 7>(m.r[2], m.r[3]);
277  ret.r[0] = F128::Permute<0, 2, 4, 6>(tmp0, tmp1);
278  ret.r[1] = F128::Permute<1, 3, 5, 7>(tmp0, tmp1);
279  ret.r[2] = F128::Permute<0, 2, 4, 6>(tmp2, tmp3);
280  ret.r[3] = F128::Permute<1, 3, 5, 7>(tmp2, tmp3);
281  return ret;
282 #endif
283 }
284 
285 inline void __vectorcall Matrix::StoreFloat3x4(Float3x4* p, SimdMatrixArg m) NLIB_NOEXCEPT {
286  SimdMatrix M = Matrix::Transpose(m);
287  F128::StoreA16(&p->m[0][0], M.r[0]);
288  F128::StoreA16(&p->m[1][0], M.r[1]);
289  F128::StoreA16(&p->m[2][0], M.r[2]);
290 }
291 
292 // XMMatrixInverse
293 NLIB_M(SimdMatrix) Matrix::Inverse(SimdVector* det, SimdMatrixArg m) NLIB_NOEXCEPT {
294  SimdMatrix M = Transpose(m);
295  // M:
296  // a0 b0 c0 d0
297  // a1 b1 c1 d1
298  // a2 b2 c2 d2
299  // a3 b3 c3 d3
300 
301  // Inv:
302  // ( |bcd123|, -|acd123|, |abd123|, -|abc123|) / detm
303  // (-|bcd023|, |acd023|, -|abd023|, |abc023}) / detm
304  // ( |bcd012|, -|acd012|, |abd012|, -|abc012|) / detm
305  // (-|bcd013|, |acd013|, -|abd013|, |abc013|) / detm
306 
307  // row0:
308  // |bcd123| = b1|cd23| - c1|bd23| + d1|bc23| -> lane0
309  // |acd123| = a1|cd23| - c1|ad23| + d1|ac23| -> lane1
310  // |abd123| = a1|bd23| - b1|ad23| + d1|ab23| -> lane2
311  // |abc123| = a1|bc23| - b1|ac23| + c1|ab23| -> lane3
312 
313  // row1:
314  // |bcd023| = b0|cd23| - c0|bd23| + d0|bc23| -> lane0
315  // |acd023| = a0|cd23| - c0|ad23| + d0|ac23| -> lane1
316  // |abd023| = a0|bd23| - b0|ad23| + d0|ab23| -> lane2
317  // |abc023| = a0|bc23| - b0|ac23| + c0|ab23| -> lane3
318  f128 detValueReciprocal;
319  SimdMatrix ret;
320  {
321  f128 c0det, c1det, c2det;
322  {
323  f128 ccbb_2 = F128::Swizzle<2, 2, 1, 1>(M.r[2]);
324  f128 ccbb_3 = F128::Swizzle<2, 2, 1, 1>(M.r[3]);
325 
326  f128 dddc_2 = F128::Swizzle<3, 3, 3, 2>(M.r[2]);
327  f128 dddc_3 = F128::Swizzle<3, 3, 3, 2>(M.r[3]);
328 
329  f128 baaa_2 = F128::Swizzle<1, 0, 0, 0>(M.r[2]);
330  f128 baaa_3 = F128::Swizzle<1, 0, 0, 0>(M.r[3]);
331 
332  f128 tmp0 = F128::Mult(ccbb_2, dddc_3);
333  f128 tmp1 = F128::Mult(baaa_2, dddc_3);
334  f128 tmp2 = F128::Mult(baaa_2, ccbb_3);
335 
336  c0det = F128::MultSub(dddc_2, ccbb_3, tmp0);
337  c1det = F128::MultSub(dddc_2, baaa_3, tmp1);
338  c2det = F128::MultSub(ccbb_2, baaa_3, tmp2);
339  }
340  {
341  f128 baaa_1 = F128::Swizzle<1, 0, 0, 0>(M.r[1]);
342  f128 ccbb_1 = F128::Swizzle<2, 2, 1, 1>(M.r[1]);
343  f128 dddc_1 = F128::Swizzle<3, 3, 3, 2>(M.r[1]);
344  f128 r0x = F128::NegateEx<true, false, true, false>(M.r[0]);
345 
346  f128 det3 = F128::Mult(c1det, ccbb_1);
347  det3 = F128::MultSub(c0det, baaa_1, det3);
348  det3 = F128::MultSub(c2det, dddc_1, det3);
349 
350  detValueReciprocal = Vector4::Dot(r0x, det3);
351  if (det) {
352  *det = detValueReciprocal;
353  }
354 
355  det3 = F128::NegateEx<true, false, true, false>(det3);
356  detValueReciprocal = F128::Recp(detValueReciprocal);
357 
358  ret.r[0] = F128::Mult(detValueReciprocal, det3);
359  }
360  {
361  f128 baaa_0 = F128::Swizzle<1, 0, 0, 0>(M.r[0]);
362  f128 ccbb_0 = F128::Swizzle<2, 2, 1, 1>(M.r[0]);
363  f128 dddc_0 = F128::Swizzle<3, 3, 3, 2>(M.r[0]);
364 
365  f128 det3 = F128::Mult(c0det, baaa_0);
366  det3 = F128::MultAdd(c2det, dddc_0, det3);
367  det3 = F128::MultSub(c1det, ccbb_0, det3);
368  det3 = F128::NegateEx<true, false, true, false>(det3);
369  ret.r[1] = F128::Mult(detValueReciprocal, det3);
370  }
371  }
372 
373  // row3:
374  // |bcd012| = b2|cd01| - c2|bd01| + d2|bc01| -> lane0
375  // |acd012| = a2|cd01| - c2|ad01| + d2|ac01| -> lane1
376  // |abd012| = a2|bd01| - b2|ad01| + d2|ab01| -> lane2
377  // |abc012| = a2|bc01| - b2|ac01| + c2|ab01| -> lane3
378 
379  // row2:
380  // |bcd013| = b3|cd01| - c3|bd01| + d3|bc01| -> lane0
381  // |acd013| = a3|cd01| - c3|ad01| + d3|ac01| -> lane1
382  // |abd013| = a3|bd01| - b3|ad01| + d3|ab01| -> lane2
383  // |abc013| = a3|bc01| - b3|ac01| + c3|ab01| -> lane3
384  {
385  f128 c0det, c1det, c2det;
386  {
387  f128 ccbb_0 = F128::Swizzle<2, 2, 1, 1>(M.r[0]);
388  f128 ccbb_1 = F128::Swizzle<2, 2, 1, 1>(M.r[1]);
389 
390  f128 dddc_0 = F128::Swizzle<3, 3, 3, 2>(M.r[0]);
391  f128 dddc_1 = F128::Swizzle<3, 3, 3, 2>(M.r[1]);
392 
393  f128 baaa_0 = F128::Swizzle<1, 0, 0, 0>(M.r[0]);
394  f128 baaa_1 = F128::Swizzle<1, 0, 0, 0>(M.r[1]);
395 
396  f128 tmp0 = F128::Mult(ccbb_0, dddc_1);
397  f128 tmp1 = F128::Mult(baaa_0, dddc_1);
398  f128 tmp2 = F128::Mult(baaa_0, ccbb_1);
399 
400  c0det = F128::MultSub(dddc_0, ccbb_1, tmp0);
401  c1det = F128::MultSub(dddc_0, baaa_1, tmp1);
402  c2det = F128::MultSub(ccbb_0, baaa_1, tmp2);
403  }
404  {
405  f128 baaa_3 = F128::Swizzle<1, 0, 0, 0>(M.r[3]);
406  f128 ccbb_3 = F128::Swizzle<2, 2, 1, 1>(M.r[3]);
407  f128 dddc_3 = F128::Swizzle<3, 3, 3, 2>(M.r[3]);
408 
409  f128 det3 = F128::Mult(c1det, ccbb_3);
410  det3 = F128::MultSub(c0det, baaa_3, det3);
411  det3 = F128::MultSub(c2det, dddc_3, det3);
412  det3 = F128::NegateEx<true, false, true, false>(det3);
413 
414  ret.r[2] = F128::Mult(detValueReciprocal, det3);
415  }
416  {
417  f128 baaa_2 = F128::Swizzle<1, 0, 0, 0>(M.r[2]);
418  f128 ccbb_2 = F128::Swizzle<2, 2, 1, 1>(M.r[2]);
419  f128 dddc_2 = F128::Swizzle<3, 3, 3, 2>(M.r[2]);
420 
421  f128 det3 = F128::Mult(c0det, baaa_2);
422  det3 = F128::MultAdd(c2det, dddc_2, det3);
423  det3 = F128::MultSub(c1det, ccbb_2, det3);
424  det3 = F128::NegateEx<true, false, true, false>(det3);
425 
426  ret.r[3] = F128::Mult(detValueReciprocal, det3);
427  }
428  }
429  return ret;
430 }
431 
432 // XMMatrixIsIdentity
433 // true if m is an indentity matrix
434 NLIB_M(bool) Matrix::IsIdentity(SimdMatrixArg m) NLIB_NOEXCEPT { // NOLINT
435  f128 cmp0 = F128::CmpEq(m.r[0], F128::LoadA16(F128::v1000_));
436  f128 cmp1 = F128::CmpEq(m.r[1], F128::LoadA16(F128::v0100_));
437  f128 cmp2 = F128::CmpEq(m.r[2], F128::LoadA16(F128::v0010_));
438  f128 cmp3 = F128::CmpEq(m.r[3], F128::LoadA16(F128::v0001_));
439  cmp0 = F128::And(cmp0, cmp1);
440  cmp2 = F128::And(cmp2, cmp3);
441  cmp0 = F128::And(cmp0, cmp2);
442  return F128::IsAllMaskTrue(cmp0);
443 }
444 
445 // XMMatrixIsInfinite
446 // true if there is (i, j) which satisfies isinf(m[i][j])
447 NLIB_M(bool) Matrix::IsInfinite(SimdMatrixArg m) NLIB_NOEXCEPT { // NOLINT
448 #ifdef NLIB_F128_SIMD_NOUSE
449  f128 cmp0 = F128::IsInfinite(m.r[0]);
450  f128 cmp1 = F128::IsInfinite(m.r[1]);
451  f128 cmp2 = F128::IsInfinite(m.r[2]);
452  f128 cmp3 = F128::IsInfinite(m.r[3]);
453  cmp0 = F128::Or(cmp0, cmp1);
454  cmp2 = F128::Or(cmp2, cmp3);
455  cmp0 = F128::Or(cmp0, cmp2);
456  return !F128::IsAllMaskFalse(cmp0);
457 #else
458  f128 inf_value = F128::SetInfinity();
459  f128 cmp0 = F128::CmpEq(inf_value, F128::Abs(m.r[0]));
460  f128 cmp1 = F128::CmpEq(inf_value, F128::Abs(m.r[1]));
461  f128 cmp2 = F128::CmpEq(inf_value, F128::Abs(m.r[2]));
462  f128 cmp3 = F128::CmpEq(inf_value, F128::Abs(m.r[3]));
463  cmp0 = F128::Or(cmp0, cmp1);
464  cmp2 = F128::Or(cmp2, cmp3);
465  cmp0 = F128::Or(cmp0, cmp2);
466  return !F128::IsAllMaskFalse(cmp0);
467 #endif
468 }
469 
470 // XMMatrixIsNaN
471 // true if there is (i, j) which satisfies isnan(m[i][j])
472 NLIB_M(bool) Matrix::IsNaN(SimdMatrixArg m) NLIB_NOEXCEPT { // NOLINT
473  f128 cmp0 = F128::IsNaN(m.r[0]);
474  f128 cmp1 = F128::IsNaN(m.r[1]);
475  f128 cmp2 = F128::IsNaN(m.r[2]);
476  f128 cmp3 = F128::IsNaN(m.r[3]);
477  cmp0 = F128::Or(cmp0, cmp1);
478  cmp2 = F128::Or(cmp2, cmp3);
479  cmp0 = F128::Or(cmp0, cmp2);
480  return !F128::IsAllMaskFalse(cmp0);
481 }
482 
483 // XMMatrixMultiply
484 // r = a * b
485 NLIB_M(SimdMatrix) Matrix::Mult(SimdMatrixArg a, SimdMatrixArg b) NLIB_NOEXCEPT {
486  SimdMatrix m;
487  m.r[0] = Vector4::Transform(a.r[0], b);
488  m.r[1] = Vector4::Transform(a.r[1], b);
489  m.r[2] = Vector4::Transform(a.r[2], b);
490  m.r[3] = Vector4::Transform(a.r[3], b);
491  return m;
492 }
493 
494 // XMMatrixMultiplyTranspose
495 // r = transpose(a * b)
496 NLIB_M(SimdMatrix) Matrix::MultTranspose(SimdMatrixArg a, SimdMatrixArg b) NLIB_NOEXCEPT {
497  f128 r0 = Vector4::Transform(a.r[0], b);
498  f128 r1 = Vector4::Transform(a.r[1], b);
499  f128 r2 = Vector4::Transform(a.r[2], b);
500  f128 r3 = Vector4::Transform(a.r[3], b);
501  SimdMatrix ret;
502 #if !defined(NLIB_F128_SIMD_NOUSE) && defined(NLIB_NEON)
503  float32x4x2_t trn_f0 = vtrnq_f32(r0, r1);
504  float32x4x2_t trn_f1 = vtrnq_f32(r2, r3);
505  ret.r[0] = vcombine_f32(vget_low_f32(trn_f0.val[0]), vget_low_f32(trn_f1.val[0]));
506  ret.r[1] = vcombine_f32(vget_low_f32(trn_f0.val[1]), vget_low_f32(trn_f1.val[1]));
507  ret.r[2] = vcombine_f32(vget_high_f32(trn_f0.val[0]), vget_high_f32(trn_f1.val[0]));
508  ret.r[3] = vcombine_f32(vget_high_f32(trn_f0.val[1]), vget_high_f32(trn_f1.val[1]));
509 #else
510  f128 tmp0 = F128::Permute<0, 1, 4, 5>(r0, r1);
511  f128 tmp2 = F128::Permute<2, 3, 6, 7>(r0, r1);
512  f128 tmp1 = F128::Permute<0, 1, 4, 5>(r2, r3);
513  f128 tmp3 = F128::Permute<2, 3, 6, 7>(r2, r3);
514  ret.r[0] = F128::Permute<0, 2, 4, 6>(tmp0, tmp1);
515  ret.r[1] = F128::Permute<1, 3, 5, 7>(tmp0, tmp1);
516  ret.r[2] = F128::Permute<0, 2, 4, 6>(tmp2, tmp3);
517  ret.r[3] = F128::Permute<1, 3, 5, 7>(tmp2, tmp3);
518 #endif
519  return ret;
520 }
521 
522 // XMMatrixScaling
523 // r[0] = { x, 0, 0, 0 }
524 // r[1] = { 0, y, 0, 0 }
525 // r[2] = { 0, 0, z, 0 }
526 // r[3] = { 0, 0, 0, 1 }
527 NLIB_M(SimdMatrix) Matrix::FromScaling(float scale_x, float scale_y, float scale_z) NLIB_NOEXCEPT {
528  SimdMatrix m;
529  f128 zero = F128::SetZero();
530  m.r[0] = F128::SetFloatToLane<0>(zero, scale_x);
531  m.r[1] = F128::SetFloatToLane<1>(zero, scale_y);
532  m.r[2] = F128::SetFloatToLane<2>(zero, scale_z);
533  m.r[3] = F128::LoadA16(F128::v0001_);
534  return m;
535 }
536 
537 // XMMatrixScalingFromVector
538 // r[0] = { x, 0, 0, 0 }
539 // r[1] = { 0, y, 0, 0 }
540 // r[2] = { 0, 0, z, 0 }
541 // r[3] = { 0, 0, 0, 1 }
542 NLIB_M(SimdMatrix) Matrix::FromScaling(SimdVectorArg scale) NLIB_NOEXCEPT {
543  SimdMatrix m;
544  f128 zero = F128::SetZero();
545  m.r[0] = F128::Splat<false, true, true, true>(scale, zero);
546  m.r[1] = F128::Splat<true, false, true, true>(scale, zero);
547  m.r[2] = F128::Splat<true, true, false, true>(scale, zero);
548  m.r[3] = F128::LoadA16(F128::v0001_);
549  return m;
550 }
551 
552 // XMMatrixTranslation
553 // r[0] = { 1, 0, 0, 0 }
554 // r[1] = { 0, 1, 0, 0 }
555 // r[2] = { 0, 0, 1, 0 }
556 // r[3] = { x, y, z, 1 }
557 NLIB_M(SimdMatrix) Matrix::FromTranslation(float ofs_x, float ofs_y, float ofs_z) NLIB_NOEXCEPT {
558  SimdMatrix m;
559  m.r[0] = F128::LoadA16(F128::v1000_);
560  m.r[1] = F128::LoadA16(F128::v0100_);
561  m.r[2] = F128::LoadA16(F128::v0010_);
562  m.r[3] = F128::SetValue(ofs_x, ofs_y, ofs_z, 1.f);
563  return m;
564 }
565 
566 // XMMatrixTranslationFromVector
567 // r[0] = { 1, 0, 0, 0 }
568 // r[1] = { 0, 1, 0, 0 }
569 // r[2] = { 0, 0, 1, 0 }
570 // r[3] = { x, y, z, 1 }
571 NLIB_M(SimdMatrix) Matrix::FromTranslation(SimdVectorArg ofs) NLIB_NOEXCEPT {
572  SimdMatrix m;
573  m.r[0] = F128::LoadA16(F128::v1000_);
574  m.r[1] = F128::LoadA16(F128::v0100_);
575  m.r[2] = F128::LoadA16(F128::v0010_);
576  m.r[3] = F128::SetFloatToLane<3>(ofs, 1.f);
577  return m;
578 }
579 
580 // XMMatrixRotationX
581 // r[0] = { 1 0 0 0 }
582 // r[1] = { 0 c s 0 }
583 // r[2] = { 0 -s c 0 }
584 // r[3] = { 0 0 0 1 }
585 NLIB_M(SimdMatrix) Matrix::FromRotationX(float sin_value, float cos_value) NLIB_NOEXCEPT {
586  SimdMatrix m;
587  SimdVector zero = F128::SetZero();
588  f128 r1 = F128::SetFloatToLane<1>(zero, cos_value);
589  r1 = F128::SetFloatToLane<2>(r1, sin_value);
590  f128 r2 = F128::SetFloatToLane<1>(zero, -sin_value);
591  r2 = F128::SetFloatToLane<2>(r2, cos_value);
592 
593  m.r[0] = F128::LoadA16(F128::v1000_);
594  m.r[1] = r1;
595  m.r[2] = r2;
596  m.r[3] = F128::LoadA16(F128::v0001_);
597  return m;
598 }
599 
600 // XMMatrixRotationY
601 // r[0] = { c 0 -s 0 }
602 // r[1] = { 0 1 0 0 }
603 // r[2] = { s 0 c 0 }
604 // r[3] = { 0 0 0 1 }
605 NLIB_M(SimdMatrix) Matrix::FromRotationY(float sin_value, float cos_value) NLIB_NOEXCEPT {
606  SimdMatrix m;
607  SimdVector zero = F128::SetZero();
608  f128 r0 = F128::SetFloatToLane<0>(zero, cos_value);
609  r0 = F128::SetFloatToLane<2>(r0, -sin_value);
610  f128 r2 = F128::SetFloatToLane<0>(zero, sin_value);
611  r2 = F128::SetFloatToLane<2>(r2, cos_value);
612 
613  m.r[0] = r0;
614  m.r[1] = F128::LoadA16(F128::v0100_);
615  m.r[2] = r2;
616  m.r[3] = F128::LoadA16(F128::v0001_);
617  return m;
618 }
619 
620 // XMMatrixRotationZ
621 // r[0] = { c s 0 0 }
622 // r[1] = { -s c 0 0 }
623 // r[2] = { 0 0 1 0 }
624 // r[3] = { 0 0 0 1 }
625 NLIB_M(SimdMatrix) Matrix::FromRotationZ(float sin_value, float cos_value) NLIB_NOEXCEPT {
626  SimdMatrix m;
627  SimdVector zero = F128::SetZero();
628  f128 r0 = F128::SetFloatToLane<0>(zero, cos_value);
629  r0 = F128::SetFloatToLane<1>(r0, sin_value);
630  f128 r1 = F128::SetFloatToLane<0>(zero, -sin_value);
631  r1 = F128::SetFloatToLane<1>(r1, cos_value);
632 
633  m.r[0] = r0;
634  m.r[1] = r1;
635  m.r[2] = F128::LoadA16(F128::v0010_);
636  m.r[3] = F128::LoadA16(F128::v0001_);
637  return m;
638 }
639 
640 // XMMatrixRotationAxis
641 // The result may be different from DirectXMath's XMMatrixRotationAxis.
642 // It is because the calculation order is different.
643 NLIB_M(SimdMatrix) Matrix::FromRotationAxisAndSinCos(SimdVectorArg axis_normalized, float sin_value,
644  float cos_value) NLIB_NOEXCEPT {
645  // m00, m11, m22, *
646  f128 diagonal, c1;
647  {
648  f128 nn = F128::Mult(axis_normalized, axis_normalized);
649  f128 c = F128::SetValue(cos_value, each_float);
650  c1 = F128::SetValue(1.f - cos_value, each_float);
651  diagonal = F128::MultAdd(c1, nn, c);
652  diagonal = F128::SetZeroToLane<3>(diagonal);
653  }
654 
655  f128 zxy = F128::Swizzle<2, 0, 1, 2>(axis_normalized);
656  f128 s = F128::SetValue(sin_value, each_float);
657  f128 xy_yz_xz = F128::Mult(axis_normalized, F128::Swizzle<1, 2, 0, 3>(axis_normalized));
658  xy_yz_xz = F128::Mult(c1, xy_yz_xz);
659  f128 plus = F128::MultAdd(s, zxy, xy_yz_xz); // xy(1-c)+sz, yz(1-c)+sx, xz(1-c)+sy
660  f128 minus = F128::MultSub(s, zxy, xy_yz_xz); // xy(1-c)-sz, yz(1-c)-sx, xz(1-c)-sy
661 
662  f128 t0 = F128::Permute<1, 2, 4, 5>(plus, minus);
663  f128 t1 = F128::Permute<0, 6, 2, 3>(plus, minus);
664 
665  SimdMatrix m;
666  m.r[0] = F128::Permute<4, 0, 1, 7>(t1, diagonal);
667  m.r[1] = F128::Permute<2, 5, 0, 7>(t0, diagonal);
668  m.r[2] = F128::Permute<1, 3, 6, 7>(t0, diagonal);
669  m.r[3] = F128::LoadA16(F128::v0001_);
670  return m;
671 }
672 
673 // XMMatrixRotationQuaternion
674 // the result may differ from directxmath's one because the computation sequence is different.
675 NLIB_M(SimdMatrix) Matrix::FromRotationQuaternion(SimdQuaternionArg quat) NLIB_NOEXCEPT {
676  // m00, m11, m22, *
677  f128 q2 = F128::Add(quat, quat);
678  f128 qq2 = F128::Mult(quat, q2);
679  f128 t0, t1;
680 
681  t0 = F128::Swizzle<1, 0, 0, 1>(qq2); // 2y^2, 2x^2, 2x^2, *
682  t1 = F128::Swizzle<2, 2, 1, 0>(qq2); // 2z^2, 2z^2, 2y^2, *
683  // 1-2y^2-2z^2, 1-2x^2-2z^2, 1-2x^2-2y^2, 0
684  f128 diagonal = F128::Sub(F128::Sub(F128::SetOne(), t0), t1);
685  diagonal = F128::SetFloatToLane<3>(diagonal, 0.f);
686 
687  t0 = F128::Swizzle<1, 0, 0, 1>(quat); // y, x, x, *
688  t1 = F128::Swizzle<2, 2, 1, 0>(q2); // 2z, 2z, 2y, *
689  f128 yz_xz_xy = F128::Mult(t0, t1); // 2yz, 2xz, 2xy, *
690 
691  t0 = F128::SetValue<3>(quat, each_select32); // w, w, w, *
692  f128 wx_wy_wz = F128::Mult(q2, t0); // 2wx, 2wy, 2wz
693 
694  f128 plus = F128::Add(yz_xz_xy, wx_wy_wz); // 2yz+2wx, 2xz+2wy, 2xy+2wz, *
695  f128 minus = F128::Sub(yz_xz_xy, wx_wy_wz); // 2yz-2wx, 2xz-2wy, 2xy-2wz, *
696 
697  t0 = F128::Permute<1, 2, 4, 5>(plus, minus);
698  t1 = F128::Permute<0, 6, 2, 3>(plus, minus);
699 
700  SimdMatrix m;
701  m.r[0] = F128::Permute<4, 1, 3, 7>(t0, diagonal);
702  m.r[1] = F128::Permute<1, 5, 0, 7>(t1, diagonal);
703  m.r[2] = F128::Permute<0, 2, 6, 7>(t0, diagonal);
704  m.r[3] = F128::LoadA16(F128::v0001_);
705  return m;
706 }
707 
708 // XMMatrixRotationRollPitchYaw
709 NLIB_M(SimdMatrix) Matrix::FromRotationZXY(SimdVectorArg sin_xyz,
710  SimdVectorArg cos_xyz) NLIB_NOEXCEPT {
711  // CzCy+SzSxSy SzCx -CzSy+SzSxCy 0
712  // -SzCy+CzSxSy CzCx SzSy+CzSxCy 0
713  // CxSy -Sx CxCy 0
714  f128 m00_12_02_10;
715  {
716  f128 sz_cz_sz_cz = F128::Permute<2, 6, 2, 6>(sin_xyz, cos_xyz);
717  f128 sy_cy_cy_sy = F128::Permute<1, 5, 5, 1>(sin_xyz, cos_xyz);
718  f128 tmp = F128::Mult(sz_cz_sz_cz, sy_cy_cy_sy);
719  m00_12_02_10 = F128::Mult<0>(sin_xyz, tmp, each_select32);
720  tmp = F128::Swizzle<1, 0, 3, 2>(tmp);
721  tmp = F128::NegateEx<false, false, true, true>(tmp);
722  m00_12_02_10 = F128::Add(tmp, m00_12_02_10);
723  }
724  f128 m20_01_22_11;
725  {
726  f128 sy_sz_cy_cz = F128::Permute<1, 2, 5, 6>(sin_xyz, cos_xyz);
727  m20_01_22_11 = F128::Mult<0>(cos_xyz, sy_sz_cy_cz, each_select32);
728  }
729 
730  f128 r2 = F128::SetFloatToLane<3>(m20_01_22_11, 0.f);
731  f128 r1 = F128::Permute<3, 7, 1, 1>(m00_12_02_10, m20_01_22_11);
732 
733  SimdMatrix m;
734  m.r[0] = F128::Permute<0, 5, 2, 7>(m00_12_02_10, r2);
735  m.r[1] = F128::SetZeroToLane<3>(r1);
736  m.r[2] = F128::SetFloatToLane<1>(r2, -F128::GetFloatFromLane<0>(sin_xyz));
737  m.r[3] = F128::LoadA16(F128::v0001_);
738  return m;
739 }
740 
741 // XMMatrixLookToLH
742 NLIB_M(SimdMatrix) Matrix::LookToLh(SimdVectorArg eye_pos, SimdVectorArg eye_dir_normalized,
743  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT {
744  SimdVector r0 = Vector3::Cross(up_dir_normalized, eye_dir_normalized);
745  SimdVector r1 = Vector3::Cross(eye_dir_normalized, r0);
746  SimdVector neg = F128::Negate(eye_pos);
747  f128 d0 = Vector3::Dot(r0, neg);
748  f128 d1 = Vector3::Dot(r1, neg);
749  f128 d2 = Vector3::Dot(eye_dir_normalized, neg);
750  SimdMatrix m;
751  m.r[0] = F128::Splat<false, false, false, true>(r0, d0);
752  m.r[1] = F128::Splat<false, false, false, true>(r1, d1);
753  m.r[2] = F128::Splat<false, false, false, true>(eye_dir_normalized, d2);
754  m.r[3] = F128::LoadA16(F128::v0001_);
755  return Transpose(m);
756 }
757 
758 // XMMatrixLookAtLH
759 NLIB_M(SimdMatrix) Matrix::LookAtLh(SimdVectorArg eye_pos, SimdVectorArg at_pos,
760  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT {
761  SimdVector eye_dir = F128::Sub(at_pos, eye_pos);
762  eye_dir = Vector3::Normalize(eye_dir);
763  return LookToLh(eye_pos, eye_dir, up_dir_normalized);
764 }
765 
766 // XMMatrixLookToRH
767 NLIB_M(SimdMatrix) Matrix::LookToRh(SimdVectorArg eye_pos, SimdVectorArg eye_dir_normalized,
768  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT {
769  return LookToLh(eye_pos, F128::Negate(eye_dir_normalized), up_dir_normalized);
770 }
771 
772 // XMMatrixLookAtRH
773 NLIB_M(SimdMatrix) Matrix::LookAtRh(SimdVectorArg eye_pos, SimdVectorArg at_pos,
774  SimdVectorArg up_dir_normalized) NLIB_NOEXCEPT {
775  SimdVector eye_dir = F128::Sub(eye_pos, at_pos);
776  eye_dir = Vector3::Normalize(eye_dir);
777  return LookToLh(eye_pos, eye_dir, up_dir_normalized);
778 }
779 
780 // XMMatrixPerspectiveLH
781 NLIB_M(SimdMatrix) Matrix::PerspectiveLh(float width, float height, float near_z,
782  float far_z) NLIB_NOEXCEPT {
783  float near2 = near_z + near_z;
784  float range = far_z / (far_z - near_z);
785  f128 zero = F128::SetZero();
786  f128 v = F128::SetValue(near2 / width, near2 / height, range, -range * near_z);
787  SimdMatrix m;
788  m.r[0] = F128::Splat<false, true, true, true>(v, zero);
789  m.r[1] = F128::Splat<true, false, true, true>(v, zero);
790  f128 tmp = F128::Splat<true, true, false, true>(v, zero);
791  m.r[2] = F128::SetFloatToLane<3>(tmp, 1.f);
792  m.r[3] = F128::Permute<0, 1, 7, 0>(zero, v);
793  return m;
794 }
795 
796 // XMMatrixPerspectiveRH
797 NLIB_M(SimdMatrix) Matrix::PerspectiveRh(float width, float height, float near_z,
798  float far_z) NLIB_NOEXCEPT {
799  float near2 = near_z + near_z;
800  float range = far_z / (near_z - far_z);
801  f128 zero = F128::SetZero();
802  f128 v = F128::SetValue(near2 / width, near2 / height, range, range * near_z);
803  SimdMatrix m;
804  m.r[0] = F128::Splat<false, true, true, true>(v, zero);
805  m.r[1] = F128::Splat<true, false, true, true>(v, zero);
806  f128 tmp = F128::Splat<true, true, false, true>(v, zero);
807  m.r[2] = F128::SetFloatToLane<3>(tmp, -1.f);
808  m.r[3] = F128::Permute<0, 1, 7, 0>(zero, v);
809  return m;
810 }
811 
812 // XMMatrixPerspectiveFovLH
813 NLIB_M(SimdMatrix) Matrix::PerspectiveFovLh(float half_fovy_sin, float half_fovy_cos, float aspect,
814  float near_z, float far_z) NLIB_NOEXCEPT {
815  float height = half_fovy_cos / half_fovy_sin;
816  float width = height / aspect;
817  float range = far_z / (far_z - near_z);
818 
819  f128 zero = F128::SetZero();
820  f128 v = F128::SetValue(width, height, range, -range * near_z);
821  SimdMatrix m;
822  m.r[0] = F128::Splat<false, true, true, true>(v, zero);
823  m.r[1] = F128::Splat<true, false, true, true>(v, zero);
824  f128 tmp = F128::Splat<true, true, false, true>(v, zero);
825  m.r[2] = F128::SetFloatToLane<3>(tmp, 1.f);
826  m.r[3] = F128::Permute<0, 1, 7, 0>(zero, v);
827  return m;
828 }
829 
830 // XMMatrixPerspectiveFovRH
831 NLIB_M(SimdMatrix) Matrix::PerspectiveFovRh(float half_fovy_sin, float half_fovy_cos, float aspect,
832  float near_z, float far_z) NLIB_NOEXCEPT {
833  float height = half_fovy_cos / half_fovy_sin;
834  float width = height / aspect;
835  float range = far_z / (near_z - far_z);
836 
837  f128 zero = F128::SetZero();
838  f128 v = F128::SetValue(width, height, range, range * near_z);
839  SimdMatrix m;
840  m.r[0] = F128::Splat<false, true, true, true>(v, zero);
841  m.r[1] = F128::Splat<true, false, true, true>(v, zero);
842  f128 tmp = F128::Splat<true, true, false, true>(v, zero);
843  m.r[2] = F128::SetFloatToLane<3>(tmp, -1.f);
844  m.r[3] = F128::Permute<0, 1, 7, 0>(zero, v);
845  return m;
846 }
847 
848 // XMMatrixPerspectiveOffCenterLH
849 NLIB_M(SimdMatrix) Matrix::PerspectiveOffCenterLh(float left, float right, float bottom, float top,
850  float near_z, float far_z) NLIB_NOEXCEPT {
851  float near2 = near_z + near_z;
852  f128 div;
853  {
854  f128 a = F128::SetValue(1.f, 1.f, far_z, 1.f);
855  f128 b = F128::SetValue(right - left, top - bottom, far_z - near_z, 1.f);
856  div = F128::Div(a, b);
857  // recpWidth, recpHeight, range, 1.f
858  }
859  f128 zero = F128::SetZero();
860  f128 v0 = F128::SetValue(near2, near2, -near_z, 1.f);
861  f128 r2 = F128::SetValue(-(left + right), -(top + bottom), 1.f, 1.f);
862  v0 = F128::Mult(v0, div);
863 
864  SimdMatrix m;
865  m.r[0] = F128::Splat<false, true, true, true>(v0, zero);
866  m.r[1] = F128::Splat<true, false, true, true>(v0, zero);
867  m.r[2] = F128::Mult(r2, div);
868  m.r[3] = F128::Splat<true, true, false, true>(v0, zero);
869  return m;
870 }
871 
872 // XMMatrixPerspectiveOffCenterRH
873 NLIB_M(SimdMatrix) Matrix::PerspectiveOffCenterRh(float left, float right, float bottom, float top,
874  float near_z, float far_z) NLIB_NOEXCEPT {
875  float near2 = near_z + near_z;
876  f128 div;
877  {
878  f128 a = F128::SetValue(1.f, 1.f, far_z, 1.f);
879  f128 b = F128::SetValue(right - left, top - bottom, near_z - far_z, -1.f);
880  div = F128::Div(a, b);
881  // recpWidth, recpHeight, range, 1.f
882  }
883  f128 zero = F128::SetZero();
884  f128 v0 = F128::SetValue(near2, near2, near_z, 1.f);
885  f128 r2 = F128::SetValue((left + right), (top + bottom), 1.f, 1.f);
886  v0 = F128::Mult(v0, div);
887 
888  SimdMatrix m;
889  m.r[0] = F128::Splat<false, true, true, true>(v0, zero);
890  m.r[1] = F128::Splat<true, false, true, true>(v0, zero);
891  m.r[2] = F128::Mult(r2, div);
892  m.r[3] = F128::Splat<true, true, false, true>(v0, zero);
893  return m;
894 }
895 
896 // XMMatrixOrthographicLH
897 NLIB_M(SimdMatrix) Matrix::OrthographicLh(float width, float height, float near_z,
898  float far_z) NLIB_NOEXCEPT {
899  f128 div;
900  {
901  f128 a = F128::SetValue(2.f, 2.f, 1.f, -near_z);
902  f128 b = F128::SetValue(width, height, far_z - near_z, far_z - near_z);
903  div = F128::Div(a, b);
904  }
905  f128 zero = F128::SetZero();
906 
907  SimdMatrix m;
908  m.r[0] = F128::Splat<false, true, true, true>(div, zero);
909  m.r[1] = F128::Splat<true, false, true, true>(div, zero);
910  m.r[2] = F128::Splat<true, true, false, true>(div, zero);
911  f128 tmp = F128::Permute<0, 1, 7, 1>(zero, div);
912  m.r[3] = F128::SetFloatToLane<3>(tmp, 1.f);
913  return m;
914 }
915 
916 // XMMatrixOrthographicRH
917 NLIB_M(SimdMatrix) Matrix::OrthographicRh(float width, float height, float near_z,
918  float far_z) NLIB_NOEXCEPT {
919  f128 div;
920  {
921  f128 a = F128::SetValue(2.f, 2.f, 1.f, near_z);
922  f128 b = F128::SetValue(width, height, near_z - far_z, near_z - far_z);
923  div = F128::Div(a, b);
924  }
925  f128 zero = F128::SetZero();
926 
927  SimdMatrix m;
928  m.r[0] = F128::Splat<false, true, true, true>(div, zero);
929  m.r[1] = F128::Splat<true, false, true, true>(div, zero);
930  m.r[2] = F128::Splat<true, true, false, true>(div, zero);
931  f128 tmp = F128::Permute<0, 1, 7, 1>(zero, div);
932  m.r[3] = F128::SetFloatToLane<3>(tmp, 1.f);
933  return m;
934 }
935 
936 // XMMatrixOrthographicOffCenterLH
937 NLIB_M(SimdMatrix) Matrix::OrthographicOffCenterLh(float left, float right, float bottom, float top,
938  float near_z, float far_z) NLIB_NOEXCEPT {
939  f128 div;
940  {
941  f128 a = F128::SetOne();
942  f128 b = F128::SetValue(right - left, top - bottom, far_z - near_z, 1.f);
943  div = F128::Div(a, b);
944  }
945  f128 zero = F128::SetZero();
946  f128 v0 = F128::SetValue(2.f, 2.f, 1.f, 1.f);
947  f128 r3 = F128::SetValue(-(left + right), -(top + bottom), -near_z, 1.f);
948  v0 = F128::Mult(v0, div);
949 
950  SimdMatrix m;
951  m.r[0] = F128::Splat<false, true, true, true>(v0, zero);
952  m.r[1] = F128::Splat<true, false, true, true>(v0, zero);
953  m.r[2] = F128::Splat<true, true, false, true>(v0, zero);
954  m.r[3] = F128::Mult(r3, div);
955  return m;
956 }
957 
958 // XMMatrixOrthographicOffCenterRH
959 NLIB_M(SimdMatrix) Matrix::OrthographicOffCenterRh(float left, float right, float bottom, float top,
960  float near_z, float far_z) NLIB_NOEXCEPT {
961  f128 div;
962  {
963  f128 a = F128::SetOne();
964  f128 b = F128::SetValue(right - left, top - bottom, near_z - far_z, 1.f);
965  div = F128::Div(a, b);
966  }
967  f128 zero = F128::SetZero();
968  f128 v0 = F128::SetValue(2.f, 2.f, 1.f, 1.f);
969  f128 r3 = F128::SetValue(-(left + right), -(top + bottom), near_z, 1.f);
970  v0 = F128::Mult(v0, div);
971 
972  SimdMatrix m;
973  m.r[0] = F128::Splat<false, true, true, true>(v0, zero);
974  m.r[1] = F128::Splat<true, false, true, true>(v0, zero);
975  m.r[2] = F128::Splat<true, true, false, true>(v0, zero);
976  m.r[3] = F128::Mult(r3, div);
977  return m;
978 }
979 
980 // XMMatrixShadow
981 NLIB_M(SimdMatrix) Matrix::Shadow(SimdPlaneArg shadow_plane, SimdVector light_pos) NLIB_NOEXCEPT {
982  // Plane::Normalize(shadow_plane);
983  SimdPlane plane = F128::Mult(Vector3::RecpLength(shadow_plane), shadow_plane);
984  // distance from plane to the light
985  f128 r0 = Vector4::DotEx<true, false, false, false>(plane, light_pos);
986  plane = F128::Negate(plane);
987  f128 r1 = F128::RotateLeft<1>(r0);
988  f128 r2 = F128::RotateLeft<2>(r0);
989  f128 r3 = F128::RotateLeft<3>(r0);
990 
991  SimdMatrix m;
992  m.r[0] = F128::MultAdd<0>(plane, light_pos, r0, each_select32);
993  m.r[1] = F128::MultAdd<1>(plane, light_pos, r1, each_select32);
994  m.r[2] = F128::MultAdd<2>(plane, light_pos, r2, each_select32);
995  m.r[3] = F128::MultAdd<3>(plane, light_pos, r3, each_select32);
996  return m;
997 }
998 
999 // XMMatrixReflect
1000 NLIB_M(SimdMatrix) Matrix::Reflect(SimdPlaneArg reflection_plane) NLIB_NOEXCEPT {
1001  // SimdPlane plane = Plane::Normalize(reflection_plane);
1002  SimdPlane plane = F128::Mult(Vector3::RecpLength(reflection_plane), reflection_plane);
1003  f128 minus_2n = F128::Mult(-2.f, plane);
1004  minus_2n = F128::SetZeroToLane<3>(minus_2n);
1005 
1006  SimdMatrix m = Matrix::Identity();
1007  m.r[0] = F128::MultAdd<0>(plane, minus_2n, m.r[0], each_select32);
1008  m.r[1] = F128::MultAdd<1>(plane, minus_2n, m.r[1], each_select32);
1009  m.r[2] = F128::MultAdd<2>(plane, minus_2n, m.r[2], each_select32);
1010  m.r[3] = F128::MultAdd<3>(plane, minus_2n, m.r[3], each_select32);
1011  return m;
1012 }
1013 
1014 // different from XMMatrixDecompose
1015 // note that scale.xyz >= 0
1016 NLIB_M(void) Matrix::Decompose(SimdVector* scale, SimdMatrix* rot, SimdVector* trans, // NOLINT
1017  SimdMatrixArg m) NLIB_NOEXCEPT {
1018  // translation
1019  *trans = m.r[3];
1020 
1021  // scaling
1022  f128 recp_scale;
1023  {
1024  f128 dot_x = Vector3::DotEx<true, false, false, true>(m.r[0], m.r[0]);
1025  f128 dot_y = Vector3::DotEx<false, true, false, true>(m.r[1], m.r[1]);
1026  f128 dot_z = Vector3::DotEx<false, false, true, true>(m.r[2], m.r[2]);
1027  f128 dot = F128::Or(dot_x, dot_y);
1028  dot = F128::Or(dot, dot_z);
1029  recp_scale = F128::RecpSqrt(dot);
1030  *scale = F128::Mult(dot, recp_scale);
1031  }
1032 
1033  // rotation
1034  rot->r[0] = F128::Mult<0>(recp_scale, m.r[0], each_select32);
1035  rot->r[1] = F128::Mult<1>(recp_scale, m.r[1], each_select32);
1036  rot->r[2] = F128::Mult<2>(recp_scale, m.r[2], each_select32);
1037  rot->r[3] = F128::LoadA16(F128::v0001_);
1038 }
1039 
1040 #undef NLIB_M
1041 
1042 #endif // NLIB_DOXYGEN
1043 
1044 } // namespace simd
1045 NLIB_NAMESPACE_END
1046 
1047 #endif // INCLUDE_NN_NLIB_SIMD_SIMDMATRIX_H_
#define NLIB_NOEXCEPT
環境に合わせてnoexcept 又は同等の定義がされます。
Definition: Platform.h:2151
4x4行列を扱う関数が集められたクラスです。
Definition: SimdMatrix.h:15
クォータニオンが定義されています。
f128arg SimdVectorArg
f128argがtypedefされています。
Definition: SimdFloat.h:3927
#define NLIB_VIS_HIDDEN
関数やクラス等のシンボルをライブラリの外部に公開しません。
Definition: Platform_unix.h:50
constexpr const each_float_tag each_float
each_float_tag型の定数オブジェクトで、単精度浮動小数点数を示すためのタグです。
Definition: SimdFloat.h:51
f128arg SimdQuaternionArg
f128argがtypedefされています。
Definition: SimdFloat.h:3929
f128arg SimdPlaneArg
f128argがtypedefされています。
Definition: SimdFloat.h:3931
f128 r[4]
4x4行列の各行を保持します。
Definition: SimdFloat.h:3954
4x4行列を保持する構造体です。
Definition: SimdFloat.h:3938
単精度浮動小数点数のSIMD演算を行うためのクラスや関数が定義されています。
constexpr const each_select32_tag each_select32
each_select32_tag型の定数オブジェクトで、32bitのレーンを選択することを示すためのタグです。 ...
Definition: SimdInt.h:56
4x3行列をメモリから読み出したりメモリに書き出したりするための型です。データメンバmは4x3の配列で16バイ...
Definition: SimdFloat.h:4079
3x3行列をメモリから読み出したりメモリに書き出したりするための型です。データメンバmは3x3の配列です。 ...
Definition: SimdFloat.h:4064
#define NLIB_F128_TRANSPOSE(row0, row1, row2, row3)
インプレイスで行列を転置するためのマクロです。
Definition: SimdFloat.h:4001
3次元ベクトルが定義されています。
nlib_f128_t f128
nlib_f128_tがtypedefされています。
Definition: SimdFloat.h:54
4次元ベクトルが定義されています。
4x4行列をメモリから読み出したりメモリに書き出したりするための型です。データメンバmは4x4の配列で16バイ...
Definition: SimdFloat.h:4087
3x4行列をメモリから読み出したりメモリに書き出したりするための型です。データメンバmは3x4の配列で16バイ...
Definition: SimdFloat.h:4071
f128 SimdPlane
f128がtypedefされています。平面を扱う場合に利用されます。
Definition: SimdFloat.h:3930
f128 SimdVector
f128がtypedefされています。3次元ベクトル又は4次元ベクトルを扱う場合に利用されます。 ...
Definition: SimdFloat.h:3926