3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDALGORITHM_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDALGORITHM_H_
14 NLIB_VIS_PUBLIC void sortUint32A16(uint32_t* buf, uint32_t* src,
size_t N);
15 NLIB_VIS_PUBLIC void sortUint32A16_1(uint32_t* buf, uint32_t* src,
size_t N);
17 template <
size_t NumElem,
bool flag>
18 struct MergeSortHelper {
21 sortUint32A16(tmp, data, NumElem);
25 template <
size_t NumElem>
26 struct MergeSortHelper<NumElem, false> {
29 sortUint32A16_1(tmp, data, NumElem);
35 template <
size_t NumElem>
39 NLIB_ASSERT(!(reinterpret_cast<uintptr_t>(data) & 15));
40 detail::MergeSortHelper<NumElem, ((NumElem & (NumElem - 1)) == 0)>::Sort(data);
48 const unsigned char* p =
reinterpret_cast<const unsigned char*
>(s);
52 if (reinterpret_cast<uintptr_t>(p) & 15) {
53 size_t r =
reinterpret_cast<uintptr_t
>(p) & 15;
54 a1 = I128::LoadA16(p - r);
56 mask = I128::MoveMask8(cmp1);
69 if ((reinterpret_cast<uintptr_t>(p) & 32)) {
70 a1 = I128::LoadA16(p);
72 if (!I128::IsZero(cmp1)) {
73 mask = I128::MoveMask8(cmp1);
80 a1 = I128::LoadA16(p);
81 a2 = I128::LoadA16(p + 16);
82 cmp1 = I128::SetZero();
83 cmp2 = I128::SetZero();
86 if (!I128::IsZero(I128::Or(cmp1, cmp2))) {
87 mask = I128::MoveMask8(cmp1) | (I128::MoveMask8(cmp2) << 16);
94 a1 = I128::LoadA16(p);
96 if (!I128::IsZero(cmp1)) {
97 mask = I128::MoveMask8(cmp1);
105 a1 = I128::LoadA16(p);
107 mask = I128::MoveMask8(cmp1);
108 mask &= (1 << n) - 1;
109 if (mask)
return p +
nlib_ctz(mask);
114 template <
class PRED>
119 const unsigned char* p =
reinterpret_cast<const unsigned char*
>(s);
123 if (reinterpret_cast<uintptr_t>(p) & 15) {
124 size_t r =
reinterpret_cast<uintptr_t
>(p) & 15;
125 a1 = I128::LoadA16(p - r);
126 cmp1 = I128::Not(pred(a1));
127 mask = I128::MoveMask8(cmp1);
131 mask &= (1 << n) - 1;
132 if (mask)
return p +
nlib_ctz(mask);
135 if (mask)
return p +
nlib_ctz(mask);
140 if ((reinterpret_cast<uintptr_t>(p) & 32)) {
141 a1 = I128::LoadA16(p);
143 if (!I128::IsFull(cmp1)) {
144 mask = I128::MoveMask8(I128::Not(cmp1));
151 a1 = I128::LoadA16(p);
152 a2 = I128::LoadA16(p + 16);
155 if (!I128::IsFull(I128::And(cmp1, cmp2))) {
156 mask = I128::MoveMask8(I128::Not(cmp1)) | (I128::MoveMask8(I128::Not(cmp2)) << 16);
163 a1 = I128::LoadA16(p);
165 if (!I128::IsFull(cmp1)) {
166 mask = I128::MoveMask8(I128::Not(cmp1));
174 a1 = I128::LoadA16(p);
175 cmp1 = I128::Not(pred(a1));
176 mask = I128::MoveMask8(cmp1);
177 mask &= (1 << n) - 1;
178 if (mask)
return p +
nlib_ctz(mask);
185 i128 result = I128::CmpLtInt8(c, I128::SetValue(
'{',
each_int8));
186 result = I128::And(result, I128::CmpGtInt8(c, I128::SetValue(
'`',
each_int8)));
188 i128 tmp = I128::CmpLtInt8(c, I128::SetValue(
'[',
each_int8));
189 tmp = I128::And(tmp, I128::CmpGtInt8(c, I128::SetValue(
'@',
each_int8)));
190 result = I128::Or(result, tmp);
197 i128 result = I128::CmpLtInt8(c, I128::SetValue(
':',
each_int8));
198 result = I128::And(result, I128::CmpGtInt8(c, I128::SetValue(
'/',
each_int8)));
208 i128 result = I128::CmpEq8(c, I128::SetValue(
' ',
each_int8));
209 result = I128::Or(result, I128::CmpEq8(c, I128::SetValue(
'\r',
each_int8)));
210 result = I128::Or(result, I128::CmpEq8(c, I128::SetValue(
'\n',
each_int8)));
211 result = I128::Or(result, I128::CmpEq8(c, I128::SetValue(
'\t',
each_int8)));
219 i128 result = I128::CmpLtInt8(c, I128::SetValue(
':',
each_int8));
220 result = I128::And(result, I128::CmpGtInt8(c, I128::SetValue(
'/',
each_int8)));
222 tmp = I128::CmpLtInt8(c, I128::SetValue(
'G',
each_int8));
223 tmp = I128::And(tmp, I128::CmpGtInt8(c, I128::SetValue(
'@',
each_int8)));
224 result = I128::Or(result, tmp);
226 tmp = I128::CmpLtInt8(c, I128::SetValue(
'g',
each_int8));
227 tmp = I128::And(tmp, I128::CmpGtInt8(c, I128::SetValue(
'`',
each_int8)));
228 result = I128::Or(result, tmp);
238 #endif // INCLUDE_NN_NLIB_SIMD_SIMDALGORITHM_H_
#define NLIB_NOEXCEPT
環境に合わせてnoexcept 又は同等の定義がされます。
i128 IsXdigit(i128 c) noexcept
c 内の16進数の文字をマスクします。
i128 IsDigit(i128 c) noexcept
c 内の'0'-'9'の文字をマスクします。
整数のSIMD演算を行うためのクラスや関数が実装されています。
i128 IsAlpha(i128 c) noexcept
c 内のアルファベットをマスクします。
i128 IsSpace(i128 c) noexcept
c 内の空白文字(0x20, 0x09, 0x0A, 0x0D)をマスクします。
i128 IsAlnum(i128 c) noexcept
c 内のアルファベットか'0'-'9'の文字をマスクします。
void MergeSortUint32A16(uint32_t *data)
SIMDを利用して32bit符号なし整数の並びをマージソートします。
const void * nlib_memchr_pred_not(const void *s, PRED pred, size_t n)
バイト列内のバイトの検査をSIMD命令を使って行うための関数テンプレートです。
nlib_i128_t i128
nlib_i128_tがtypedefされています。
const void * nlib_memchr_pred(const void *s, PRED pred, size_t n)
バイト列内のバイトの検査をSIMD命令を使って行うための関数テンプレートです。
#define NLIB_ALIGNAS(x)
alignas(x)又は同等の定義がされます。
constexpr const each_int8_tag each_int8
each_int8_tag型の定数オブジェクトで、8bitの符号付き整数を示すためのタグです。
#define NLIB_STATIC_ASSERT(exp)
静的アサートが定義されます。利用可能であればstatic_assertを利用します。