3 #ifndef INCLUDE_NN_NLIB_SIMD_SIMDALGORITHM_H_
4 #define INCLUDE_NN_NLIB_SIMD_SIMDALGORITHM_H_
14 NLIB_VIS_PUBLIC void sortUint32A16(uint32_t* buf, uint32_t* src,
size_t N);
15 NLIB_VIS_PUBLIC void sortUint32A16_1(uint32_t* buf, uint32_t* src,
size_t N);
17 template <
size_t NumElem,
bool flag>
18 struct MergeSortHelper {
21 sortUint32A16(tmp, data, NumElem);
25 template <
size_t NumElem>
26 struct MergeSortHelper<NumElem, false> {
29 sortUint32A16_1(tmp, data, NumElem);
35 template <
size_t NumElem>
39 NLIB_ASSERT(!(reinterpret_cast<uintptr_t>(data) & 15));
40 detail::MergeSortHelper<NumElem, ((NumElem & (NumElem - 1)) == 0)>::Sort(data);
48 const unsigned char* p =
reinterpret_cast<const unsigned char*
>(s);
52 if (reinterpret_cast<uintptr_t>(p) & 15) {
53 size_t r =
reinterpret_cast<uintptr_t
>(p) & 15;
54 a1 = I128::LoadA16(p - r);
56 mask = I128::MoveMask8(cmp1);
69 if ((reinterpret_cast<uintptr_t>(p) & 32)) {
70 a1 = I128::LoadA16(p);
72 if (!I128::IsZero(cmp1)) {
73 mask = I128::MoveMask8(cmp1);
80 a1 = I128::LoadA16(p);
81 a2 = I128::LoadA16(p + 16);
82 cmp1 = I128::SetZero();
83 cmp2 = I128::SetZero();
86 if (!I128::IsZero(I128::Or(cmp1, cmp2))) {
87 mask = I128::MoveMask8(cmp1) | (I128::MoveMask8(cmp2) << 16);
94 a1 = I128::LoadA16(p);
96 if (!I128::IsZero(cmp1)) {
97 mask = I128::MoveMask8(cmp1);
105 a1 = I128::LoadA16(p);
107 mask = I128::MoveMask8(cmp1);
108 mask &= (1 << n) - 1;
109 if (mask)
return p +
nlib_ctz(mask);
114 template <
class PRED>
119 const unsigned char* p =
reinterpret_cast<const unsigned char*
>(s);
123 if (reinterpret_cast<uintptr_t>(p) & 15) {
124 size_t r =
reinterpret_cast<uintptr_t
>(p) & 15;
125 a1 = I128::LoadA16(p - r);
126 cmp1 = I128::Not(pred(a1));
127 mask = I128::MoveMask8(cmp1);
131 mask &= (1 << n) - 1;
132 if (mask)
return p +
nlib_ctz(mask);
135 if (mask)
return p +
nlib_ctz(mask);
140 if ((reinterpret_cast<uintptr_t>(p) & 32)) {
141 a1 = I128::LoadA16(p);
143 if (!I128::IsFull(cmp1)) {
144 mask = I128::MoveMask8(I128::Not(cmp1));
151 a1 = I128::LoadA16(p);
152 a2 = I128::LoadA16(p + 16);
155 if (!I128::IsFull(I128::And(cmp1, cmp2))) {
156 mask = I128::MoveMask8(I128::Not(cmp1)) | (I128::MoveMask8(I128::Not(cmp2)) << 16);
163 a1 = I128::LoadA16(p);
165 if (!I128::IsFull(cmp1)) {
166 mask = I128::MoveMask8(I128::Not(cmp1));
174 a1 = I128::LoadA16(p);
175 cmp1 = I128::Not(pred(a1));
176 mask = I128::MoveMask8(cmp1);
177 mask &= (1 << n) - 1;
178 if (mask)
return p +
nlib_ctz(mask);
185 i128 result = I128::CmpLtInt8(c, I128::SetValue(
'{',
each_int8));
186 result = I128::And(result, I128::CmpGtInt8(c, I128::SetValue(
'`',
each_int8)));
188 i128 tmp = I128::CmpLtInt8(c, I128::SetValue(
'[',
each_int8));
189 tmp = I128::And(tmp, I128::CmpGtInt8(c, I128::SetValue(
'@',
each_int8)));
190 result = I128::Or(result, tmp);
197 i128 result = I128::CmpLtInt8(c, I128::SetValue(
':',
each_int8));
198 result = I128::And(result, I128::CmpGtInt8(c, I128::SetValue(
'/',
each_int8)));
208 i128 result = I128::CmpEq8(c, I128::SetValue(
' ',
each_int8));
209 result = I128::Or(result, I128::CmpEq8(c, I128::SetValue(
'\r',
each_int8)));
210 result = I128::Or(result, I128::CmpEq8(c, I128::SetValue(
'\n',
each_int8)));
211 result = I128::Or(result, I128::CmpEq8(c, I128::SetValue(
'\t',
each_int8)));
219 i128 result = I128::CmpLtInt8(c, I128::SetValue(
':',
each_int8));
220 result = I128::And(result, I128::CmpGtInt8(c, I128::SetValue(
'/',
each_int8)));
222 tmp = I128::CmpLtInt8(c, I128::SetValue(
'G',
each_int8));
223 tmp = I128::And(tmp, I128::CmpGtInt8(c, I128::SetValue(
'@',
each_int8)));
224 result = I128::Or(result, tmp);
226 tmp = I128::CmpLtInt8(c, I128::SetValue(
'g',
each_int8));
227 tmp = I128::And(tmp, I128::CmpGtInt8(c, I128::SetValue(
'`',
each_int8)));
228 result = I128::Or(result, tmp);
238 #endif // INCLUDE_NN_NLIB_SIMD_SIMDALGORITHM_H_
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
i128 IsXdigit(i128 c) noexcept
Masks hexadecimal characters in c.
i128 IsDigit(i128 c) noexcept
Masks the characters 0 though 9 in c.
Implements the class and functions for SIMD computations on integers.
i128 IsAlpha(i128 c) noexcept
Masks alphabetic letters in c.
i128 IsSpace(i128 c) noexcept
Masks space characters (0x20, 0x09, 0x0A, 0x0D) in c.
i128 IsAlnum(i128 c) noexcept
Masks alphabetic letters or the characters 0 though 9 in c.
void MergeSortUint32A16(uint32_t *data)
Uses SIMD to merge sort a sequence of 32-bit unsigned integers.
const void * nlib_memchr_pred_not(const void *s, PRED pred, size_t n)
A function template for examining the bytes in byte strings using SIMD instructions.
nlib_i128_t i128
nlib_i128_t is defined using typedef.
const void * nlib_memchr_pred(const void *s, PRED pred, size_t n)
A function template for examining the bytes in byte strings using SIMD instructions.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
constexpr const each_int8_tag each_int8
The tag for representing a signed 8-bit integer with an each_int8_tag-type constant object...
#define NLIB_STATIC_ASSERT(exp)
Defines a static assertion. Uses static_assert if it is available for use.