3 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_ 4 #define INCLUDE_NN_NLIB_GROUPVARINT_H_ 17 return (1 + 4 + 4 + 4 + 4) -
18 (nlib_clz(v0 | 1) / 8) -
19 (nlib_clz(v1 | 1) / 8) -
20 (nlib_clz(v2 | 1) / 8) -
21 (nlib_clz(v3 | 1) / 8);
25 static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1,
28 static const uint8_t* Decode(uint32_t* v0, uint32_t* v1,
29 uint32_t* v2, uint32_t* v3,
33 return ((n + 3) / 4) * (1 + 4 *
sizeof(uint32_t));
46 inline size_t GroupVarInt32::GetEncodedSize(
const uint32_t* p)
NLIB_NOEXCEPT {
47 return GetEncodedSize(p[0], p[1], p[2], p[3]);
50 inline size_t GroupVarInt32::GetNextGroupSize(
const uint8_t* p)
NLIB_NOEXCEPT {
55 return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
59 inline size_t GroupVarInt32::GetTotalSize(
const uint32_t* p,
size_t n)
NLIB_NOEXCEPT {
61 for (; n > 4; n -= 4, p += 4) {
62 rval += GetEncodedSize(p);
71 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
73 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
74 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
77 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
78 simd::i128 r = simd::I128::Shuffle8(val, mask);
79 simd::I128::StoreA1(p + 1, r);
81 return p + count_[tag];
84 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
87 val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
88 val = simd::I128::SetUint32ToLane<1>(val, v1);
89 val = simd::I128::SetUint32ToLane<2>(val, v2);
90 val = simd::I128::SetUint32ToLane<3>(val, v3);
91 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
92 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
95 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
96 simd::i128 r = simd::I128::Shuffle8(val, mask);
97 simd::I128::StoreA1(p + 1, r);
99 return p + count_[tag];
102 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
104 int v0k = 3 - (nlib_clz(v0 | 1) / 8);
105 int v1k = 3 - (nlib_clz(v1 | 1) / 8);
106 int v2k = 3 - (nlib_clz(v2 | 1) / 8);
107 int v3k = 3 - (nlib_clz(v3 | 1) / 8);
108 *p++ =
static_cast<uint8_t
>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
113 *(p + 3) = static_cast<uint8_t>(v0 >> 24);
116 *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
119 *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
122 *p =
static_cast<uint8_t
>(v0);
132 *(p + 3) = static_cast<uint8_t>(v1 >> 24);
135 *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
138 *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
141 *p =
static_cast<uint8_t
>(v1);
151 *(p + 3) = static_cast<uint8_t>(v2 >> 24);
154 *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
157 *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
160 *p =
static_cast<uint8_t
>(v2);
170 *(p + 3) = static_cast<uint8_t>(v3 >> 24);
173 *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
176 *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
179 *p =
static_cast<uint8_t
>(v3);
189 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
190 return Encode(p, src[0], src[1], src[2], src[3]);
195 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
198 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
199 simd::i128 r = simd::I128::Shuffle8(val, mask);
200 simd::I128::StoreA4(dest, r);
201 return p + count_[tag];
204 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
205 uint32_t* v2, uint32_t* v3,
209 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
210 simd::i128 r = simd::I128::Shuffle8(val, mask);
212 *v0 = simd::I128::GetUint32FromLane<0>(r);
213 *v1 = simd::I128::GetUint32FromLane<1>(r);
214 *v2 = simd::I128::GetUint32FromLane<2>(r);
215 *v3 = simd::I128::GetUint32FromLane<3>(r);
216 return p + count_[tag];
219 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
220 uint32_t* v2, uint32_t* v3,
231 tmp |= *(p + 3) << 24;
234 tmp |= *(p + 2) << 16;
237 tmp |= *(p + 1) << 8;
248 int v1k = (tag >> 2) & 3;
252 tmp |= *(p + 3) << 24;
255 tmp |= *(p + 2) << 16;
258 tmp |= *(p + 1) << 8;
269 int v2k = (tag >> 4) & 3;
273 tmp |= *(p + 3) << 24;
276 tmp |= *(p + 2) << 16;
279 tmp |= *(p + 1) << 8;
290 int v3k = (tag >> 6) & 3;
294 tmp |= *(p + 3) << 24;
297 tmp |= *(p + 2) << 16;
300 tmp |= *(p + 1) << 8;
314 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
315 return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
321 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_ Implements the class and functions for SIMD computations on integers.
Class for encoding and decoding four 32-bit integer values as a unit.
nlib_i128_t i128
nlib_i128_t is defined using typedef.
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
Returns the size of the encoded four integer values.
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
A file that contains the configuration information for each development environment.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
static size_t GetMaxSize(size_t n) noexcept
Returns the maximum size of encoded n integer values.