16 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_ 17 #define INCLUDE_NN_NLIB_GROUPVARINT_H_ 30 return (1 + 4 + 4 + 4 + 4) -
38 static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1,
41 static const uint8_t* Decode(uint32_t* v0, uint32_t* v1,
42 uint32_t* v2, uint32_t* v3,
46 return ((n + 3) / 4) * (1 + 4 *
sizeof(uint32_t));
61 inline size_t GroupVarInt32::GetEncodedSize(
const uint32_t* p)
NLIB_NOEXCEPT {
62 return GetEncodedSize(p[0], p[1], p[2], p[3]);
65 inline size_t GroupVarInt32::GetNextGroupSize(
const uint8_t* p)
NLIB_NOEXCEPT {
70 return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
74 inline size_t GroupVarInt32::GetTotalSize(
const uint32_t* p,
size_t n)
NLIB_NOEXCEPT {
76 for (; n > 4; n -= 4, p += 4) {
77 rval += GetEncodedSize(p);
86 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
88 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
89 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
92 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
93 simd::i128 r = simd::I128::Shuffle8(val, mask);
94 simd::I128::StoreA1(p + 1, r);
96 return p + count_[tag];
99 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
102 val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
103 val = simd::I128::SetUint32ToLane<1>(val, v1);
104 val = simd::I128::SetUint32ToLane<2>(val, v2);
105 val = simd::I128::SetUint32ToLane<3>(val, v3);
106 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
107 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
110 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
111 simd::i128 r = simd::I128::Shuffle8(val, mask);
112 simd::I128::StoreA1(p + 1, r);
114 return p + count_[tag];
117 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
123 *p++ =
static_cast<uint8_t
>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
128 *(p + 3) = static_cast<uint8_t>(v0 >> 24);
131 *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
134 *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
137 *p =
static_cast<uint8_t
>(v0);
147 *(p + 3) = static_cast<uint8_t>(v1 >> 24);
150 *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
153 *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
156 *p =
static_cast<uint8_t
>(v1);
166 *(p + 3) = static_cast<uint8_t>(v2 >> 24);
169 *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
172 *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
175 *p =
static_cast<uint8_t
>(v2);
185 *(p + 3) = static_cast<uint8_t>(v3 >> 24);
188 *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
191 *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
194 *p =
static_cast<uint8_t
>(v3);
204 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
205 return Encode(p, src[0], src[1], src[2], src[3]);
210 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
213 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
214 simd::i128 r = simd::I128::Shuffle8(val, mask);
215 simd::I128::StoreA4(dest, r);
216 return p + count_[tag];
219 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
220 uint32_t* v2, uint32_t* v3,
224 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
225 simd::i128 r = simd::I128::Shuffle8(val, mask);
227 *v0 = simd::I128::GetUint32FromLane<0>(r);
228 *v1 = simd::I128::GetUint32FromLane<1>(r);
229 *v2 = simd::I128::GetUint32FromLane<2>(r);
230 *v3 = simd::I128::GetUint32FromLane<3>(r);
231 return p + count_[tag];
234 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
235 uint32_t* v2, uint32_t* v3,
246 tmp |= *(p + 3) << 24;
249 tmp |= *(p + 2) << 16;
252 tmp |= *(p + 1) << 8;
263 int v1k = (tag >> 2) & 3;
267 tmp |= *(p + 3) << 24;
270 tmp |= *(p + 2) << 16;
273 tmp |= *(p + 1) << 8;
284 int v2k = (tag >> 4) & 3;
288 tmp |= *(p + 3) << 24;
291 tmp |= *(p + 2) << 16;
294 tmp |= *(p + 1) << 8;
305 int v3k = (tag >> 6) & 3;
309 tmp |= *(p + 3) << 24;
312 tmp |= *(p + 2) << 16;
315 tmp |= *(p + 1) << 8;
329 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
330 return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
336 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_ #define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
Implements the class and functions for SIMD computations on integers.
Class for encoding and decoding four 32-bit integer values as a unit.
nlib_i128_t i128
nlib_i128_t is defined using typedef.
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
Returns the size of the encoded four integer values.
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
A file that contains the configuration information for each development environment.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
static size_t GetMaxSize(size_t n) noexcept
Returns the maximum size of encoded n integer values.