16 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_ 17 #define INCLUDE_NN_NLIB_GROUPVARINT_H_ 34 static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1, uint32_t v2,
37 static const uint8_t* Decode(uint32_t* v0, uint32_t* v1, uint32_t* v2, uint32_t* v3,
41 return ((n + 3) / 4) * (1 + 4 *
sizeof(uint32_t));
56 inline size_t GroupVarInt32::GetEncodedSize(
const uint32_t* p)
NLIB_NOEXCEPT {
57 return GetEncodedSize(p[0], p[1], p[2], p[3]);
60 inline size_t GroupVarInt32::GetNextGroupSize(
const uint8_t* p)
NLIB_NOEXCEPT {
65 return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
69 inline size_t GroupVarInt32::GetTotalSize(
const uint32_t* p,
size_t n)
NLIB_NOEXCEPT {
71 for (; n > 4; n -= 4, p += 4) {
72 rval += GetEncodedSize(p);
81 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
83 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
84 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
87 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
88 simd::i128 r = simd::I128::Shuffle8(val, mask);
89 simd::I128::StoreA1(p + 1, r);
91 return p + count_[tag];
94 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1, uint32_t v2,
97 val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
98 val = simd::I128::SetUint32ToLane<1>(val, v1);
99 val = simd::I128::SetUint32ToLane<2>(val, v2);
100 val = simd::I128::SetUint32ToLane<3>(val, v3);
101 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
102 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
105 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
106 simd::i128 r = simd::I128::Shuffle8(val, mask);
107 simd::I128::StoreA1(p + 1, r);
109 return p + count_[tag];
112 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1, uint32_t v2,
118 *p++ =
static_cast<uint8_t
>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
123 *(p + 3) = static_cast<uint8_t>(v0 >> 24);
126 *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
129 *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
132 *p =
static_cast<uint8_t
>(v0);
142 *(p + 3) = static_cast<uint8_t>(v1 >> 24);
145 *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
148 *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
151 *p =
static_cast<uint8_t
>(v1);
161 *(p + 3) = static_cast<uint8_t>(v2 >> 24);
164 *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
167 *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
170 *p =
static_cast<uint8_t
>(v2);
180 *(p + 3) = static_cast<uint8_t>(v3 >> 24);
183 *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
186 *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
189 *p =
static_cast<uint8_t
>(v3);
199 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
200 return Encode(p, src[0], src[1], src[2], src[3]);
205 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
208 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
209 simd::i128 r = simd::I128::Shuffle8(val, mask);
210 simd::I128::StoreA4(dest, r);
211 return p + count_[tag];
214 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1, uint32_t* v2, uint32_t* v3,
218 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
219 simd::i128 r = simd::I128::Shuffle8(val, mask);
221 *v0 = simd::I128::GetUint32FromLane<0>(r);
222 *v1 = simd::I128::GetUint32FromLane<1>(r);
223 *v2 = simd::I128::GetUint32FromLane<2>(r);
224 *v3 = simd::I128::GetUint32FromLane<3>(r);
225 return p + count_[tag];
228 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1, uint32_t* v2, uint32_t* v3,
239 tmp |= *(p + 3) << 24;
242 tmp |= *(p + 2) << 16;
245 tmp |= *(p + 1) << 8;
256 int v1k = (tag >> 2) & 3;
260 tmp |= *(p + 3) << 24;
263 tmp |= *(p + 2) << 16;
266 tmp |= *(p + 1) << 8;
277 int v2k = (tag >> 4) & 3;
281 tmp |= *(p + 3) << 24;
284 tmp |= *(p + 2) << 16;
287 tmp |= *(p + 1) << 8;
298 int v3k = (tag >> 6) & 3;
302 tmp |= *(p + 3) << 24;
305 tmp |= *(p + 2) << 16;
308 tmp |= *(p + 1) << 8;
322 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
323 return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
329 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_ #define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
Implements the class and functions for SIMD computations on integers.
Class for encoding and decoding four 32-bit integer values as a unit.
nlib_i128_t i128
nlib_i128_t is defined using typedef.
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
Returns the size of the encoded four integer values.
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
A file that contains the configuration information for each development environment.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
static size_t GetMaxSize(size_t n) noexcept
Returns the maximum size of encoded n integer values.