16 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_ 17 #define INCLUDE_NN_NLIB_GROUPVARINT_H_ 30 return (1 + 4 + 4 + 4 + 4) -
38 static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1,
41 static const uint8_t* Decode(uint32_t* v0, uint32_t* v1,
42 uint32_t* v2, uint32_t* v3,
46 return ((n + 3) / 4) * (1 + 4 *
sizeof(uint32_t));
59 inline size_t GroupVarInt32::GetEncodedSize(
const uint32_t* p)
NLIB_NOEXCEPT {
60 return GetEncodedSize(p[0], p[1], p[2], p[3]);
63 inline size_t GroupVarInt32::GetNextGroupSize(
const uint8_t* p)
NLIB_NOEXCEPT {
68 return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
72 inline size_t GroupVarInt32::GetTotalSize(
const uint32_t* p,
size_t n)
NLIB_NOEXCEPT {
74 for (; n > 4; n -= 4, p += 4) {
75 rval += GetEncodedSize(p);
84 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
86 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
87 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
90 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
91 simd::i128 r = simd::I128::Shuffle8(val, mask);
92 simd::I128::StoreA1(p + 1, r);
94 return p + count_[tag];
97 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
100 val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
101 val = simd::I128::SetUint32ToLane<1>(val, v1);
102 val = simd::I128::SetUint32ToLane<2>(val, v2);
103 val = simd::I128::SetUint32ToLane<3>(val, v3);
104 int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
105 uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
108 simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
109 simd::i128 r = simd::I128::Shuffle8(val, mask);
110 simd::I128::StoreA1(p + 1, r);
112 return p + count_[tag];
115 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
121 *p++ =
static_cast<uint8_t
>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
126 *(p + 3) = static_cast<uint8_t>(v0 >> 24);
129 *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
132 *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
135 *p =
static_cast<uint8_t
>(v0);
145 *(p + 3) = static_cast<uint8_t>(v1 >> 24);
148 *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
151 *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
154 *p =
static_cast<uint8_t
>(v1);
164 *(p + 3) = static_cast<uint8_t>(v2 >> 24);
167 *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
170 *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
173 *p =
static_cast<uint8_t
>(v2);
183 *(p + 3) = static_cast<uint8_t>(v3 >> 24);
186 *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
189 *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
192 *p =
static_cast<uint8_t
>(v3);
202 inline uint8_t* GroupVarInt32::Encode(uint8_t* p,
const uint32_t* src)
NLIB_NOEXCEPT {
203 return Encode(p, src[0], src[1], src[2], src[3]);
208 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
211 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
212 simd::i128 r = simd::I128::Shuffle8(val, mask);
213 simd::I128::StoreA4(dest, r);
214 return p + count_[tag];
217 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
218 uint32_t* v2, uint32_t* v3,
222 simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
223 simd::i128 r = simd::I128::Shuffle8(val, mask);
225 *v0 = simd::I128::GetUint32FromLane<0>(r);
226 *v1 = simd::I128::GetUint32FromLane<1>(r);
227 *v2 = simd::I128::GetUint32FromLane<2>(r);
228 *v3 = simd::I128::GetUint32FromLane<3>(r);
229 return p + count_[tag];
232 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
233 uint32_t* v2, uint32_t* v3,
244 tmp |= *(p + 3) << 24;
247 tmp |= *(p + 2) << 16;
250 tmp |= *(p + 1) << 8;
261 int v1k = (tag >> 2) & 3;
265 tmp |= *(p + 3) << 24;
268 tmp |= *(p + 2) << 16;
271 tmp |= *(p + 1) << 8;
282 int v2k = (tag >> 4) & 3;
286 tmp |= *(p + 3) << 24;
289 tmp |= *(p + 2) << 16;
292 tmp |= *(p + 1) << 8;
303 int v3k = (tag >> 6) & 3;
307 tmp |= *(p + 3) << 24;
310 tmp |= *(p + 2) << 16;
313 tmp |= *(p + 1) << 8;
327 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest,
const uint8_t* p)
NLIB_NOEXCEPT {
328 return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
334 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_ 整数のSIMD演算を行うためのクラスや関数が実装されています。
32bit整数値を4個単位でエンコード及びデコードするためのクラスです。
nlib_i128_t i128
nlib_i128_tがtypedefされています。
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
4つの整数値がエンコードされた場合のサイズを返します。
#define NLIB_NOEXCEPT
環境に合わせてnoexcept 又は同等の定義がされます。
#define NLIB_ALIGNAS(x)
alignas(x)又は同等の定義がされます。
static size_t GetMaxSize(size_t n) noexcept
n個の整数値をエンコードした場合の最大サイズを返します。