nlib
GroupVarInt.h
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_
4 #define INCLUDE_NN_NLIB_GROUPVARINT_H_
5 
6 #include "nn/nlib/Config.h"
7 #ifdef NLIB_SIMD
8 #include "nn/nlib/simd/SimdInt.h"
9 #endif
10 
11 NLIB_NAMESPACE_BEGIN
12 
14  public:
15  static size_t GetEncodedSize(uint32_t v0, uint32_t v1,
16  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
17  return (1 + 4 + 4 + 4 + 4) -
18  (nlib_clz(v0 | 1) / 8) -
19  (nlib_clz(v1 | 1) / 8) -
20  (nlib_clz(v2 | 1) / 8) -
21  (nlib_clz(v3 | 1) / 8);
22  }
23  static size_t GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
24  static size_t GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
25  static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1,
26  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT NLIB_NONNULL;
27  static uint8_t* Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT NLIB_NONNULL;
28  static const uint8_t* Decode(uint32_t* v0, uint32_t* v1,
29  uint32_t* v2, uint32_t* v3,
30  const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
31  static const uint8_t* Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
32  static size_t GetMaxSize(size_t n) NLIB_NOEXCEPT {
33  return ((n + 3) / 4) * (1 + 4 * sizeof(uint32_t));
34  }
35  static size_t GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
36 
37  private:
38 #ifdef NLIB_SIMD
39  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t decodemasks_[256 * 16];
40  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t encodemasks_[256 * 16];
41  static NLIB_VIS_PUBLIC const uint8_t count_[256];
42  static NLIB_VIS_PUBLIC const uint8_t tagarray_[256];
43 #endif
44 };
45 
46 inline size_t GroupVarInt32::GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT {
47  return GetEncodedSize(p[0], p[1], p[2], p[3]);
48 }
49 
50 inline size_t GroupVarInt32::GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT {
51 #ifdef NLIB_SIMD
52  return count_[*p];
53 #else
54  uint8_t v = *p;
55  return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
56 #endif
57 }
58 
59 inline size_t GroupVarInt32::GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT {
60  size_t rval = 0;
61  for (; n > 4; n -= 4, p += 4) {
62  rval += GetEncodedSize(p);
63  }
64  // The decoder must be able to read 1 + 16 bytes anywhere.
65  // This is for _mm_loadu_si128 safety.
66  rval += 1 + 16;
67  return rval;
68 }
69 
70 #ifdef NLIB_SIMD
71 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
72  simd::i128 val = simd::I128::LoadA4(src);
73  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
74  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
75 
76  *p = tag;
77  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
78  simd::i128 r = simd::I128::Shuffle8(val, mask);
79  simd::I128::StoreA1(p + 1, r);
80 
81  return p + count_[tag];
82 }
83 
84 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
85  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
86  simd::i128 val;
87  val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
88  val = simd::I128::SetUint32ToLane<1>(val, v1);
89  val = simd::I128::SetUint32ToLane<2>(val, v2);
90  val = simd::I128::SetUint32ToLane<3>(val, v3);
91  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
92  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
93 
94  *p = tag;
95  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
96  simd::i128 r = simd::I128::Shuffle8(val, mask);
97  simd::I128::StoreA1(p + 1, r);
98 
99  return p + count_[tag];
100 }
101 #else
102 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
103  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
104  int v0k = 3 - (nlib_clz(v0 | 1) / 8);
105  int v1k = 3 - (nlib_clz(v1 | 1) / 8);
106  int v2k = 3 - (nlib_clz(v2 | 1) / 8);
107  int v3k = 3 - (nlib_clz(v3 | 1) / 8);
108  *p++ = static_cast<uint8_t>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
109 
110  // v0
111  switch (v0k) {
112  case 3:
113  *(p + 3) = static_cast<uint8_t>(v0 >> 24);
114  // fall through
115  case 2:
116  *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
117  // fall through
118  case 1:
119  *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
120  // fall through
121  case 0:
122  *p = static_cast<uint8_t>(v0);
123  break;
124  default:
125  NLIB_ASSUME(0);
126  }
127  p += v0k + 1;
128 
129  // v1
130  switch (v1k) {
131  case 3:
132  *(p + 3) = static_cast<uint8_t>(v1 >> 24);
133  // fall through
134  case 2:
135  *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
136  // fall through
137  case 1:
138  *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
139  // fall through
140  case 0:
141  *p = static_cast<uint8_t>(v1);
142  break;
143  default:
144  NLIB_ASSUME(0);
145  }
146  p += v1k + 1;
147 
148  // v2
149  switch (v2k) {
150  case 3:
151  *(p + 3) = static_cast<uint8_t>(v2 >> 24);
152  // fall through
153  case 2:
154  *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
155  // fall through
156  case 1:
157  *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
158  // fall through
159  case 0:
160  *p = static_cast<uint8_t>(v2);
161  break;
162  default:
163  NLIB_ASSUME(0);
164  }
165  p += v2k + 1;
166 
167  // v3
168  switch (v3k) {
169  case 3:
170  *(p + 3) = static_cast<uint8_t>(v3 >> 24);
171  // fall through
172  case 2:
173  *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
174  // fall through
175  case 1:
176  *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
177  // fall through
178  case 0:
179  *p = static_cast<uint8_t>(v3);
180  break;
181  default:
182  NLIB_ASSUME(0);
183  }
184  p += v3k + 1;
185 
186  return p;
187 }
188 
189 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
190  return Encode(p, src[0], src[1], src[2], src[3]);
191 }
192 #endif
193 
194 #ifdef NLIB_SIMD
195 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
196  uint8_t tag = *p;
197  simd::i128 val = simd::I128::LoadA1(p + 1);
198  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
199  simd::i128 r = simd::I128::Shuffle8(val, mask);
200  simd::I128::StoreA4(dest, r);
201  return p + count_[tag];
202 }
203 
204 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
205  uint32_t* v2, uint32_t* v3,
206  const uint8_t* p) NLIB_NOEXCEPT {
207  uint8_t tag = *p;
208  simd::i128 val = simd::I128::LoadA1(p + 1);
209  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
210  simd::i128 r = simd::I128::Shuffle8(val, mask);
211 
212  *v0 = simd::I128::GetUint32FromLane<0>(r);
213  *v1 = simd::I128::GetUint32FromLane<1>(r);
214  *v2 = simd::I128::GetUint32FromLane<2>(r);
215  *v3 = simd::I128::GetUint32FromLane<3>(r);
216  return p + count_[tag];
217 }
218 #else
219 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
220  uint32_t* v2, uint32_t* v3,
221  const uint8_t* p) NLIB_NOEXCEPT {
222  uint8_t tag = *p;
223  ++p;
224  uint32_t tmp;
225 
226  // v0
227  int v0k = tag & 3;
228  tmp = 0;
229  switch (v0k) {
230  case 3:
231  tmp |= *(p + 3) << 24;
232  // fall through
233  case 2:
234  tmp |= *(p + 2) << 16;
235  // fall through
236  case 1:
237  tmp |= *(p + 1) << 8;
238  // fall through
239  case 0:
240  tmp |= *p;
241  break;
242  default:
243  NLIB_ASSUME(0);
244  }
245  *v0 = tmp;
246  p += v0k + 1;
247 
248  int v1k = (tag >> 2) & 3;
249  tmp = 0;
250  switch (v1k) {
251  case 3:
252  tmp |= *(p + 3) << 24;
253  // fall through
254  case 2:
255  tmp |= *(p + 2) << 16;
256  // fall through
257  case 1:
258  tmp |= *(p + 1) << 8;
259  // fall through
260  case 0:
261  tmp |= *p;
262  break;
263  default:
264  NLIB_ASSUME(0);
265  }
266  *v1 = tmp;
267  p += v1k + 1;
268 
269  int v2k = (tag >> 4) & 3;
270  tmp = 0;
271  switch (v2k) {
272  case 3:
273  tmp |= *(p + 3) << 24;
274  // fall through
275  case 2:
276  tmp |= *(p + 2) << 16;
277  // fall through
278  case 1:
279  tmp |= *(p + 1) << 8;
280  // fall through
281  case 0:
282  tmp |= *p;
283  break;
284  default:
285  NLIB_ASSUME(0);
286  }
287  *v2 = tmp;
288  p += v2k + 1;
289 
290  int v3k = (tag >> 6) & 3;
291  tmp = 0;
292  switch (v3k) {
293  case 3:
294  tmp |= *(p + 3) << 24;
295  // fall through
296  case 2:
297  tmp |= *(p + 2) << 16;
298  // fall through
299  case 1:
300  tmp |= *(p + 1) << 8;
301  // fall through
302  case 0:
303  tmp |= *p;
304  break;
305  default:
306  NLIB_ASSUME(0);
307  }
308  *v3 = tmp;
309  p += v3k + 1;
310 
311  return p;
312 }
313 
314 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
315  return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
316 }
317 #endif
318 
319 NLIB_NAMESPACE_END
320 
321 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_
Implements the class and functions for SIMD computations on integers.
#define NLIB_VIS_PUBLIC
Symbols for functions and classes are made available outside of the library.
Definition: Platform_unix.h:61
#define NLIB_ASSUME(cond)
Indicates that cond is true and provides tips for optimizing the compiler.
Definition: Platform.h:581
Class for encoding and decoding four 32-bit integer values as a unit.
Definition: GroupVarInt.h:13
nlib_i128_t i128
nlib_i128_t is defined using typedef.
Definition: SimdInt.h:63
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
Returns the size of the encoded four integer values.
Definition: GroupVarInt.h:15
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Config.h:86
A file that contains the configuration information for each development environment.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
Definition: Config.h:221
static size_t GetMaxSize(size_t n) noexcept
Returns the maximum size of encoded n integer values.
Definition: GroupVarInt.h:32
#define NLIB_NONNULL
Indicates that you cannot specify NULL for all arguments.
Definition: Platform_unix.h:76