nlib
GroupVarInt.h
1 
2 /*--------------------------------------------------------------------------------*
3  Project: CrossRoad
4  Copyright (C)Nintendo All rights reserved.
5 
6  These coded instructions, statements, and computer programs contain proprietary
7  information of Nintendo and/or its licensed developers and are protected by
8  national and international copyright laws. They may not be disclosed to third
9  parties or copied or duplicated in any form, in whole or in part, without the
10  prior written consent of Nintendo.
11 
12  The content herein is highly confidential and should be handled accordingly.
13  *--------------------------------------------------------------------------------*/
14 
15 #pragma once
16 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_
17 #define INCLUDE_NN_NLIB_GROUPVARINT_H_
18 
19 #include "nn/nlib/Config.h"
20 #ifdef NLIB_SIMD
21 #include "nn/nlib/simd/SimdInt.h"
22 #endif
23 
24 NLIB_NAMESPACE_BEGIN
25 
27  public:
28  static size_t GetEncodedSize(uint32_t v0, uint32_t v1,
29  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
30  return (1 + 4 + 4 + 4 + 4) -
31  (nlib_clz32(v0 | 1) / 8) -
32  (nlib_clz32(v1 | 1) / 8) -
33  (nlib_clz32(v2 | 1) / 8) -
34  (nlib_clz32(v3 | 1) / 8);
35  }
36  static size_t GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
37  static size_t GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
38  static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1,
39  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT NLIB_NONNULL;
40  static uint8_t* Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT NLIB_NONNULL;
41  static const uint8_t* Decode(uint32_t* v0, uint32_t* v1,
42  uint32_t* v2, uint32_t* v3,
43  const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
44  static const uint8_t* Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
45  static size_t GetMaxSize(size_t n) NLIB_NOEXCEPT {
46  return ((n + 3) / 4) * (1 + 4 * sizeof(uint32_t));
47  }
48  static size_t GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
49 
50  private:
51 #ifdef NLIB_SIMD
52  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t decodemasks_[256 * 16];
53  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t encodemasks_[256 * 16];
54  static NLIB_VIS_PUBLIC const uint8_t count_[256];
55  static NLIB_VIS_PUBLIC const uint8_t tagarray_[256];
56 #endif
57 };
58 
59 inline size_t GroupVarInt32::GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT {
60  return GetEncodedSize(p[0], p[1], p[2], p[3]);
61 }
62 
63 inline size_t GroupVarInt32::GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT {
64 #ifdef NLIB_SIMD
65  return count_[*p];
66 #else
67  uint8_t v = *p;
68  return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
69 #endif
70 }
71 
72 inline size_t GroupVarInt32::GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT {
73  size_t rval = 0;
74  for (; n > 4; n -= 4, p += 4) {
75  rval += GetEncodedSize(p);
76  }
77  // The decoder must be able to read 1 + 16 bytes anywhere.
78  // This is for _mm_loadu_si128 safety.
79  rval += 1 + 16;
80  return rval;
81 }
82 
83 #ifdef NLIB_SIMD
84 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
85  simd::i128 val = simd::I128::LoadA4(src);
86  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
87  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
88 
89  *p = tag;
90  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
91  simd::i128 r = simd::I128::Shuffle8(val, mask);
92  simd::I128::StoreA1(p + 1, r);
93 
94  return p + count_[tag];
95 }
96 
97 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
98  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
99  simd::i128 val;
100  val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
101  val = simd::I128::SetUint32ToLane<1>(val, v1);
102  val = simd::I128::SetUint32ToLane<2>(val, v2);
103  val = simd::I128::SetUint32ToLane<3>(val, v3);
104  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
105  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
106 
107  *p = tag;
108  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
109  simd::i128 r = simd::I128::Shuffle8(val, mask);
110  simd::I128::StoreA1(p + 1, r);
111 
112  return p + count_[tag];
113 }
114 #else
115 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
116  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
117  int v0k = 3 - (nlib_clz32(v0 | 1) / 8);
118  int v1k = 3 - (nlib_clz32(v1 | 1) / 8);
119  int v2k = 3 - (nlib_clz32(v2 | 1) / 8);
120  int v3k = 3 - (nlib_clz32(v3 | 1) / 8);
121  *p++ = static_cast<uint8_t>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
122 
123  // v0
124  switch (v0k) {
125  case 3:
126  *(p + 3) = static_cast<uint8_t>(v0 >> 24);
127  // fall through
128  case 2:
129  *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
130  // fall through
131  case 1:
132  *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
133  // fall through
134  case 0:
135  *p = static_cast<uint8_t>(v0);
136  break;
137  default:
138  NLIB_ASSUME(0);
139  }
140  p += v0k + 1;
141 
142  // v1
143  switch (v1k) {
144  case 3:
145  *(p + 3) = static_cast<uint8_t>(v1 >> 24);
146  // fall through
147  case 2:
148  *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
149  // fall through
150  case 1:
151  *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
152  // fall through
153  case 0:
154  *p = static_cast<uint8_t>(v1);
155  break;
156  default:
157  NLIB_ASSUME(0);
158  }
159  p += v1k + 1;
160 
161  // v2
162  switch (v2k) {
163  case 3:
164  *(p + 3) = static_cast<uint8_t>(v2 >> 24);
165  // fall through
166  case 2:
167  *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
168  // fall through
169  case 1:
170  *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
171  // fall through
172  case 0:
173  *p = static_cast<uint8_t>(v2);
174  break;
175  default:
176  NLIB_ASSUME(0);
177  }
178  p += v2k + 1;
179 
180  // v3
181  switch (v3k) {
182  case 3:
183  *(p + 3) = static_cast<uint8_t>(v3 >> 24);
184  // fall through
185  case 2:
186  *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
187  // fall through
188  case 1:
189  *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
190  // fall through
191  case 0:
192  *p = static_cast<uint8_t>(v3);
193  break;
194  default:
195  NLIB_ASSUME(0);
196  }
197  p += v3k + 1;
198 
199  return p;
200 }
201 
202 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
203  return Encode(p, src[0], src[1], src[2], src[3]);
204 }
205 #endif
206 
207 #ifdef NLIB_SIMD
208 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
209  uint8_t tag = *p;
210  simd::i128 val = simd::I128::LoadA1(p + 1);
211  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
212  simd::i128 r = simd::I128::Shuffle8(val, mask);
213  simd::I128::StoreA4(dest, r);
214  return p + count_[tag];
215 }
216 
217 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
218  uint32_t* v2, uint32_t* v3,
219  const uint8_t* p) NLIB_NOEXCEPT {
220  uint8_t tag = *p;
221  simd::i128 val = simd::I128::LoadA1(p + 1);
222  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
223  simd::i128 r = simd::I128::Shuffle8(val, mask);
224 
225  *v0 = simd::I128::GetUint32FromLane<0>(r);
226  *v1 = simd::I128::GetUint32FromLane<1>(r);
227  *v2 = simd::I128::GetUint32FromLane<2>(r);
228  *v3 = simd::I128::GetUint32FromLane<3>(r);
229  return p + count_[tag];
230 }
231 #else
232 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
233  uint32_t* v2, uint32_t* v3,
234  const uint8_t* p) NLIB_NOEXCEPT {
235  uint8_t tag = *p;
236  ++p;
237  uint32_t tmp;
238 
239  // v0
240  int v0k = tag & 3;
241  tmp = 0;
242  switch (v0k) {
243  case 3:
244  tmp |= *(p + 3) << 24;
245  // fall through
246  case 2:
247  tmp |= *(p + 2) << 16;
248  // fall through
249  case 1:
250  tmp |= *(p + 1) << 8;
251  // fall through
252  case 0:
253  tmp |= *p;
254  break;
255  default:
256  NLIB_ASSUME(0);
257  }
258  *v0 = tmp;
259  p += v0k + 1;
260 
261  int v1k = (tag >> 2) & 3;
262  tmp = 0;
263  switch (v1k) {
264  case 3:
265  tmp |= *(p + 3) << 24;
266  // fall through
267  case 2:
268  tmp |= *(p + 2) << 16;
269  // fall through
270  case 1:
271  tmp |= *(p + 1) << 8;
272  // fall through
273  case 0:
274  tmp |= *p;
275  break;
276  default:
277  NLIB_ASSUME(0);
278  }
279  *v1 = tmp;
280  p += v1k + 1;
281 
282  int v2k = (tag >> 4) & 3;
283  tmp = 0;
284  switch (v2k) {
285  case 3:
286  tmp |= *(p + 3) << 24;
287  // fall through
288  case 2:
289  tmp |= *(p + 2) << 16;
290  // fall through
291  case 1:
292  tmp |= *(p + 1) << 8;
293  // fall through
294  case 0:
295  tmp |= *p;
296  break;
297  default:
298  NLIB_ASSUME(0);
299  }
300  *v2 = tmp;
301  p += v2k + 1;
302 
303  int v3k = (tag >> 6) & 3;
304  tmp = 0;
305  switch (v3k) {
306  case 3:
307  tmp |= *(p + 3) << 24;
308  // fall through
309  case 2:
310  tmp |= *(p + 2) << 16;
311  // fall through
312  case 1:
313  tmp |= *(p + 1) << 8;
314  // fall through
315  case 0:
316  tmp |= *p;
317  break;
318  default:
319  NLIB_ASSUME(0);
320  }
321  *v3 = tmp;
322  p += v3k + 1;
323 
324  return p;
325 }
326 
327 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
328  return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
329 }
330 #endif
331 
332 NLIB_NAMESPACE_END
333 
334 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_
Implements the class and functions for SIMD computations on integers.
static int nlib_clz32(uint32_t x)
Returns the number of consecutive zero bits, with respect to the most significant bit (MSB)...
Definition: Platform.h:2535
#define NLIB_VIS_PUBLIC
Symbols for functions and classes are made available outside of the library.
Definition: Platform_unix.h:89
#define NLIB_ASSUME(cond)
Indicates that cond is true and provides tips for optimizing the compiler.
Definition: Platform.h:256
Class for encoding and decoding four 32-bit integer values as a unit.
Definition: GroupVarInt.h:26
nlib_i128_t i128
nlib_i128_t is defined using typedef.
Definition: SimdInt.h:74
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
Returns the size of the encoded four integer values.
Definition: GroupVarInt.h:28
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Config.h:99
A file that contains the configuration information for each development environment.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
Definition: Config.h:239
static size_t GetMaxSize(size_t n) noexcept
Returns the maximum size of encoded n integer values.
Definition: GroupVarInt.h:45
#define NLIB_NONNULL
Indicates that you cannot specify NULL for all arguments.