nlib
GroupVarInt.h
1 
2 /*--------------------------------------------------------------------------------*
3  Project: CrossRoad
4  Copyright (C)Nintendo All rights reserved.
5 
6  These coded instructions, statements, and computer programs contain proprietary
7  information of Nintendo and/or its licensed developers and are protected by
8  national and international copyright laws. They may not be disclosed to third
9  parties or copied or duplicated in any form, in whole or in part, without the
10  prior written consent of Nintendo.
11 
12  The content herein is highly confidential and should be handled accordingly.
13  *--------------------------------------------------------------------------------*/
14 
15 #pragma once
16 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_
17 #define INCLUDE_NN_NLIB_GROUPVARINT_H_
18 
19 #include "nn/nlib/Config.h"
20 #ifdef NLIB_SIMD
21 #include "nn/nlib/simd/SimdInt.h"
22 #endif
23 
24 NLIB_NAMESPACE_BEGIN
25 
27  public:
28  static size_t GetEncodedSize(uint32_t v0, uint32_t v1,
29  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
30  return (1 + 4 + 4 + 4 + 4) -
31  (nlib_clz32(v0 | 1) / 8) -
32  (nlib_clz32(v1 | 1) / 8) -
33  (nlib_clz32(v2 | 1) / 8) -
34  (nlib_clz32(v3 | 1) / 8);
35  }
36  static size_t GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
37  static size_t GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
38  static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1,
39  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT NLIB_NONNULL;
40  static uint8_t* Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT NLIB_NONNULL;
41  static const uint8_t* Decode(uint32_t* v0, uint32_t* v1,
42  uint32_t* v2, uint32_t* v3,
43  const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
44  static const uint8_t* Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
45  static size_t GetMaxSize(size_t n) NLIB_NOEXCEPT {
46  return ((n + 3) / 4) * (1 + 4 * sizeof(uint32_t));
47  }
48  static size_t GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
49 
50  private:
51  GroupVarInt32();
53 #ifdef NLIB_SIMD
54  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t decodemasks_[256 * 16];
55  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t encodemasks_[256 * 16];
56  static NLIB_VIS_PUBLIC const uint8_t count_[256];
57  static NLIB_VIS_PUBLIC const uint8_t tagarray_[256];
58 #endif
59 };
60 
61 inline size_t GroupVarInt32::GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT {
62  return GetEncodedSize(p[0], p[1], p[2], p[3]);
63 }
64 
65 inline size_t GroupVarInt32::GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT {
66 #ifdef NLIB_SIMD
67  return count_[*p];
68 #else
69  uint8_t v = *p;
70  return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
71 #endif
72 }
73 
74 inline size_t GroupVarInt32::GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT {
75  size_t rval = 0;
76  for (; n > 4; n -= 4, p += 4) {
77  rval += GetEncodedSize(p);
78  }
79  // The decoder must be able to read 1 + 16 bytes anywhere.
80  // This is for _mm_loadu_si128 safety.
81  rval += 1 + 16;
82  return rval;
83 }
84 
85 #ifdef NLIB_SIMD
86 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
87  simd::i128 val = simd::I128::LoadA4(src);
88  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
89  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
90 
91  *p = tag;
92  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
93  simd::i128 r = simd::I128::Shuffle8(val, mask);
94  simd::I128::StoreA1(p + 1, r);
95 
96  return p + count_[tag];
97 }
98 
99 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
100  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
101  simd::i128 val;
102  val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
103  val = simd::I128::SetUint32ToLane<1>(val, v1);
104  val = simd::I128::SetUint32ToLane<2>(val, v2);
105  val = simd::I128::SetUint32ToLane<3>(val, v3);
106  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
107  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
108 
109  *p = tag;
110  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
111  simd::i128 r = simd::I128::Shuffle8(val, mask);
112  simd::I128::StoreA1(p + 1, r);
113 
114  return p + count_[tag];
115 }
116 #else
117 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1,
118  uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
119  int v0k = 3 - (nlib_clz32(v0 | 1) / 8);
120  int v1k = 3 - (nlib_clz32(v1 | 1) / 8);
121  int v2k = 3 - (nlib_clz32(v2 | 1) / 8);
122  int v3k = 3 - (nlib_clz32(v3 | 1) / 8);
123  *p++ = static_cast<uint8_t>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
124 
125  // v0
126  switch (v0k) {
127  case 3:
128  *(p + 3) = static_cast<uint8_t>(v0 >> 24);
129  // fall through
130  case 2:
131  *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
132  // fall through
133  case 1:
134  *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
135  // fall through
136  case 0:
137  *p = static_cast<uint8_t>(v0);
138  break;
139  default:
140  NLIB_ASSUME(0);
141  }
142  p += v0k + 1;
143 
144  // v1
145  switch (v1k) {
146  case 3:
147  *(p + 3) = static_cast<uint8_t>(v1 >> 24);
148  // fall through
149  case 2:
150  *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
151  // fall through
152  case 1:
153  *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
154  // fall through
155  case 0:
156  *p = static_cast<uint8_t>(v1);
157  break;
158  default:
159  NLIB_ASSUME(0);
160  }
161  p += v1k + 1;
162 
163  // v2
164  switch (v2k) {
165  case 3:
166  *(p + 3) = static_cast<uint8_t>(v2 >> 24);
167  // fall through
168  case 2:
169  *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
170  // fall through
171  case 1:
172  *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
173  // fall through
174  case 0:
175  *p = static_cast<uint8_t>(v2);
176  break;
177  default:
178  NLIB_ASSUME(0);
179  }
180  p += v2k + 1;
181 
182  // v3
183  switch (v3k) {
184  case 3:
185  *(p + 3) = static_cast<uint8_t>(v3 >> 24);
186  // fall through
187  case 2:
188  *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
189  // fall through
190  case 1:
191  *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
192  // fall through
193  case 0:
194  *p = static_cast<uint8_t>(v3);
195  break;
196  default:
197  NLIB_ASSUME(0);
198  }
199  p += v3k + 1;
200 
201  return p;
202 }
203 
204 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
205  return Encode(p, src[0], src[1], src[2], src[3]);
206 }
207 #endif
208 
209 #ifdef NLIB_SIMD
210 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
211  uint8_t tag = *p;
212  simd::i128 val = simd::I128::LoadA1(p + 1);
213  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
214  simd::i128 r = simd::I128::Shuffle8(val, mask);
215  simd::I128::StoreA4(dest, r);
216  return p + count_[tag];
217 }
218 
219 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
220  uint32_t* v2, uint32_t* v3,
221  const uint8_t* p) NLIB_NOEXCEPT {
222  uint8_t tag = *p;
223  simd::i128 val = simd::I128::LoadA1(p + 1);
224  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
225  simd::i128 r = simd::I128::Shuffle8(val, mask);
226 
227  *v0 = simd::I128::GetUint32FromLane<0>(r);
228  *v1 = simd::I128::GetUint32FromLane<1>(r);
229  *v2 = simd::I128::GetUint32FromLane<2>(r);
230  *v3 = simd::I128::GetUint32FromLane<3>(r);
231  return p + count_[tag];
232 }
233 #else
234 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1,
235  uint32_t* v2, uint32_t* v3,
236  const uint8_t* p) NLIB_NOEXCEPT {
237  uint8_t tag = *p;
238  ++p;
239  uint32_t tmp;
240 
241  // v0
242  int v0k = tag & 3;
243  tmp = 0;
244  switch (v0k) {
245  case 3:
246  tmp |= *(p + 3) << 24;
247  // fall through
248  case 2:
249  tmp |= *(p + 2) << 16;
250  // fall through
251  case 1:
252  tmp |= *(p + 1) << 8;
253  // fall through
254  case 0:
255  tmp |= *p;
256  break;
257  default:
258  NLIB_ASSUME(0);
259  }
260  *v0 = tmp;
261  p += v0k + 1;
262 
263  int v1k = (tag >> 2) & 3;
264  tmp = 0;
265  switch (v1k) {
266  case 3:
267  tmp |= *(p + 3) << 24;
268  // fall through
269  case 2:
270  tmp |= *(p + 2) << 16;
271  // fall through
272  case 1:
273  tmp |= *(p + 1) << 8;
274  // fall through
275  case 0:
276  tmp |= *p;
277  break;
278  default:
279  NLIB_ASSUME(0);
280  }
281  *v1 = tmp;
282  p += v1k + 1;
283 
284  int v2k = (tag >> 4) & 3;
285  tmp = 0;
286  switch (v2k) {
287  case 3:
288  tmp |= *(p + 3) << 24;
289  // fall through
290  case 2:
291  tmp |= *(p + 2) << 16;
292  // fall through
293  case 1:
294  tmp |= *(p + 1) << 8;
295  // fall through
296  case 0:
297  tmp |= *p;
298  break;
299  default:
300  NLIB_ASSUME(0);
301  }
302  *v2 = tmp;
303  p += v2k + 1;
304 
305  int v3k = (tag >> 6) & 3;
306  tmp = 0;
307  switch (v3k) {
308  case 3:
309  tmp |= *(p + 3) << 24;
310  // fall through
311  case 2:
312  tmp |= *(p + 2) << 16;
313  // fall through
314  case 1:
315  tmp |= *(p + 1) << 8;
316  // fall through
317  case 0:
318  tmp |= *p;
319  break;
320  default:
321  NLIB_ASSUME(0);
322  }
323  *v3 = tmp;
324  p += v3k + 1;
325 
326  return p;
327 }
328 
329 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
330  return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
331 }
332 #endif
333 
334 NLIB_NAMESPACE_END
335 
336 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_
#define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
Definition: Config.h:179
Implements the class and functions for SIMD computations on integers.
static int nlib_clz32(uint32_t x)
Returns the number of consecutive zero bits, with respect to the most significant bit (MSB)...
Definition: Platform.h:2615
#define NLIB_VIS_PUBLIC
Symbols for functions and classes are made available outside of the library.
Definition: Platform_unix.h:89
#define NLIB_ASSUME(cond)
Indicates that cond is true and provides tips for optimizing the compiler.
Definition: Platform.h:264
Class for encoding and decoding four 32-bit integer values as a unit.
Definition: GroupVarInt.h:26
nlib_i128_t i128
nlib_i128_t is defined using typedef.
Definition: SimdInt.h:74
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
Returns the size of the encoded four integer values.
Definition: GroupVarInt.h:28
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Config.h:105
A file that contains the configuration information for each development environment.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
Definition: Config.h:255
static size_t GetMaxSize(size_t n) noexcept
Returns the maximum size of encoded n integer values.
Definition: GroupVarInt.h:45
#define NLIB_NONNULL
Indicates that you cannot specify NULL for all arguments.