nlib
GroupVarInt.h
1 
2 /*--------------------------------------------------------------------------------*
3  Project: CrossRoad
4  Copyright (C)Nintendo All rights reserved.
5 
6  These coded instructions, statements, and computer programs contain proprietary
7  information of Nintendo and/or its licensed developers and are protected by
8  national and international copyright laws. They may not be disclosed to third
9  parties or copied or duplicated in any form, in whole or in part, without the
10  prior written consent of Nintendo.
11 
12  The content herein is highly confidential and should be handled accordingly.
13  *--------------------------------------------------------------------------------*/
14 
15 #pragma once
16 #ifndef INCLUDE_NN_NLIB_GROUPVARINT_H_
17 #define INCLUDE_NN_NLIB_GROUPVARINT_H_
18 
19 #include "nn/nlib/Config.h"
20 #ifdef NLIB_SIMD
21 #include "nn/nlib/simd/SimdInt.h"
22 #endif
23 
24 NLIB_NAMESPACE_BEGIN
25 
27  public:
28  static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) NLIB_NOEXCEPT {
29  return (1 + 4 + 4 + 4 + 4) - (nlib_clz32(v0 | 1) / 8) - (nlib_clz32(v1 | 1) / 8) -
30  (nlib_clz32(v2 | 1) / 8) - (nlib_clz32(v3 | 1) / 8);
31  }
32  static size_t GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
33  static size_t GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
34  static uint8_t* Encode(uint8_t* p, uint32_t v0, uint32_t v1, uint32_t v2,
35  uint32_t v3) NLIB_NOEXCEPT NLIB_NONNULL;
36  static uint8_t* Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT NLIB_NONNULL;
37  static const uint8_t* Decode(uint32_t* v0, uint32_t* v1, uint32_t* v2, uint32_t* v3,
38  const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
39  static const uint8_t* Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT NLIB_NONNULL;
40  static size_t GetMaxSize(size_t n) NLIB_NOEXCEPT {
41  return ((n + 3) / 4) * (1 + 4 * sizeof(uint32_t));
42  }
43  static size_t GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
44 
45  private:
46  GroupVarInt32();
48 #ifdef NLIB_SIMD
49  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t decodemasks_[256 * 16];
50  NLIB_ALIGNAS(16) static NLIB_VIS_PUBLIC const uint8_t encodemasks_[256 * 16];
51  static NLIB_VIS_PUBLIC const uint8_t count_[256];
52  static NLIB_VIS_PUBLIC const uint8_t tagarray_[256];
53 #endif
54 };
55 
56 inline size_t GroupVarInt32::GetEncodedSize(const uint32_t* p) NLIB_NOEXCEPT {
57  return GetEncodedSize(p[0], p[1], p[2], p[3]);
58 }
59 
60 inline size_t GroupVarInt32::GetNextGroupSize(const uint8_t* p) NLIB_NOEXCEPT {
61 #ifdef NLIB_SIMD
62  return count_[*p];
63 #else
64  uint8_t v = *p;
65  return 5 + (v & 3) + ((v >> 2) & 3) + ((v >> 4) & 3) + ((v >> 6) & 3);
66 #endif
67 }
68 
69 inline size_t GroupVarInt32::GetTotalSize(const uint32_t* p, size_t n) NLIB_NOEXCEPT {
70  size_t rval = 0;
71  for (; n > 4; n -= 4, p += 4) {
72  rval += GetEncodedSize(p);
73  }
74  // The decoder must be able to read 1 + 16 bytes anywhere.
75  // This is for _mm_loadu_si128 safety.
76  rval += 1 + 16;
77  return rval;
78 }
79 
80 #ifdef NLIB_SIMD
81 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
82  simd::i128 val = simd::I128::LoadA4(src);
83  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
84  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
85 
86  *p = tag;
87  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
88  simd::i128 r = simd::I128::Shuffle8(val, mask);
89  simd::I128::StoreA1(p + 1, r);
90 
91  return p + count_[tag];
92 }
93 
94 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1, uint32_t v2,
95  uint32_t v3) NLIB_NOEXCEPT {
96  simd::i128 val;
97  val = simd::I128::SetUint32ToLane<0>(simd::I128::SetZero(), v0);
98  val = simd::I128::SetUint32ToLane<1>(val, v1);
99  val = simd::I128::SetUint32ToLane<2>(val, v2);
100  val = simd::I128::SetUint32ToLane<3>(val, v3);
101  int imask = simd::I128::MoveMask8(simd::I128::CmpEq8(val, simd::I128::SetZero()));
102  uint8_t tag = tagarray_[imask & 0xFF] | (tagarray_[imask >> 8] << 4);
103 
104  *p = tag;
105  simd::i128 mask = simd::I128::LoadA16(&encodemasks_[tag * 16]);
106  simd::i128 r = simd::I128::Shuffle8(val, mask);
107  simd::I128::StoreA1(p + 1, r);
108 
109  return p + count_[tag];
110 }
111 #else
112 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, uint32_t v0, uint32_t v1, uint32_t v2,
113  uint32_t v3) NLIB_NOEXCEPT {
114  int v0k = 3 - (nlib_clz32(v0 | 1) / 8);
115  int v1k = 3 - (nlib_clz32(v1 | 1) / 8);
116  int v2k = 3 - (nlib_clz32(v2 | 1) / 8);
117  int v3k = 3 - (nlib_clz32(v3 | 1) / 8);
118  *p++ = static_cast<uint8_t>((v3k << 6) | (v2k << 4) | (v1k << 2) | v0k);
119 
120  // v0
121  switch (v0k) {
122  case 3:
123  *(p + 3) = static_cast<uint8_t>(v0 >> 24);
124  // fall through
125  case 2:
126  *(p + 2) = static_cast<uint8_t>((v0 >> 16) & 0xFF);
127  // fall through
128  case 1:
129  *(p + 1) = static_cast<uint8_t>((v0 >> 8) & 0xFF);
130  // fall through
131  case 0:
132  *p = static_cast<uint8_t>(v0);
133  break;
134  default:
135  NLIB_ASSUME(0);
136  }
137  p += v0k + 1;
138 
139  // v1
140  switch (v1k) {
141  case 3:
142  *(p + 3) = static_cast<uint8_t>(v1 >> 24);
143  // fall through
144  case 2:
145  *(p + 2) = static_cast<uint8_t>((v1 >> 16) & 0xFF);
146  // fall through
147  case 1:
148  *(p + 1) = static_cast<uint8_t>((v1 >> 8) & 0xFF);
149  // fall through
150  case 0:
151  *p = static_cast<uint8_t>(v1);
152  break;
153  default:
154  NLIB_ASSUME(0);
155  }
156  p += v1k + 1;
157 
158  // v2
159  switch (v2k) {
160  case 3:
161  *(p + 3) = static_cast<uint8_t>(v2 >> 24);
162  // fall through
163  case 2:
164  *(p + 2) = static_cast<uint8_t>((v2 >> 16) & 0xFF);
165  // fall through
166  case 1:
167  *(p + 1) = static_cast<uint8_t>((v2 >> 8) & 0xFF);
168  // fall through
169  case 0:
170  *p = static_cast<uint8_t>(v2);
171  break;
172  default:
173  NLIB_ASSUME(0);
174  }
175  p += v2k + 1;
176 
177  // v3
178  switch (v3k) {
179  case 3:
180  *(p + 3) = static_cast<uint8_t>(v3 >> 24);
181  // fall through
182  case 2:
183  *(p + 2) = static_cast<uint8_t>((v3 >> 16) & 0xFF);
184  // fall through
185  case 1:
186  *(p + 1) = static_cast<uint8_t>((v3 >> 8) & 0xFF);
187  // fall through
188  case 0:
189  *p = static_cast<uint8_t>(v3);
190  break;
191  default:
192  NLIB_ASSUME(0);
193  }
194  p += v3k + 1;
195 
196  return p;
197 }
198 
199 inline uint8_t* GroupVarInt32::Encode(uint8_t* p, const uint32_t* src) NLIB_NOEXCEPT {
200  return Encode(p, src[0], src[1], src[2], src[3]);
201 }
202 #endif
203 
204 #ifdef NLIB_SIMD
205 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
206  uint8_t tag = *p;
207  simd::i128 val = simd::I128::LoadA1(p + 1);
208  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
209  simd::i128 r = simd::I128::Shuffle8(val, mask);
210  simd::I128::StoreA4(dest, r);
211  return p + count_[tag];
212 }
213 
214 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1, uint32_t* v2, uint32_t* v3,
215  const uint8_t* p) NLIB_NOEXCEPT {
216  uint8_t tag = *p;
217  simd::i128 val = simd::I128::LoadA1(p + 1);
218  simd::i128 mask = simd::I128::LoadA16(&decodemasks_[tag * 16]);
219  simd::i128 r = simd::I128::Shuffle8(val, mask);
220 
221  *v0 = simd::I128::GetUint32FromLane<0>(r);
222  *v1 = simd::I128::GetUint32FromLane<1>(r);
223  *v2 = simd::I128::GetUint32FromLane<2>(r);
224  *v3 = simd::I128::GetUint32FromLane<3>(r);
225  return p + count_[tag];
226 }
227 #else
228 inline const uint8_t* GroupVarInt32::Decode(uint32_t* v0, uint32_t* v1, uint32_t* v2, uint32_t* v3,
229  const uint8_t* p) NLIB_NOEXCEPT {
230  uint8_t tag = *p;
231  ++p;
232  uint32_t tmp;
233 
234  // v0
235  int v0k = tag & 3;
236  tmp = 0;
237  switch (v0k) {
238  case 3:
239  tmp |= *(p + 3) << 24;
240  // fall through
241  case 2:
242  tmp |= *(p + 2) << 16;
243  // fall through
244  case 1:
245  tmp |= *(p + 1) << 8;
246  // fall through
247  case 0:
248  tmp |= *p;
249  break;
250  default:
251  NLIB_ASSUME(0);
252  }
253  *v0 = tmp;
254  p += v0k + 1;
255 
256  int v1k = (tag >> 2) & 3;
257  tmp = 0;
258  switch (v1k) {
259  case 3:
260  tmp |= *(p + 3) << 24;
261  // fall through
262  case 2:
263  tmp |= *(p + 2) << 16;
264  // fall through
265  case 1:
266  tmp |= *(p + 1) << 8;
267  // fall through
268  case 0:
269  tmp |= *p;
270  break;
271  default:
272  NLIB_ASSUME(0);
273  }
274  *v1 = tmp;
275  p += v1k + 1;
276 
277  int v2k = (tag >> 4) & 3;
278  tmp = 0;
279  switch (v2k) {
280  case 3:
281  tmp |= *(p + 3) << 24;
282  // fall through
283  case 2:
284  tmp |= *(p + 2) << 16;
285  // fall through
286  case 1:
287  tmp |= *(p + 1) << 8;
288  // fall through
289  case 0:
290  tmp |= *p;
291  break;
292  default:
293  NLIB_ASSUME(0);
294  }
295  *v2 = tmp;
296  p += v2k + 1;
297 
298  int v3k = (tag >> 6) & 3;
299  tmp = 0;
300  switch (v3k) {
301  case 3:
302  tmp |= *(p + 3) << 24;
303  // fall through
304  case 2:
305  tmp |= *(p + 2) << 16;
306  // fall through
307  case 1:
308  tmp |= *(p + 1) << 8;
309  // fall through
310  case 0:
311  tmp |= *p;
312  break;
313  default:
314  NLIB_ASSUME(0);
315  }
316  *v3 = tmp;
317  p += v3k + 1;
318 
319  return p;
320 }
321 
322 inline const uint8_t* GroupVarInt32::Decode(uint32_t* dest, const uint8_t* p) NLIB_NOEXCEPT {
323  return Decode(&dest[0], &dest[1], &dest[2], &dest[3], p);
324 }
325 #endif
326 
327 NLIB_NAMESPACE_END
328 
329 #endif // INCLUDE_NN_NLIB_GROUPVARINT_H_
#define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
Definition: Config.h:183
Implements the class and functions for SIMD computations on integers.
static int nlib_clz32(uint32_t x)
Returns the number of consecutive zero bits, with respect to the most significant bit (MSB)...
Definition: Platform.h:2690
#define NLIB_VIS_PUBLIC
Symbols for functions and classes are made available outside of the library.
Definition: Platform_unix.h:87
#define NLIB_ASSUME(cond)
Indicates that cond is true and provides tips for optimizing the compiler.
Definition: Platform.h:259
Class for encoding and decoding four 32-bit integer values as a unit.
Definition: GroupVarInt.h:26
nlib_i128_t i128
nlib_i128_t is defined using typedef.
Definition: SimdInt.h:74
static size_t GetEncodedSize(uint32_t v0, uint32_t v1, uint32_t v2, uint32_t v3) noexcept
Returns the size of the encoded four integer values.
Definition: GroupVarInt.h:28
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Config.h:109
A file that contains the configuration information for each development environment.
#define NLIB_ALIGNAS(x)
Defines alignas(x) or the equivalent.
Definition: Config.h:260
static size_t GetMaxSize(size_t n) noexcept
Returns the maximum size of encoded n integer values.
Definition: GroupVarInt.h:40
#define NLIB_NONNULL
Indicates that you cannot specify NULL for all arguments.