nlib
TextReader.h
Go to the documentation of this file.
1 
2 /*--------------------------------------------------------------------------------*
3  Project: CrossRoad
4  Copyright (C)Nintendo All rights reserved.
5 
6  These coded instructions, statements, and computer programs contain proprietary
7  information of Nintendo and/or its licensed developers and are protected by
8  national and international copyright laws. They may not be disclosed to third
9  parties or copied or duplicated in any form, in whole or in part, without the
10  prior written consent of Nintendo.
11 
12  The content herein is highly confidential and should be handled accordingly.
13  *--------------------------------------------------------------------------------*/
14 
15 #pragma once
16 #ifndef INCLUDE_NN_NLIB_TEXTREADER_H_
17 #define INCLUDE_NN_NLIB_TEXTREADER_H_
18 
19 #include "nn/nlib/Config.h"
20 
21 NLIB_NAMESPACE_BEGIN
22 
23 class InputStream;
24 
25 // code snippets:
26 // TextReader reader;
27 // if (reader.Init() != 0 || reader.Open(&stream) != 0) { error }
28 // while ((c = reader.Read()) >= 0)
29 // c is a codepoint
30 // if (!reader) { stream error if !stream, otherwise UTF-8 may be malformed }
31 // reader.Close();
32 // (stream.Close();)
34  public:
36  virtual ~TextReader() NLIB_NOEXCEPT;
37  errno_t Init() NLIB_NOEXCEPT;
38  errno_t Open(InputStream* stream) NLIB_NOEXCEPT;
39  int Read() NLIB_NOEXCEPT {
40  if (cur_ == bufend_) {
41  this->FillBuffer();
42  if (cur_ == bufend_) return -1;
43  }
44  int rval = *reinterpret_cast<unsigned char*>(cur_);
45  if (!(rval & 0x80)) {
46  ++cur_;
47  if (rval == 0x0A) {
48  pos_of_line_ = pos_of_buf_ + (cur_ - &buf_[0]);
49  ++line_;
50  }
51  return rval;
52  }
53  if (!utf32_cache_) ConstructUtf32Cache();
54  cur_ += utf8_bytecount_;
55  rval = static_cast<int>(utf32_cache_);
56  utf32_cache_ = 0;
57  return rval;
58  }
59  int Peek() NLIB_NOEXCEPT {
60  if (cur_ == bufend_) {
61  this->FillBuffer();
62  if (cur_ == bufend_) return -1;
63  }
64  int rval = *reinterpret_cast<unsigned char*>(cur_);
65  if (!(rval & 0x80)) return rval;
66  if (!utf32_cache_) ConstructUtf32Cache();
67  return static_cast<int>(utf32_cache_);
68  }
69  int SkipWs() NLIB_NOEXCEPT {
70  // skip ' ' \t \n (\r)
71  if (cur_ + 1 < bufend_) {
72  unsigned char c = *reinterpret_cast<unsigned char*>(cur_);
73  if (c > ' ') return 0;
74  if (c == ' ') {
75  ++cur_;
76  c = *reinterpret_cast<unsigned char*>(cur_);
77  if (c > ' ') {
78  utf32_cache_ = 0;
79  return 1;
80  }
81  return this->SkipWs_(1);
82  }
83  }
84  return this->SkipWs_(0);
85  }
86  bool ReadUntil(size_t* len, nlib_utf8_t* buf, size_t n, char delim) NLIB_NOEXCEPT NLIB_NONNULL;
87  template <size_t N>
88  bool ReadUntil(size_t* len, nlib_utf8_t (&buf)[N], char delim) NLIB_NOEXCEPT { // NOLINT
89  return this->ReadUntil(len, &buf[0], N, delim);
90  }
91  template <class T>
92  bool ReadUntil(size_t* len, nlib_utf8_t* buf, size_t n, T pred) NLIB_NOEXCEPT;
93  template <class T, size_t N>
94  bool ReadUntil(size_t* len, nlib_utf8_t (&buf)[N], T pred) NLIB_NOEXCEPT { // NOLINT
95  return this->ReadUntil(len, buf, N, pred);
96  }
97  size_t ReadDecimalString(char* buf, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
98  template <size_t N>
99  size_t ReadDecimalString(char (&buf)[N]) NLIB_NOEXCEPT {
100  return this->ReadDecimalString(buf, N);
101  }
102  bool Proceed(const nlib_utf8_t* str, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
103  bool Proceed(char c) NLIB_NOEXCEPT {
104  NLIB_ASSERT(!(c & 0x80) && c != 0x0A);
105  if (cur_ == bufend_) {
106  this->FillBuffer();
107  if (cur_ == bufend_) return false;
108  }
109  if (*cur_ != c) return false;
110  ++cur_;
111  return true;
112  }
113  bool ProceedEx(const nlib_utf8_t* str) NLIB_NOEXCEPT;
114  bool Close() NLIB_NOEXCEPT;
115  void SetError(errno_t e) const NLIB_NOEXCEPT {
116  if (errno_ == 0) errno_ = e;
117  }
118  errno_t GetErrorValue() const NLIB_NOEXCEPT { return errno_; }
119  InputStream* GetStream() NLIB_NOEXCEPT { return stream_; }
120  int GetLine() const NLIB_NOEXCEPT { return line_; }
121  int GetColumn() const NLIB_NOEXCEPT {
122  return static_cast<int>((cur_ - &buf_[0]) + 1 + pos_of_buf_ - pos_of_line_);
123  }
124  NLIB_SAFE_BOOL(TextReader, GetErrorValue() == 0)
125 
126  protected:
127  char* GetCur() NLIB_NOEXCEPT { return cur_; }
128  char* GetBufEnd() NLIB_NOEXCEPT { return bufend_; }
129  void SetBufEnd(char* p) NLIB_NOEXCEPT { bufend_ = p; }
130 
131  // checks UTF-8 validity, converts CRLF to LF, and convert CR to LF
132  virtual void FillBuffer_() NLIB_NOEXCEPT;
133 
134  private:
135  void FillBuffer() NLIB_NOEXCEPT { this->FillBuffer_(); }
136  NLIB_VIS_HIDDEN void CountNewLine(const char* mcur, const char* mend) NLIB_NOEXCEPT;
137  void ConstructUtf32Cache() NLIB_NOEXCEPT;
138  int SkipWs_(int base) NLIB_NOEXCEPT;
139 
140  private:
141  char buf_[512 + 3 + 1];
142  nlib_utf32_t utf32_cache_;
143  char* cur_;
144  char* bufend_;
145  size_t utf8_bytecount_;
146  InputStream* stream_;
147  mutable ErrnoT errno_;
148  int line_;
149  size_t pos_of_line_;
150  size_t pos_of_buf_;
151 
153 };
154 
155 template <class T>
156 bool TextReader::ReadUntil(size_t* len, nlib_utf8_t* buf, size_t n, T pred) NLIB_NOEXCEPT {
157  // NOTICE: not NULL terminated
158  if (!buf) {
159  *len = 0;
160  return false;
161  }
162  utf32_cache_ = 0;
163  char* p = buf;
164  char* pend = buf + n;
165  for (;;) {
166  if (cur_ == bufend_) {
167  this->FillBuffer();
168  if (cur_ == bufend_) {
169  *len = p - buf;
170  return false;
171  }
172  }
173  // T::operator()(const char* ptr);
174  while (p != pend && cur_ != bufend_) {
175  if ((*reinterpret_cast<unsigned char*>(cur_) & 0xC0) != 0x80 &&
176  pred((const char*)cur_)) { // NOLINT
177  *len = p - buf;
178  return true;
179  }
180  *p = *cur_;
181  ++p;
182  ++cur_;
183  }
184  if (p == pend) {
185  if (cur_ != bufend_ && (*reinterpret_cast<unsigned char*>(cur_) & 0xC0) == 0x80) {
186  do {
187  --p;
188  --cur_;
189  } while ((*reinterpret_cast<unsigned char*>(cur_) & 0xC0) == 0x80);
190  }
191  *len = p - buf;
192  return false;
193  }
194  }
195 }
196 
197 NLIB_NAMESPACE_END
198 
199 #endif // INCLUDE_NN_NLIB_TEXTREADER_H_
bool ReadUntil(size_t *len, nlib_utf8_t(&buf)[N], T pred) noexcept
Calls ReadUntil(len, buf, N, pred).
Definition: TextReader.h:94
#define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
Definition: Config.h:163
#define NLIB_SAFE_BOOL(class_name, exp)
Defines a safe operator bool function in the class. Uses the C++11 explicit bool if it is available f...
Definition: Config.h:178
int Read() noexcept
Reads one character from the stream and returns UTF-32 data.
Definition: TextReader.h:39
InputStream * GetStream() noexcept
Gets the stream for the text reader to read.
Definition: TextReader.h:119
#define NLIB_VIS_HIDDEN
Symbols for functions and classes are not made available outside of the library.
Definition: Platform_unix.h:88
#define NLIB_VIS_PUBLIC
Symbols for functions and classes are made available outside of the library.
Definition: Platform_unix.h:89
int GetColumn() const noexcept
Gets the current column.
Definition: TextReader.h:121
errno_t GetErrorValue() const noexcept
This function can get the cause of the error when reading has failed.
Definition: TextReader.h:118
uint32_t nlib_utf32_t
Uses typedef to define as char32_t if that can be used. If not, it uses typedef to define as uint32_t...
Definition: Platform.h:286
size_t ReadDecimalString(char(&buf)[N]) noexcept
Calls ReadDecimalString(buf, N).
Definition: TextReader.h:99
bool Proceed(char c) noexcept
Advances the stream by the amount of the character specified by c.
Definition: TextReader.h:103
bool ReadUntil(size_t *len, nlib_utf8_t(&buf)[N], char delim) noexcept
Calls ReadUntil(len, buf, N, delim).
Definition: TextReader.h:88
The class for reading text from streams.
Definition: TextReader.h:33
The base class for input streams. This class cannot be instantiated.
Definition: InputStream.h:29
Class that wraps errno_t. This class improves visual representations in the Visual Studio debugger...
Definition: Config.h:492
int GetLine() const noexcept
Gets the current line number.
Definition: TextReader.h:120
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Config.h:99
A file that contains the configuration information for each development environment.
int SkipWs() noexcept
Skips white-space characters (space, newline, tab, and return) in the stream and returns the number t...
Definition: TextReader.h:69
void SetError(errno_t e) const noexcept
Sets an error value.
Definition: TextReader.h:115
int Peek() noexcept
Returns one character from the start of the stream in UTF-32.
Definition: TextReader.h:59
#define NLIB_NONNULL
Indicates that you cannot specify NULL for all arguments.
char nlib_utf8_t
Defines char with a typedef. Indicates that it is a UTF-8 string.
Definition: Platform.h:300
int errno_t
Indicates with an int-type typedef that a POSIX error value is returned as the return value...
Definition: NMalloc.h:37