nlib
TextReader.h
Go to the documentation of this file.
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_TEXTREADER_H_
4 #define INCLUDE_NN_NLIB_TEXTREADER_H_
5 
6 #include "nn/nlib/Config.h"
7 
8 NLIB_NAMESPACE_BEGIN
9 
10 class InputStream;
11 
12 // code snippets:
13 // TextReader reader;
14 // if (!reader.Init(&stream)) { error }
15 // while ((c = reader.Read()) >= 0)
16 // c is a codepoint
17 // if (!reader) { stream error if !stream, otherwise UTF-8 may be malformed }
18 // reader.Close();
19 // (stream.Close();)
21  public:
23  virtual ~TextReader() NLIB_NOEXCEPT;
24  bool Init(InputStream* stream) NLIB_NOEXCEPT NLIB_NONNULL;
26  if (m_Cur == m_BufEnd) {
27  this->FillBuffer();
28  if (m_Cur == m_BufEnd) return -1;
29  }
30  int rval = *reinterpret_cast<unsigned char*>(m_Cur);
31  if (!(rval & 0x80)) {
32  ++m_Cur;
33  if (rval == 0x0A) {
34  m_BegLinePos = m_BasePos + (m_Cur - &m_Buf[0]);
35  ++m_Line;
36  }
37  return rval;
38  }
39  if (!m_Utf32Cache) ConstructUtf32Cache();
40  m_Cur += m_NextStep;
41  rval = static_cast<int>(m_Utf32Cache);
42  m_Utf32Cache = 0;
43  return rval;
44  }
46  if (m_Cur == m_BufEnd) {
47  this->FillBuffer();
48  if (m_Cur == m_BufEnd) return -1;
49  }
50  int rval = *reinterpret_cast<unsigned char*>(m_Cur);
51  if (!(rval & 0x80)) return rval;
52  if (!m_Utf32Cache) ConstructUtf32Cache();
53  return static_cast<int>(m_Utf32Cache);
54  }
56  // skip ' ' \t \n (\r)
57  if (m_Cur + 1 < m_BufEnd) {
58  unsigned char c = *reinterpret_cast<unsigned char*>(m_Cur);
59  if (c > ' ') return 0;
60  if (c == ' ') {
61  ++m_Cur;
62  c = *reinterpret_cast<unsigned char*>(m_Cur);
63  if (c > ' ') {
64  m_Utf32Cache = 0;
65  return 1;
66  }
67  return this->SkipWs_(1);
68  }
69  }
70  return this->SkipWs_(0);
71  }
72  bool ReadUntil(size_t* len, char* buf, size_t n, char delim) NLIB_NOEXCEPT NLIB_NONNULL;
73  template <size_t N>
74  bool ReadUntil(size_t* len, char (&buf)[N], char delim) NLIB_NOEXCEPT { // NOLINT
75  return this->ReadUntil(len, &buf[0], N, delim);
76  }
77  template <class T>
78  bool ReadUntil(size_t* len, char* buf, size_t n, T pred) NLIB_NOEXCEPT;
79  template <class T, size_t N>
80  bool ReadUntil(size_t* len, char (&buf)[N], T pred) NLIB_NOEXCEPT { // NOLINT
81  return this->ReadUntil(len, buf, N, pred);
82  }
83  size_t ReadDecimalString(char* buf, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
84  template <size_t N>
85  size_t ReadDecimalString(char (&buf)[N]) NLIB_NOEXCEPT {
86  return this->ReadDecimalString(buf, N);
87  }
88  bool Proceed(const char* str, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
89  bool Proceed(char c) NLIB_NOEXCEPT {
90  NLIB_ASSERT(!(c & 0x80) && c != 0x0A);
91  if (m_Cur == m_BufEnd) {
92  this->FillBuffer();
93  if (m_Cur == m_BufEnd) return false;
94  }
95  if (*m_Cur != c) return false;
96  ++m_Cur;
97  return true;
98  }
99  bool ProceedEx(const char* str) NLIB_NOEXCEPT;
100 
101  int ReadAsUtf8(char* b0, char* b1, char* b2, char* b3) NLIB_NOEXCEPT {
102  if (m_Cur == m_BufEnd) {
103  this->FillBuffer();
104  if (m_Cur == m_BufEnd) return 0;
105  }
106  *b0 = m_Cur[0];
107  int c = m_Cur[0];
108  if (!(c & 0x80)) {
109  m_Utf32Cache = static_cast<nlib_utf32_t>(c);
110  m_Cur += 1;
111  if (c == 0x0A) {
112  m_BegLinePos = m_BasePos + (m_Cur - &m_Buf[0]);
113  ++m_Line;
114  }
115  return 1;
116  }
117  m_Utf32Cache = 0;
118  switch (c & 0xF0) {
119  case 0xC0:
120  case 0xD0:
121  *b1 = m_Cur[1];
122  m_Cur += 2;
123  return 2;
124  case 0xE0:
125  *b1 = m_Cur[1];
126  *b2 = m_Cur[2];
127  m_Cur += 3;
128  return 3;
129  default:
130  *b1 = m_Cur[1];
131  *b2 = m_Cur[2];
132  *b3 = m_Cur[3];
133  m_Cur += 4;
134  return 4;
135  }
136  }
137  int ReadAsUtf16(nlib_utf16_t* upper, nlib_utf16_t* lower) NLIB_NOEXCEPT {
138  int utf32 = this->Read();
139  return utf32 != -1 ? nlib_utf32char_to_utf16(upper, lower, utf32) : 0;
140  }
141  int PeekAsUtf16(nlib_utf16_t* upper, nlib_utf16_t* lower) NLIB_NOEXCEPT {
142  int utf32 = this->Peek();
143  return utf32 != -1 ? nlib_utf32char_to_utf16(upper, lower, utf32) : 0;
144  }
145  bool Close() NLIB_NOEXCEPT;
146  void SetError(errno_t e) const NLIB_NOEXCEPT {
147  if (m_ErrorValue == 0) m_ErrorValue = e;
148  }
149  errno_t GetErrorValue() const NLIB_NOEXCEPT { return m_ErrorValue; }
150  InputStream* GetStream() NLIB_NOEXCEPT { return m_Stream; }
151  int GetLine() const NLIB_NOEXCEPT { return m_Line; }
152  int GetColumn() const NLIB_NOEXCEPT {
153  return static_cast<int>((m_Cur - &m_Buf[0]) + 1 + m_BasePos - m_BegLinePos);
154  }
155  NLIB_SAFE_BOOL(TextReader, GetErrorValue() == 0)
156 
157  protected:
158  char* GetCur() NLIB_NOEXCEPT { return m_Cur; }
159  char* GetBufEnd() NLIB_NOEXCEPT { return m_BufEnd; }
160  void SetBufEnd(char* p) NLIB_NOEXCEPT { m_BufEnd = p; }
161 
162  // checks UTF-8 validity, converts CRLF to LF, and convert CR to LF
163  virtual void FillBuffer_() NLIB_NOEXCEPT;
164 
165  private:
166  void FillBuffer() NLIB_NOEXCEPT { this->FillBuffer_(); }
167  NLIB_VIS_HIDDEN void CountNewLine(const char* mcur, const char* mend) NLIB_NOEXCEPT;
168  void ConstructUtf32Cache() NLIB_NOEXCEPT;
169  int SkipWs_(int base) NLIB_NOEXCEPT;
170 
171  private:
172  char m_Buf[512 + 3 + 1];
173  nlib_utf32_t m_Utf32Cache;
174  char* m_Cur;
175  char* m_BufEnd;
176  size_t m_NextStep;
177  InputStream* m_Stream;
178  mutable errno_t m_ErrorValue;
179  int m_Line;
180  size_t m_BegLinePos;
181  size_t m_BasePos;
182 
183  NLIB_DISALLOW_COPY_AND_ASSIGN(TextReader);
184 };
185 
186 template <class T>
187 bool TextReader::ReadUntil(size_t* len, char* buf, size_t n, T pred) NLIB_NOEXCEPT {
188  // NOTICE: not NULL terminated
189  if (!buf) {
190  *len = 0;
191  return false;
192  }
193  m_Utf32Cache = 0;
194  char* p = buf;
195  char* pend = buf + n;
196  for (;;) {
197  if (m_Cur == m_BufEnd) {
198  this->FillBuffer();
199  if (m_Cur == m_BufEnd) {
200  *len = p - buf;
201  return false;
202  }
203  }
204  // T::operator()(const char* ptr);
205  while (p != pend && m_Cur != m_BufEnd) {
206  if ((*reinterpret_cast<unsigned char*>(m_Cur) & 0xC0) != 0x80 &&
207  pred((const char*)m_Cur)) { // NOLINT
208  *len = p - buf;
209  return true;
210  }
211  *p = *m_Cur;
212  ++p;
213  ++m_Cur;
214  }
215  if (p == pend) {
216  if (m_Cur != m_BufEnd && (*reinterpret_cast<unsigned char*>(m_Cur) & 0xC0) == 0x80) {
217  do {
218  --p;
219  --m_Cur;
220  } while ((*reinterpret_cast<unsigned char*>(m_Cur) & 0xC0) == 0x80);
221  }
222  *len = p - buf;
223  return false;
224  }
225  }
226 }
227 
228 NLIB_NAMESPACE_END
229 
230 #endif // INCLUDE_NN_NLIB_TEXTREADER_H_
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Platform.h:2151
bool ReadUntil(size_t *len, char(&buf)[N], char delim) noexcept
Calls ReadUntil(len, buf, N, delim).
Definition: TextReader.h:74
int PeekAsUtf16(nlib_utf16_t *upper, nlib_utf16_t *lower) noexcept
Stores the one code point from the start of the stream as UTF-16 in upper and lower.
Definition: TextReader.h:141
errno_t GetErrorValue() const noexcept
This function can get the cause of the error when reading has failed.
Definition: TextReader.h:149
#define NLIB_NONNULL
Indicates that you cannot specify NULL for all arguments.
Definition: Platform_unix.h:66
#define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
Definition: Config.h:126
#define NLIB_SAFE_BOOL(class_name, exp)
Defines a safe operator bool function in the class. Uses the C++11 explicit bool if it is available f...
Definition: Config.h:141
int Read() noexcept
Reads one character from the stream and returns UTF-32 data.
Definition: TextReader.h:25
InputStream * GetStream() noexcept
Gets the stream for the text reader to read.
Definition: TextReader.h:150
#define NLIB_VIS_HIDDEN
Symbols for functions and classes are not made available outside of the library.
Definition: Platform_unix.h:50
uint32_t nlib_utf32_t
Uses typedef to define as char32_t if that can be used. If not, it uses typedef to define as uint32_t...
Definition: Platform.h:2161
size_t ReadDecimalString(char(&buf)[N]) noexcept
Calls ReadDecimalString(buf, N).
Definition: TextReader.h:85
bool Proceed(char c) noexcept
Advances the stream by the amount of the character specified by c.
Definition: TextReader.h:89
uint16_t nlib_utf16_t
Uses typedef to define as char16_t if that can be used. If not, it uses typedef to define as uint16_t...
Definition: Platform.h:2160
The class for reading text from streams.
Definition: TextReader.h:20
The base class for input streams. This class cannot be instantiated.
Definition: InputStream.h:15
A file that contains the configuration information for each development environment.
int ReadAsUtf16(nlib_utf16_t *upper, nlib_utf16_t *lower) noexcept
Reads one code point from the stream and stores it as UTF-16 in upper and lower.
Definition: TextReader.h:137
int GetLine() const noexcept
Gets the current line number.
Definition: TextReader.h:151
int ReadAsUtf8(char *b0, char *b1, char *b2, char *b3) noexcept
Reads one code point from the stream and stores it as UTF-8 in b0, b1, b2, and b3.
Definition: TextReader.h:101
bool ReadUntil(size_t *len, char(&buf)[N], T pred) noexcept
Calls ReadUntil(len, buf, N, pred).
Definition: TextReader.h:80
int GetColumn() const noexcept
Gets the current column.
Definition: TextReader.h:152
#define NLIB_VIS_PUBLIC
Symbols for functions and classes are made available outside of the library.
Definition: Platform_unix.h:51
int SkipWs() noexcept
Skips white-space characters (space, newline, tab, and return) in the stream and returns the number t...
Definition: TextReader.h:55
bool Read(BinaryReader *r, T *x)
You can read to user-defined class objects by specializing this function template.
Definition: BinaryReader.h:158
int Peek() noexcept
Returns one character from the start of the stream in UTF-32.
Definition: TextReader.h:45
NLIB_CHECK_RESULT int nlib_utf32char_to_utf16(nlib_utf16_t *upper, nlib_utf16_t *lower, nlib_utf32_t utf32) noexcept
Converts a single UTF-32 character into UTF-16.
int errno_t
Indicates with an int-type typedef that a POSIX error value is returned as the return value...
Definition: NMalloc.h:24