nlib
TextReader.h
Go to the documentation of this file.
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_TEXTREADER_H_
4 #define INCLUDE_NN_NLIB_TEXTREADER_H_
5 
6 #include "nn/nlib/Config.h"
7 
8 NLIB_NAMESPACE_BEGIN
9 
10 class InputStream;
11 
12 // code snippets:
13 // TextReader reader;
14 // if (reader.Init() != 0 || reader.Open(&stream) != 0) { error }
15 // while ((c = reader.Read()) >= 0)
16 // c is a codepoint
17 // if (!reader) { stream error if !stream, otherwise UTF-8 may be malformed }
18 // reader.Close();
19 // (stream.Close();)
21  public:
23  virtual ~TextReader() NLIB_NOEXCEPT;
24  errno_t Init() NLIB_NOEXCEPT;
25  errno_t Open(InputStream* stream) NLIB_NOEXCEPT;
26  int Read() NLIB_NOEXCEPT {
27  if (cur_ == bufend_) {
28  this->FillBuffer();
29  if (cur_ == bufend_) return -1;
30  }
31  int rval = *reinterpret_cast<unsigned char*>(cur_);
32  if (!(rval & 0x80)) {
33  ++cur_;
34  if (rval == 0x0A) {
35  pos_of_line_ = pos_of_buf_ + (cur_ - &buf_[0]);
36  ++line_;
37  }
38  return rval;
39  }
40  if (!utf32_cache_) ConstructUtf32Cache();
41  cur_ += utf8_bytecount_;
42  rval = static_cast<int>(utf32_cache_);
43  utf32_cache_ = 0;
44  return rval;
45  }
46  int Peek() NLIB_NOEXCEPT {
47  if (cur_ == bufend_) {
48  this->FillBuffer();
49  if (cur_ == bufend_) return -1;
50  }
51  int rval = *reinterpret_cast<unsigned char*>(cur_);
52  if (!(rval & 0x80)) return rval;
53  if (!utf32_cache_) ConstructUtf32Cache();
54  return static_cast<int>(utf32_cache_);
55  }
56  int SkipWs() NLIB_NOEXCEPT {
57  // skip ' ' \t \n (\r)
58  if (cur_ + 1 < bufend_) {
59  unsigned char c = *reinterpret_cast<unsigned char*>(cur_);
60  if (c > ' ') return 0;
61  if (c == ' ') {
62  ++cur_;
63  c = *reinterpret_cast<unsigned char*>(cur_);
64  if (c > ' ') {
65  utf32_cache_ = 0;
66  return 1;
67  }
68  return this->SkipWs_(1);
69  }
70  }
71  return this->SkipWs_(0);
72  }
73  bool ReadUntil(size_t* len, char* buf, size_t n, char delim) NLIB_NOEXCEPT NLIB_NONNULL;
74  template <size_t N>
75  bool ReadUntil(size_t* len, char (&buf)[N], char delim) NLIB_NOEXCEPT { // NOLINT
76  return this->ReadUntil(len, &buf[0], N, delim);
77  }
78  template <class T>
79  bool ReadUntil(size_t* len, char* buf, size_t n, T pred) NLIB_NOEXCEPT;
80  template <class T, size_t N>
81  bool ReadUntil(size_t* len, char (&buf)[N], T pred) NLIB_NOEXCEPT { // NOLINT
82  return this->ReadUntil(len, buf, N, pred);
83  }
84  size_t ReadDecimalString(char* buf, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
85  template <size_t N>
86  size_t ReadDecimalString(char (&buf)[N]) NLIB_NOEXCEPT {
87  return this->ReadDecimalString(buf, N);
88  }
89  bool Proceed(const char* str, size_t n) NLIB_NOEXCEPT NLIB_NONNULL;
90  bool Proceed(char c) NLIB_NOEXCEPT {
91  NLIB_ASSERT(!(c & 0x80) && c != 0x0A);
92  if (cur_ == bufend_) {
93  this->FillBuffer();
94  if (cur_ == bufend_) return false;
95  }
96  if (*cur_ != c) return false;
97  ++cur_;
98  return true;
99  }
100  bool ProceedEx(const char* str) NLIB_NOEXCEPT;
101  bool Close() NLIB_NOEXCEPT;
102  void SetError(errno_t e) const NLIB_NOEXCEPT {
103  if (errno_ == 0) errno_ = e;
104  }
105  errno_t GetErrorValue() const NLIB_NOEXCEPT { return errno_; }
106  InputStream* GetStream() NLIB_NOEXCEPT { return stream_; }
107  int GetLine() const NLIB_NOEXCEPT { return line_; }
108  int GetColumn() const NLIB_NOEXCEPT {
109  return static_cast<int>((cur_ - &buf_[0]) + 1 + pos_of_buf_ - pos_of_line_);
110  }
111  NLIB_SAFE_BOOL(TextReader, GetErrorValue() == 0)
112 
113  protected:
114  char* GetCur() NLIB_NOEXCEPT { return cur_; }
115  char* GetBufEnd() NLIB_NOEXCEPT { return bufend_; }
116  void SetBufEnd(char* p) NLIB_NOEXCEPT { bufend_ = p; }
117 
118  // checks UTF-8 validity, converts CRLF to LF, and convert CR to LF
119  virtual void FillBuffer_() NLIB_NOEXCEPT;
120 
121  private:
122  void FillBuffer() NLIB_NOEXCEPT { this->FillBuffer_(); }
123  NLIB_VIS_HIDDEN void CountNewLine(const char* mcur, const char* mend) NLIB_NOEXCEPT;
124  void ConstructUtf32Cache() NLIB_NOEXCEPT;
125  int SkipWs_(int base) NLIB_NOEXCEPT;
126 
127  private:
128  char buf_[512 + 3 + 1];
129  nlib_utf32_t utf32_cache_;
130  char* cur_;
131  char* bufend_;
132  size_t utf8_bytecount_;
133  InputStream* stream_;
134  mutable ErrnoT errno_;
135  int line_;
136  size_t pos_of_line_;
137  size_t pos_of_buf_;
138 
140 };
141 
142 template <class T>
143 bool TextReader::ReadUntil(size_t* len, char* buf, size_t n, T pred) NLIB_NOEXCEPT {
144  // NOTICE: not NULL terminated
145  if (!buf) {
146  *len = 0;
147  return false;
148  }
149  utf32_cache_ = 0;
150  char* p = buf;
151  char* pend = buf + n;
152  for (;;) {
153  if (cur_ == bufend_) {
154  this->FillBuffer();
155  if (cur_ == bufend_) {
156  *len = p - buf;
157  return false;
158  }
159  }
160  // T::operator()(const char* ptr);
161  while (p != pend && cur_ != bufend_) {
162  if ((*reinterpret_cast<unsigned char*>(cur_) & 0xC0) != 0x80 &&
163  pred((const char*)cur_)) { // NOLINT
164  *len = p - buf;
165  return true;
166  }
167  *p = *cur_;
168  ++p;
169  ++cur_;
170  }
171  if (p == pend) {
172  if (cur_ != bufend_ && (*reinterpret_cast<unsigned char*>(cur_) & 0xC0) == 0x80) {
173  do {
174  --p;
175  --cur_;
176  } while ((*reinterpret_cast<unsigned char*>(cur_) & 0xC0) == 0x80);
177  }
178  *len = p - buf;
179  return false;
180  }
181  }
182 }
183 
184 NLIB_NAMESPACE_END
185 
186 #endif // INCLUDE_NN_NLIB_TEXTREADER_H_
bool ReadUntil(size_t *len, char(&buf)[N], char delim) noexcept
Calls ReadUntil(len, buf, N, delim).
Definition: TextReader.h:75
#define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
Definition: Config.h:145
#define NLIB_SAFE_BOOL(class_name, exp)
Defines a safe operator bool function in the class. Uses the C++11 explicit bool if it is available f...
Definition: Config.h:160
int Read() noexcept
Reads one character from the stream and returns UTF-32 data.
Definition: TextReader.h:26
InputStream * GetStream() noexcept
Gets the stream for the text reader to read.
Definition: TextReader.h:106
#define NLIB_VIS_HIDDEN
Symbols for functions and classes are not made available outside of the library.
Definition: Platform_unix.h:60
#define NLIB_VIS_PUBLIC
Symbols for functions and classes are made available outside of the library.
Definition: Platform_unix.h:61
int GetColumn() const noexcept
Gets the current column.
Definition: TextReader.h:108
errno_t GetErrorValue() const noexcept
This function can get the cause of the error when reading has failed.
Definition: TextReader.h:105
uint32_t nlib_utf32_t
Uses typedef to define as char32_t if that can be used. If not, it uses typedef to define as uint32_t...
Definition: Config.h:539
size_t ReadDecimalString(char(&buf)[N]) noexcept
Calls ReadDecimalString(buf, N).
Definition: TextReader.h:86
bool Proceed(char c) noexcept
Advances the stream by the amount of the character specified by c.
Definition: TextReader.h:90
The class for reading text from streams.
Definition: TextReader.h:20
The base class for input streams. This class cannot be instantiated.
Definition: InputStream.h:16
Class that wraps errno_t. This class improves visual representations in the Visual Studio debugger...
Definition: Config.h:474
int GetLine() const noexcept
Gets the current line number.
Definition: TextReader.h:107
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
Definition: Config.h:86
A file that contains the configuration information for each development environment.
bool ReadUntil(size_t *len, char(&buf)[N], T pred) noexcept
Calls ReadUntil(len, buf, N, pred).
Definition: TextReader.h:81
int SkipWs() noexcept
Skips white-space characters (space, newline, tab, and return) in the stream and returns the number t...
Definition: TextReader.h:56
void SetError(errno_t e) const noexcept
Sets an error value.
Definition: TextReader.h:102
int Peek() noexcept
Returns one character from the start of the stream in UTF-32.
Definition: TextReader.h:46
#define NLIB_NONNULL
Indicates that you cannot specify NULL for all arguments.
Definition: Platform_unix.h:76
int errno_t
Indicates with an int-type typedef that a POSIX error value is returned as the return value...
Definition: NMalloc.h:24