nlib
TaggedTextParser.h
Go to the documentation of this file.
1 
2 #pragma once
3 #ifndef INCLUDE_NN_NLIB_TAGGEDTEXTPARSER_H_
4 #define INCLUDE_NN_NLIB_TAGGEDTEXTPARSER_H_
5 
6 #include "nn/nlib/Config.h"
7 
8 NLIB_NAMESPACE_BEGIN
9 
11  public:
12  enum Event {
13  NONE = -1,
14  START_ELEMENT = 1,
15  END_ELEMENT,
16  CHARACTERS,
17  END_DOCUMENT,
18  SYNTAX_ERROR
19  };
20  explicit TaggedTextParser(char* tagged_text) {
21  // change tagged_text in parsing
22  text_ = tagged_text;
23  cur_ = reinterpret_cast<unsigned char*>(tagged_text);
24  chr_ = *tagged_text;
25  *cur_++ = '\0';
26  is_elemtag_empty_ = false;
27  }
28  const char* GetText() const {
29  // tag name if START_ELEMENT, END_ELEMENT
30  // trimmed string if CHARACTERS
31  return text_;
32  }
33  Event Next() {
34  NLIB_TAGGEDTEXTPARSER_NEXT:
35  if (is_elemtag_empty_) {
36  is_elemtag_empty_ = false;
37  return END_ELEMENT;
38  }
39  // SkipSpace
40  if (IsSpc(chr_)) {
41  for (;;) {
42  if (!IsSpc(*cur_)) {
43  chr_ = *cur_++;
44  break;
45  }
46  ++cur_;
47  }
48  }
49  if (chr_ == '\0') return END_DOCUMENT;
50  if (chr_ == '<') {
51  unsigned char* p = cur_;
52  int c = *p;
53  if (c == '?' || c == '!') {
54  // skip until '>' if '<!' or '<?'.
55  ++p;
56  unsigned char* p2 = GotoGT(p);
57  if (*p2 == '\0') goto NLIB_TAGGEDTEXTPARSER_ERROR;
58  ++p2; // skip '>'
59  chr_ = *p2;
60  cur_ = *p2 != '\0' ? p2 + 1 : p2;
61  goto NLIB_TAGGEDTEXTPARSER_NEXT;
62  } else if (c == '/') {
63  // ETag
64  ++p; // move after '/'
65  // skip until space, '/' or '>'.
66  unsigned char* p2 = SpotName(p);
67  // skip until '>', even if there are attributes.
68  unsigned char* p3 = GotoGT(p2);
69  if (*p3 == '\0') goto NLIB_TAGGEDTEXTPARSER_ERROR;
70 
71  *p2 = '\0';
72  text_ = reinterpret_cast<const char*>(p);
73  ++p3; // skip after '>'
74  chr_ = *p3;
75  cur_ = *p3 != '\0' ? p3 + 1 : p3;
76  return END_ELEMENT;
77  } else {
78  // skip until space, '/' or '>'.
79  unsigned char* p2 = SpotName(p);
80  // skip until '/' or '>'.
81  unsigned char* p3 = GotoSlashGT(p2);
82  if (*p3 == '\0') goto NLIB_TAGGEDTEXTPARSER_ERROR;
83  if (*p3 == '/') { // determine '>' or '/>'
84  // EmptyElemTag
85  ++p3; // move after '/'
86  if (*p3 != '>') goto NLIB_TAGGEDTEXTPARSER_ERROR;
87  is_elemtag_empty_ = true;
88  }
89  // STag or EmptyElemTag
90  *p2 = '\0';
91  text_ = reinterpret_cast<const char*>(p);
92  ++p3; // move after '>'
93  chr_ = *p3;
94  cur_ = *p3 != '\0' ? p3 + 1 : p3;
95  return START_ELEMENT;
96  }
97  } else {
98  // it must be CHARACTERS
99  unsigned char* p = cur_ - 1;
100  for (;;) {
101  int c = *p;
102  if (c == '<' || c == '\0') {
103  text_ = reinterpret_cast<const char*>(cur_ - 1);
104  chr_ = *p;
105  cur_ = (chr_ != '\0') ? p + 1 : p;
106  unsigned char* pp = p - 1;
107  while (IsSpc(*pp)) --pp;
108  *(pp + 1) = '\0'; // trim the last space chars
109  return CHARACTERS;
110  }
111  ++p;
112  }
113  }
114  NLIB_TAGGEDTEXTPARSER_ERROR:
115  chr_ = '\0';
116  cur_ = NULL;
117  text_ = "";
118  return SYNTAX_ERROR;
119  }
120 
121  private:
122  static bool IsSpc(int c) {
123  // S = ( #x20 | #x9 | #xD | #xA )
124  static const char table[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0x00 - 0x0F
125  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
126  1};
127  return (c <= 0x20 && table[c] != 0);
128  }
129  static unsigned char* SpotName(unsigned char* p) {
130  static const char table[] = {
131  1, 0, 0, 0, 0, 0, 0, 0, 0,
132  1, 1, 0, 0, 1, 0, 0, // 0x00 - 0x0F
133  0, 0, 0, 0, 0, 0, 0, 0, 0,
134  0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
135  1, 0, 0, 0, 0, 0, 0, 0, 0,
136  0, 0, 0, 0, 0, 0, 1, // 0x20 - 0x2F
137  0, 0, 0, 0, 0, 0, 0, 0, 0,
138  0, 0, 0, 0, 0, 1, 0 // 0x30 - 0x3F
139  };
140  for (;;) {
141  int c = *p;
142  // (c == '/' || c == '>' || c == '\0' || IsSpc(c))
143  if (c < 0x40 && table[c] != 0) return p;
144  ++p;
145  }
146  }
147  static unsigned char* GotoGT(unsigned char* p) {
148  for (;;) {
149  int c = *p;
150  if (c == '>' || c == '\0') return p;
151  ++p;
152  }
153  }
154  static unsigned char* GotoSlashGT(unsigned char* p) {
155  for (;;) {
156  int c = *p;
157  if (c == '/' || c == '>' || c == '\0') return p;
158  ++p;
159  }
160  }
161 
162  private:
163  const char* text_;
164  unsigned char* cur_;
165  int chr_;
166  bool is_elemtag_empty_;
167 };
168 
169 NLIB_NAMESPACE_END
170 
171 #endif // INCLUDE_NN_NLIB_TAGGEDTEXTPARSER_H_
TaggedTextParser(char *tagged_text)
Instantiates the object.
const char * GetText() const
Returns the pointer to the character data obtained by the GetNext function.
Event Next()
Gets the next element or text.
A simple parser for parsing XML-like tagged text.
A file that contains the configuration information for each development environment.
#define NLIB_FINAL
Defines final if it is available for use. If not, holds an empty string.
Definition: Config.h:211