nlib
TaggedTextParser.h
[詳解]
1 
2 /*--------------------------------------------------------------------------------*
3  Project: CrossRoad
4  Copyright (C)Nintendo All rights reserved.
5 
6  These coded instructions, statements, and computer programs contain proprietary
7  information of Nintendo and/or its licensed developers and are protected by
8  national and international copyright laws. They may not be disclosed to third
9  parties or copied or duplicated in any form, in whole or in part, without the
10  prior written consent of Nintendo.
11 
12  The content herein is highly confidential and should be handled accordingly.
13  *--------------------------------------------------------------------------------*/
14 
15 #pragma once
16 #ifndef INCLUDE_NN_NLIB_TAGGEDTEXTPARSER_H_
17 #define INCLUDE_NN_NLIB_TAGGEDTEXTPARSER_H_
18 
19 #include "nn/nlib/Config.h"
20 
21 NLIB_NAMESPACE_BEGIN
22 
24  public:
25  enum Event {
26  kNone = -1,
27  kStartElement = 1,
28  kEndElement,
29  kCharacters,
30  kEndDocument,
31  kSyntaxError,
32  NONE = kNone,
33  START_ELEMENT = kStartElement,
34  END_ELEMENT = kEndElement,
35  CHARACTERS = kCharacters,
36  END_DOCUMENT = kEndDocument,
37  SYNTAX_ERROR = kSyntaxError
38  };
39  explicit TaggedTextParser(char* tagged_text) {
40  // change tagged_text in parsing
41  text_ = tagged_text;
42  cur_ = reinterpret_cast<unsigned char*>(tagged_text);
43  chr_ = *tagged_text;
44  *cur_++ = '\0';
45  is_elemtag_empty_ = false;
46  }
47  const char* GetText() const {
48  // tag name if kStartElement, kEndElement
49  // trimmed string if kCharacters
50  return text_;
51  }
52  Event Next() {
53  NLIB_TAGGEDTEXTPARSER_NEXT:
54  if (is_elemtag_empty_) {
55  is_elemtag_empty_ = false;
56  return kEndElement;
57  }
58  // SkipSpace
59  if (IsSpc(chr_)) {
60  for (;;) {
61  if (!IsSpc(*cur_)) {
62  chr_ = *cur_++;
63  break;
64  }
65  ++cur_;
66  }
67  }
68  if (chr_ == '\0') return kEndDocument;
69  if (chr_ == '<') {
70  unsigned char* p = cur_;
71  int c = *p;
72  if (c == '?' || c == '!') {
73  // skip until '>' if '<!' or '<?'.
74  ++p;
75  unsigned char* p2 = GotoGT(p);
76  if (*p2 == '\0') goto NLIB_TAGGEDTEXTPARSER_ERROR;
77  ++p2; // skip '>'
78  chr_ = *p2;
79  cur_ = *p2 != '\0' ? p2 + 1 : p2;
80  goto NLIB_TAGGEDTEXTPARSER_NEXT;
81  } else if (c == '/') {
82  // ETag
83  ++p; // move after '/'
84  // skip until space, '/' or '>'.
85  unsigned char* p2 = SpotName(p);
86  // skip until '>', even if there are attributes.
87  unsigned char* p3 = GotoGT(p2);
88  if (*p3 == '\0') goto NLIB_TAGGEDTEXTPARSER_ERROR;
89 
90  *p2 = '\0';
91  text_ = reinterpret_cast<const char*>(p);
92  ++p3; // skip after '>'
93  chr_ = *p3;
94  cur_ = *p3 != '\0' ? p3 + 1 : p3;
95  return kEndElement;
96  } else {
97  // skip until space, '/' or '>'.
98  unsigned char* p2 = SpotName(p);
99  // skip until '/' or '>'.
100  unsigned char* p3 = GotoSlashGT(p2);
101  if (*p3 == '\0') goto NLIB_TAGGEDTEXTPARSER_ERROR;
102  if (*p3 == '/') { // determine '>' or '/>'
103  // EmptyElemTag
104  ++p3; // move after '/'
105  if (*p3 != '>') goto NLIB_TAGGEDTEXTPARSER_ERROR;
106  is_elemtag_empty_ = true;
107  }
108  // STag or EmptyElemTag
109  *p2 = '\0';
110  text_ = reinterpret_cast<const char*>(p);
111  ++p3; // move after '>'
112  chr_ = *p3;
113  cur_ = *p3 != '\0' ? p3 + 1 : p3;
114  return kStartElement;
115  }
116  } else {
117  // it must be kCharacters
118  unsigned char* p = cur_ - 1;
119  for (;;) {
120  int c = *p;
121  if (c == '<' || c == '\0') {
122  text_ = reinterpret_cast<const char*>(cur_ - 1);
123  chr_ = *p;
124  cur_ = (chr_ != '\0') ? p + 1 : p;
125  unsigned char* pp = p - 1;
126  while (IsSpc(*pp)) --pp;
127  *(pp + 1) = '\0'; // trim the last space chars
128  return kCharacters;
129  }
130  ++p;
131  }
132  }
133  NLIB_TAGGEDTEXTPARSER_ERROR:
134  chr_ = '\0';
135  cur_ = nullptr;
136  text_ = "";
137  return kSyntaxError;
138  }
139 
140  private:
141  static bool IsSpc(int c) {
142  // S = ( #x20 | #x9 | #xD | #xA )
143  static const char table[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0x00 - 0x0F
144  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
145  1};
146  return (c <= 0x20 && table[c] != 0);
147  }
148  static unsigned char* SpotName(unsigned char* p) {
149  static const char table[] = {
150  1, 0, 0, 0, 0, 0, 0, 0, 0,
151  1, 1, 0, 0, 1, 0, 0, // 0x00 - 0x0F
152  0, 0, 0, 0, 0, 0, 0, 0, 0,
153  0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
154  1, 0, 0, 0, 0, 0, 0, 0, 0,
155  0, 0, 0, 0, 0, 0, 1, // 0x20 - 0x2F
156  0, 0, 0, 0, 0, 0, 0, 0, 0,
157  0, 0, 0, 0, 0, 1, 0 // 0x30 - 0x3F
158  };
159  for (;;) {
160  int c = *p;
161  // (c == '/' || c == '>' || c == '\0' || IsSpc(c))
162  if (c < 0x40 && table[c] != 0) return p;
163  ++p;
164  }
165  }
166  static unsigned char* GotoGT(unsigned char* p) {
167  for (;;) {
168  int c = *p;
169  if (c == '>' || c == '\0') return p;
170  ++p;
171  }
172  }
173  static unsigned char* GotoSlashGT(unsigned char* p) {
174  for (;;) {
175  int c = *p;
176  if (c == '/' || c == '>' || c == '\0') return p;
177  ++p;
178  }
179  }
180 
181  private:
182  const char* text_;
183  unsigned char* cur_;
184  int chr_;
185  bool is_elemtag_empty_;
186 };
187 
188 NLIB_NAMESPACE_END
189 
190 #endif // INCLUDE_NN_NLIB_TAGGEDTEXTPARSER_H_
TaggedTextParser(char *tagged_text)
コンストラクタです。
const char * GetText() const
GetNext()で取得した文字データへのポインタを返します。
Event Next()
次の要素やテキストを取得します。
XML風タグ付きテキストをパースする簡単なパーサーです。
開発環境別の設定が書かれるファイルです。
#define NLIB_FINAL
利用可能であればfinalが定義されます。そうでない場合は空文字列です。
Definition: Config.h:245