3 #ifndef INCLUDE_NN_NLIB_SUCCINCT_AHOCORASICKBUILDER_H_
4 #define INCLUDE_NN_NLIB_SUCCINCT_AHOCORASICKBUILDER_H_
13 #if defined(_MSC_VER) && defined(nx_succinct_EXPORTS)
14 #undef NLIB_VIS_PUBLIC
15 #define NLIB_VIS_PUBLIC NLIB_WINEXPORT
21 class AhoCorasickBuilder;
31 static const
size_t kBufSize = 1024 * 1024 * 4U;
33 ReallocVec<
char*> m_Data;
34 ReallocVec<
char*> m_Buf;
44 PrefixString(
const unsigned char* first,
const unsigned char* last) NLIB_NOEXCEPT {
49 NLIB_VIS_HIDDEN bool ReverseLexOrderLess(
const PrefixString& rhs)
const NLIB_NOEXCEPT {
50 NLIB_ASSERT(m_Rbegin && rhs.m_Rbegin);
54 const unsigned char* rbeg = m_Rbegin;
55 const unsigned char* rbeg_rhs = rhs.m_Rbegin;
57 unsigned char c = *rbeg;
58 unsigned char c_rhs = *rbeg_rhs;
59 if (c < c_rhs)
return true;
60 if (c > c_rhs)
return false;
61 if (c == 0)
return false;
67 NLIB_ASSERT(m_Rbegin);
68 return m_Rbegin - length() + 1;
71 NLIB_ASSERT(m_Rbegin);
75 NLIB_ASSERT(m_Rbegin);
76 return *m_Rbegin == 0;
79 NLIB_ASSERT(m_Rbegin);
80 const unsigned char* p;
81 const unsigned char* rbeg = m_Rbegin;
82 for (p = rbeg; *p; --p) {
87 NLIB_ASSERT(m_Rbegin);
88 return !*(m_Rbegin + 1);
92 const unsigned char* m_Rbegin;
104 NLIB_VIS_HIDDEN AcNode() NLIB_NOEXCEPT : m_NodeId(0), m_Failure(NULL), m_Edges(NULL) {}
107 return m_Prefix.ReverseLexOrderLess(rhs.m_Prefix);
109 NLIB_VIS_HIDDEN unsigned char GetLevelValue(
size_t level)
const NLIB_NOEXCEPT {
110 return level < m_NodeId ? *(m_Prefix.end() - 1 - level) : 0;
112 NLIB_VIS_HIDDEN uint32_t GetNodeId() const NLIB_NOEXCEPT {
return m_NodeId; }
114 return m_NodeId != 0 && m_Prefix.IsTerminal();
121 NLIB_VIS_HIDDEN const AcNode* Goto(const
unsigned char c) const NLIB_NOEXCEPT;
123 const
unsigned char* last) NLIB_NOEXCEPT;
126 for (failure = m_Failure;
127 failure != NULL && !failure->IsTerminal();
128 failure = failure->m_Failure) {
132 NLIB_VIS_HIDDEN AcNode* GetFailureArc() const NLIB_NOEXCEPT {
return m_Failure; }
133 NLIB_VIS_HIDDEN const EdgeList* GetEdgeList() const NLIB_NOEXCEPT {
return m_Edges; }
135 for (EdgeList* e = m_Edges; e != NULL; e = e->next) {
136 if (e->label == c)
return e->dest;
140 NLIB_VIS_HIDDEN PrefixString GetPrefix() const NLIB_NOEXCEPT {
return m_Prefix; }
143 NLIB_VIS_HIDDEN void SetNodeId(uint32_t nodeid) NLIB_NOEXCEPT { m_NodeId = nodeid; }
144 NLIB_VIS_HIDDEN void SetFailureArc(AcNode* failure) NLIB_NOEXCEPT { m_Failure = failure; }
147 m_NodeId =
static_cast<uint32_t
>(m_Prefix.length());
152 PrefixString m_Prefix;
156 friend class ::nlib_ns::succinct::AhoCorasickBuilder;
170 NLIB_VIS_PUBLIC bool AddPattern(const
void* p,
size_t n) NLIB_NOEXCEPT;
171 NLIB_VIS_PUBLIC bool AddWords(const
char* str,
size_t len) NLIB_NOEXCEPT;
172 bool AddWords(const
char* str) NLIB_NOEXCEPT {
173 return AddWords(str, strlen(str));
176 typedef
bool (*MatchCallback)(const
char* first, const
char* last, uint32_t nodeid,
178 NLIB_VIS_PUBLIC void MatchByBuilder(const
char* doc, MatchCallback callback,
179 void* user_obj = NULL) NLIB_NOEXCEPT;
181 size_t GetNumWords() const NLIB_NOEXCEPT {
return m_NumWords; }
183 size_t GetNumNodes() const NLIB_NOEXCEPT {
return m_AcNodeHolder.size(); }
188 bool operator()() NLIB_NOEXCEPT;
192 bool operator()() NLIB_NOEXCEPT;
197 bool operator()() NLIB_NOEXCEPT;
202 bool operator()() NLIB_NOEXCEPT;
207 bool operator()() NLIB_NOEXCEPT;
212 void operator()() NLIB_NOEXCEPT;
222 AcNodeHolder m_AcNodeHolder;
223 Nlist<detail::AcNode> m_AcNodeMem;
224 Nlist<detail::AcNode::EdgeList> m_EdgeMem;
225 detail::StringHolder m_StringHolder;
226 nlib_ns::threading::ThreadPool m_ThreadPool;
234 #if defined(_MSC_VER) && defined(nx_succinct_EXPORTS)
235 #undef NLIB_VIS_PUBLIC
236 #define NLIB_VIS_PUBLIC NLIB_WINIMPORT
239 #endif // INCLUDE_NN_NLIB_SUCCINCT_AHOCORASICKBUILDER_H_
bool operator<(const TimeValue &lhs, const TimeValue &rhs) noexcept
A relational operator.
#define NLIB_NOEXCEPT
Defines noexcept geared to the environment, or the equivalent.
#define NLIB_FINAL
Defines final if it is available for use. If not, holds an empty string.
#define NLIB_DISALLOW_COPY_AND_ASSIGN(TypeName)
Prohibits use of the copy constructor and assignment operator for the class specified by TypeName...
size_t GetNumBytes() const noexcept
Gets the sum size of registered strings or patterns in bytes.
Uses the Aho-Corasick algorithm to detect language and patterns.
AhoCorasickBuilder() noexcept
Instantiates the object with default parameters (default constructor).
Defines the class for searching text strings using the Aho-Corasick string-matching algorithm...
Implements common features and features that are highly platform-dependent. Also refer to nlib Platfo...
A container-like class similar to std::vector that can store objects that do not have copy constructo...
size_t GetNumNodes() const noexcept
Gets the number of created automaton nodes.
Creates the index (automaton) used in the Aho-Corasick algorithm.