nlib
misc/stringutils/stringutils.cpp

nn::nlib::StringViewを用いてテキストファイル(unicode.orgのユニコード正規化のテストデータ)のパースを行うサンプルです。 パース後はnlib_nfkc()を用いて行ったNFKCでのUnicode正規化がそれぞれ正しいことをチェックしています。

StringViewクラスを利用すると、std::stringが備えるメソッド(の大半)を文字列データの複製を伴わずに利用することができます。

/*--------------------------------------------------------------------------------*
Project: CrossRoad
Copyright (C)Nintendo All rights reserved.
These coded instructions, statements, and computer programs contain proprietary
information of Nintendo and/or its licensed developers and are protected by
national and international copyright laws. They may not be disclosed to third
parties or copied or duplicated in any form, in whole or in part, without the
prior written consent of Nintendo.
The content herein is highly confidential and should be handled accordingly.
*--------------------------------------------------------------------------------*/
#include <vector>
NLIB_PATHMAPPER_FORSAMPLE
char* ReadFile(size_t* size, const char* uri_path) {
char filename[1024];
InitPathMapperForSample();
e = g_pathmapper.ResolvePath(NULL, filename, uri_path);
if (e != 0) return NULL;
nlib_fd fd;
nlib_offset filesize_;
e = nlib_fd_open(&fd, filename, NLIB_FD_O_RDONLY);
if (e != 0) return NULL;
e = nlib_fd_getsize(&filesize_, fd);
if (e != 0) {
(void)nlib_fd_close(fd);
return NULL;
}
size_t filesize = static_cast<size_t>(filesize_);
nlib_ns::UniquePtr<char[]> text(new (std::nothrow) char[filesize + 1]);
if (!text) {
(void)nlib_fd_close(fd);
return NULL;
}
while (filesize > 0) {
size_t readsize;
e = nlib_fd_read(&readsize, fd, text.get(), filesize);
if (e != 0) {
(void)nlib_fd_close(fd);
return NULL;
}
filesize -= readsize;
}
*size = static_cast<size_t>(filesize_);
return text.release();
}
std::vector<nlib_utf8_t>
GetUtf8(const char* first, const char* last) {
std::vector<nlib_utf8_t> vec;
nlib_ns::StringView view(first, last - first);
while(!view.empty()) {
uint32_t cp = 0;
while (!view.empty() && !nlib_ns::StartsWith(view, ' ')) {
char c = *view.begin();
if (nlib_isdigit(c)) {
cp = cp * 16 + (c - '0');
} else {
cp = cp * 16 + (c - 'A' + 10);
}
view.remove_prefix(1);
}
nlib_utf8_t buf[4];
int len = nlib_utf32char_to_utf8(buf, cp);
NLIB_ASSERT(len > 0 && len <= 4);
vec.insert(vec.end(), &buf[0], &buf[0] + len);
}
return vec;
}
bool Nfkc(const std::vector<nlib_utf8_t>& vec, const std::vector<nlib_utf8_t>& ans) {
nlib_utf8_t buf[128];
const nlib_utf8_t* first = &vec[0];
const nlib_utf8_t* last = first + vec.size();
errno_t e = nlib_nfkc(&result, &buf[0], sizeof(buf), first, last, kNfkcDefault);
if (e != 0) return false;
if (ans.size() != result.written) return false;
if (memcmp(&buf[0], &*ans.begin(), ans.size()) != 0) return false;
return true;
}
bool NfkcNormalizeTest() {
size_t filesize;
nlib_ns::UniquePtr<char[]> normalization_test(ReadFile(&filesize, "nlibpath:///readonly/NormalizationTest.txt"));
if (!normalization_test) return false;
nlib_ns::StringView view(normalization_test.get(), filesize);
for (;;) {
if (line.length() == 0) break;
if (line[0] == '#' || line[0] == '@') continue;
size_t idx;
nlib_ns::StringView c1_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c2_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c3_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c4_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c5_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
std::vector<nlib_utf8_t> c1 = GetUtf8(c1_.begin(), c1_.end());
std::vector<nlib_utf8_t> c2 = GetUtf8(c2_.begin(), c2_.end());
std::vector<nlib_utf8_t> c3 = GetUtf8(c3_.begin(), c3_.end());
std::vector<nlib_utf8_t> c4 = GetUtf8(c4_.begin(), c4_.end());
std::vector<nlib_utf8_t> c5 = GetUtf8(c5_.begin(), c5_.end());
// NFKC
// c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
nlib_utf8_t c1_str[64], c2_str[64], c3_str[64], c4_str[64], c5_str[64];
nlib_memset(c1_str, 0, sizeof(c1_str));
nlib_memset(c2_str, 0, sizeof(c2_str));
nlib_memset(c3_str, 0, sizeof(c3_str));
nlib_memset(c4_str, 0, sizeof(c4_str));
nlib_memset(c5_str, 0, sizeof(c5_str));
nlib_memcpy(&c1_str, sizeof(c1_str) - 1, &c1[0], c1.size());
nlib_memcpy(&c2_str, sizeof(c2_str) - 1, &c2[0], c2.size());
nlib_memcpy(&c3_str, sizeof(c3_str) - 1, &c3[0], c3.size());
nlib_memcpy(&c4_str, sizeof(c4_str) - 1, &c4[0], c4.size());
nlib_memcpy(&c5_str, sizeof(c5_str) - 1, &c5[0], c5.size());
nlib_printf("'%s' == toNFKC(%s) == toNFKC(%s) == toNFKC(%s) == toNFKC(%s) == toNFKC(%s)\n",
c4_str, c1_str, c2_str, c3_str, c4_str, c5_str);
if (!Nfkc(c1, c4)) return false;
if (!Nfkc(c2, c4)) return false;
if (!Nfkc(c3, c4)) return false;
if (!Nfkc(c4, c4)) return false;
if (!Nfkc(c5, c4)) return false;
}
return true;
}
bool SampleMain(int, char**) { return NfkcNormalizeTest(); }
NLIB_MAINFUNC