nlib
misc/stringutils/stringutils.cpp

A sample that uses nn::nlib::StringView to parse a text file (data for testing Unicode normalization of unicode.org). After parsing the file, it should check whether each of the Unicode normalization and the one performed with NFKC using nlib_nfkc() is correct.

By using the StringView class, you can use the majority of the std:: string member functions without making duplicates of the string data.

/*--------------------------------------------------------------------------------*
Project: CrossRoad
Copyright (C)Nintendo All rights reserved.
These coded instructions, statements, and computer programs contain proprietary
information of Nintendo and/or its licensed developers and are protected by
national and international copyright laws. They may not be disclosed to third
parties or copied or duplicated in any form, in whole or in part, without the
prior written consent of Nintendo.
The content herein is highly confidential and should be handled accordingly.
*--------------------------------------------------------------------------------*/
#include <vector>
NLIB_PATHMAPPER_FORSAMPLE
char* ReadFile(size_t* size, const char* uri_path) {
char filename[1024];
InitPathMapperForSample();
e = g_pathmapper.ResolvePath(NULL, filename, uri_path);
if (e != 0) return NULL;
nlib_fd fd;
nlib_offset filesize_;
e = nlib_fd_open(&fd, filename, NLIB_FD_O_RDONLY);
if (e != 0) return NULL;
e = nlib_fd_getsize(&filesize_, fd);
if (e != 0) {
(void)nlib_fd_close(fd);
return NULL;
}
size_t filesize = static_cast<size_t>(filesize_);
nlib_ns::UniquePtr<char[]> text(new (std::nothrow) char[filesize + 1]);
if (!text) {
(void)nlib_fd_close(fd);
return NULL;
}
while (filesize > 0) {
size_t readsize;
e = nlib_fd_read(&readsize, fd, text.get(), filesize);
if (e != 0) {
(void)nlib_fd_close(fd);
return NULL;
}
filesize -= readsize;
}
*size = static_cast<size_t>(filesize_);
return text.release();
}
std::vector<nlib_utf8_t>
GetUtf8(const char* first, const char* last) {
std::vector<nlib_utf8_t> vec;
nlib_ns::StringView view(first, last - first);
while(!view.empty()) {
uint32_t cp = 0;
while (!view.empty() && !nlib_ns::StartsWith(view, ' ')) {
char c = *view.begin();
if (nlib_isdigit(c)) {
cp = cp * 16 + (c - '0');
} else {
cp = cp * 16 + (c - 'A' + 10);
}
view.remove_prefix(1);
}
nlib_utf8_t buf[4];
int len = nlib_utf32char_to_utf8(buf, cp);
NLIB_ASSERT(len > 0 && len <= 4);
vec.insert(vec.end(), &buf[0], &buf[0] + len);
}
return vec;
}
bool Nfkc(const std::vector<nlib_utf8_t>& vec, const std::vector<nlib_utf8_t>& ans) {
nlib_utf8_t buf[128];
const nlib_utf8_t* first = &vec[0];
const nlib_utf8_t* last = first + vec.size();
errno_t e = nlib_nfkc(&result, &buf[0], sizeof(buf), first, last, kNfkcDefault);
if (e != 0) return false;
if (ans.size() != result.written) return false;
if (memcmp(&buf[0], &*ans.begin(), ans.size()) != 0) return false;
return true;
}
bool NfkcNormalizeTest() {
size_t filesize;
nlib_ns::UniquePtr<char[]> normalization_test(ReadFile(&filesize, "nlibpath:///readonly/NormalizationTest.txt"));
if (!normalization_test) return false;
nlib_ns::StringView view(normalization_test.get(), filesize);
for (;;) {
if (line.length() == 0) break;
if (line[0] == '#' || line[0] == '@') continue;
size_t idx;
nlib_ns::StringView c1_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c2_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c3_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c4_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
nlib_ns::StringView c5_(line.begin(), (idx = line.find(';')));
line.remove_prefix(idx + 1);
std::vector<nlib_utf8_t> c1 = GetUtf8(c1_.begin(), c1_.end());
std::vector<nlib_utf8_t> c2 = GetUtf8(c2_.begin(), c2_.end());
std::vector<nlib_utf8_t> c3 = GetUtf8(c3_.begin(), c3_.end());
std::vector<nlib_utf8_t> c4 = GetUtf8(c4_.begin(), c4_.end());
std::vector<nlib_utf8_t> c5 = GetUtf8(c5_.begin(), c5_.end());
// NFKC
// c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
nlib_utf8_t c1_str[64], c2_str[64], c3_str[64], c4_str[64], c5_str[64];
nlib_memset(c1_str, 0, sizeof(c1_str));
nlib_memset(c2_str, 0, sizeof(c2_str));
nlib_memset(c3_str, 0, sizeof(c3_str));
nlib_memset(c4_str, 0, sizeof(c4_str));
nlib_memset(c5_str, 0, sizeof(c5_str));
nlib_memcpy(&c1_str, sizeof(c1_str) - 1, &c1[0], c1.size());
nlib_memcpy(&c2_str, sizeof(c2_str) - 1, &c2[0], c2.size());
nlib_memcpy(&c3_str, sizeof(c3_str) - 1, &c3[0], c3.size());
nlib_memcpy(&c4_str, sizeof(c4_str) - 1, &c4[0], c4.size());
nlib_memcpy(&c5_str, sizeof(c5_str) - 1, &c5[0], c5.size());
nlib_printf("'%s' == toNFKC(%s) == toNFKC(%s) == toNFKC(%s) == toNFKC(%s) == toNFKC(%s)\n",
c4_str, c1_str, c2_str, c3_str, c4_str, c5_str);
if (!Nfkc(c1, c4)) return false;
if (!Nfkc(c2, c4)) return false;
if (!Nfkc(c3, c4)) return false;
if (!Nfkc(c4, c4)) return false;
if (!Nfkc(c5, c4)) return false;
}
return true;
}
bool SampleMain(int, char**) { return NfkcNormalizeTest(); }
NLIB_MAINFUNC