nlib
succinct/ngc/ngc.cpp
This sample creates and uses a word filter from NG and excluded words.
It is designed so that the index for this word filter does not grow overly large and the search speed does not slow down even when there are a huge number of NG and excluded words (100000-1000000+).
/*--------------------------------------------------------------------------------*
Project: CrossRoad
Copyright (C)Nintendo All rights reserved.
These coded instructions, statements, and computer programs contain proprietary
information of Nintendo and/or its licensed developers and are protected by
national and international copyright laws. They may not be disclosed to third
parties or copied or duplicated in any form, in whole or in part, without the
prior written consent of Nintendo.
The content herein is highly confidential and should be handled accordingly.
*--------------------------------------------------------------------------------*/
#include <string>
#include <vector>
const wchar_t ngwords[] =
L"fuck\n"
L"nig\n"
L"nigger\n"
L"shit\n"
L"ass\n"
L"asshole\n"
L"damn\n"
L"piss\n"
L"bitch\n"
L"dick\n"
L"cunt\n"
L"poo\n"
L"pish\n"
L"bastard\n"
L"suck\n"
L"wanker\n"
L"crap\n"
L"pussy\n"
L"fag\n"
L"slut\n"
L"douche\n"
L"cock\n"
L"darn\n"
L"bugger\n"
L"bollocks\n"
L"arsehole\n";
const wchar_t okwords[] =
L"nigeria\n"
L"night\n"
L"pool\n"
L"assessment\n"
L"embarrassment\n"
L"cocktail\n";
// sentences should be normalized beforehand.
wchar_t sentence[] =
L"well, fuck that!\n"
L"niggers are fucking ignorant. and no, not all black people are nigs. what's a nigger, the fuckers in ferguson that are rioting.\n"
L"i went to nigeria.\n"
L"currently all the haters are trolling through my pics trying to find shit on me\n"
L"what an asshole.\n"
L"there was an ass-load of people there last night.\n"
L"that damn teacher gave us a ton of homework.\n"
L"i'm pissed off!\n"
L"you're the boss's little bitch aren't you?\n"
L"that dick took up two parking spots.\n"
L"oh, we were just shooting the shit.\n"
L"damn, those real life guys really know their shit.\n"
L"shut up, you cunt.\n"
L"poo in the pool.\n"
L"property assessment information is provided\n"
L"choose us. choose life. choose mortgage payments; choose washing machines; "
L"choose cars; choose sitting oan a couch watching mind-numbing and "
L"spirit-crushing game shows, stuffing fuckin junk food intae yir mooth. "
L"choose rotting away, pishing and shiteing yersel in a home, "
L"a total fuckin embarrassment tae the selfish, fucked-up brats ye've produced. choose life.\n"
L"they fuck you up, your mum and dad. "
L"they may not mean to, but they do. "
L"they fill you with the faults they had and add some extra, just for you.\n"
L"ach it was hopeless. that was what ye felt. these bastards. "
L"what can ye do but. except start again so he started again. "
L"that was what he did he started again … ye just plough on, ye plough on, "
L"ye just fucking plough on … ye just fucking push ahead, ye get fucking on with it.\n";
using nlib_ns::succinct::WordFilter;
using nlib_ns::succinct::WordFilterBuilder;
using nlib_ns::unicode::ToUtf8Obj;
class ProfanityFilter {
public:
bool Initialize() {
WordFilterBuilder builder;
if (nlib_is_error(builder.Init())) return false;
// appends NG words(profanity words).
if (nlib_is_error(builder.AddWords(ToUtf8Obj(ngwords).c_str()))) return false;
// appends OK words(exceptions).
if (nlib_is_error(builder.AddExcludeWords(ToUtf8Obj(okwords).c_str()))) return false;
// builds the WordFilter object.
filter_.reset(builder.Build());
if (!filter_.get()) return false;
return true;
}
void Check(char* str) {
filter_->Match(str, Callback, NULL);
}
private:
static bool Callback(const char* first, const char* last, uint32_t node_id, void* user_obj) {
// replaces the word with ***...
NLIB_UNUSED(node_id);
NLIB_UNUSED(user_obj);
while (first != last) {
*const_cast<char*>(first) = '*';
++first;
}
return true;
}
private:
UniquePtr<WordFilter> filter_;
};
bool SampleMain(int, char**) {
ProfanityFilter filter;
if (!filter.Initialize()) return false;
char sentence_[8192];
nlib_strcpy(sentence_, ToUtf8Obj(sentence).c_str());
nlib_printf("Before checking\n %s\n", sentence_);
filter.Check(sentence_);
nlib_printf("After checking\n %s\n", sentence_);
return true;
}
NLIB_MAINFUNC