nlib
heap/nmalloc_simple/nmalloc_simple.cpp

This extremely simple program compares the performance of the default malloc and free functions against nmalloc and nfree.

#include <stdlib.h>
using ::nlib_ns::threading::Thread;
#if defined(NN_PLATFORM_CTR)
static const int kRepeatConst = 1;
#elif defined(CAFE)
static const int kRepeatConst = 10;
#else
static const int kRepeatConst = 100; // others
#endif
//
// alloc/free 8bytes regions
//
class malloc_8bytes {
public:
malloc_8bytes(void* (*malloc_func)(size_t), void (*free_func)(void*))
: malloc_(malloc_func), free_(free_func) {}
bool operator()() {
bool success = true;
for (int loop_cnt = 0; loop_cnt < 50; ++loop_cnt) {
const size_t n = kRepeatConst * 1000;
void** ptrs = (void**)malloc_(n * sizeof(void*)); // NOLINT
if (!ptrs) return false;
memset(ptrs, 0, n * sizeof(void*)); // NOLINT
for (size_t i = 0; i < n; ++i) {
ptrs[i] = malloc_(8);
if (!ptrs[i]) {
success = false;
break;
}
}
for (size_t i = 0; i < n; ++i) {
free_(ptrs[i]);
}
free_(ptrs);
if (!success) return false;
}
return true;
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
//
// it causes severe fragmentation if naive malloc/free is used
//
class malloc_fragmentation {
public:
malloc_fragmentation(void* (*malloc_func)(size_t), void (*free_func)(void*))
: malloc_(malloc_func), free_(free_func) {}
bool operator()() {
const int nloop = kRepeatConst * 30;
bool success = true;
for (int loop_cnt = 0; loop_cnt < nloop; ++loop_cnt) {
const size_t n = 10000;
void** ptrs = (void**)malloc_(n * sizeof(void*)); // NOLINT
if (!ptrs) return false;
memset(ptrs, 0, n * sizeof(void*)); // NOLINT
for (size_t i = 0; i < n; ++i) {
ptrs[i] = malloc_(8);
if (!ptrs[i]) {
success = false;
break;
}
}
for (size_t i = 0; i < n; i += 2) {
free_(ptrs[i]);
ptrs[i] = NULL;
}
for (size_t i = 0; i < n; i += 2) {
ptrs[i] = malloc_(16);
if (!ptrs[i]) {
success = false;
break;
}
}
for (size_t i = 0; i < n; ++i) {
free_(ptrs[i]);
}
free_(ptrs);
if (!success) return false;
}
return true;
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
//
// realloc from 1 byte to 100000 bytes
//
class malloc_realloc {
public:
malloc_realloc(void* (*realloc_func)(void*, size_t), void (*free_func)(void*))
: realloc_(realloc_func), free_(free_func) {}
bool operator()() {
for (int loop_cnt = 0; loop_cnt < kRepeatConst; ++loop_cnt) {
const size_t n = 100000;
void* p = NULL;
for (size_t i = 1; i < n; ++i) {
void* pnew = realloc_(p, i);
if (!pnew) {
free_(p);
return false;
}
p = pnew;
}
free_(p);
}
return true;
}
private:
void* (*realloc_)(void*, size_t);
void (*free_)(void*);
};
const int kNumThread = 10;
Thread g_Th[kNumThread];
volatile bool g_Success;
class mt_smallmem {
public:
mt_smallmem(void* malloc_func(size_t), void free_func(void*))
: malloc_(malloc_func), free_(free_func) {}
void operator()() {
const int n = 1000;
void* p[n];
for (int j = 0; j < kRepeatConst * 10; ++j) {
for (int i = 0; i < n; ++i) {
p[i] = malloc_(8);
if (!p[i]) g_Success = false;
}
for (int i = 0; i < n; ++i) {
free_(p[i]);
}
}
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
class mt_largemem {
public:
mt_largemem(void* malloc_func(size_t), void free_func(void*))
: malloc_(malloc_func), free_(free_func) {}
void operator()() {
const int n = 40;
void* p[n];
for (int j = 0; j < kRepeatConst * 10; ++j) {
for (int i = 0; i < n; ++i) {
p[i] = malloc_(16384);
if (!p[i]) g_Success = false;
}
for (int i = 0; i < n; ++i) {
free_(p[i]);
}
}
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
//
// multithread performance
//
template<class FUNC>
class malloc_mt {
public:
explicit malloc_mt(const FUNC& obj) : obj_(obj) {}
bool operator()() {
g_Success = true;
for (int i = 0; i < kNumThread; ++i) {
g_Th[i].Start(obj_);
}
for (int i = 0; i < kNumThread; ++i) {
g_Th[i].Join();
}
return g_Success;
}
private:
FUNC obj_;
};
//
// lock / unock mutex
//
class mutex_loop {
public:
bool operator()() {
for (int i = 0; i < kRepeatConst * 50000; ++i) {
nlib_mutex_lock(&g_Mutex);
nlib_mutex_unlock(&g_Mutex);
}
return true;
}
};
static void* g_Ptr = NULL;
class nmalloc_loop {
public:
bool operator()() {
for (int i = 0; i < kRepeatConst * 50000; ++i) {
g_Ptr = nmalloc(8);
nfree(g_Ptr);
}
return true;
}
};
template<class FUNC>
class do_test {
public:
do_test(const char* title, const FUNC& func) : title_(title), func_(func) {}
bool operator()() {
uint64_t from = ::nlib_ns::GetTickTime();
bool result = func_();
uint64_t to = ::nlib_ns::GetTickTime();
nlib_printf("%s: %" PRIu64 " msec\n", title_, to - from);
return result;
}
private:
const char* title_;
FUNC func_;
};
static bool clear_cache() {
// free thread-cache nmalloc has, returning them to CentralHeap.
nmalloc_query(NMALLOC_QUERY_FINALIZE_CACHE);
int isclean;
nmalloc_query(NMALLOC_QUERY_IS_CLEAN, &isclean);
if (!isclean) {
nmalloc_query(NMALLOC_QUERY_DUMP, NMALLOC_DUMP_ALL, 1);
return false;
}
return true;
}
static bool compare_speed() {
bool result;
//
// 8 bytes alloc/free
//
result = do_test<malloc_8bytes>("malloc: malloc_8bytes() loop",
malloc_8bytes(malloc, free))();
if (!result) return false;
result = do_test<malloc_8bytes>("nmalloc: malloc_8bytes() loop",
malloc_8bytes(nmalloc, nfree))();
if (!result) return false;
if (!clear_cache()) return false;
//
// fragmentation
//
result = do_test<malloc_fragmentation>("malloc: malloc_fragmentation() loop",
malloc_fragmentation(malloc, free))();
if (!result) return false;
result = do_test<malloc_fragmentation>("nmalloc: malloc_fragmentation() loop",
malloc_fragmentation(nmalloc, nfree))();
if (!result) return false;
if (!clear_cache()) return false;
//
// realloc
//
result = do_test<malloc_realloc>("malloc: realloc() loop",
malloc_realloc(realloc, free))();
if (!result) return false;
result = do_test<malloc_realloc>("nmalloc: realloc() loop",
malloc_realloc(nrealloc, nfree))();
if (!result) return false;
if (!clear_cache()) return false;
//
// multi-thread malloc/free small mem
//
result = do_test<malloc_mt<mt_smallmem> >(
"malloc: mt-small loop",
malloc_mt<mt_smallmem>(mt_smallmem(malloc, free)))();
if (!result) return false;
result = do_test<malloc_mt<mt_smallmem> >(
"nmalloc: mt-small loop",
malloc_mt<mt_smallmem>(mt_smallmem(nmalloc, nfree)))();
if (!result) return false;
if (!clear_cache()) return false;
//
// multi-thread malloc/free big mem
//
result = do_test<malloc_mt<mt_largemem> >(
"malloc: mt-large loop",
malloc_mt<mt_largemem>(mt_largemem(malloc, free)))();
if (!result) return false;
result = do_test<malloc_mt<mt_largemem> >(
"nmalloc: mt-large loop",
malloc_mt<mt_largemem>(mt_largemem(nmalloc, nfree)))();
if (!result) return false;
if (!clear_cache()) return false;
//
// compare mutex_lock/mutex_unlock loop and nmalloc/nfree loop
//
result = do_test<mutex_loop>("nlib_mutex: lock/unlock loop", mutex_loop())();
result = do_test<nmalloc_loop>("nmalloc: nmalloc/nfree loop", nmalloc_loop())();
return true;
}
#ifdef NLIB_HAS_VIRTUALMEMORY
extern "C" void nmalloc_get_settings(NMallocSettings* settings) {
settings->addr = NULL;
settings->size = 1024 * 1024 * 10;
settings->heap_option = 0;
}
#else
const size_t heapmem_size = 1024 * 1024 * 10;
NLIB_ALIGNAS(4096) static char heapmem[heapmem_size];
extern "C" void nmalloc_get_settings(NMallocSettings* settings) {
settings->addr = heapmem;
settings->size = heapmem_size;
settings->heap_option = 0;
}
#endif
static bool SampleMain(int, char**) { return compare_speed(); }
NLIB_MAINFUNC