nlib
heap/nmalloc_simple/nmalloc_simple.cpp

This extremely simple program compares the performance of the default malloc and free functions against nmalloc and nfree.

/*---------------------------------------------------------------------------*
Project: CrossRoad
Copyright (C)2012-2016 Nintendo. All rights reserved.
These coded instructions, statements, and computer programs contain
proprietary information of Nintendo of America Inc. and/or Nintendo
Company Ltd., and are protected by Federal copyright law. They may
not be disclosed to third parties or copied or duplicated in any form,
in whole or in part, without the prior written consent of Nintendo.
*---------------------------------------------------------------------------*/
#include <stdlib.h>
using ::nlib_ns::threading::Thread;
#if defined(NN_PLATFORM_CTR)
static const int kRepeatConst = 1;
#elif defined(CAFE)
static const int kRepeatConst = 10;
#else
static const int kRepeatConst = 100; // others
#endif
//
// alloc/free 8bytes regions
//
class malloc_8bytes {
public:
malloc_8bytes(void* (*malloc_func)(size_t), void (*free_func)(void*))
: malloc_(malloc_func), free_(free_func) {}
bool operator()() {
bool success = true;
for (int loop_cnt = 0; loop_cnt < 50; ++loop_cnt) {
const size_t n = kRepeatConst * 1000;
void** ptrs = (void**)malloc_(n * sizeof(void*)); // NOLINT
if (!ptrs) return false;
memset(ptrs, 0, n * sizeof(void*)); // NOLINT
for (size_t i = 0; i < n; ++i) {
ptrs[i] = malloc_(8);
if (!ptrs[i]) {
success = false;
break;
}
}
for (size_t i = 0; i < n; ++i) {
free_(ptrs[i]);
}
free_(ptrs);
if (!success) return false;
}
return true;
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
//
// it causes severe fragmentation if naive malloc/free is used
//
class malloc_fragmentation {
public:
malloc_fragmentation(void* (*malloc_func)(size_t), void (*free_func)(void*))
: malloc_(malloc_func), free_(free_func) {}
bool operator()() {
const int nloop = kRepeatConst * 30;
bool success = true;
for (int loop_cnt = 0; loop_cnt < nloop; ++loop_cnt) {
const size_t n = 10000;
void** ptrs = (void**)malloc_(n * sizeof(void*)); // NOLINT
if (!ptrs) return false;
memset(ptrs, 0, n * sizeof(void*)); // NOLINT
for (size_t i = 0; i < n; ++i) {
ptrs[i] = malloc_(8);
if (!ptrs[i]) {
success = false;
break;
}
}
for (size_t i = 0; i < n; i += 2) {
free_(ptrs[i]);
ptrs[i] = NULL;
}
for (size_t i = 0; i < n; i += 2) {
ptrs[i] = malloc_(16);
if (!ptrs[i]) {
success = false;
break;
}
}
for (size_t i = 0; i < n; ++i) {
free_(ptrs[i]);
}
free_(ptrs);
if (!success) return false;
}
return true;
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
//
// realloc from 1 byte to 100000 bytes
//
class malloc_realloc {
public:
malloc_realloc(void* (*realloc_func)(void*, size_t), void (*free_func)(void*))
: realloc_(realloc_func), free_(free_func) {}
bool operator()() {
for (int loop_cnt = 0; loop_cnt < kRepeatConst; ++loop_cnt) {
const size_t n = 100000;
void* p = NULL;
for (size_t i = 1; i < n; ++i) {
void* pnew = realloc_(p, i);
if (!pnew) {
free_(p);
return false;
}
p = pnew;
}
free_(p);
}
return true;
}
private:
void* (*realloc_)(void*, size_t);
void (*free_)(void*);
};
const int kNumThread = 10;
Thread g_Th[kNumThread];
volatile bool g_Success;
class mt_smallmem {
public:
mt_smallmem(void* malloc_func(size_t), void free_func(void*))
: malloc_(malloc_func), free_(free_func) {}
void operator()() {
const int n = 1000;
void* p[n];
for (int j = 0; j < kRepeatConst * 10; ++j) {
for (int i = 0; i < n; ++i) {
p[i] = malloc_(8);
if (!p[i]) g_Success = false;
}
for (int i = 0; i < n; ++i) {
free_(p[i]);
}
}
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
class mt_largemem {
public:
mt_largemem(void* malloc_func(size_t), void free_func(void*))
: malloc_(malloc_func), free_(free_func) {}
void operator()() {
const int n = 40;
void* p[n];
for (int j = 0; j < kRepeatConst * 10; ++j) {
for (int i = 0; i < n; ++i) {
p[i] = malloc_(16384);
if (!p[i]) g_Success = false;
}
for (int i = 0; i < n; ++i) {
free_(p[i]);
}
}
}
private:
void* (*malloc_)(size_t);
void (*free_)(void*);
};
//
// multithread performance
//
template<class FUNC>
class malloc_mt {
public:
explicit malloc_mt(const FUNC& obj) : obj_(obj) {}
bool operator()() {
g_Success = true;
for (int i = 0; i < kNumThread; ++i) {
g_Th[i].Start(obj_);
}
for (int i = 0; i < kNumThread; ++i) {
g_Th[i].Join();
}
return g_Success;
}
private:
FUNC obj_;
};
//
// lock / unock mutex
//
class mutex_loop {
public:
bool operator()() {
for (int i = 0; i < kRepeatConst * 50000; ++i) {
nlib_mutex_lock(&g_Mutex);
nlib_mutex_unlock(&g_Mutex);
}
return true;
}
};
static void* g_Ptr = NULL;
class nmalloc_loop {
public:
bool operator()() {
for (int i = 0; i < kRepeatConst * 50000; ++i) {
g_Ptr = nmalloc(8);
nfree(g_Ptr);
}
return true;
}
};
template<class FUNC>
class do_test {
public:
do_test(const char* title, const FUNC& func) : title_(title), func_(func) {}
bool operator()() {
uint64_t from = ::nlib_ns::GetTickTime();
bool result = func_();
uint64_t to = ::nlib_ns::GetTickTime();
nlib_printf("%s: %" PRIu64 " msec\n", title_, to - from);
return result;
}
private:
const char* title_;
FUNC func_;
};
static bool clear_cache() {
// free thread-cache nmalloc has, returning them to CentralHeap.
nmalloc_query(NMALLOC_QUERY_FINALIZE_CACHE);
int isclean;
nmalloc_query(NMALLOC_QUERY_IS_CLEAN, &isclean);
if (!isclean) {
nmalloc_query(NMALLOC_QUERY_DUMP, NMALLOC_DUMP_ALL, 1);
return false;
}
return true;
}
static bool compare_speed() {
bool result;
//
// 8 bytes alloc/free
//
result = do_test<malloc_8bytes>("malloc: malloc_8bytes() loop",
malloc_8bytes(malloc, free))();
if (!result) return false;
result = do_test<malloc_8bytes>("nmalloc: malloc_8bytes() loop",
malloc_8bytes(nmalloc, nfree))();
if (!result) return false;
if (!clear_cache()) return false;
//
// fragmentation
//
result = do_test<malloc_fragmentation>("malloc: malloc_fragmentation() loop",
malloc_fragmentation(malloc, free))();
if (!result) return false;
result = do_test<malloc_fragmentation>("nmalloc: malloc_fragmentation() loop",
malloc_fragmentation(nmalloc, nfree))();
if (!result) return false;
if (!clear_cache()) return false;
//
// realloc
//
result = do_test<malloc_realloc>("malloc: realloc() loop",
malloc_realloc(realloc, free))();
if (!result) return false;
result = do_test<malloc_realloc>("nmalloc: realloc() loop",
malloc_realloc(nrealloc, nfree))();
if (!result) return false;
if (!clear_cache()) return false;
//
// multi-thread malloc/free small mem
//
result = do_test<malloc_mt<mt_smallmem> >(
"malloc: mt-small loop",
malloc_mt<mt_smallmem>(mt_smallmem(malloc, free)))();
if (!result) return false;
result = do_test<malloc_mt<mt_smallmem> >(
"nmalloc: mt-small loop",
malloc_mt<mt_smallmem>(mt_smallmem(nmalloc, nfree)))();
if (!result) return false;
if (!clear_cache()) return false;
//
// multi-thread malloc/free big mem
//
result = do_test<malloc_mt<mt_largemem> >(
"malloc: mt-large loop",
malloc_mt<mt_largemem>(mt_largemem(malloc, free)))();
if (!result) return false;
result = do_test<malloc_mt<mt_largemem> >(
"nmalloc: mt-large loop",
malloc_mt<mt_largemem>(mt_largemem(nmalloc, nfree)))();
if (!result) return false;
if (!clear_cache()) return false;
//
// compare mutex_lock/mutex_unlock loop and nmalloc/nfree loop
//
result = do_test<mutex_loop>("nlib_mutex: lock/unlock loop", mutex_loop())();
result = do_test<nmalloc_loop>("nmalloc: nmalloc/nfree loop", nmalloc_loop())();
return true;
}
#ifdef NLIB_HAS_VIRTUALMEMORY
extern "C" void nmalloc_get_settings(NMallocSettings* settings) {
settings->addr = NULL;
settings->size = 1024 * 1024 * 10;
settings->heap_option = 0;
}
#else
const size_t heapmem_size = 1024 * 1024 * 10;
NLIB_ALIGNAS(4096) static char heapmem[heapmem_size];
extern "C" void nmalloc_get_settings(NMallocSettings* settings) {
settings->addr = heapmem;
settings->size = heapmem_size;
settings->heap_option = 0;
}
#endif
static bool SampleMain(int, char**) { return compare_speed(); }
NLIB_MAINFUNC