Skip to content

Commit 10c59db

Browse files
committed
FlatHashMap: simple benchmark for memory usage
1 parent f4b3a09 commit 10c59db

File tree

5 files changed

+310
-0
lines changed

5 files changed

+310
-0
lines changed

‎CMakeLists.txt

+9
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,11 @@ set(MEMPROF_SOURCE
706706
memprof/memprof.h
707707
)
708708

709+
set(MEMPROF_STAT_SOURCE
710+
memprof/memprof_stat.cpp
711+
memprof/memprof_stat.h
712+
)
713+
709714
#RULES
710715

711716
file(MAKE_DIRECTORY auto)
@@ -739,6 +744,10 @@ if (MEMPROF)
739744
endif()
740745
endif()
741746

747+
add_library(memprof_stat STATIC ${MEMPROF_STAT_SOURCE})
748+
target_include_directories(memprof_stat PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
749+
target_link_libraries(memprof_stat PRIVATE tdutils)
750+
742751

743752
add_library(tdapi ${TL_TD_API_SOURCE})
744753
target_include_directories(tdapi PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> INTERFACE $<BUILD_INTERFACE:${TL_TD_AUTO_INCLUDE_DIR}>)

‎memprof/memprof_stat.cpp

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
//
2+
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
3+
//
4+
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5+
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
//
7+
#include "memprof_stat.h"
8+
9+
#include "td/utils/port/platform.h"
10+
11+
#if (TD_DARWIN || TD_LINUX)
12+
#include <algorithm>
13+
#include <atomic>
14+
#include <cstddef>
15+
#include <cstdint>
16+
#include <cstdlib>
17+
#include <cstring>
18+
#include <functional>
19+
#include <new>
20+
#include <utility>
21+
#include <vector>
22+
23+
#include <dlfcn.h>
24+
#include <execinfo.h>
25+
26+
bool is_memprof_on() {
27+
return true;
28+
}
29+
30+
#define my_assert(f) \
31+
if (!(f)) { \
32+
std::abort(); \
33+
}
34+
35+
struct malloc_info {
36+
std::int32_t magic;
37+
std::int32_t size;
38+
};
39+
40+
std::atomic<uint64_t> total_memory_used;
41+
void register_xalloc(malloc_info *info, std::int32_t diff) {
42+
my_assert(info->size >= 0);
43+
// TODO: this is very slow in case of several threads.
44+
// Currently this statistics is intended only for memory benchmarks.
45+
total_memory_used.fetch_add(diff * info->size, std::memory_order_relaxed);
46+
}
47+
48+
std::size_t get_used_memory_size() {
49+
return total_memory_used.load();
50+
}
51+
52+
extern "C" {
53+
54+
static constexpr std::size_t RESERVED_SIZE = 16;
55+
static constexpr std::int32_t MALLOC_INFO_MAGIC = 0x27138373;
56+
57+
static void *do_malloc(std::size_t size) {
58+
static_assert(RESERVED_SIZE % alignof(std::max_align_t) == 0, "fail");
59+
static_assert(RESERVED_SIZE >= sizeof(malloc_info), "fail");
60+
#if TD_DARWIN
61+
static void *malloc_void = dlsym(RTLD_NEXT, "malloc");
62+
static auto malloc_old = *reinterpret_cast<decltype(malloc) **>(&malloc_void);
63+
#else
64+
extern decltype(malloc) __libc_malloc;
65+
static auto malloc_old = __libc_malloc;
66+
#endif
67+
auto *info = static_cast<malloc_info *>(malloc_old(size + RESERVED_SIZE));
68+
auto *buf = reinterpret_cast<char *>(info);
69+
70+
info->magic = MALLOC_INFO_MAGIC;
71+
info->size = static_cast<std::int32_t>(size);
72+
73+
register_xalloc(info, +1);
74+
75+
void *data = buf + RESERVED_SIZE;
76+
77+
return data;
78+
}
79+
80+
static malloc_info *get_info(void *data_void) {
81+
auto *data = static_cast<char *>(data_void);
82+
auto *buf = data - RESERVED_SIZE;
83+
84+
auto *info = reinterpret_cast<malloc_info *>(buf);
85+
my_assert(info->magic == MALLOC_INFO_MAGIC);
86+
return info;
87+
}
88+
89+
void *malloc(std::size_t size) {
90+
return do_malloc(size);
91+
}
92+
93+
void free(void *data_void) {
94+
if (data_void == nullptr) {
95+
return;
96+
}
97+
auto *info = get_info(data_void);
98+
register_xalloc(info, -1);
99+
100+
#if TD_DARWIN
101+
static void *free_void = dlsym(RTLD_NEXT, "free");
102+
static auto free_old = *reinterpret_cast<decltype(free) **>(&free_void);
103+
#else
104+
extern decltype(free) __libc_free;
105+
static auto free_old = __libc_free;
106+
#endif
107+
return free_old(info);
108+
}
109+
void *calloc(std::size_t size_a, std::size_t size_b) {
110+
auto size = size_a * size_b;
111+
void *res = do_malloc(size);
112+
std::memset(res, 0, size);
113+
return res;
114+
}
115+
void *realloc(void *ptr, std::size_t size) {
116+
if (ptr == nullptr) {
117+
return do_malloc(size);
118+
}
119+
auto *info = get_info(ptr);
120+
auto *new_ptr = do_malloc(size);
121+
auto to_copy = std::min(static_cast<std::int32_t>(size), info->size);
122+
std::memcpy(new_ptr, ptr, to_copy);
123+
free(ptr);
124+
return new_ptr;
125+
}
126+
void *memalign(std::size_t alignment, std::size_t size) {
127+
auto res = malloc(size);
128+
my_assert(reinterpret_cast<uint64_t>(res) % alignment == 0);
129+
return res;
130+
}
131+
132+
int posix_memalign(void **memptr, size_t alignment, size_t size) {
133+
auto res = malloc(size);
134+
my_assert(reinterpret_cast<uint64_t>(res) % alignment == 0);
135+
*memptr = res;
136+
return 0;
137+
}
138+
}
139+
140+
// c++14 guarantees that it is enough to override these two operators.
141+
void *operator new(std::size_t count) {
142+
return do_malloc(count);
143+
}
144+
void operator delete(void *ptr) noexcept(true) {
145+
free(ptr);
146+
}
147+
// because of gcc warning: the program should also define 'void operator delete(void*, std::size_t)'
148+
void operator delete(void *ptr, std::size_t) noexcept(true) {
149+
free(ptr);
150+
}
151+
152+
// c++17
153+
// void *operator new(std::size_t count, std::align_val_t al);
154+
// void operator delete(void *ptr, std::align_val_t al);
155+
156+
#else
157+
bool is_memprof_on() {
158+
return false;
159+
}
160+
std::size_t get_used_memory_size() {
161+
return false;
162+
}
163+
#endif

‎memprof/memprof_stat.h

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#pragma once
2+
3+
#include <cstddef>
4+
5+
bool is_memprof_on();
6+
std::size_t get_used_memory_size();

‎tdutils/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -395,4 +395,8 @@ if (ABSL_FOUND AND benchmark_FOUND AND gflags_FOUND AND folly_FOUND)
395395
add_executable(benchmark-hashset ${CMAKE_CURRENT_SOURCE_DIR}/test/hashset_benchmark.cpp)
396396
target_link_libraries(benchmark-hashset PRIVATE tdutils)
397397
target_link_libraries(benchmark-hashset PRIVATE SYSTEM benchmark::benchmark Folly::folly absl::flat_hash_map absl::hash)
398+
399+
add_executable(memory-hashset ${CMAKE_CURRENT_SOURCE_DIR}/test/hashset_memory.cpp)
400+
target_link_libraries(memory-hashset PRIVATE tdutils memprof_stat)
401+
target_link_libraries(memory-hashset PRIVATE SYSTEM Folly::folly absl::flat_hash_map absl::hash)
398402
endif()

‎tdutils/test/hashset_memory.cpp

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
//
2+
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
3+
//
4+
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5+
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
//
7+
8+
#include "memprof/memprof.h"
9+
#include "td/utils/check.h"
10+
#include "td/utils/Slice.h"
11+
#include "td/utils/FlatHashMap.h"
12+
#include "td/utils/format.h"
13+
#include "td/utils/UInt.h"
14+
15+
#include <folly/container/F14Map.h>
16+
#include <absl/container/flat_hash_map.h>
17+
#include <map>
18+
#include <unordered_map>
19+
20+
template <class T>
21+
class Generator {
22+
public:
23+
T next() {
24+
UNREACHABLE();
25+
}
26+
};
27+
28+
template <class T>
29+
class IntGenerator {
30+
public:
31+
T next() {
32+
return ++value;
33+
}
34+
private:
35+
T value{};
36+
};
37+
38+
template <>
39+
class Generator<uint32_t> : public IntGenerator<uint32_t> {
40+
public:
41+
};
42+
template <>
43+
class Generator<uint64_t> : public IntGenerator<uint64_t> {
44+
public:
45+
};
46+
47+
template <class T, class KeyT, class ValueT>
48+
void measure(td::StringBuilder &sb, td::Slice name, td::Slice key_name, td::Slice value_name) {
49+
sb << name << "<" << key_name << "," << value_name << ">:\n";
50+
size_t ideal_size = sizeof(KeyT) + sizeof(ValueT);
51+
52+
sb << "\tempty:" << sizeof(T);
53+
struct Stat {
54+
int pi;
55+
double min_ratio;
56+
double max_ratio;
57+
};
58+
std::vector<Stat> stat;
59+
stat.reserve(1024);
60+
for (size_t size : {10000000u}) {
61+
Generator<KeyT> key_generator;
62+
auto start_mem = get_used_memory_size();
63+
T ht;
64+
auto ratio = [&]() {
65+
auto end_mem = get_used_memory_size();
66+
auto used_mem = end_mem - start_mem;
67+
return double(used_mem) / double(ideal_size * ht.size());
68+
};
69+
double min_ratio;
70+
double max_ratio;
71+
auto reset = [&]() {
72+
min_ratio = 1e100;
73+
max_ratio = 0;
74+
};
75+
auto update = [&]() {
76+
auto x = ratio();
77+
min_ratio = std::min(min_ratio, x);
78+
max_ratio = std::max(max_ratio, x);
79+
if (x > 14) {
80+
LOG(ERROR) << "WTF";
81+
}
82+
};
83+
reset();
84+
85+
int p = 10;
86+
int pi = 1;
87+
for (size_t i = 0; i < size; i++) {
88+
ht.emplace(key_generator.next(), ValueT{});
89+
update();
90+
if ((i + 1) % p == 0) {
91+
stat.emplace_back(Stat{pi, min_ratio, max_ratio});
92+
reset();
93+
pi++;
94+
p *= 10;
95+
}
96+
}
97+
}
98+
for (auto &s : stat) {
99+
sb << " " << 10 << "^" << s.pi << ":" << s.min_ratio << "->" << s.max_ratio;
100+
}
101+
sb << "\n";
102+
}
103+
104+
105+
template <template<typename... Args> class T>
106+
void print_memory_stats(td::Slice name) {
107+
std::string big_buff(1<<16, '\0');
108+
td::StringBuilder sb(big_buff, false);
109+
#define MEASURE(KeyT, ValueT) \
110+
measure<T<KeyT, ValueT>, KeyT, ValueT>(sb, name, #KeyT, #ValueT);
111+
MEASURE(uint32_t, uint32_t)
112+
// MEASURE(uint64_t, td::UInt256)
113+
LOG(ERROR) << "\n" << sb.as_cslice();
114+
}
115+
116+
#define FOR_EACH_TABLE(F) \
117+
F(td::FlatHashMapImpl) \
118+
F(folly::F14FastMap) \
119+
F(absl::flat_hash_map) \
120+
F(std::unordered_map) \
121+
F(std::map)
122+
#define BENCH_MEMORY(T) print_memory_stats<T>(#T);
123+
124+
int main() {
125+
CHECK(get_used_memory_size());
126+
FOR_EACH_TABLE(BENCH_MEMORY);
127+
return 0;
128+
}

0 commit comments

Comments
 (0)