tdlight/benchmark/hashset_memory.cpp

194 lines
4.8 KiB
C++
Raw Normal View History

//
2024-01-01 01:07:21 +01:00
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2024
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#if USE_MEMPROF
#include "memprof/memprof_stat.h"
#endif
2022-02-10 21:01:28 +01:00
#include "td/utils/common.h"
#include "td/utils/FlatHashMap.h"
#include "td/utils/FlatHashMapChunks.h"
#include "td/utils/FlatHashTable.h"
#include "td/utils/HashTableUtils.h"
2022-02-10 21:01:28 +01:00
#include "td/utils/logging.h"
2022-03-11 22:50:04 +01:00
#include "td/utils/MapNode.h"
#include "td/utils/misc.h"
#include "td/utils/port/Stat.h"
#include "td/utils/Slice.h"
2022-02-10 21:01:28 +01:00
#include "td/utils/StringBuilder.h"
2022-02-18 21:25:23 +01:00
#ifdef SCOPE_EXIT
#undef SCOPE_EXIT
#endif
#include <absl/container/flat_hash_map.h>
2022-02-10 21:01:28 +01:00
#include <array>
#include <folly/container/F14Map.h>
#include <functional>
#include <map>
#include <unordered_map>
2022-02-10 21:01:28 +01:00
static int mem_stat_i = -1;
static int mem_stat_cur = 0;
static bool use_memprof() {
#if USE_MEMPROF
2022-02-10 21:01:28 +01:00
return mem_stat_i < 0 && is_memprof_on();
#else
return mem_stat_i < 0;
#endif
}
2022-02-10 21:01:28 +01:00
2022-02-11 17:40:16 +01:00
static td::uint64 get_memory() {
#if USE_MEMPROF
if (use_memprof()) {
return get_used_memory_size();
}
2022-02-10 21:01:28 +01:00
#endif
CHECK(!use_memprof());
return td::mem_stat().ok().resident_size_;
2022-02-10 21:01:28 +01:00
}
template <class T>
class Generator {
public:
T next() {
UNREACHABLE();
}
2022-02-10 16:30:23 +01:00
static size_t dyn_size() {
UNREACHABLE();
}
};
template <class T>
class IntGenerator {
public:
T next() {
return ++value;
}
2022-02-10 16:30:23 +01:00
static size_t dyn_size() {
return 0;
}
private:
T value{};
};
template <>
class Generator<td::int32> final : public IntGenerator<td::int32> {};
template <>
class Generator<td::int64> final : public IntGenerator<td::int64> {};
2022-02-10 16:30:23 +01:00
template <class T>
class Generator<td::unique_ptr<T>> {
public:
td::unique_ptr<T> next() {
return td::make_unique<T>();
}
2022-02-10 21:01:28 +01:00
static std::size_t dyn_size() {
2022-02-10 16:30:23 +01:00
return sizeof(T);
}
};
template <class T, class KeyT, class ValueT>
2022-02-10 21:01:28 +01:00
static void measure(td::StringBuilder &sb, td::Slice name, td::Slice key_name, td::Slice value_name) {
mem_stat_cur++;
if (mem_stat_i >= 0 && mem_stat_cur != mem_stat_i) {
return;
}
sb << name << "<" << key_name << "," << value_name << "> " << (use_memprof() ? "memprof" : "os") << "\n";
2022-02-10 21:01:28 +01:00
std::size_t ideal_size = sizeof(KeyT) + sizeof(ValueT) + Generator<ValueT>::dyn_size();
sb << "\tempty:" << sizeof(T);
struct Stat {
int pi;
double min_ratio;
double max_ratio;
};
2022-02-10 21:01:28 +01:00
td::vector<Stat> stat;
stat.reserve(1024);
2022-02-10 21:01:28 +01:00
for (std::size_t size : {1000000u}) {
Generator<KeyT> key_generator;
2022-02-10 16:30:23 +01:00
Generator<ValueT> value_generator;
auto start_mem = get_memory();
T ht;
2022-02-10 21:01:28 +01:00
auto ratio = [&] {
auto end_mem = get_memory();
auto used_mem = end_mem - start_mem;
2022-02-10 21:01:28 +01:00
return static_cast<double>(used_mem) / (static_cast<double>(ideal_size) * static_cast<double>(ht.size()));
};
double min_ratio;
double max_ratio;
2022-02-10 21:01:28 +01:00
auto reset = [&] {
min_ratio = 1e100;
max_ratio = 0;
};
2022-02-10 21:01:28 +01:00
auto update = [&] {
auto x = ratio();
2022-02-10 21:01:28 +01:00
min_ratio = td::min(min_ratio, x);
max_ratio = td::max(max_ratio, x);
};
reset();
int p = 10;
int pi = 1;
2022-02-10 21:01:28 +01:00
for (std::size_t i = 0; i < size; i++) {
2022-02-10 16:30:23 +01:00
ht.emplace(key_generator.next(), value_generator.next());
update();
if ((i + 1) % p == 0) {
stat.push_back(Stat{pi, min_ratio, max_ratio});
reset();
pi++;
p *= 10;
}
}
}
for (auto &s : stat) {
2022-02-10 21:01:28 +01:00
sb << " 10^" << s.pi << ":" << s.min_ratio << "->" << s.max_ratio;
}
2022-02-10 21:01:28 +01:00
sb << '\n';
}
2022-02-10 21:01:28 +01:00
template <std::size_t size>
using Bytes = std::array<char, size>;
2022-02-10 16:30:23 +01:00
template <template <typename... Args> class T>
void print_memory_stats(td::Slice name) {
2022-02-10 21:01:28 +01:00
td::string big_buff(1 << 16, '\0');
td::StringBuilder sb(big_buff, false);
#define MEASURE(KeyT, ValueT) measure<T<KeyT, ValueT>, KeyT, ValueT>(sb, name, #KeyT, #ValueT);
MEASURE(td::int32, td::int32);
MEASURE(td::int64, td::unique_ptr<Bytes<360>>);
if (!sb.as_cslice().empty()) {
2022-02-10 21:01:28 +01:00
LOG(PLAIN) << '\n' << sb.as_cslice() << '\n';
}
}
template <class KeyT, class ValueT, class HashT = td::Hash<KeyT>, class EqT = std::equal_to<KeyT>>
using FlatHashMapImpl = td::FlatHashTable<td::MapNode<KeyT, ValueT>, HashT, EqT>;
#define FOR_EACH_TABLE(F) \
F(FlatHashMapImpl) \
F(folly::F14FastMap) \
F(absl::flat_hash_map) \
F(std::unordered_map) \
F(std::map)
2022-02-10 21:01:28 +01:00
#define BENCHMARK_MEMORY(T) print_memory_stats<T>(#T);
int main(int argc, const char *argv[]) {
// Usage:
// % benchmark/memory-hashset-os 0
2022-02-10 21:01:28 +01:00
// Number of benchmarks = 10
// % for i in {1..10}; do ./benchmark/memory-hashset-os $i; done
if (argc > 1) {
mem_stat_i = td::to_integer<td::int32>(td::Slice(argv[1]));
}
2022-02-10 21:01:28 +01:00
FOR_EACH_TABLE(BENCHMARK_MEMORY);
if (mem_stat_i <= 0) {
2022-02-10 21:01:28 +01:00
LOG(PLAIN) << "Number of benchmarks = " << mem_stat_cur << "\n";
}
2022-02-10 21:01:28 +01:00
}