2022-02-10 14:36:34 +01:00
|
|
|
//
|
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
|
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
2022-02-10 18:26:11 +01:00
|
|
|
#if USE_MEMPROF
|
|
|
|
#include "memprof/memprof_stat.h"
|
|
|
|
#endif
|
2022-02-10 18:30:03 +03:00
|
|
|
|
2022-02-10 23:01:28 +03:00
|
|
|
#include "td/utils/common.h"
|
2022-02-10 14:36:34 +01:00
|
|
|
#include "td/utils/FlatHashMap.h"
|
2022-03-09 16:29:47 +03:00
|
|
|
#include "td/utils/FlatHashMapChunks.h"
|
2022-03-09 18:03:56 +03:00
|
|
|
#include "td/utils/FlatHashTable.h"
|
2022-11-23 19:37:32 +03:00
|
|
|
#include "td/utils/HashTableUtils.h"
|
2022-02-10 23:01:28 +03:00
|
|
|
#include "td/utils/logging.h"
|
2022-03-12 00:50:04 +03:00
|
|
|
#include "td/utils/MapNode.h"
|
2022-02-10 18:26:11 +01:00
|
|
|
#include "td/utils/misc.h"
|
|
|
|
#include "td/utils/port/Stat.h"
|
2022-02-10 18:30:03 +03:00
|
|
|
#include "td/utils/Slice.h"
|
2022-02-10 23:01:28 +03:00
|
|
|
#include "td/utils/StringBuilder.h"
|
2022-02-10 14:36:34 +01:00
|
|
|
|
2022-02-18 23:25:23 +03:00
|
|
|
#ifdef SCOPE_EXIT
|
|
|
|
#undef SCOPE_EXIT
|
|
|
|
#endif
|
|
|
|
|
2022-02-10 14:36:34 +01:00
|
|
|
#include <absl/container/flat_hash_map.h>
|
2022-02-10 23:01:28 +03:00
|
|
|
#include <array>
|
2022-02-10 18:30:03 +03:00
|
|
|
#include <folly/container/F14Map.h>
|
2022-03-09 18:03:56 +03:00
|
|
|
#include <functional>
|
2022-02-10 14:36:34 +01:00
|
|
|
#include <map>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
2022-02-10 23:01:28 +03:00
|
|
|
static int mem_stat_i = -1;
|
|
|
|
static int mem_stat_cur = 0;
|
|
|
|
|
|
|
|
static bool use_memprof() {
|
2022-02-10 18:26:11 +01:00
|
|
|
#if USE_MEMPROF
|
2022-02-10 23:01:28 +03:00
|
|
|
return mem_stat_i < 0 && is_memprof_on();
|
|
|
|
#else
|
|
|
|
return mem_stat_i < 0;
|
2022-02-10 18:26:11 +01:00
|
|
|
#endif
|
|
|
|
}
|
2022-02-10 23:01:28 +03:00
|
|
|
|
2022-02-11 17:40:16 +01:00
|
|
|
static td::uint64 get_memory() {
|
2022-02-10 18:26:11 +01:00
|
|
|
#if USE_MEMPROF
|
|
|
|
if (use_memprof()) {
|
|
|
|
return get_used_memory_size();
|
|
|
|
}
|
2022-02-10 23:01:28 +03:00
|
|
|
#endif
|
2022-02-10 18:26:11 +01:00
|
|
|
CHECK(!use_memprof());
|
|
|
|
return td::mem_stat().ok().resident_size_;
|
2022-02-10 23:01:28 +03:00
|
|
|
}
|
2022-02-10 18:26:11 +01:00
|
|
|
|
2022-02-10 14:36:34 +01:00
|
|
|
template <class T>
|
|
|
|
class Generator {
|
|
|
|
public:
|
2022-02-10 18:30:03 +03:00
|
|
|
T next() {
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
2022-02-10 16:30:23 +01:00
|
|
|
static size_t dyn_size() {
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
2022-02-10 14:36:34 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
template <class T>
|
|
|
|
class IntGenerator {
|
|
|
|
public:
|
|
|
|
T next() {
|
|
|
|
return ++value;
|
|
|
|
}
|
2022-02-10 16:30:23 +01:00
|
|
|
static size_t dyn_size() {
|
|
|
|
return 0;
|
|
|
|
}
|
2022-02-10 18:30:03 +03:00
|
|
|
|
2022-02-10 14:36:34 +01:00
|
|
|
private:
|
|
|
|
T value{};
|
|
|
|
};
|
|
|
|
|
|
|
|
template <>
|
2022-11-23 19:37:32 +03:00
|
|
|
class Generator<td::int32> final : public IntGenerator<td::int32> {};
|
2022-02-10 14:36:34 +01:00
|
|
|
template <>
|
2022-11-23 19:37:32 +03:00
|
|
|
class Generator<td::int64> final : public IntGenerator<td::int64> {};
|
2022-02-10 14:36:34 +01:00
|
|
|
|
2022-02-10 16:30:23 +01:00
|
|
|
template <class T>
|
|
|
|
class Generator<td::unique_ptr<T>> {
|
|
|
|
public:
|
|
|
|
td::unique_ptr<T> next() {
|
|
|
|
return td::make_unique<T>();
|
|
|
|
}
|
2022-02-10 23:01:28 +03:00
|
|
|
static std::size_t dyn_size() {
|
2022-02-10 16:30:23 +01:00
|
|
|
return sizeof(T);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-02-10 14:36:34 +01:00
|
|
|
template <class T, class KeyT, class ValueT>
|
2022-02-10 23:01:28 +03:00
|
|
|
static void measure(td::StringBuilder &sb, td::Slice name, td::Slice key_name, td::Slice value_name) {
|
2022-02-10 18:26:11 +01:00
|
|
|
mem_stat_cur++;
|
|
|
|
if (mem_stat_i >= 0 && mem_stat_cur != mem_stat_i) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
sb << name << "<" << key_name << "," << value_name << "> " << (use_memprof() ? "memprof" : "os") << "\n";
|
2022-02-10 23:01:28 +03:00
|
|
|
std::size_t ideal_size = sizeof(KeyT) + sizeof(ValueT) + Generator<ValueT>::dyn_size();
|
2022-02-10 14:36:34 +01:00
|
|
|
|
|
|
|
sb << "\tempty:" << sizeof(T);
|
|
|
|
struct Stat {
|
|
|
|
int pi;
|
|
|
|
double min_ratio;
|
|
|
|
double max_ratio;
|
|
|
|
};
|
2022-02-10 23:01:28 +03:00
|
|
|
td::vector<Stat> stat;
|
2022-02-10 14:36:34 +01:00
|
|
|
stat.reserve(1024);
|
2022-02-10 23:01:28 +03:00
|
|
|
for (std::size_t size : {1000000u}) {
|
2022-02-10 14:36:34 +01:00
|
|
|
Generator<KeyT> key_generator;
|
2022-02-10 16:30:23 +01:00
|
|
|
Generator<ValueT> value_generator;
|
2022-02-10 18:26:11 +01:00
|
|
|
auto start_mem = get_memory();
|
2022-02-10 14:36:34 +01:00
|
|
|
T ht;
|
2022-02-10 23:01:28 +03:00
|
|
|
auto ratio = [&] {
|
2022-02-10 18:26:11 +01:00
|
|
|
auto end_mem = get_memory();
|
2022-02-10 14:36:34 +01:00
|
|
|
auto used_mem = end_mem - start_mem;
|
2022-02-10 23:01:28 +03:00
|
|
|
return static_cast<double>(used_mem) / (static_cast<double>(ideal_size) * static_cast<double>(ht.size()));
|
2022-02-10 14:36:34 +01:00
|
|
|
};
|
|
|
|
double min_ratio;
|
|
|
|
double max_ratio;
|
2022-02-10 23:01:28 +03:00
|
|
|
auto reset = [&] {
|
2022-02-10 14:36:34 +01:00
|
|
|
min_ratio = 1e100;
|
|
|
|
max_ratio = 0;
|
|
|
|
};
|
2022-02-10 23:01:28 +03:00
|
|
|
auto update = [&] {
|
2022-02-10 14:36:34 +01:00
|
|
|
auto x = ratio();
|
2022-02-10 23:01:28 +03:00
|
|
|
min_ratio = td::min(min_ratio, x);
|
|
|
|
max_ratio = td::max(max_ratio, x);
|
2022-02-10 14:36:34 +01:00
|
|
|
};
|
|
|
|
reset();
|
|
|
|
|
|
|
|
int p = 10;
|
|
|
|
int pi = 1;
|
2022-02-10 23:01:28 +03:00
|
|
|
for (std::size_t i = 0; i < size; i++) {
|
2022-02-10 16:30:23 +01:00
|
|
|
ht.emplace(key_generator.next(), value_generator.next());
|
2022-02-10 14:36:34 +01:00
|
|
|
update();
|
|
|
|
if ((i + 1) % p == 0) {
|
2022-10-27 12:53:53 +03:00
|
|
|
stat.push_back(Stat{pi, min_ratio, max_ratio});
|
2022-02-10 14:36:34 +01:00
|
|
|
reset();
|
|
|
|
pi++;
|
|
|
|
p *= 10;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (auto &s : stat) {
|
2022-02-10 23:01:28 +03:00
|
|
|
sb << " 10^" << s.pi << ":" << s.min_ratio << "->" << s.max_ratio;
|
2022-02-10 14:36:34 +01:00
|
|
|
}
|
2022-02-10 23:01:28 +03:00
|
|
|
sb << '\n';
|
2022-02-10 14:36:34 +01:00
|
|
|
}
|
|
|
|
|
2022-02-10 23:01:28 +03:00
|
|
|
template <std::size_t size>
|
|
|
|
using Bytes = std::array<char, size>;
|
2022-02-10 16:30:23 +01:00
|
|
|
|
2022-02-10 18:26:11 +01:00
|
|
|
template <template <typename... Args> class T>
|
2022-02-10 14:36:34 +01:00
|
|
|
void print_memory_stats(td::Slice name) {
|
2022-02-10 23:01:28 +03:00
|
|
|
td::string big_buff(1 << 16, '\0');
|
2022-02-10 14:36:34 +01:00
|
|
|
td::StringBuilder sb(big_buff, false);
|
2022-02-10 18:26:11 +01:00
|
|
|
#define MEASURE(KeyT, ValueT) measure<T<KeyT, ValueT>, KeyT, ValueT>(sb, name, #KeyT, #ValueT);
|
2022-11-23 19:37:32 +03:00
|
|
|
MEASURE(td::int32, td::int32);
|
|
|
|
MEASURE(td::int64, td::unique_ptr<Bytes<360>>);
|
2022-02-10 18:26:11 +01:00
|
|
|
if (!sb.as_cslice().empty()) {
|
2022-02-10 23:01:28 +03:00
|
|
|
LOG(PLAIN) << '\n' << sb.as_cslice() << '\n';
|
2022-02-10 18:26:11 +01:00
|
|
|
}
|
2022-02-10 14:36:34 +01:00
|
|
|
}
|
|
|
|
|
2022-11-23 19:37:32 +03:00
|
|
|
template <class KeyT, class ValueT, class HashT = td::Hash<KeyT>, class EqT = std::equal_to<KeyT>>
|
2022-03-09 18:03:56 +03:00
|
|
|
using FlatHashMapImpl = td::FlatHashTable<td::MapNode<KeyT, ValueT>, HashT, EqT>;
|
|
|
|
|
2022-02-10 14:36:34 +01:00
|
|
|
#define FOR_EACH_TABLE(F) \
|
2022-03-09 18:03:56 +03:00
|
|
|
F(FlatHashMapImpl) \
|
2022-02-10 14:36:34 +01:00
|
|
|
F(folly::F14FastMap) \
|
|
|
|
F(absl::flat_hash_map) \
|
|
|
|
F(std::unordered_map) \
|
|
|
|
F(std::map)
|
2022-02-10 23:01:28 +03:00
|
|
|
#define BENCHMARK_MEMORY(T) print_memory_stats<T>(#T);
|
2022-02-10 14:36:34 +01:00
|
|
|
|
2022-02-10 18:26:11 +01:00
|
|
|
int main(int argc, const char *argv[]) {
|
|
|
|
// Usage:
|
|
|
|
// % benchmark/memory-hashset-os 0
|
2022-02-10 23:01:28 +03:00
|
|
|
// Number of benchmarks = 10
|
2022-02-10 18:26:11 +01:00
|
|
|
// % for i in {1..10}; do ./benchmark/memory-hashset-os $i; done
|
|
|
|
if (argc > 1) {
|
|
|
|
mem_stat_i = td::to_integer<td::int32>(td::Slice(argv[1]));
|
|
|
|
}
|
2022-02-10 23:01:28 +03:00
|
|
|
FOR_EACH_TABLE(BENCHMARK_MEMORY);
|
2022-02-10 18:26:11 +01:00
|
|
|
if (mem_stat_i <= 0) {
|
2022-02-10 23:01:28 +03:00
|
|
|
LOG(PLAIN) << "Number of benchmarks = " << mem_stat_cur << "\n";
|
2022-02-10 18:26:11 +01:00
|
|
|
}
|
2022-02-10 23:01:28 +03:00
|
|
|
}
|