2018-12-31 20:04:05 +01:00
|
|
|
//
|
2021-01-01 13:57:46 +01:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2021
|
2018-12-31 20:04:05 +01:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#include "td/db/binlog/Binlog.h"
|
|
|
|
|
|
|
|
#include "td/db/binlog/detail/BinlogEventsBuffer.h"
|
|
|
|
#include "td/db/binlog/detail/BinlogEventsProcessor.h"
|
|
|
|
|
|
|
|
#include "td/utils/buffer.h"
|
|
|
|
#include "td/utils/format.h"
|
|
|
|
#include "td/utils/misc.h"
|
|
|
|
#include "td/utils/port/Clocks.h"
|
2019-01-02 16:21:31 +01:00
|
|
|
#include "td/utils/port/FileFd.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/utils/port/path.h"
|
2018-09-10 03:08:15 +02:00
|
|
|
#include "td/utils/port/PollFlags.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/utils/port/Stat.h"
|
|
|
|
#include "td/utils/Random.h"
|
|
|
|
#include "td/utils/ScopeGuard.h"
|
2021-05-17 14:21:11 +02:00
|
|
|
#include "td/utils/SliceBuilder.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/utils/Status.h"
|
|
|
|
#include "td/utils/Time.h"
|
|
|
|
#include "td/utils/tl_helpers.h"
|
|
|
|
#include "td/utils/tl_parsers.h"
|
|
|
|
|
|
|
|
namespace td {
|
|
|
|
namespace detail {
|
|
|
|
struct AesCtrEncryptionEvent {
|
|
|
|
static constexpr size_t min_salt_size() {
|
|
|
|
return 16; // 256 bits
|
|
|
|
}
|
|
|
|
static constexpr size_t default_salt_size() {
|
|
|
|
return 32; // 256 bits
|
|
|
|
}
|
|
|
|
static constexpr size_t key_size() {
|
|
|
|
return 32; // 256 bits
|
|
|
|
}
|
|
|
|
static constexpr size_t iv_size() {
|
|
|
|
return 16; // 128 bits
|
|
|
|
}
|
|
|
|
static constexpr size_t hash_size() {
|
|
|
|
return 32; // 256 bits
|
|
|
|
}
|
|
|
|
static constexpr size_t kdf_iteration_count() {
|
|
|
|
return 60002;
|
|
|
|
}
|
|
|
|
static constexpr size_t kdf_fast_iteration_count() {
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
BufferSlice key_salt_;
|
|
|
|
BufferSlice iv_;
|
|
|
|
BufferSlice key_hash_;
|
|
|
|
|
|
|
|
BufferSlice generate_key(const DbKey &db_key) {
|
|
|
|
CHECK(!db_key.is_empty());
|
|
|
|
BufferSlice key(key_size());
|
|
|
|
size_t iteration_count = kdf_iteration_count();
|
|
|
|
if (db_key.is_raw_key()) {
|
|
|
|
iteration_count = kdf_fast_iteration_count();
|
|
|
|
}
|
|
|
|
pbkdf2_sha256(db_key.data(), key_salt_.as_slice(), narrow_cast<int>(iteration_count), key.as_slice());
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
BufferSlice generate_hash(Slice key) {
|
|
|
|
BufferSlice hash(hash_size());
|
|
|
|
hmac_sha256(key, "cucumbers everywhere", hash.as_slice());
|
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class StorerT>
|
|
|
|
void store(StorerT &storer) const {
|
|
|
|
using td::store;
|
2018-04-16 17:02:42 +02:00
|
|
|
BEGIN_STORE_FLAGS();
|
|
|
|
END_STORE_FLAGS();
|
2018-12-31 20:04:05 +01:00
|
|
|
store(key_salt_, storer);
|
|
|
|
store(iv_, storer);
|
|
|
|
store(key_hash_, storer);
|
|
|
|
}
|
|
|
|
template <class ParserT>
|
|
|
|
void parse(ParserT &&parser) {
|
|
|
|
using td::parse;
|
2018-04-16 17:02:42 +02:00
|
|
|
BEGIN_PARSE_FLAGS();
|
2019-08-01 02:40:28 +02:00
|
|
|
END_PARSE_FLAGS();
|
2018-12-31 20:04:05 +01:00
|
|
|
parse(key_salt_, parser);
|
|
|
|
parse(iv_, parser);
|
|
|
|
parse(key_hash_, parser);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class BinlogReader {
|
|
|
|
public:
|
|
|
|
explicit BinlogReader(ChainBufferReader *input) : input_(input) {
|
|
|
|
}
|
2018-07-09 02:19:37 +02:00
|
|
|
void set_input(ChainBufferReader *input, bool is_encrypted, int64 expected_size) {
|
2018-12-31 20:04:05 +01:00
|
|
|
input_ = input;
|
2018-07-09 02:19:37 +02:00
|
|
|
is_encrypted_ = is_encrypted;
|
|
|
|
expected_size_ = expected_size;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2018-07-13 13:42:03 +02:00
|
|
|
ChainBufferReader *input() {
|
|
|
|
return input_;
|
|
|
|
}
|
|
|
|
|
2018-07-09 02:19:37 +02:00
|
|
|
int64 offset() const {
|
2018-12-31 20:04:05 +01:00
|
|
|
return offset_;
|
|
|
|
}
|
|
|
|
Result<size_t> read_next(BinlogEvent *event) {
|
2020-01-19 01:02:56 +01:00
|
|
|
if (state_ == State::ReadLength) {
|
2018-12-31 20:04:05 +01:00
|
|
|
if (input_->size() < 4) {
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
auto it = input_->clone();
|
|
|
|
|
|
|
|
char buf[4];
|
|
|
|
it.advance(4, MutableSlice(buf, 4));
|
|
|
|
size_ = static_cast<size_t>(TlParser(Slice(buf, 4)).fetch_int());
|
|
|
|
|
2020-06-11 17:21:18 +02:00
|
|
|
if (size_ > BinlogEvent::MAX_SIZE) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return Status::Error(PSLICE() << "Too big event " << tag("size", size_));
|
|
|
|
}
|
2020-06-11 17:21:18 +02:00
|
|
|
if (size_ < BinlogEvent::MIN_SIZE) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return Status::Error(PSLICE() << "Too small event " << tag("size", size_));
|
|
|
|
}
|
2018-07-03 10:46:22 +02:00
|
|
|
if (size_ % 4 != 0) {
|
2018-07-09 12:48:22 +02:00
|
|
|
return Status::Error(-2, PSLICE() << "Event of size " << size_ << " at offset " << offset() << " out of "
|
|
|
|
<< expected_size_ << ' ' << tag("is_encrypted", is_encrypted_)
|
|
|
|
<< format::as_hex_dump<4>(Slice(input_->prepare_read().truncate(28))));
|
2018-07-03 10:46:22 +02:00
|
|
|
}
|
2020-01-19 01:02:56 +01:00
|
|
|
state_ = State::ReadEvent;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (input_->size() < size_) {
|
|
|
|
return size_;
|
|
|
|
}
|
|
|
|
|
2018-06-28 19:00:11 +02:00
|
|
|
event->debug_info_ = BinlogDebugInfo{__FILE__, __LINE__};
|
2018-12-31 20:04:05 +01:00
|
|
|
TRY_STATUS(event->init(input_->cut_head(size_).move_as_buffer_slice()));
|
|
|
|
offset_ += size_;
|
|
|
|
event->offset_ = offset_;
|
2020-01-19 01:02:56 +01:00
|
|
|
state_ = State::ReadLength;
|
2018-12-31 20:04:05 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
ChainBufferReader *input_;
|
2020-01-19 01:02:56 +01:00
|
|
|
enum class State { ReadLength, ReadEvent };
|
|
|
|
State state_ = State::ReadLength;
|
2018-12-31 20:04:05 +01:00
|
|
|
size_t size_{0};
|
|
|
|
int64 offset_{0};
|
2018-07-09 02:19:37 +02:00
|
|
|
int64 expected_size_{0};
|
|
|
|
bool is_encrypted_{false};
|
2018-12-31 20:04:05 +01:00
|
|
|
};
|
2018-07-16 23:49:25 +02:00
|
|
|
|
|
|
|
static int64 file_size(CSlice path) {
|
|
|
|
auto r_stat = stat(path);
|
|
|
|
if (r_stat.is_error()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return r_stat.ok().size_;
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
} // namespace detail
|
|
|
|
|
2021-06-16 04:23:22 +02:00
|
|
|
int32 VERBOSITY_NAME(binlog) = VERBOSITY_NAME(DEBUG) + 8;
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
Binlog::Binlog() = default;
|
|
|
|
|
|
|
|
Binlog::~Binlog() {
|
|
|
|
close().ignore();
|
|
|
|
}
|
|
|
|
|
2018-10-08 20:18:06 +02:00
|
|
|
Result<FileFd> Binlog::open_binlog(const string &path, int32 flags) {
|
2018-12-31 20:04:05 +01:00
|
|
|
TRY_RESULT(fd, FileFd::open(path, flags));
|
2018-10-08 20:18:06 +02:00
|
|
|
TRY_STATUS(fd.lock(FileFd::LockFlags::Write, path, 100));
|
2018-12-31 20:04:05 +01:00
|
|
|
return std::move(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Binlog::init(string path, const Callback &callback, DbKey db_key, DbKey old_db_key, int32 dummy,
|
|
|
|
const Callback &debug_callback) {
|
|
|
|
close().ignore();
|
|
|
|
|
|
|
|
db_key_ = std::move(db_key);
|
|
|
|
old_db_key_ = std::move(old_db_key);
|
|
|
|
|
2018-09-27 03:19:03 +02:00
|
|
|
processor_ = make_unique<detail::BinlogEventsProcessor>();
|
2018-12-31 20:04:05 +01:00
|
|
|
// Turn off BinlogEventsBuffer
|
2018-09-27 03:19:03 +02:00
|
|
|
// events_buffer_ = make_unique<detail::BinlogEventsBuffer>();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
// try to restore binlog from regenerated version
|
|
|
|
if (stat(path).is_error()) {
|
|
|
|
rename(PSLICE() << path << ".new", path).ignore();
|
|
|
|
}
|
|
|
|
|
|
|
|
info_ = BinlogInfo();
|
|
|
|
info_.was_created = stat(path).is_error();
|
|
|
|
|
|
|
|
TRY_RESULT(fd, open_binlog(path, FileFd::Flags::Read | FileFd::Flags::Write | FileFd::Flags::Create));
|
|
|
|
fd_ = BufferedFdBase<FileFd>(std::move(fd));
|
|
|
|
fd_size_ = 0;
|
|
|
|
path_ = std::move(path);
|
|
|
|
|
|
|
|
auto status = load_binlog(callback, debug_callback);
|
|
|
|
if (status.is_error()) {
|
|
|
|
close().ignore();
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
info_.last_id = processor_->last_id();
|
|
|
|
last_id_ = processor_->last_id();
|
|
|
|
if (info_.wrong_password) {
|
|
|
|
close().ignore();
|
|
|
|
return Status::Error(Error::WrongPassword, "Wrong password");
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((!db_key_.is_empty() && !db_key_used_) || (db_key_.is_empty() && encryption_type_ != EncryptionType::None)) {
|
|
|
|
aes_ctr_key_salt_ = BufferSlice();
|
|
|
|
do_reindex();
|
|
|
|
}
|
|
|
|
|
|
|
|
info_.is_opened = true;
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::add_event(BinlogEvent &&event) {
|
2018-07-03 10:46:22 +02:00
|
|
|
if (event.size_ % 4 != 0) {
|
|
|
|
LOG(FATAL) << "Trying to add event with bad size " << event.public_to_string();
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
if (!events_buffer_) {
|
|
|
|
do_add_event(std::move(event));
|
|
|
|
} else {
|
|
|
|
events_buffer_->add_event(std::move(event));
|
|
|
|
}
|
|
|
|
lazy_flush();
|
|
|
|
|
|
|
|
if (state_ == State::Run) {
|
|
|
|
auto fd_size = fd_size_;
|
|
|
|
if (events_buffer_) {
|
|
|
|
fd_size += events_buffer_->size();
|
|
|
|
}
|
|
|
|
auto need_reindex = [&](int64 min_size, int rate) {
|
|
|
|
return fd_size > min_size && fd_size / rate > processor_->total_raw_events_size();
|
|
|
|
};
|
2020-07-13 15:26:36 +02:00
|
|
|
if (need_reindex(50000, 5) || need_reindex(100000, 4) || need_reindex(300000, 3) || need_reindex(500000, 2)) {
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << tag("fd_size", format::as_size(fd_size))
|
|
|
|
<< tag("total events size", format::as_size(processor_->total_raw_events_size()));
|
|
|
|
do_reindex();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t Binlog::flush_events_buffer(bool force) {
|
|
|
|
if (!events_buffer_) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!force && !events_buffer_->need_flush()) {
|
|
|
|
return events_buffer_->size();
|
|
|
|
}
|
|
|
|
CHECK(!in_flush_events_buffer_);
|
|
|
|
in_flush_events_buffer_ = true;
|
|
|
|
events_buffer_->flush([&](BinlogEvent &&event) { this->do_add_event(std::move(event)); });
|
|
|
|
in_flush_events_buffer_ = false;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::do_add_event(BinlogEvent &&event) {
|
|
|
|
if (event.flags_ & BinlogEvent::Flags::Partial) {
|
|
|
|
event.flags_ &= ~BinlogEvent::Flags::Partial;
|
|
|
|
pending_events_.emplace_back(std::move(event));
|
|
|
|
} else {
|
|
|
|
for (auto &pending_event : pending_events_) {
|
|
|
|
do_event(std::move(pending_event));
|
|
|
|
}
|
|
|
|
pending_events_.clear();
|
|
|
|
do_event(std::move(event));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Binlog::close(bool need_sync) {
|
|
|
|
if (fd_.empty()) {
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
if (need_sync) {
|
2018-02-16 10:13:04 +01:00
|
|
|
sync();
|
|
|
|
} else {
|
|
|
|
flush();
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2018-10-08 20:18:06 +02:00
|
|
|
|
|
|
|
fd_.lock(FileFd::LockFlags::Unlock, path_, 1).ensure();
|
|
|
|
fd_.close();
|
|
|
|
path_.clear();
|
|
|
|
info_.is_opened = false;
|
|
|
|
need_sync_ = false;
|
2018-12-31 20:04:05 +01:00
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
2020-07-14 19:54:38 +02:00
|
|
|
void Binlog::close(Promise<> promise) {
|
2020-07-14 18:10:26 +02:00
|
|
|
TRY_STATUS_PROMISE(promise, close());
|
|
|
|
promise.set_value({});
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
void Binlog::change_key(DbKey new_db_key) {
|
|
|
|
db_key_ = std::move(new_db_key);
|
|
|
|
aes_ctr_key_salt_ = BufferSlice();
|
|
|
|
do_reindex();
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Binlog::close_and_destroy() {
|
|
|
|
auto path = path_;
|
|
|
|
auto close_status = close(false);
|
|
|
|
destroy(path).ignore();
|
|
|
|
return close_status;
|
|
|
|
}
|
2019-03-11 15:56:03 +01:00
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
Status Binlog::destroy(Slice path) {
|
|
|
|
unlink(PSLICE() << path).ignore();
|
|
|
|
unlink(PSLICE() << path << ".new").ignore();
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::do_event(BinlogEvent &&event) {
|
2018-08-17 11:19:21 +02:00
|
|
|
auto event_size = event.raw_event_.size();
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
if (state_ == State::Run || state_ == State::Reindex) {
|
2018-07-09 12:48:22 +02:00
|
|
|
auto validate_status = event.validate();
|
|
|
|
if (validate_status.is_error()) {
|
|
|
|
LOG(FATAL) << "Failed to validate binlog event " << validate_status << " "
|
|
|
|
<< format::as_hex_dump<4>(Slice(event.raw_event_.as_slice().truncate(28)));
|
|
|
|
}
|
2021-06-16 04:23:22 +02:00
|
|
|
VLOG(binlog) << "Write binlog event: " << format::cond(state_ == State::Reindex, "[reindex] ")
|
|
|
|
<< event.public_to_string();
|
2018-12-31 20:04:05 +01:00
|
|
|
switch (encryption_type_) {
|
|
|
|
case EncryptionType::None: {
|
|
|
|
buffer_writer_.append(event.raw_event_.clone());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case EncryptionType::AesCtr: {
|
|
|
|
buffer_writer_.append(event.raw_event_.as_slice());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (event.type_ < 0) {
|
|
|
|
if (event.type_ == BinlogEvent::ServiceTypes::AesCtrEncryption) {
|
|
|
|
detail::AesCtrEncryptionEvent encryption_event;
|
|
|
|
encryption_event.parse(TlParser(event.data_));
|
|
|
|
|
|
|
|
BufferSlice key;
|
|
|
|
if (aes_ctr_key_salt_.as_slice() == encryption_event.key_salt_.as_slice()) {
|
2018-11-06 17:00:03 +01:00
|
|
|
key = BufferSlice(as_slice(aes_ctr_key_));
|
2018-12-31 20:04:05 +01:00
|
|
|
} else if (!db_key_.is_empty()) {
|
|
|
|
key = encryption_event.generate_key(db_key_);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (encryption_event.generate_hash(key.as_slice()).as_slice() != encryption_event.key_hash_.as_slice()) {
|
|
|
|
CHECK(state_ == State::Load);
|
|
|
|
if (!old_db_key_.is_empty()) {
|
|
|
|
key = encryption_event.generate_key(old_db_key_);
|
|
|
|
if (encryption_event.generate_hash(key.as_slice()).as_slice() != encryption_event.key_hash_.as_slice()) {
|
|
|
|
info_.wrong_password = true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
info_.wrong_password = true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
db_key_used_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
encryption_type_ = EncryptionType::AesCtr;
|
|
|
|
|
|
|
|
aes_ctr_key_salt_ = encryption_event.key_salt_.copy();
|
|
|
|
update_encryption(key.as_slice(), encryption_event.iv_.as_slice());
|
|
|
|
|
|
|
|
if (state_ == State::Load) {
|
|
|
|
update_read_encryption();
|
|
|
|
LOG(INFO) << "Load: init encryption";
|
|
|
|
} else {
|
|
|
|
CHECK(state_ == State::Reindex);
|
|
|
|
flush();
|
|
|
|
update_write_encryption();
|
|
|
|
//LOG(INFO) << format::cond(state_ == State::Run, "Run", "Reindex") << ": init encryption";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state_ != State::Reindex) {
|
2018-07-16 12:07:00 +02:00
|
|
|
auto status = processor_->add_event(std::move(event));
|
|
|
|
if (status.is_error()) {
|
2018-07-16 23:49:25 +02:00
|
|
|
auto old_size = detail::file_size(path_);
|
2019-01-02 16:21:31 +01:00
|
|
|
auto data = debug_get_binlog_data(fd_size_, old_size);
|
2018-07-16 12:07:00 +02:00
|
|
|
if (state_ == State::Load) {
|
|
|
|
fd_.seek(fd_size_).ensure();
|
|
|
|
fd_.truncate_to_current_position(fd_size_).ensure();
|
2019-03-11 15:56:03 +01:00
|
|
|
|
|
|
|
if (data.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
2018-07-16 12:07:00 +02:00
|
|
|
}
|
2019-03-11 15:56:03 +01:00
|
|
|
|
2018-07-23 20:46:05 +02:00
|
|
|
LOG(FATAL) << "Truncate binlog \"" << path_ << "\" from size " << old_size << " to size " << fd_size_
|
2019-01-02 16:21:31 +01:00
|
|
|
<< " in state " << static_cast<int32>(state_) << " due to error: " << status << " after reading "
|
|
|
|
<< data;
|
2018-07-16 12:07:00 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2018-07-16 12:07:00 +02:00
|
|
|
|
|
|
|
fd_events_++;
|
2018-08-17 11:19:21 +02:00
|
|
|
fd_size_ += event_size;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::sync() {
|
|
|
|
flush();
|
2018-02-16 10:13:04 +01:00
|
|
|
if (need_sync_) {
|
|
|
|
auto status = fd_.sync();
|
|
|
|
LOG_IF(FATAL, status.is_error()) << "Failed to sync binlog: " << status;
|
|
|
|
need_sync_ = false;
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::flush() {
|
|
|
|
if (state_ == State::Load) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
flush_events_buffer(true);
|
|
|
|
// NB: encryption happens during flush
|
|
|
|
if (byte_flow_flag_) {
|
|
|
|
byte_flow_source_.wakeup();
|
|
|
|
}
|
2018-02-16 10:13:04 +01:00
|
|
|
auto r_written = fd_.flush_write();
|
|
|
|
r_written.ensure();
|
|
|
|
auto written = r_written.ok();
|
|
|
|
if (written > 0) {
|
|
|
|
need_sync_ = true;
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
need_flush_since_ = 0;
|
2018-02-16 10:13:04 +01:00
|
|
|
LOG_IF(FATAL, fd_.need_flush_write()) << "Failed to flush binlog";
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::lazy_flush() {
|
|
|
|
size_t events_buffer_size = flush_events_buffer(false /*force*/);
|
|
|
|
buffer_reader_.sync_with_writer();
|
|
|
|
auto size = buffer_reader_.size() + events_buffer_size;
|
|
|
|
if (size > (1 << 14)) {
|
|
|
|
flush();
|
|
|
|
} else if (size > 0 && need_flush_since_ == 0) {
|
|
|
|
need_flush_since_ = Time::now_cached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::update_read_encryption() {
|
|
|
|
CHECK(binlog_reader_ptr_);
|
|
|
|
switch (encryption_type_) {
|
|
|
|
case EncryptionType::None: {
|
2019-06-19 16:31:25 +02:00
|
|
|
auto r_file_size = fd_.get_size();
|
|
|
|
r_file_size.ensure();
|
|
|
|
binlog_reader_ptr_->set_input(&buffer_reader_, false, r_file_size.ok());
|
2018-12-31 20:04:05 +01:00
|
|
|
byte_flow_flag_ = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case EncryptionType::AesCtr: {
|
|
|
|
byte_flow_source_ = ByteFlowSource(&buffer_reader_);
|
|
|
|
aes_xcode_byte_flow_ = AesCtrByteFlow();
|
|
|
|
aes_xcode_byte_flow_.init(std::move(aes_ctr_state_));
|
|
|
|
byte_flow_sink_ = ByteFlowSink();
|
|
|
|
byte_flow_source_ >> aes_xcode_byte_flow_ >> byte_flow_sink_;
|
|
|
|
byte_flow_flag_ = true;
|
2019-06-19 16:31:25 +02:00
|
|
|
auto r_file_size = fd_.get_size();
|
|
|
|
r_file_size.ensure();
|
|
|
|
binlog_reader_ptr_->set_input(byte_flow_sink_.get_output(), true, r_file_size.ok());
|
2018-12-31 20:04:05 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::update_write_encryption() {
|
|
|
|
switch (encryption_type_) {
|
|
|
|
case EncryptionType::None: {
|
|
|
|
fd_.set_output_reader(&buffer_reader_);
|
|
|
|
byte_flow_flag_ = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case EncryptionType::AesCtr: {
|
|
|
|
byte_flow_source_ = ByteFlowSource(&buffer_reader_);
|
|
|
|
aes_xcode_byte_flow_ = AesCtrByteFlow();
|
|
|
|
aes_xcode_byte_flow_.init(std::move(aes_ctr_state_));
|
|
|
|
byte_flow_sink_ = ByteFlowSink();
|
|
|
|
byte_flow_source_ >> aes_xcode_byte_flow_ >> byte_flow_sink_;
|
|
|
|
byte_flow_flag_ = true;
|
|
|
|
fd_.set_output_reader(byte_flow_sink_.get_output());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Status Binlog::load_binlog(const Callback &callback, const Callback &debug_callback) {
|
|
|
|
state_ = State::Load;
|
|
|
|
|
|
|
|
buffer_writer_ = ChainBufferWriter();
|
|
|
|
buffer_reader_ = buffer_writer_.extract_reader();
|
|
|
|
fd_.set_input_writer(&buffer_writer_);
|
2018-10-26 16:11:20 +02:00
|
|
|
detail::BinlogReader reader{nullptr};
|
2018-12-31 20:04:05 +01:00
|
|
|
binlog_reader_ptr_ = &reader;
|
|
|
|
|
|
|
|
update_read_encryption();
|
|
|
|
|
2018-08-14 09:42:40 +02:00
|
|
|
fd_.get_poll_info().add_flags(PollFlags::Read());
|
2018-12-31 20:04:05 +01:00
|
|
|
info_.wrong_password = false;
|
|
|
|
while (true) {
|
|
|
|
BinlogEvent event;
|
|
|
|
auto r_need_size = reader.read_next(&event);
|
|
|
|
if (r_need_size.is_error()) {
|
2018-07-09 12:23:34 +02:00
|
|
|
if (r_need_size.error().code() == -2) {
|
2018-07-16 23:49:25 +02:00
|
|
|
auto old_size = detail::file_size(path_);
|
2019-01-02 16:21:31 +01:00
|
|
|
auto offset = reader.offset();
|
|
|
|
auto data = debug_get_binlog_data(offset, old_size);
|
|
|
|
fd_.seek(offset).ensure();
|
|
|
|
fd_.truncate_to_current_position(offset).ensure();
|
2019-03-11 15:56:03 +01:00
|
|
|
if (data.empty()) {
|
|
|
|
break;
|
|
|
|
}
|
2019-01-02 16:21:31 +01:00
|
|
|
LOG(FATAL) << "Truncate binlog \"" << path_ << "\" from size " << old_size << " to size " << offset
|
|
|
|
<< " due to error: " << r_need_size.error() << " after reading " << data;
|
2018-07-09 12:23:34 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(ERROR) << r_need_size.error();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
auto need_size = r_need_size.move_as_ok();
|
2019-02-21 16:58:20 +01:00
|
|
|
// LOG(ERROR) << "Need size = " << need_size;
|
2018-12-31 20:04:05 +01:00
|
|
|
if (need_size == 0) {
|
2020-01-19 01:05:36 +01:00
|
|
|
if (debug_callback) {
|
|
|
|
debug_callback(event);
|
|
|
|
}
|
|
|
|
do_add_event(std::move(event));
|
|
|
|
if (info_.wrong_password) {
|
|
|
|
return Status::OK();
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
} else {
|
2018-02-12 11:37:54 +01:00
|
|
|
TRY_STATUS(fd_.flush_read(max(need_size, static_cast<size_t>(4096))));
|
2018-12-31 20:04:05 +01:00
|
|
|
buffer_reader_.sync_with_writer();
|
|
|
|
if (byte_flow_flag_) {
|
|
|
|
byte_flow_source_.wakeup();
|
|
|
|
}
|
2018-07-13 13:42:03 +02:00
|
|
|
if (reader.input()->size() < need_size) {
|
|
|
|
break;
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto offset = processor_->offset();
|
|
|
|
processor_->for_each([&](BinlogEvent &event) {
|
2018-06-25 12:37:14 +02:00
|
|
|
VLOG(binlog) << "Replay binlog event: " << event.public_to_string();
|
2018-12-31 20:04:05 +01:00
|
|
|
if (callback) {
|
|
|
|
callback(event);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
2019-06-19 16:31:25 +02:00
|
|
|
TRY_RESULT(fd_size, fd_.get_size());
|
2018-12-31 20:04:05 +01:00
|
|
|
if (offset != fd_size) {
|
|
|
|
LOG(ERROR) << "Truncate " << tag("path", path_) << tag("old_size", fd_size) << tag("new_size", offset);
|
|
|
|
fd_.seek(offset).ensure();
|
|
|
|
fd_.truncate_to_current_position(offset).ensure();
|
|
|
|
db_key_used_ = false; // force reindex
|
|
|
|
}
|
2020-01-19 01:05:36 +01:00
|
|
|
LOG_CHECK(fd_size_ == offset) << fd_size << " " << fd_size_ << " " << offset;
|
2018-12-31 20:04:05 +01:00
|
|
|
binlog_reader_ptr_ = nullptr;
|
|
|
|
state_ = State::Run;
|
|
|
|
|
|
|
|
buffer_writer_ = ChainBufferWriter();
|
|
|
|
buffer_reader_ = buffer_writer_.extract_reader();
|
|
|
|
|
|
|
|
// reuse aes_ctr_state_
|
|
|
|
if (encryption_type_ == EncryptionType::AesCtr) {
|
|
|
|
aes_ctr_state_ = aes_xcode_byte_flow_.move_aes_ctr_state();
|
|
|
|
}
|
|
|
|
update_write_encryption();
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::update_encryption(Slice key, Slice iv) {
|
2018-11-06 17:00:03 +01:00
|
|
|
as_slice(aes_ctr_key_).copy_from(key);
|
2018-12-31 20:04:05 +01:00
|
|
|
UInt128 aes_ctr_iv;
|
2018-11-06 17:00:03 +01:00
|
|
|
as_slice(aes_ctr_iv).copy_from(iv);
|
2019-08-12 15:53:17 +02:00
|
|
|
aes_ctr_state_.init(as_slice(aes_ctr_key_), as_slice(aes_ctr_iv));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::reset_encryption() {
|
|
|
|
if (db_key_.is_empty()) {
|
|
|
|
encryption_type_ = EncryptionType::None;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
using EncryptionEvent = detail::AesCtrEncryptionEvent;
|
|
|
|
EncryptionEvent event;
|
|
|
|
|
|
|
|
if (aes_ctr_key_salt_.empty()) {
|
|
|
|
event.key_salt_ = BufferSlice(EncryptionEvent::default_salt_size());
|
|
|
|
Random::secure_bytes(event.key_salt_.as_slice());
|
|
|
|
} else {
|
|
|
|
event.key_salt_ = aes_ctr_key_salt_.clone();
|
|
|
|
}
|
|
|
|
event.iv_ = BufferSlice(EncryptionEvent::iv_size());
|
|
|
|
Random::secure_bytes(event.iv_.as_slice());
|
|
|
|
|
|
|
|
BufferSlice key;
|
|
|
|
if (aes_ctr_key_salt_.as_slice() == event.key_salt_.as_slice()) {
|
2018-11-06 17:00:03 +01:00
|
|
|
key = BufferSlice(as_slice(aes_ctr_key_));
|
2018-12-31 20:04:05 +01:00
|
|
|
} else {
|
|
|
|
key = event.generate_key(db_key_);
|
|
|
|
}
|
|
|
|
|
|
|
|
event.key_hash_ = event.generate_hash(key.as_slice());
|
|
|
|
|
|
|
|
do_event(BinlogEvent(
|
2018-06-28 19:00:11 +02:00
|
|
|
BinlogEvent::create_raw(0, BinlogEvent::ServiceTypes::AesCtrEncryption, 0, create_default_storer(event)),
|
|
|
|
BinlogDebugInfo{__FILE__, __LINE__}));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Binlog::do_reindex() {
|
|
|
|
flush_events_buffer(true);
|
|
|
|
// start reindex
|
|
|
|
CHECK(state_ == State::Run);
|
|
|
|
state_ = State::Reindex;
|
|
|
|
SCOPE_EXIT {
|
|
|
|
state_ = State::Run;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto start_time = Clocks::monotonic();
|
2018-07-16 23:49:25 +02:00
|
|
|
auto start_size = detail::file_size(path_);
|
2018-12-31 20:04:05 +01:00
|
|
|
auto start_events = fd_events_;
|
|
|
|
|
|
|
|
string new_path = path_ + ".new";
|
|
|
|
|
|
|
|
auto r_opened_file = open_binlog(new_path, FileFd::Flags::Write | FileFd::Flags::Create | FileFd::Truncate);
|
|
|
|
if (r_opened_file.is_error()) {
|
|
|
|
LOG(ERROR) << "Can't open new binlog for regenerate: " << r_opened_file.error();
|
|
|
|
return;
|
|
|
|
}
|
2018-10-08 20:18:06 +02:00
|
|
|
auto old_fd = std::move(fd_); // can't close fd_ now, because it will release file lock
|
2018-12-31 20:04:05 +01:00
|
|
|
fd_ = BufferedFdBase<FileFd>(r_opened_file.move_as_ok());
|
|
|
|
|
|
|
|
buffer_writer_ = ChainBufferWriter();
|
|
|
|
buffer_reader_ = buffer_writer_.extract_reader();
|
|
|
|
encryption_type_ = EncryptionType::None;
|
|
|
|
update_write_encryption();
|
|
|
|
|
|
|
|
// reindex
|
|
|
|
fd_size_ = 0;
|
|
|
|
fd_events_ = 0;
|
|
|
|
reset_encryption();
|
|
|
|
processor_->for_each([&](BinlogEvent &event) {
|
2020-08-07 19:30:05 +02:00
|
|
|
event.realloc();
|
2018-12-31 20:04:05 +01:00
|
|
|
do_event(std::move(event)); // NB: no move is actually happens
|
|
|
|
});
|
2018-02-16 10:13:04 +01:00
|
|
|
need_sync_ = true; // must sync creation of the file
|
|
|
|
sync();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
// finish_reindex
|
2018-02-16 10:13:04 +01:00
|
|
|
auto status = unlink(path_);
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG_IF(FATAL, status.is_error()) << "Failed to unlink old binlog: " << status;
|
2018-10-08 20:18:06 +02:00
|
|
|
old_fd.close(); // now we can close old file and release the system lock
|
2018-12-31 20:04:05 +01:00
|
|
|
status = rename(new_path, path_);
|
2018-10-08 20:18:06 +02:00
|
|
|
FileFd::remove_local_lock(new_path); // now we can release local lock for temporary file
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG_IF(FATAL, status.is_error()) << "Failed to rename binlog: " << status;
|
|
|
|
|
|
|
|
auto finish_time = Clocks::monotonic();
|
|
|
|
auto finish_size = fd_size_;
|
|
|
|
auto finish_events = fd_events_;
|
2021-07-13 22:46:09 +02:00
|
|
|
{
|
|
|
|
auto r_stat = stat(path_);
|
|
|
|
if (r_stat.is_error()) {
|
2021-07-14 04:24:45 +02:00
|
|
|
LOG(FATAL) << "Failed to rename binlog of size " << fd_size_ << " to " << path_ << ": " << r_stat.error()
|
|
|
|
<< ". Old file size is " << detail::file_size(new_path);
|
2021-07-13 22:46:09 +02:00
|
|
|
}
|
2021-07-14 04:24:45 +02:00
|
|
|
LOG_CHECK(fd_size_ == r_stat.ok().size_) << fd_size_ << ' ' << r_stat.ok().size_ << ' '
|
|
|
|
<< detail::file_size(new_path) << ' ' << fd_events_ << ' ' << path_;
|
2021-07-13 22:46:09 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
double ratio = static_cast<double>(start_size) / static_cast<double>(finish_size + 1);
|
2020-08-07 19:30:05 +02:00
|
|
|
|
|
|
|
[&](Slice msg) {
|
|
|
|
if (start_size > (10 << 20) || finish_time - start_time > 1) {
|
|
|
|
LOG(WARNING) << "Slow " << msg;
|
|
|
|
} else {
|
|
|
|
LOG(INFO) << msg;
|
|
|
|
}
|
|
|
|
}(PSLICE() << "Regenerate index " << tag("name", path_) << tag("time", format::as_time(finish_time - start_time))
|
|
|
|
<< tag("before_size", format::as_size(start_size)) << tag("after_size", format::as_size(finish_size))
|
|
|
|
<< tag("ratio", ratio) << tag("before_events", start_events) << tag("after_events", finish_events));
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
buffer_writer_ = ChainBufferWriter();
|
|
|
|
buffer_reader_ = buffer_writer_.extract_reader();
|
|
|
|
|
|
|
|
// reuse aes_ctr_state_
|
|
|
|
if (encryption_type_ == EncryptionType::AesCtr) {
|
|
|
|
aes_ctr_state_ = aes_xcode_byte_flow_.move_aes_ctr_state();
|
|
|
|
}
|
|
|
|
update_write_encryption();
|
|
|
|
}
|
|
|
|
|
2019-01-02 16:21:31 +01:00
|
|
|
string Binlog::debug_get_binlog_data(int64 begin_offset, int64 end_offset) {
|
|
|
|
if (begin_offset > end_offset) {
|
|
|
|
return "Begin offset is bigger than end_offset";
|
|
|
|
}
|
|
|
|
if (begin_offset == end_offset) {
|
|
|
|
return string();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int64 MAX_DATA_LENGTH = 512;
|
|
|
|
if (end_offset - begin_offset > MAX_DATA_LENGTH) {
|
|
|
|
end_offset = begin_offset + MAX_DATA_LENGTH;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto r_fd = FileFd::open(path_, FileFd::Flags::Read);
|
|
|
|
if (r_fd.is_error()) {
|
|
|
|
return PSTRING() << "Failed to open binlog: " << r_fd.error();
|
|
|
|
}
|
|
|
|
auto fd = r_fd.move_as_ok();
|
|
|
|
|
|
|
|
fd_.lock(FileFd::LockFlags::Unlock, path_, 1).ignore();
|
|
|
|
SCOPE_EXIT {
|
|
|
|
fd_.lock(FileFd::LockFlags::Write, path_, 1).ensure();
|
|
|
|
};
|
|
|
|
size_t expected_data_length = narrow_cast<size_t>(end_offset - begin_offset);
|
|
|
|
string data(expected_data_length, '\0');
|
|
|
|
auto r_data_size = fd.pread(data, begin_offset);
|
|
|
|
if (r_data_size.is_error()) {
|
|
|
|
return PSTRING() << "Failed to read binlog: " << r_data_size.error();
|
|
|
|
}
|
2019-03-11 15:56:03 +01:00
|
|
|
|
2019-01-02 16:21:31 +01:00
|
|
|
if (r_data_size.ok() < expected_data_length) {
|
|
|
|
data.resize(r_data_size.ok());
|
|
|
|
data = PSTRING() << format::as_hex_dump<4>(Slice(data)) << " | with " << expected_data_length - r_data_size.ok()
|
|
|
|
<< " missed bytes";
|
|
|
|
} else {
|
2019-03-11 15:56:03 +01:00
|
|
|
if (encryption_type_ == EncryptionType::AesCtr) {
|
|
|
|
bool is_zero = true;
|
|
|
|
for (auto &c : data) {
|
|
|
|
if (c != '\0') {
|
|
|
|
is_zero = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// very often we have '\0' bytes written to disk instead of a real log event
|
|
|
|
// this is clearly impossible content for a real encrypted log event, so just ignore it
|
|
|
|
if (is_zero) {
|
|
|
|
return string();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-02 16:21:31 +01:00
|
|
|
data = PSTRING() << format::as_hex_dump<4>(Slice(data));
|
|
|
|
}
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
} // namespace td
|