2018-12-31 22:04:05 +03:00
|
|
|
//
|
2023-01-01 00:28:08 +03:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2023
|
2018-12-31 22:04:05 +03:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
2022-01-31 15:56:44 +03:00
|
|
|
#include "td/utils/algorithm.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/utils/buffer.h"
|
|
|
|
#include "td/utils/ByteFlow.h"
|
2020-03-16 22:00:44 +03:00
|
|
|
#include "td/utils/common.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/utils/Gzip.h"
|
|
|
|
#include "td/utils/GzipByteFlow.h"
|
|
|
|
#include "td/utils/logging.h"
|
2020-07-26 14:24:30 +03:00
|
|
|
#include "td/utils/port/thread_local.h"
|
|
|
|
#include "td/utils/Slice.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/utils/Status.h"
|
|
|
|
#include "td/utils/tests.h"
|
2020-03-15 02:32:53 +03:00
|
|
|
#include "td/utils/Time.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2021-10-21 12:51:16 +03:00
|
|
|
static void encode_decode(const td::string &s) {
|
2018-12-31 22:04:05 +03:00
|
|
|
auto r = td::gzencode(s, 2);
|
|
|
|
ASSERT_TRUE(!r.empty());
|
2020-03-15 02:32:53 +03:00
|
|
|
ASSERT_EQ(s, td::gzdecode(r.as_slice()));
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Gzip, gzencode_gzdecode) {
|
2020-03-15 02:32:53 +03:00
|
|
|
encode_decode(td::rand_string(0, 255, 1000));
|
|
|
|
encode_decode(td::rand_string('a', 'z', 1000000));
|
|
|
|
encode_decode(td::string(1000000, 'a'));
|
|
|
|
}
|
|
|
|
|
2021-10-21 12:51:16 +03:00
|
|
|
static void test_gzencode(const td::string &s) {
|
2020-03-15 02:32:53 +03:00
|
|
|
auto begin_time = td::Time::now();
|
|
|
|
auto r = td::gzencode(s, td::max(2, static_cast<int>(100 / s.size())));
|
|
|
|
ASSERT_TRUE(!r.empty());
|
|
|
|
LOG(INFO) << "Encoded string of size " << s.size() << " in " << (td::Time::now() - begin_time)
|
2022-12-11 01:32:46 +03:00
|
|
|
<< " seconds with compression ratio " << static_cast<double>(r.size()) / static_cast<double>(s.size());
|
2020-03-15 02:32:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Gzip, gzencode) {
|
|
|
|
for (size_t len = 1; len <= 10000000; len *= 10) {
|
|
|
|
test_gzencode(td::rand_string('a', 'a', len));
|
|
|
|
test_gzencode(td::rand_string('a', 'z', len));
|
|
|
|
test_gzencode(td::rand_string(0, 255, len));
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Gzip, flow) {
|
|
|
|
auto str = td::rand_string('a', 'z', 1000000);
|
|
|
|
auto parts = td::rand_split(str);
|
|
|
|
|
2018-09-10 18:00:28 +03:00
|
|
|
td::ChainBufferWriter input_writer;
|
2018-12-31 22:04:05 +03:00
|
|
|
auto input = input_writer.extract_reader();
|
|
|
|
td::ByteFlowSource source(&input);
|
2020-01-19 03:02:56 +03:00
|
|
|
td::GzipByteFlow gzip_flow(td::Gzip::Mode::Encode);
|
|
|
|
gzip_flow = td::GzipByteFlow(td::Gzip::Mode::Encode);
|
2018-12-31 22:04:05 +03:00
|
|
|
td::ByteFlowSink sink;
|
|
|
|
|
|
|
|
source >> gzip_flow >> sink;
|
|
|
|
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
for (auto &part : parts) {
|
|
|
|
input_writer.append(part);
|
|
|
|
source.wakeup();
|
|
|
|
}
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
source.close_input(td::Status::OK());
|
|
|
|
ASSERT_TRUE(sink.is_ready());
|
|
|
|
ASSERT_TRUE(sink.status().is_ok());
|
|
|
|
auto res = sink.result()->move_as_buffer_slice().as_slice().str();
|
|
|
|
ASSERT_TRUE(!res.empty());
|
|
|
|
ASSERT_EQ(td::gzencode(str, 2).as_slice().str(), res);
|
|
|
|
}
|
|
|
|
TEST(Gzip, flow_error) {
|
|
|
|
auto str = td::rand_string('a', 'z', 1000000);
|
2020-03-15 01:56:48 +03:00
|
|
|
auto zip = td::gzencode(str, 0.9).as_slice().str();
|
|
|
|
ASSERT_TRUE(!zip.empty());
|
2018-12-31 22:04:05 +03:00
|
|
|
zip.resize(zip.size() - 1);
|
|
|
|
auto parts = td::rand_split(zip);
|
|
|
|
|
|
|
|
auto input_writer = td::ChainBufferWriter();
|
|
|
|
auto input = input_writer.extract_reader();
|
|
|
|
td::ByteFlowSource source(&input);
|
2020-01-19 03:02:56 +03:00
|
|
|
td::GzipByteFlow gzip_flow(td::Gzip::Mode::Decode);
|
2018-12-31 22:04:05 +03:00
|
|
|
td::ByteFlowSink sink;
|
|
|
|
|
|
|
|
source >> gzip_flow >> sink;
|
|
|
|
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
for (auto &part : parts) {
|
|
|
|
input_writer.append(part);
|
|
|
|
source.wakeup();
|
|
|
|
}
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
source.close_input(td::Status::OK());
|
|
|
|
ASSERT_TRUE(sink.is_ready());
|
|
|
|
ASSERT_TRUE(!sink.status().is_ok());
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Gzip, encode_decode_flow) {
|
|
|
|
auto str = td::rand_string('a', 'z', 1000000);
|
|
|
|
auto parts = td::rand_split(str);
|
2018-09-10 18:00:28 +03:00
|
|
|
td::ChainBufferWriter input_writer;
|
2018-12-31 22:04:05 +03:00
|
|
|
auto input = input_writer.extract_reader();
|
|
|
|
td::ByteFlowSource source(&input);
|
2020-01-19 03:02:56 +03:00
|
|
|
td::GzipByteFlow gzip_encode_flow(td::Gzip::Mode::Encode);
|
|
|
|
td::GzipByteFlow gzip_decode_flow(td::Gzip::Mode::Decode);
|
|
|
|
td::GzipByteFlow gzip_encode_flow2(td::Gzip::Mode::Encode);
|
|
|
|
td::GzipByteFlow gzip_decode_flow2(td::Gzip::Mode::Decode);
|
2018-12-31 22:04:05 +03:00
|
|
|
td::ByteFlowSink sink;
|
|
|
|
source >> gzip_encode_flow >> gzip_decode_flow >> gzip_encode_flow2 >> gzip_decode_flow2 >> sink;
|
|
|
|
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
for (auto &part : parts) {
|
|
|
|
input_writer.append(part);
|
|
|
|
source.wakeup();
|
|
|
|
}
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
source.close_input(td::Status::OK());
|
|
|
|
ASSERT_TRUE(sink.is_ready());
|
|
|
|
LOG_IF(ERROR, sink.status().is_error()) << sink.status();
|
|
|
|
ASSERT_TRUE(sink.status().is_ok());
|
|
|
|
ASSERT_EQ(str, sink.result()->move_as_buffer_slice().as_slice().str());
|
|
|
|
}
|
2020-07-23 16:39:13 +03:00
|
|
|
|
|
|
|
TEST(Gzip, encode_decode_flow_big) {
|
|
|
|
td::clear_thread_locals();
|
|
|
|
auto start_mem = td::BufferAllocator::get_buffer_mem();
|
|
|
|
{
|
2022-01-31 15:56:44 +03:00
|
|
|
auto str = td::string(200000, 'a');
|
2020-07-23 16:39:13 +03:00
|
|
|
td::ChainBufferWriter input_writer;
|
|
|
|
auto input = input_writer.extract_reader();
|
|
|
|
td::ByteFlowSource source(&input);
|
|
|
|
td::GzipByteFlow gzip_encode_flow(td::Gzip::Mode::Encode);
|
|
|
|
td::GzipByteFlow gzip_decode_flow(td::Gzip::Mode::Decode);
|
|
|
|
td::GzipByteFlow gzip_encode_flow2(td::Gzip::Mode::Encode);
|
|
|
|
td::GzipByteFlow gzip_decode_flow2(td::Gzip::Mode::Decode);
|
|
|
|
td::ByteFlowSink sink;
|
|
|
|
source >> gzip_encode_flow >> gzip_decode_flow >> gzip_encode_flow2 >> gzip_decode_flow2 >> sink;
|
|
|
|
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
size_t n = 200;
|
|
|
|
size_t left_size = n * str.size();
|
|
|
|
auto validate = [&](td::Slice chunk) {
|
|
|
|
CHECK(chunk.size() <= left_size);
|
|
|
|
left_size -= chunk.size();
|
2022-01-31 15:56:44 +03:00
|
|
|
ASSERT_TRUE(td::all_of(chunk, [](auto c) { return c == 'a'; }));
|
2020-07-23 16:39:13 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
for (size_t i = 0; i < n; i++) {
|
|
|
|
input_writer.append(str);
|
|
|
|
source.wakeup();
|
|
|
|
auto extra_mem = td::BufferAllocator::get_buffer_mem() - start_mem;
|
|
|
|
// limit means nothing. just check that we do not use 200Mb or so
|
|
|
|
CHECK(extra_mem < (10 << 20));
|
|
|
|
|
|
|
|
auto size = sink.get_output()->size();
|
|
|
|
validate(sink.get_output()->cut_head(size).move_as_buffer_slice().as_slice());
|
|
|
|
}
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
|
|
|
source.close_input(td::Status::OK());
|
|
|
|
ASSERT_TRUE(sink.is_ready());
|
|
|
|
LOG_IF(ERROR, sink.status().is_error()) << sink.status();
|
|
|
|
ASSERT_TRUE(sink.status().is_ok());
|
|
|
|
validate(sink.result()->move_as_buffer_slice().as_slice());
|
|
|
|
ASSERT_EQ(0u, left_size);
|
|
|
|
}
|
|
|
|
td::clear_thread_locals();
|
|
|
|
ASSERT_EQ(start_mem, td::BufferAllocator::get_buffer_mem());
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Gzip, decode_encode_flow_bomb) {
|
2022-01-31 15:56:44 +03:00
|
|
|
td::string gzip_bomb_str;
|
2020-07-23 16:39:13 +03:00
|
|
|
size_t N = 200;
|
|
|
|
{
|
|
|
|
td::ChainBufferWriter input_writer;
|
|
|
|
auto input = input_writer.extract_reader();
|
|
|
|
td::GzipByteFlow gzip_flow(td::Gzip::Mode::Encode);
|
|
|
|
td::ByteFlowSource source(&input);
|
|
|
|
td::ByteFlowSink sink;
|
|
|
|
source >> gzip_flow >> sink;
|
|
|
|
|
2022-01-31 15:56:44 +03:00
|
|
|
td::string s(1 << 16, 'a');
|
2020-07-23 16:39:13 +03:00
|
|
|
for (size_t i = 0; i < N; i++) {
|
|
|
|
input_writer.append(s);
|
|
|
|
source.wakeup();
|
|
|
|
}
|
|
|
|
source.close_input(td::Status::OK());
|
|
|
|
ASSERT_TRUE(sink.is_ready());
|
|
|
|
LOG_IF(ERROR, sink.status().is_error()) << sink.status();
|
|
|
|
ASSERT_TRUE(sink.status().is_ok());
|
|
|
|
gzip_bomb_str = sink.result()->move_as_buffer_slice().as_slice().str();
|
|
|
|
}
|
|
|
|
|
|
|
|
td::clear_thread_locals();
|
|
|
|
auto start_mem = td::BufferAllocator::get_buffer_mem();
|
|
|
|
{
|
|
|
|
td::ChainBufferWriter input_writer;
|
|
|
|
auto input = input_writer.extract_reader();
|
|
|
|
td::ByteFlowSource source(&input);
|
|
|
|
td::GzipByteFlow::Options decode_options;
|
|
|
|
decode_options.write_watermark.low = 2 << 20;
|
|
|
|
decode_options.write_watermark.high = 4 << 20;
|
|
|
|
td::GzipByteFlow::Options encode_options;
|
|
|
|
encode_options.read_watermark.low = 2 << 20;
|
|
|
|
encode_options.read_watermark.high = 4 << 20;
|
|
|
|
td::GzipByteFlow gzip_decode_flow(td::Gzip::Mode::Decode);
|
|
|
|
gzip_decode_flow.set_options(decode_options);
|
|
|
|
td::GzipByteFlow gzip_encode_flow(td::Gzip::Mode::Encode);
|
|
|
|
gzip_encode_flow.set_options(encode_options);
|
|
|
|
td::GzipByteFlow gzip_decode_flow2(td::Gzip::Mode::Decode);
|
|
|
|
gzip_decode_flow2.set_options(decode_options);
|
|
|
|
td::GzipByteFlow gzip_encode_flow2(td::Gzip::Mode::Encode);
|
|
|
|
gzip_encode_flow2.set_options(encode_options);
|
|
|
|
td::GzipByteFlow gzip_decode_flow3(td::Gzip::Mode::Decode);
|
|
|
|
gzip_decode_flow3.set_options(decode_options);
|
|
|
|
td::ByteFlowSink sink;
|
|
|
|
source >> gzip_decode_flow >> gzip_encode_flow >> gzip_decode_flow2 >> gzip_encode_flow2 >> gzip_decode_flow3 >>
|
|
|
|
sink;
|
|
|
|
|
|
|
|
ASSERT_TRUE(!sink.is_ready());
|
2020-07-23 17:59:48 +03:00
|
|
|
size_t left_size = N * (1 << 16);
|
2020-07-23 16:39:13 +03:00
|
|
|
auto validate = [&](td::Slice chunk) {
|
|
|
|
CHECK(chunk.size() <= left_size);
|
|
|
|
left_size -= chunk.size();
|
2022-01-31 15:56:44 +03:00
|
|
|
ASSERT_TRUE(td::all_of(chunk, [](auto c) { return c == 'a'; }));
|
2020-07-23 16:39:13 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
input_writer.append(gzip_bomb_str);
|
|
|
|
source.close_input(td::Status::OK());
|
|
|
|
|
|
|
|
do {
|
|
|
|
gzip_decode_flow3.wakeup();
|
|
|
|
gzip_decode_flow2.wakeup();
|
|
|
|
gzip_decode_flow.wakeup();
|
|
|
|
source.wakeup();
|
|
|
|
auto extra_mem = td::BufferAllocator::get_buffer_mem() - start_mem;
|
2020-07-23 17:59:48 +03:00
|
|
|
// limit means nothing. just check that we do not use 15Mb or so
|
|
|
|
CHECK(extra_mem < (5 << 20));
|
2020-07-23 16:39:13 +03:00
|
|
|
auto size = sink.get_output()->size();
|
|
|
|
validate(sink.get_output()->cut_head(size).move_as_buffer_slice().as_slice());
|
|
|
|
} while (!sink.is_ready());
|
|
|
|
ASSERT_EQ(0u, left_size);
|
|
|
|
}
|
|
|
|
td::clear_thread_locals();
|
|
|
|
ASSERT_EQ(start_mem, td::BufferAllocator::get_buffer_mem());
|
|
|
|
}
|