2018-11-11 12:38:04 +01:00
|
|
|
//
|
2022-01-01 01:35:39 +01:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2022
|
2018-11-11 12:38:04 +01:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#include "td/telegram/files/FileBitmask.h"
|
2018-12-26 17:11:15 +01:00
|
|
|
|
|
|
|
#include "td/utils/common.h"
|
2018-11-11 12:38:04 +01:00
|
|
|
#include "td/utils/misc.h"
|
2018-12-27 20:24:44 +01:00
|
|
|
#include "td/utils/ScopeGuard.h"
|
2018-12-26 17:11:15 +01:00
|
|
|
|
2018-11-11 12:38:04 +01:00
|
|
|
namespace td {
|
2018-12-26 17:11:15 +01:00
|
|
|
|
2018-11-11 12:38:04 +01:00
|
|
|
Bitmask::Bitmask(Decode, Slice data) : data_(zero_one_decode(data)) {
|
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
|
|
|
|
Bitmask::Bitmask(Ones, int64 count) : data_(narrow_cast<size_t>((count + 7) / 8), '\0') {
|
2018-11-11 12:38:04 +01:00
|
|
|
for (int64 i = 0; i < count; i++) {
|
|
|
|
set(i);
|
|
|
|
}
|
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
|
2019-08-23 18:25:07 +02:00
|
|
|
Bitmask Bitmask::compress(int k) const {
|
2019-08-19 11:31:25 +02:00
|
|
|
Bitmask res;
|
|
|
|
for (int64 i = 0; i * k < size(); i++) {
|
|
|
|
bool f = true;
|
|
|
|
for (int64 j = 0; j < k && f; j++) {
|
|
|
|
f &= get(i * k + j);
|
|
|
|
}
|
|
|
|
if (f) {
|
|
|
|
res.set(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2018-12-27 19:06:30 +01:00
|
|
|
std::string Bitmask::encode(int32 prefix_count) {
|
2022-12-22 15:04:18 +01:00
|
|
|
// remove zeroes at the end to make encoding deterministic
|
2019-09-15 05:19:46 +02:00
|
|
|
Slice data(data_);
|
2018-12-27 19:06:30 +01:00
|
|
|
|
|
|
|
int save_i = -1;
|
|
|
|
char save_c;
|
2018-12-27 10:12:24 +01:00
|
|
|
if (prefix_count != -1) {
|
2018-12-27 19:06:30 +01:00
|
|
|
auto truncated_size = (prefix_count + 7) / 8;
|
|
|
|
data.truncate(truncated_size);
|
|
|
|
if (prefix_count % 8 != 0) {
|
|
|
|
save_i = truncated_size - 1;
|
|
|
|
save_c = data_[save_i];
|
2018-12-28 02:30:57 +01:00
|
|
|
auto mask = 0xff >> (8 - prefix_count % 8);
|
|
|
|
data_[save_i] = static_cast<char>(data_[save_i] & mask);
|
2018-12-27 19:06:30 +01:00
|
|
|
}
|
2018-12-27 10:12:24 +01:00
|
|
|
}
|
2018-12-27 19:06:30 +01:00
|
|
|
SCOPE_EXIT {
|
|
|
|
if (save_i != -1) {
|
|
|
|
data_[save_i] = save_c;
|
|
|
|
}
|
|
|
|
};
|
2018-12-26 17:11:15 +01:00
|
|
|
while (!data.empty() && data.back() == '\0') {
|
2018-11-11 12:38:04 +01:00
|
|
|
data.remove_suffix(1);
|
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
return zero_one_encode(data);
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
|
|
|
|
int64 Bitmask::get_ready_prefix_size(int64 offset, int64 part_size, int64 file_size) const {
|
2018-12-27 01:59:17 +01:00
|
|
|
if (offset < 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
2019-03-09 15:31:52 +01:00
|
|
|
if (part_size == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
CHECK(part_size > 0);
|
2018-11-11 12:38:04 +01:00
|
|
|
auto offset_part = offset / part_size;
|
|
|
|
auto ones = get_ready_parts(offset_part);
|
|
|
|
if (ones == 0) {
|
2018-12-26 17:11:15 +01:00
|
|
|
return 0;
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
auto ready_parts_end = (offset_part + ones) * part_size;
|
|
|
|
if (file_size != 0 && ready_parts_end > file_size) {
|
|
|
|
ready_parts_end = file_size;
|
|
|
|
if (offset > file_size) {
|
|
|
|
offset = file_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto res = ready_parts_end - offset;
|
|
|
|
CHECK(res >= 0);
|
2018-11-11 12:38:04 +01:00
|
|
|
return res;
|
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
|
2018-12-27 09:34:36 +01:00
|
|
|
int64 Bitmask::get_total_size(int64 part_size, int64 file_size) const {
|
2018-11-11 12:38:04 +01:00
|
|
|
int64 res = 0;
|
|
|
|
for (int64 i = 0; i < size(); i++) {
|
2018-12-27 09:34:36 +01:00
|
|
|
if (get(i)) {
|
|
|
|
auto from = i * part_size;
|
|
|
|
auto to = from + part_size;
|
|
|
|
if (file_size != 0 && file_size < to) {
|
|
|
|
to = file_size;
|
|
|
|
}
|
|
|
|
if (from < to) {
|
|
|
|
res += to - from;
|
|
|
|
}
|
|
|
|
}
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
2018-12-27 09:34:36 +01:00
|
|
|
return res;
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
|
|
|
|
bool Bitmask::get(int64 offset_part) const {
|
|
|
|
if (offset_part < 0) {
|
2021-10-19 17:11:16 +02:00
|
|
|
return false;
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
auto index = narrow_cast<size_t>(offset_part / 8);
|
|
|
|
if (index >= data_.size()) {
|
2021-10-19 17:11:16 +02:00
|
|
|
return false;
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
return (static_cast<uint8>(data_[index]) & (1 << static_cast<int>(offset_part % 8))) != 0;
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
|
|
|
|
2018-12-26 17:11:15 +01:00
|
|
|
int64 Bitmask::get_ready_parts(int64 offset_part) const {
|
2018-11-11 12:38:04 +01:00
|
|
|
int64 res = 0;
|
2018-12-26 17:11:15 +01:00
|
|
|
while (get(offset_part + res)) {
|
2018-11-11 12:38:04 +01:00
|
|
|
res++;
|
|
|
|
}
|
|
|
|
return res;
|
2018-12-26 17:11:15 +01:00
|
|
|
}
|
2018-11-11 12:38:04 +01:00
|
|
|
|
|
|
|
std::vector<int32> Bitmask::as_vector() const {
|
|
|
|
std::vector<int32> res;
|
2018-12-26 17:11:15 +01:00
|
|
|
auto size = narrow_cast<int32>(data_.size() * 8);
|
|
|
|
for (int32 i = 0; i < size; i++) {
|
2018-11-11 12:38:04 +01:00
|
|
|
if (get(i)) {
|
|
|
|
res.push_back(i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
2018-12-26 17:11:15 +01:00
|
|
|
|
|
|
|
void Bitmask::set(int64 offset_part) {
|
|
|
|
CHECK(offset_part >= 0);
|
|
|
|
auto need_size = narrow_cast<size_t>(offset_part / 8 + 1);
|
2018-11-11 12:38:04 +01:00
|
|
|
if (need_size > data_.size()) {
|
2018-12-26 17:11:15 +01:00
|
|
|
data_.resize(need_size, '\0');
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
2018-12-28 02:30:57 +01:00
|
|
|
data_[need_size - 1] = static_cast<char>(data_[need_size - 1] | (1 << (offset_part % 8)));
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
int64 Bitmask::size() const {
|
2019-04-28 14:00:06 +02:00
|
|
|
return static_cast<int64>(data_.size()) * 8;
|
2018-11-11 12:38:04 +01:00
|
|
|
}
|
|
|
|
|
2018-12-28 00:33:07 +01:00
|
|
|
StringBuilder &operator<<(StringBuilder &sb, const Bitmask &mask) {
|
2021-10-19 17:11:16 +02:00
|
|
|
bool prev = false;
|
2018-12-28 01:31:29 +01:00
|
|
|
int32 cnt = 0;
|
|
|
|
for (int64 i = 0; i <= mask.size(); i++) {
|
|
|
|
bool cur = mask.get(i);
|
2018-12-28 02:30:57 +01:00
|
|
|
if (cur != prev) { // zeros at the end are intentionally skipped
|
2018-12-28 01:31:29 +01:00
|
|
|
if (cnt < 5) {
|
|
|
|
while (cnt > 0) {
|
|
|
|
sb << (prev ? '1' : '0');
|
|
|
|
cnt--;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
sb << (prev ? '1' : '0') << "(x" << cnt << ')';
|
|
|
|
cnt = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
prev = cur;
|
|
|
|
cnt++;
|
2018-12-28 00:33:07 +01:00
|
|
|
}
|
|
|
|
return sb;
|
|
|
|
}
|
|
|
|
|
2018-11-11 12:38:04 +01:00
|
|
|
} // namespace td
|