2018-12-31 22:04:05 +03:00
|
|
|
//
|
2023-01-01 00:28:08 +03:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2023
|
2018-12-31 22:04:05 +03:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#include "td/telegram/files/FileGcWorker.h"
|
|
|
|
|
2019-01-20 00:54:29 +03:00
|
|
|
#include "td/telegram/files/FileLocation.h"
|
2019-01-20 03:21:26 +03:00
|
|
|
#include "td/telegram/files/FileManager.h"
|
2019-01-20 00:54:29 +03:00
|
|
|
#include "td/telegram/files/FileType.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/telegram/Global.h"
|
|
|
|
|
2021-01-01 15:59:53 +03:00
|
|
|
#include "td/utils/algorithm.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/utils/format.h"
|
|
|
|
#include "td/utils/logging.h"
|
|
|
|
#include "td/utils/misc.h"
|
|
|
|
#include "td/utils/port/Clocks.h"
|
|
|
|
#include "td/utils/port/path.h"
|
|
|
|
#include "td/utils/Time.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <array>
|
|
|
|
|
|
|
|
namespace td {
|
2018-11-16 00:25:08 +03:00
|
|
|
|
|
|
|
int VERBOSITY_NAME(file_gc) = VERBOSITY_NAME(INFO);
|
|
|
|
|
2018-12-31 22:04:05 +03:00
|
|
|
void FileGcWorker::run_gc(const FileGcParameters ¶meters, std::vector<FullFileInfo> files,
|
2020-03-02 11:59:47 +03:00
|
|
|
Promise<FileGcResult> promise) {
|
2018-12-31 22:04:05 +03:00
|
|
|
auto begin_time = Time::now();
|
2022-08-30 12:05:38 +03:00
|
|
|
VLOG(file_gc) << "Start files GC with " << parameters;
|
2018-12-31 22:04:05 +03:00
|
|
|
// quite stupid implementations
|
|
|
|
// needs a lot of memory
|
|
|
|
// may write something more clever, but i will need at least 2 passes over the files
|
|
|
|
// TODO update atime for all files in android (?)
|
|
|
|
|
2020-06-22 02:27:58 +03:00
|
|
|
std::array<bool, MAX_FILE_TYPE> immune_types{{false}};
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2023-03-13 19:47:38 +03:00
|
|
|
if (G()->use_file_database()) {
|
2018-12-31 22:04:05 +03:00
|
|
|
// immune by default
|
|
|
|
immune_types[narrow_cast<size_t>(FileType::Sticker)] = true;
|
|
|
|
immune_types[narrow_cast<size_t>(FileType::ProfilePhoto)] = true;
|
|
|
|
immune_types[narrow_cast<size_t>(FileType::Thumbnail)] = true;
|
|
|
|
immune_types[narrow_cast<size_t>(FileType::Wallpaper)] = true;
|
2019-05-07 05:51:56 +03:00
|
|
|
immune_types[narrow_cast<size_t>(FileType::Background)] = true;
|
2022-04-12 22:50:20 +03:00
|
|
|
immune_types[narrow_cast<size_t>(FileType::Ringtone)] = true;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
|
2021-12-11 23:03:11 +03:00
|
|
|
if (!parameters.file_types_.empty()) {
|
2018-12-31 22:04:05 +03:00
|
|
|
std::fill(immune_types.begin(), immune_types.end(), true);
|
2021-12-11 23:03:11 +03:00
|
|
|
for (auto file_type : parameters.file_types_) {
|
2018-12-31 22:04:05 +03:00
|
|
|
immune_types[narrow_cast<size_t>(file_type)] = false;
|
|
|
|
}
|
2020-06-22 08:43:00 +03:00
|
|
|
for (int32 i = 0; i < MAX_FILE_TYPE; i++) {
|
|
|
|
auto main_file_type = narrow_cast<size_t>(get_main_file_type(static_cast<FileType>(i)));
|
2021-10-19 18:11:16 +03:00
|
|
|
if (!immune_types[main_file_type]) {
|
2020-06-22 08:43:00 +03:00
|
|
|
immune_types[i] = false;
|
|
|
|
}
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
|
2023-03-13 19:47:38 +03:00
|
|
|
if (G()->use_file_database()) {
|
2018-12-31 22:04:05 +03:00
|
|
|
immune_types[narrow_cast<size_t>(FileType::EncryptedThumbnail)] = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto file_cnt = files.size();
|
|
|
|
int32 type_immunity_ignored_cnt = 0;
|
|
|
|
int32 time_immunity_ignored_cnt = 0;
|
|
|
|
int32 exclude_owner_dialog_id_ignored_cnt = 0;
|
|
|
|
int32 owner_dialog_id_ignored_cnt = 0;
|
|
|
|
int32 remove_by_atime_cnt = 0;
|
|
|
|
int32 remove_by_count_cnt = 0;
|
|
|
|
int32 remove_by_size_cnt = 0;
|
|
|
|
int64 total_removed_size = 0;
|
|
|
|
int64 total_size = 0;
|
|
|
|
for (auto &info : files) {
|
|
|
|
if (info.atime_nsec < info.mtime_nsec) {
|
|
|
|
info.atime_nsec = info.mtime_nsec;
|
|
|
|
}
|
|
|
|
total_size += info.size;
|
|
|
|
}
|
|
|
|
|
2021-12-11 23:03:11 +03:00
|
|
|
FileStats new_stats(false, parameters.dialog_limit_ != 0);
|
|
|
|
FileStats removed_stats(false, parameters.dialog_limit_ != 0);
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2020-03-02 11:59:47 +03:00
|
|
|
auto do_remove_file = [&removed_stats](const FullFileInfo &info) {
|
|
|
|
removed_stats.add_copy(info);
|
2020-03-02 02:41:47 +03:00
|
|
|
auto status = unlink(info.path);
|
2022-08-30 12:05:38 +03:00
|
|
|
LOG_IF(WARNING, status.is_error()) << "Failed to unlink file \"" << info.path << "\" during files GC: " << status;
|
2020-03-02 02:41:47 +03:00
|
|
|
send_closure(G()->file_manager(), &FileManager::on_file_unlink,
|
|
|
|
FullLocalFileLocation(info.file_type, info.path, info.mtime_nsec));
|
|
|
|
};
|
|
|
|
|
2018-12-31 22:04:05 +03:00
|
|
|
double now = Clocks::system();
|
2020-03-02 02:41:47 +03:00
|
|
|
|
2022-12-05 13:03:38 +03:00
|
|
|
// Remove all suitable files with (atime > now - max_time_from_last_access)
|
2019-10-21 16:25:56 +03:00
|
|
|
td::remove_if(files, [&](const FullFileInfo &info) {
|
|
|
|
if (token_) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (immune_types[narrow_cast<size_t>(info.file_type)]) {
|
|
|
|
type_immunity_ignored_cnt++;
|
2020-03-02 02:55:08 +03:00
|
|
|
new_stats.add_copy(info);
|
2019-10-21 16:25:56 +03:00
|
|
|
return true;
|
|
|
|
}
|
2021-12-11 23:03:11 +03:00
|
|
|
if (td::contains(parameters.exclude_owner_dialog_ids_, info.owner_dialog_id)) {
|
2019-10-21 16:25:56 +03:00
|
|
|
exclude_owner_dialog_id_ignored_cnt++;
|
2020-03-02 02:55:08 +03:00
|
|
|
new_stats.add_copy(info);
|
2019-10-21 16:25:56 +03:00
|
|
|
return true;
|
|
|
|
}
|
2021-12-11 23:03:11 +03:00
|
|
|
if (!parameters.owner_dialog_ids_.empty() && !td::contains(parameters.owner_dialog_ids_, info.owner_dialog_id)) {
|
2019-10-21 16:25:56 +03:00
|
|
|
owner_dialog_id_ignored_cnt++;
|
2020-03-02 02:55:08 +03:00
|
|
|
new_stats.add_copy(info);
|
2019-10-21 16:25:56 +03:00
|
|
|
return true;
|
|
|
|
}
|
2021-12-11 23:03:11 +03:00
|
|
|
if (static_cast<double>(info.mtime_nsec) * 1e-9 > now - parameters.immunity_delay_) {
|
2022-08-30 12:05:38 +03:00
|
|
|
// new files are immune to GC
|
2019-10-21 16:25:56 +03:00
|
|
|
time_immunity_ignored_cnt++;
|
2020-03-02 02:55:08 +03:00
|
|
|
new_stats.add_copy(info);
|
2019-10-21 16:25:56 +03:00
|
|
|
return true;
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2021-12-11 23:03:11 +03:00
|
|
|
if (static_cast<double>(info.atime_nsec) * 1e-9 < now - parameters.max_time_from_last_access_) {
|
2019-10-21 16:25:56 +03:00
|
|
|
do_remove_file(info);
|
|
|
|
total_removed_size += info.size;
|
|
|
|
remove_by_atime_cnt++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
});
|
2019-05-16 15:05:22 +03:00
|
|
|
if (token_) {
|
2021-10-07 16:36:21 +03:00
|
|
|
return promise.set_error(Global::request_aborted_error());
|
2019-05-16 15:05:22 +03:00
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
|
|
|
|
// sort by max(atime, mtime)
|
2018-03-20 15:18:16 +03:00
|
|
|
std::sort(files.begin(), files.end(), [](const auto &a, const auto &b) { return a.atime_nsec < b.atime_nsec; });
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2021-12-11 23:03:11 +03:00
|
|
|
// 1. Total size must be less than parameters.max_files_size_
|
|
|
|
// 2. Total file count must be less than parameters.max_file_count_
|
2018-12-31 22:04:05 +03:00
|
|
|
size_t remove_count = 0;
|
2021-12-11 23:03:11 +03:00
|
|
|
if (files.size() > parameters.max_file_count_) {
|
|
|
|
remove_count = files.size() - parameters.max_file_count_;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
2021-12-11 23:03:11 +03:00
|
|
|
int64 remove_size = -parameters.max_files_size_;
|
2018-12-31 22:04:05 +03:00
|
|
|
for (auto &file : files) {
|
|
|
|
remove_size += file.size;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t pos = 0;
|
|
|
|
while (pos < files.size() && (remove_count > 0 || remove_size > 0)) {
|
2019-05-01 16:15:54 +02:00
|
|
|
if (token_) {
|
2021-10-07 16:36:21 +03:00
|
|
|
return promise.set_error(Global::request_aborted_error());
|
2019-05-01 16:15:54 +02:00
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
if (remove_count > 0) {
|
|
|
|
remove_by_count_cnt++;
|
|
|
|
} else {
|
|
|
|
remove_by_size_cnt++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remove_count > 0) {
|
|
|
|
remove_count--;
|
|
|
|
}
|
|
|
|
remove_size -= files[pos].size;
|
|
|
|
|
|
|
|
total_removed_size += files[pos].size;
|
|
|
|
do_remove_file(files[pos]);
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (pos < files.size()) {
|
2020-03-02 02:55:08 +03:00
|
|
|
new_stats.add_copy(files[pos]);
|
2018-12-31 22:04:05 +03:00
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto end_time = Time::now();
|
|
|
|
|
2022-08-30 12:05:38 +03:00
|
|
|
VLOG(file_gc) << "Finish files GC: " << tag("time", end_time - begin_time) << tag("total", file_cnt)
|
2018-11-16 00:25:08 +03:00
|
|
|
<< tag("removed", remove_by_atime_cnt + remove_by_count_cnt + remove_by_size_cnt)
|
|
|
|
<< tag("total_size", format::as_size(total_size))
|
|
|
|
<< tag("total_removed_size", format::as_size(total_removed_size))
|
|
|
|
<< tag("by_atime", remove_by_atime_cnt) << tag("by_count", remove_by_count_cnt)
|
|
|
|
<< tag("by_size", remove_by_size_cnt) << tag("type_immunity", type_immunity_ignored_cnt)
|
|
|
|
<< tag("time_immunity", time_immunity_ignored_cnt)
|
|
|
|
<< tag("owner_dialog_id_immunity", owner_dialog_id_ignored_cnt)
|
|
|
|
<< tag("exclude_owner_dialog_id_immunity", exclude_owner_dialog_id_ignored_cnt);
|
2022-08-30 12:05:38 +03:00
|
|
|
if (end_time - begin_time > 1.0) {
|
|
|
|
LOG(WARNING) << "Finish file GC: " << tag("time", end_time - begin_time) << tag("total", file_cnt)
|
|
|
|
<< tag("removed", remove_by_atime_cnt + remove_by_count_cnt + remove_by_size_cnt)
|
|
|
|
<< tag("total_size", format::as_size(total_size))
|
|
|
|
<< tag("total_removed_size", format::as_size(total_removed_size));
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2020-03-02 11:59:47 +03:00
|
|
|
promise.set_value({std::move(new_stats), std::move(removed_stats)});
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
2020-03-02 11:59:47 +03:00
|
|
|
|
2018-12-31 22:04:05 +03:00
|
|
|
} // namespace td
|