2018-12-31 22:04:05 +03:00
|
|
|
//
|
2020-01-01 04:23:48 +03:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2020
|
2018-12-31 22:04:05 +03:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#include "td/telegram/files/FileStatsWorker.h"
|
|
|
|
|
|
|
|
#include "td/telegram/DialogId.h"
|
2019-01-20 00:26:23 +03:00
|
|
|
#include "td/telegram/files/FileData.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/telegram/files/FileDb.h"
|
|
|
|
#include "td/telegram/files/FileLoaderUtils.h"
|
2019-01-20 00:54:29 +03:00
|
|
|
#include "td/telegram/files/FileLocation.h"
|
|
|
|
#include "td/telegram/files/FileType.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/telegram/Global.h"
|
2019-06-12 14:42:06 +03:00
|
|
|
#include "td/telegram/logevent/LogEvent.h"
|
2019-01-06 22:59:17 +03:00
|
|
|
#include "td/telegram/TdDb.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
|
2018-07-19 17:45:30 +03:00
|
|
|
#include "td/db/SqliteKeyValue.h"
|
|
|
|
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/utils/format.h"
|
|
|
|
#include "td/utils/logging.h"
|
2019-03-24 21:36:21 +03:00
|
|
|
#include "td/utils/misc.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/utils/PathView.h"
|
|
|
|
#include "td/utils/port/path.h"
|
|
|
|
#include "td/utils/port/Stat.h"
|
|
|
|
#include "td/utils/Slice.h"
|
2019-05-22 21:17:24 +03:00
|
|
|
#include "td/utils/Status.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
#include "td/utils/Time.h"
|
2019-03-29 14:32:50 +03:00
|
|
|
#include "td/utils/tl_parsers.h"
|
2018-12-31 22:04:05 +03:00
|
|
|
|
|
|
|
#include <functional>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
namespace td {
|
|
|
|
namespace {
|
|
|
|
// Performance ideas:
|
|
|
|
// - use slice instead of string
|
|
|
|
// - use arena memory allocator
|
|
|
|
// - store FileType or dir, no both
|
|
|
|
// - store dir relative to G()->files_dir()
|
|
|
|
|
|
|
|
struct DbFileInfo {
|
|
|
|
FileType file_type;
|
|
|
|
string path;
|
|
|
|
DialogId owner_dialog_id;
|
|
|
|
int64 size;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class CallbackT>
|
2019-05-01 16:15:54 +02:00
|
|
|
void scan_db(CancellationToken &token, CallbackT &&callback) {
|
2018-12-31 22:04:05 +03:00
|
|
|
G()->td_db()->get_file_db_shared()->pmc().get_by_range("file0", "file:", [&](Slice key, Slice value) {
|
2019-05-01 16:15:54 +02:00
|
|
|
if (token) {
|
|
|
|
return false;
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
// skip reference to other data
|
|
|
|
if (value.substr(0, 2) == "@@") {
|
2019-05-01 16:15:54 +02:00
|
|
|
return true;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
2019-06-11 13:10:14 +03:00
|
|
|
logevent::WithVersion<TlParser> parser(value);
|
2018-12-31 22:04:05 +03:00
|
|
|
FileData data;
|
2019-03-18 00:40:10 +03:00
|
|
|
data.parse(parser, false);
|
|
|
|
if (parser.get_status().is_error()) {
|
2018-01-26 04:06:53 +03:00
|
|
|
LOG(ERROR) << "Invalid FileData in the database " << tag("value", format::escaped(value));
|
2019-05-01 16:15:54 +02:00
|
|
|
return true;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
DbFileInfo info;
|
2018-01-20 17:57:52 +03:00
|
|
|
if (data.local_.type() == LocalFileLocation::Type::Full) {
|
|
|
|
info.file_type = data.local_.full().file_type_;
|
2018-12-31 22:04:05 +03:00
|
|
|
info.path = data.local_.full().path_;
|
2018-01-20 17:57:52 +03:00
|
|
|
} else if (data.local_.type() == LocalFileLocation::Type::Partial) {
|
|
|
|
info.file_type = data.local_.partial().file_type_;
|
2018-12-31 22:04:05 +03:00
|
|
|
info.path = data.local_.partial().path_;
|
|
|
|
} else {
|
2019-05-01 16:15:54 +02:00
|
|
|
return true;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
PathView path_view(info.path);
|
|
|
|
if (path_view.is_relative()) {
|
2019-01-20 01:59:37 +03:00
|
|
|
info.path = PSTRING() << get_files_base_dir(info.file_type) << info.path;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
2018-01-26 04:06:53 +03:00
|
|
|
// LOG(INFO) << "Found file in the database: " << data << " " << info.path;
|
2018-12-31 22:04:05 +03:00
|
|
|
info.owner_dialog_id = data.owner_dialog_id_;
|
|
|
|
info.size = data.size_;
|
2018-01-20 17:57:52 +03:00
|
|
|
if (info.size == 0 && data.local_.type() == LocalFileLocation::Type::Full) {
|
2018-01-26 04:06:53 +03:00
|
|
|
LOG(ERROR) << "Unknown size in the database";
|
2019-05-01 16:15:54 +02:00
|
|
|
return true;
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
callback(info);
|
2019-05-01 16:15:54 +02:00
|
|
|
return true;
|
2018-12-31 22:04:05 +03:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
struct FsFileInfo {
|
|
|
|
FileType file_type;
|
|
|
|
string path;
|
|
|
|
int64 size;
|
|
|
|
uint64 atime_nsec;
|
|
|
|
uint64 mtime_nsec;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class CallbackT>
|
2019-05-01 16:15:54 +02:00
|
|
|
void scan_fs(CancellationToken &token, CallbackT &&callback) {
|
2019-01-20 01:10:15 +03:00
|
|
|
for (int32 i = 0; i < file_type_size; i++) {
|
2018-12-31 22:04:05 +03:00
|
|
|
auto file_type = static_cast<FileType>(i);
|
2019-05-07 05:51:56 +03:00
|
|
|
if (file_type == FileType::SecureRaw || file_type == FileType::Wallpaper) {
|
2018-04-27 11:00:56 +03:00
|
|
|
continue;
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
auto files_dir = get_files_dir(file_type);
|
2019-09-28 05:14:21 +03:00
|
|
|
walk_path(files_dir, [&](CSlice path, WalkPath::Type type) {
|
|
|
|
if (token) {
|
|
|
|
return WalkPath::Action::Abort;
|
|
|
|
}
|
|
|
|
if (type != WalkPath::Type::NotDir) {
|
|
|
|
return WalkPath::Action::Continue;
|
|
|
|
}
|
|
|
|
auto r_stat = stat(path);
|
|
|
|
if (r_stat.is_error()) {
|
|
|
|
LOG(WARNING) << "Stat in files gc failed: " << r_stat.error();
|
|
|
|
return WalkPath::Action::Continue;
|
|
|
|
}
|
|
|
|
auto stat = r_stat.move_as_ok();
|
|
|
|
if (ends_with(path, "/.nomedia") && stat.size_ == 0) {
|
|
|
|
// skip .nomedia file
|
|
|
|
return WalkPath::Action::Continue;
|
|
|
|
}
|
2019-05-01 16:15:54 +02:00
|
|
|
|
2019-09-28 05:14:21 +03:00
|
|
|
FsFileInfo info;
|
|
|
|
info.path = path.str();
|
2020-01-03 04:08:22 +03:00
|
|
|
info.size = stat.real_size_;
|
2019-09-28 05:14:21 +03:00
|
|
|
info.file_type = file_type;
|
|
|
|
info.atime_nsec = stat.atime_nsec_;
|
|
|
|
info.mtime_nsec = stat.mtime_nsec_;
|
|
|
|
callback(info);
|
|
|
|
return WalkPath::Action::Continue;
|
|
|
|
}).ignore();
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
void FileStatsWorker::get_stats(bool need_all_files, bool split_by_owner_dialog_id, Promise<FileStats> promise) {
|
|
|
|
if (!G()->parameters().use_chat_info_db) {
|
|
|
|
split_by_owner_dialog_id = false;
|
|
|
|
}
|
|
|
|
if (!split_by_owner_dialog_id) {
|
|
|
|
FileStats file_stats;
|
|
|
|
file_stats.need_all_files = need_all_files;
|
|
|
|
auto start = Time::now();
|
2019-05-01 16:15:54 +02:00
|
|
|
scan_fs(token_, [&](FsFileInfo &fs_info) {
|
2018-12-31 22:04:05 +03:00
|
|
|
FullFileInfo info;
|
|
|
|
info.file_type = fs_info.file_type;
|
|
|
|
info.path = std::move(fs_info.path);
|
|
|
|
info.size = fs_info.size;
|
|
|
|
info.atime_nsec = fs_info.atime_nsec;
|
|
|
|
info.mtime_nsec = fs_info.mtime_nsec;
|
|
|
|
file_stats.add(std::move(info));
|
|
|
|
});
|
|
|
|
auto passed = Time::now() - start;
|
|
|
|
LOG_IF(INFO, passed > 0.5) << "Get file stats took: " << format::as_time(passed);
|
2019-05-16 15:05:22 +03:00
|
|
|
if (token_) {
|
|
|
|
return promise.set_error(Status::Error(500, "Request aborted"));
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
promise.set_value(std::move(file_stats));
|
|
|
|
} else {
|
|
|
|
auto start = Time::now();
|
|
|
|
|
|
|
|
std::vector<FullFileInfo> full_infos;
|
2019-05-01 16:15:54 +02:00
|
|
|
scan_fs(token_, [&](FsFileInfo &fs_info) {
|
2018-12-31 22:04:05 +03:00
|
|
|
FullFileInfo info;
|
|
|
|
info.file_type = fs_info.file_type;
|
|
|
|
info.path = std::move(fs_info.path);
|
|
|
|
info.size = fs_info.size;
|
|
|
|
info.atime_nsec = fs_info.atime_nsec;
|
|
|
|
info.mtime_nsec = fs_info.mtime_nsec;
|
2018-01-26 04:06:53 +03:00
|
|
|
|
|
|
|
// LOG(INFO) << "Found file of size " << info.size << " at " << info.path;
|
|
|
|
|
|
|
|
full_infos.push_back(std::move(info));
|
2018-12-31 22:04:05 +03:00
|
|
|
});
|
|
|
|
|
2019-05-16 15:05:22 +03:00
|
|
|
if (token_) {
|
|
|
|
return promise.set_error(Status::Error(500, "Request aborted"));
|
|
|
|
}
|
|
|
|
|
2018-12-31 22:04:05 +03:00
|
|
|
std::unordered_map<size_t, size_t> hash_to_pos;
|
|
|
|
size_t pos = 0;
|
|
|
|
for (auto &full_info : full_infos) {
|
|
|
|
hash_to_pos[std::hash<std::string>()(full_info.path)] = pos;
|
|
|
|
pos++;
|
2019-05-16 15:05:22 +03:00
|
|
|
if (token_) {
|
|
|
|
return promise.set_error(Status::Error(500, "Request aborted"));
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
2019-05-01 16:15:54 +02:00
|
|
|
scan_db(token_, [&](DbFileInfo &db_info) {
|
2018-12-31 22:04:05 +03:00
|
|
|
auto it = hash_to_pos.find(std::hash<std::string>()(db_info.path));
|
|
|
|
if (it == hash_to_pos.end()) {
|
|
|
|
return;
|
|
|
|
}
|
2018-01-26 04:06:53 +03:00
|
|
|
// LOG(INFO) << "Match! " << db_info.path << " from " << db_info.owner_dialog_id;
|
2018-12-31 22:04:05 +03:00
|
|
|
full_infos[it->second].owner_dialog_id = db_info.owner_dialog_id;
|
|
|
|
});
|
2019-05-16 15:05:22 +03:00
|
|
|
if (token_) {
|
|
|
|
return promise.set_error(Status::Error(500, "Request aborted"));
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
|
|
|
|
FileStats file_stats;
|
|
|
|
file_stats.need_all_files = need_all_files;
|
|
|
|
file_stats.split_by_owner_dialog_id = split_by_owner_dialog_id;
|
|
|
|
for (auto &full_info : full_infos) {
|
|
|
|
file_stats.add(std::move(full_info));
|
2019-05-16 15:05:22 +03:00
|
|
|
if (token_) {
|
|
|
|
return promise.set_error(Status::Error(500, "Request aborted"));
|
|
|
|
}
|
2018-12-31 22:04:05 +03:00
|
|
|
}
|
|
|
|
auto passed = Time::now() - start;
|
|
|
|
LOG_IF(INFO, passed > 0.5) << "Get file stats took: " << format::as_time(passed);
|
|
|
|
promise.set_value(std::move(file_stats));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace td
|