Check hash of local files with same name, before download

GitOrigin-RevId: ad97f6df43eb022d5c122a950081c5d7700820f5
This commit is contained in:
Arseny Smirnov 2018-02-26 13:05:14 +03:00
parent dd54e3cee0
commit 9310e29ff4
10 changed files with 117 additions and 30 deletions

View File

@ -26,7 +26,7 @@
namespace td { namespace td {
FileDownloader::FileDownloader(const FullRemoteFileLocation &remote, const LocalFileLocation &local, int64 size, FileDownloader::FileDownloader(const FullRemoteFileLocation &remote, const LocalFileLocation &local, int64 size,
string name, const FileEncryptionKey &encryption_key, bool is_small, string name, const FileEncryptionKey &encryption_key, bool is_small, bool search_file,
std::unique_ptr<Callback> callback) std::unique_ptr<Callback> callback)
: remote_(remote) : remote_(remote)
, local_(local) , local_(local)
@ -34,7 +34,8 @@ FileDownloader::FileDownloader(const FullRemoteFileLocation &remote, const Local
, name_(std::move(name)) , name_(std::move(name))
, encryption_key_(encryption_key) , encryption_key_(encryption_key)
, callback_(std::move(callback)) , callback_(std::move(callback))
, is_small_(is_small) { , is_small_(is_small)
, search_file_(search_file) {
if (!encryption_key.empty()) { if (!encryption_key.empty()) {
set_ordered_flag(true); set_ordered_flag(true);
} }
@ -47,14 +48,14 @@ Result<FileLoader::FileInfo> FileDownloader::init() {
if (local_.type() == LocalFileLocation::Type::Full) { if (local_.type() == LocalFileLocation::Type::Full) {
return Status::Error("File is already downloaded"); return Status::Error("File is already downloaded");
} }
int offset = 0; int ready_part_count = 0;
int32 part_size = 0; int32 part_size = 0;
if (local_.type() == LocalFileLocation::Type::Partial) { if (local_.type() == LocalFileLocation::Type::Partial) {
const auto &partial = local_.partial(); const auto &partial = local_.partial();
path_ = partial.path_; path_ = partial.path_;
auto result_fd = FileFd::open(path_, FileFd::Write | FileFd::Read); auto result_fd = FileFd::open(path_, FileFd::Write | FileFd::Read);
// TODO: check timestamps.. // TODO: check timestamps..
if (!result_fd.is_error()) { if (result_fd.is_ok()) {
if (!encryption_key_.empty()) { if (!encryption_key_.empty()) {
CHECK(partial.iv_.size() == 32) << partial.iv_.size(); CHECK(partial.iv_.size() == 32) << partial.iv_.size();
encryption_key_.mutable_iv() = as<UInt256>(partial.iv_.data()); encryption_key_.mutable_iv() = as<UInt256>(partial.iv_.data());
@ -62,11 +63,27 @@ Result<FileLoader::FileInfo> FileDownloader::init() {
} }
fd_ = result_fd.move_as_ok(); fd_ = result_fd.move_as_ok();
part_size = partial.part_size_; part_size = partial.part_size_;
offset = partial.ready_part_count_; ready_part_count = partial.ready_part_count_;
} }
} }
std::vector<int> parts(offset); if (search_file_ && fd_.empty() && size_ > 0 && size_ < 1000 * (1 << 20) && encryption_key_.empty() &&
for (int i = 0; i < offset; i++) { !remote_.is_web()) {
[&] {
TRY_RESULT(path, search_file(get_files_dir(remote_.file_type_), name_, size_));
TRY_RESULT(fd, FileFd::open(path, FileFd::Read));
LOG(INFO) << "Check hash of local file " << path;
path_ = std::move(path);
fd_ = std::move(fd);
need_check_ = true;
only_check_ = true;
part_size = 32 * (1 << 10);
ready_part_count = narrow_cast<int>((size_ + part_size - 1) / part_size);
return Status::OK();
}();
}
std::vector<int> parts(ready_part_count);
for (int i = 0; i < ready_part_count; i++) {
parts[i] = i; parts[i] = i;
} }
@ -76,14 +93,21 @@ Result<FileLoader::FileInfo> FileDownloader::init() {
res.part_size = part_size; res.part_size = part_size;
res.ready_parts = std::move(parts); res.ready_parts = std::move(parts);
res.use_part_count_limit = false; res.use_part_count_limit = false;
res.only_check_ = only_check_;
return res; return res;
} }
Status FileDownloader::on_ok(int64 size) { Status FileDownloader::on_ok(int64 size) {
auto dir = get_files_dir(remote_.file_type_); auto dir = get_files_dir(remote_.file_type_);
TRY_RESULT(perm_path, create_from_temp(path_, dir, name_)); std::string path;
if (only_check_) {
path = path_;
} else {
TRY_RESULT(perm_path, create_from_temp(path_, dir, name_));
path = std::move(perm_path);
}
fd_.close(); fd_.close();
callback_->on_ok(FullLocalFileLocation(remote_.file_type_, std::move(perm_path), 0), size); callback_->on_ok(FullLocalFileLocation(remote_.file_type_, std::move(path), 0), size);
return Status::OK(); return Status::OK();
} }
void FileDownloader::on_error(Status status) { void FileDownloader::on_error(Status status) {
@ -368,16 +392,20 @@ Result<FileLoader::CheckInfo> FileDownloader::check_loop(int64 checked_prefix_si
sha256(slice.as_slice(), hash); sha256(slice.as_slice(), hash);
if (hash != it->hash) { if (hash != it->hash) {
if (only_check_) {
return Status::Error("FILE_DOWNLOAD_RESTART");
}
return Status::Error("Hash mismatch"); return Status::Error("Hash mismatch");
} }
checked_prefix_size = end_offset; checked_prefix_size = end_offset;
info.changed = true;
continue; continue;
} }
if (!has_hash_query_ && use_cdn_) { if (!has_hash_query_) {
has_hash_query_ = true; has_hash_query_ = true;
auto query = auto query =
telegram_api::upload_getCdnFileHashes(BufferSlice(cdn_file_token_), narrow_cast<int32>(checked_prefix_size)); telegram_api::upload_getFileHashes(remote_.as_input_file_location(), narrow_cast<int32>(checked_prefix_size));
auto net_query = G()->net_query_creator().create( auto net_query = G()->net_query_creator().create(
create_storer(query), remote_.get_dc_id(), create_storer(query), remote_.get_dc_id(),
is_small_ ? NetQuery::Type::DownloadSmall : NetQuery::Type::Download, NetQuery::AuthFlag::On); is_small_ ? NetQuery::Type::DownloadSmall : NetQuery::Type::Download, NetQuery::AuthFlag::On);
@ -419,7 +447,7 @@ Status FileDownloader::acquire_fd() {
TRY_RESULT(file_path, open_temp_file(remote_.file_type_)); TRY_RESULT(file_path, open_temp_file(remote_.file_type_));
std::tie(fd_, path_) = std::move(file_path); std::tie(fd_, path_) = std::move(file_path);
} else { } else {
TRY_RESULT(fd, FileFd::open(path_, FileFd::Write | FileFd::Read)); TRY_RESULT(fd, FileFd::open(path_, (only_check_ ? 0 : FileFd::Write) | FileFd::Read));
fd_ = std::move(fd); fd_ = std::move(fd);
} }
} }

View File

@ -35,7 +35,8 @@ class FileDownloader : public FileLoader {
}; };
FileDownloader(const FullRemoteFileLocation &remote, const LocalFileLocation &local, int64 size, string name, FileDownloader(const FullRemoteFileLocation &remote, const LocalFileLocation &local, int64 size, string name,
const FileEncryptionKey &encryption_key, bool is_small, std::unique_ptr<Callback> callback); const FileEncryptionKey &encryption_key, bool is_small, bool search_file,
std::unique_ptr<Callback> callback);
// Should just implement all parent pure virtual methods. // Should just implement all parent pure virtual methods.
// Must not call any of them... // Must not call any of them...
@ -48,6 +49,7 @@ class FileDownloader : public FileLoader {
string name_; string name_;
FileEncryptionKey encryption_key_; FileEncryptionKey encryption_key_;
std::unique_ptr<Callback> callback_; std::unique_ptr<Callback> callback_;
bool only_check_{false};
string path_; string path_;
FileFd fd_; FileFd fd_;
@ -55,6 +57,7 @@ class FileDownloader : public FileLoader {
int32 next_part_ = 0; int32 next_part_ = 0;
bool next_part_stop_ = false; bool next_part_stop_ = false;
bool is_small_; bool is_small_;
bool search_file_{false};
bool use_cdn_ = false; bool use_cdn_ = false;
DcId cdn_dc_id_; DcId cdn_dc_id_;

View File

@ -30,7 +30,7 @@ void FileLoadManager::start_up() {
void FileLoadManager::download(QueryId id, const FullRemoteFileLocation &remote_location, void FileLoadManager::download(QueryId id, const FullRemoteFileLocation &remote_location,
const LocalFileLocation &local, int64 size, string name, const LocalFileLocation &local, int64 size, string name,
const FileEncryptionKey &encryption_key, int8 priority) { const FileEncryptionKey &encryption_key, bool search_file, int8 priority) {
if (stop_flag_) { if (stop_flag_) {
return; return;
} }
@ -42,7 +42,7 @@ void FileLoadManager::download(QueryId id, const FullRemoteFileLocation &remote_
auto callback = make_unique<FileDownloaderCallback>(actor_shared(this, node_id)); auto callback = make_unique<FileDownloaderCallback>(actor_shared(this, node_id));
bool is_small = size < 20 * 1024; bool is_small = size < 20 * 1024;
node->loader_ = create_actor<FileDownloader>("Downloader", remote_location, local, size, std::move(name), node->loader_ = create_actor<FileDownloader>("Downloader", remote_location, local, size, std::move(name),
encryption_key, is_small, std::move(callback)); encryption_key, is_small, search_file, std::move(callback));
auto &resource_manager = is_small ? download_small_resource_manager_ : download_resource_manager_; auto &resource_manager = is_small ? download_small_resource_manager_ : download_resource_manager_;
send_closure(resource_manager, &ResourceManager::register_worker, send_closure(resource_manager, &ResourceManager::register_worker,
ActorShared<FileLoaderActor>(node->loader_.get(), static_cast<uint64>(-1)), priority); ActorShared<FileLoaderActor>(node->loader_.get(), static_cast<uint64>(-1)), priority);

View File

@ -44,7 +44,7 @@ class FileLoadManager final : public Actor {
explicit FileLoadManager(ActorShared<Callback> callback, ActorShared<> parent); explicit FileLoadManager(ActorShared<Callback> callback, ActorShared<> parent);
void download(QueryId id, const FullRemoteFileLocation &remote_location, const LocalFileLocation &local, int64 size, void download(QueryId id, const FullRemoteFileLocation &remote_location, const LocalFileLocation &local, int64 size,
string name, const FileEncryptionKey &encryption_key, int8 priority); string name, const FileEncryptionKey &encryption_key, bool search_file, int8 priority);
void upload(QueryId id, const LocalFileLocation &local_location, const RemoteFileLocation &remote_location, void upload(QueryId id, const LocalFileLocation &local_location, const RemoteFileLocation &remote_location,
int64 size, const FileEncryptionKey &encryption_key, int8 priority, vector<int> bad_parts); int64 size, const FileEncryptionKey &encryption_key, int8 priority, vector<int> bad_parts);
void upload_by_hash(QueryId id, const FullLocalFileLocation &local_location, int64 size, int8 priority); void upload_by_hash(QueryId id, const FullLocalFileLocation &local_location, int64 size, int8 priority);

View File

@ -78,6 +78,9 @@ void FileLoader::start_up() {
auto &ready_parts = file_info.ready_parts; auto &ready_parts = file_info.ready_parts;
auto use_part_count_limit = file_info.use_part_count_limit; auto use_part_count_limit = file_info.use_part_count_limit;
auto status = parts_manager_.init(size, expected_size, is_size_final, part_size, ready_parts, use_part_count_limit); auto status = parts_manager_.init(size, expected_size, is_size_final, part_size, ready_parts, use_part_count_limit);
if (file_info.only_check_) {
parts_manager_.set_checked_prefix_size(0);
}
if (status.is_error()) { if (status.is_error()) {
on_error(std::move(status)); on_error(std::move(status));
stop_flag_ = true; stop_flag_ = true;
@ -110,6 +113,9 @@ Status FileLoader::do_loop() {
TRY_RESULT(check_info, TRY_RESULT(check_info,
check_loop(parts_manager_.get_checked_prefix_size(), parts_manager_.get_unchecked_ready_prefix_size(), check_loop(parts_manager_.get_checked_prefix_size(), parts_manager_.get_unchecked_ready_prefix_size(),
parts_manager_.unchecked_ready())); parts_manager_.unchecked_ready()));
if (check_info.changed) {
on_progress_impl(narrow_cast<size_t>(parts_manager_.get_ready_size()));
}
for (auto &query : check_info.queries) { for (auto &query : check_info.queries) {
G()->net_query_dispatcher().dispatch_with_callback( G()->net_query_dispatcher().dispatch_with_callback(
std::move(query), actor_shared(this, UniqueId::next(UniqueId::Type::Default, CommonQueryKey))); std::move(query), actor_shared(this, UniqueId::next(UniqueId::Type::Default, CommonQueryKey)));

View File

@ -53,6 +53,7 @@ class FileLoader : public FileLoaderActor {
int32 part_size; int32 part_size;
std::vector<int> ready_parts; std::vector<int> ready_parts;
bool use_part_count_limit = true; bool use_part_count_limit = true;
bool only_check_ = false;
}; };
virtual Result<FileInfo> init() TD_WARN_UNUSED_RESULT = 0; virtual Result<FileInfo> init() TD_WARN_UNUSED_RESULT = 0;
virtual Status on_ok(int64 size) TD_WARN_UNUSED_RESULT = 0; virtual Status on_ok(int64 size) TD_WARN_UNUSED_RESULT = 0;
@ -79,6 +80,7 @@ class FileLoader : public FileLoaderActor {
} }
struct CheckInfo { struct CheckInfo {
bool need_check{false}; bool need_check{false};
bool changed{false};
int64 checked_prefix_size{0}; int64 checked_prefix_size{0};
std::vector<NetQueryPtr> queries; std::vector<NetQueryPtr> queries;
}; };

View File

@ -28,6 +28,11 @@ Result<std::pair<FileFd, string>> try_create_new_file(Result<CSlice> result_name
TRY_RESULT(fd, FileFd::open(name, FileFd::Read | FileFd::Write | FileFd::CreateNew, 0640)); TRY_RESULT(fd, FileFd::open(name, FileFd::Read | FileFd::Write | FileFd::CreateNew, 0640));
return std::make_pair(std::move(fd), name.str()); return std::make_pair(std::move(fd), name.str());
} }
Result<std::pair<FileFd, string>> try_open_file(Result<CSlice> result_name) {
TRY_RESULT(name, std::move(result_name));
TRY_RESULT(fd, FileFd::open(name, FileFd::Read, 0640));
return std::make_pair(std::move(fd), name.str());
}
struct RandSuff { struct RandSuff {
int len; int len;
@ -63,32 +68,47 @@ Result<std::pair<FileFd, string>> open_temp_file(const FileType &file_type) {
return res; return res;
} }
Result<string> create_from_temp(CSlice temp_path, CSlice dir, CSlice name) { template <class F>
LOG(INFO) << "Create file in directory " << dir << " with suggested name " << name << " from temporary file " bool for_suggested_file_name(CSlice name, bool use_pmc, bool use_random, F &&callback) {
<< temp_path; auto try_callback = [&](Result<CSlice> r_path) {
if (r_path.is_error()) {
return true;
}
return callback(r_path.move_as_ok());
};
auto cleaned_name = clean_filename(name); auto cleaned_name = clean_filename(name);
PathView path_view(cleaned_name); PathView path_view(cleaned_name);
auto stem = path_view.file_stem(); auto stem = path_view.file_stem();
auto ext = path_view.extension(); auto ext = path_view.extension();
Result<std::pair<FileFd, string>> res; bool active = true;
if (!stem.empty() && !G()->parameters().ignore_file_names) { if (!stem.empty() && !G()->parameters().ignore_file_names) {
res = try_create_new_file(PSLICE_SAFE() << dir << stem << Ext{ext}); active = try_callback(PSLICE_SAFE() << stem << Ext{ext});
for (int i = 0; res.is_error() && i < 10; i++) { for (int i = 0; active && i < 10; i++) {
res = try_create_new_file(PSLICE_SAFE() << dir << stem << "_(" << i << ")" << Ext{ext}); active = try_callback(PSLICE_SAFE() << stem << "_(" << i << ")" << Ext{ext});
} }
for (int i = 2; res.is_error() && i < 12; i++) { for (int i = 2; active && i < 12 && use_random; i++) {
res = try_create_new_file(PSLICE_SAFE() << dir << stem << "_(" << RandSuff{i} << ")" << Ext{ext}); active = try_callback(PSLICE_SAFE() << stem << "_(" << RandSuff{i} << ")" << Ext{ext});
} }
} else { } else if (use_pmc) {
auto pmc = G()->td_db()->get_binlog_pmc(); auto pmc = G()->td_db()->get_binlog_pmc();
int32 file_id = to_integer<int32>(pmc->get("perm_file_id")); int32 file_id = to_integer<int32>(pmc->get("perm_file_id"));
pmc->set("perm_file_id", to_string(file_id + 1)); pmc->set("perm_file_id", to_string(file_id + 1));
res = try_create_new_file(PSLICE_SAFE() << dir << "file_" << file_id << Ext{ext}); active = try_callback(PSLICE_SAFE() << "file_" << file_id << Ext{ext});
if (res.is_error()) { if (active) {
res = try_create_new_file(PSLICE_SAFE() << dir << "file_" << file_id << "_" << RandSuff{6} << Ext{ext}); active = try_callback(PSLICE_SAFE() << "file_" << file_id << "_" << RandSuff{6} << Ext{ext});
} }
} }
return active;
}
Result<string> create_from_temp(CSlice temp_path, CSlice dir, CSlice name) {
LOG(INFO) << "Create file in directory " << dir << " with suggested name " << name << " from temporary file "
<< temp_path;
Result<std::pair<FileFd, string>> res = Status::Error();
for_suggested_file_name(name, true, true, [&](CSlice suggested_name) {
res = try_create_new_file(PSLICE_SAFE() << dir << suggested_name);
return res.is_error();
});
TRY_RESULT(tmp, std::move(res)); TRY_RESULT(tmp, std::move(res));
tmp.first.close(); tmp.first.close();
auto perm_path = std::move(tmp.second); auto perm_path = std::move(tmp.second);
@ -96,6 +116,26 @@ Result<string> create_from_temp(CSlice temp_path, CSlice dir, CSlice name) {
return perm_path; return perm_path;
} }
Result<string> search_file(CSlice dir, CSlice name, int64 expected_size) {
Result<std::string> res = Status::Error();
for_suggested_file_name(name, false, false, [&](CSlice suggested_name) {
auto r_pair = try_open_file(PSLICE_SAFE() << dir << suggested_name);
if (r_pair.is_error()) {
return false;
}
FileFd fd;
std::string path;
std::tie(fd, path) = r_pair.move_as_ok();
if (fd.stat().size_ != expected_size) {
return true;
}
fd.close();
res = std::move(path);
return false;
});
return res;
}
const char *file_type_name[file_type_size] = {"thumbnails", "profile_photos", "photos", "voice", const char *file_type_name[file_type_size] = {"thumbnails", "profile_photos", "photos", "voice",
"videos", "documents", "secret", "temp", "videos", "documents", "secret", "temp",
"stickers", "music", "animations", "secret_thumbnails", "stickers", "music", "animations", "secret_thumbnails",

View File

@ -18,6 +18,7 @@ enum class FileType : int8;
Result<std::pair<FileFd, string>> open_temp_file(const FileType &file_type) TD_WARN_UNUSED_RESULT; Result<std::pair<FileFd, string>> open_temp_file(const FileType &file_type) TD_WARN_UNUSED_RESULT;
Result<string> create_from_temp(CSlice temp_path, CSlice dir, CSlice name) TD_WARN_UNUSED_RESULT; Result<string> create_from_temp(CSlice temp_path, CSlice dir, CSlice name) TD_WARN_UNUSED_RESULT;
Result<string> search_file(CSlice dir, CSlice name, int64 expected_size) TD_WARN_UNUSED_RESULT;
string get_files_base_dir(const FileType &file_type); string get_files_base_dir(const FileType &file_type);
string get_files_temp_dir(const FileType &file_type); string get_files_temp_dir(const FileType &file_type);
string get_files_dir(const FileType &file_type); string get_files_dir(const FileType &file_type);

View File

@ -1037,6 +1037,7 @@ Result<FileId> FileManager::merge(FileId x_file_id, FileId y_file_id, bool no_sy
nodes[node_i]->set_encryption_key(nodes[encryption_key_i]->encryption_key_); nodes[node_i]->set_encryption_key(nodes[encryption_key_i]->encryption_key_);
} }
node->need_load_from_pmc_ |= other_node->need_load_from_pmc_; node->need_load_from_pmc_ |= other_node->need_load_from_pmc_;
node->can_search_locally_ &= other_node->can_search_locally_;
if (main_file_id_i == other_node_i) { if (main_file_id_i == other_node_i) {
node->main_file_id_ = other_node->main_file_id_; node->main_file_id_ = other_node->main_file_id_;
@ -1450,7 +1451,7 @@ void FileManager::run_download(FileNodePtr node) {
node->download_id_ = id; node->download_id_ = id;
node->is_download_started_ = false; node->is_download_started_ = false;
send_closure(file_load_manager_, &FileLoadManager::download, id, node->remote_.full(), node->local_, node->size_, send_closure(file_load_manager_, &FileLoadManager::download, id, node->remote_.full(), node->local_, node->size_,
node->suggested_name(), node->encryption_key_, priority); node->suggested_name(), node->encryption_key_, node->can_search_locally_, priority);
} }
void FileManager::resume_upload(FileId file_id, std::vector<int> bad_parts, std::shared_ptr<UploadCallback> callback, void FileManager::resume_upload(FileId file_id, std::vector<int> bad_parts, std::shared_ptr<UploadCallback> callback,
@ -2236,6 +2237,11 @@ void FileManager::on_error_impl(FileNodePtr node, FileManager::Query::Type type,
run_upload(node, {}); run_upload(node, {});
return; return;
} }
if (status.message() == "FILE_DOWNLOAD_RESTART") {
node->can_search_locally_ = false;
run_download(node);
return;
}
if (!was_active) { if (!was_active) {
return; return;

View File

@ -114,6 +114,7 @@ class FileNode {
FileLocationSource remote_source_ = FileLocationSource::FromUser; FileLocationSource remote_source_ = FileLocationSource::FromUser;
bool get_by_hash_ = false; bool get_by_hash_ = false;
bool can_search_locally_{true};
bool is_download_started_ = false; bool is_download_started_ = false;
bool generate_was_update_ = false; bool generate_was_update_ = false;