2018-12-31 20:04:05 +01:00
|
|
|
//
|
2022-12-31 22:28:08 +01:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2023
|
2018-12-31 20:04:05 +01:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#include "td/telegram/WebPagesManager.h"
|
|
|
|
|
|
|
|
#include "td/telegram/AnimationsManager.h"
|
|
|
|
#include "td/telegram/AudiosManager.h"
|
2020-01-27 00:55:18 +01:00
|
|
|
#include "td/telegram/AuthManager.h"
|
2023-09-25 17:16:26 +02:00
|
|
|
#include "td/telegram/ContactsManager.h"
|
2023-06-13 14:45:10 +02:00
|
|
|
#include "td/telegram/Dependencies.h"
|
2022-06-02 16:52:12 +02:00
|
|
|
#include "td/telegram/Dimensions.h"
|
2019-04-09 17:38:57 +02:00
|
|
|
#include "td/telegram/Document.h"
|
2019-04-09 17:52:53 +02:00
|
|
|
#include "td/telegram/Document.hpp"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/telegram/DocumentsManager.h"
|
2019-01-21 23:32:13 +01:00
|
|
|
#include "td/telegram/FileReferenceManager.h"
|
2019-01-21 22:59:55 +01:00
|
|
|
#include "td/telegram/files/FileId.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/telegram/files/FileManager.h"
|
2019-01-21 23:32:13 +01:00
|
|
|
#include "td/telegram/files/FileSourceId.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/telegram/Global.h"
|
|
|
|
#include "td/telegram/logevent/LogEvent.h"
|
|
|
|
#include "td/telegram/MessageEntity.h"
|
|
|
|
#include "td/telegram/MessagesManager.h"
|
|
|
|
#include "td/telegram/Photo.h"
|
2022-04-09 22:21:07 +02:00
|
|
|
#include "td/telegram/PhotoFormat.h"
|
2021-10-27 16:32:09 +02:00
|
|
|
#include "td/telegram/secret_api.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/telegram/StickersManager.h"
|
2023-06-13 14:45:10 +02:00
|
|
|
#include "td/telegram/StoryFullId.h"
|
2023-07-01 15:22:01 +02:00
|
|
|
#include "td/telegram/StoryId.h"
|
2023-06-13 14:45:10 +02:00
|
|
|
#include "td/telegram/StoryManager.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/telegram/Td.h"
|
2019-01-06 20:59:17 +01:00
|
|
|
#include "td/telegram/TdDb.h"
|
2023-07-01 13:53:04 +02:00
|
|
|
#include "td/telegram/telegram_api.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/telegram/VideoNotesManager.h"
|
|
|
|
#include "td/telegram/VideosManager.h"
|
|
|
|
#include "td/telegram/VoiceNotesManager.h"
|
2019-04-27 02:57:59 +02:00
|
|
|
#include "td/telegram/WebPageBlock.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-01-06 20:11:02 +01:00
|
|
|
#include "td/db/binlog/BinlogEvent.h"
|
2018-07-18 03:11:48 +02:00
|
|
|
#include "td/db/binlog/BinlogHelper.h"
|
2019-02-22 21:15:43 +01:00
|
|
|
#include "td/db/SqliteKeyValue.h"
|
2018-07-18 03:11:48 +02:00
|
|
|
#include "td/db/SqliteKeyValueAsync.h"
|
|
|
|
|
2021-01-01 13:59:53 +01:00
|
|
|
#include "td/utils/algorithm.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/utils/buffer.h"
|
2019-02-12 21:48:16 +01:00
|
|
|
#include "td/utils/common.h"
|
2018-10-28 18:30:47 +01:00
|
|
|
#include "td/utils/format.h"
|
2020-03-02 14:05:18 +01:00
|
|
|
#include "td/utils/HttpUrl.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/utils/logging.h"
|
|
|
|
#include "td/utils/misc.h"
|
2018-10-28 18:30:47 +01:00
|
|
|
#include "td/utils/Slice.h"
|
2021-05-17 14:21:11 +02:00
|
|
|
#include "td/utils/SliceBuilder.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/utils/StringBuilder.h"
|
|
|
|
#include "td/utils/tl_helpers.h"
|
2020-03-02 14:05:18 +01:00
|
|
|
#include "td/utils/utf8.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2021-09-01 19:31:39 +02:00
|
|
|
#include <limits>
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
namespace td {
|
|
|
|
|
2021-07-04 04:58:54 +02:00
|
|
|
class GetWebPagePreviewQuery final : public Td::ResultHandler {
|
2023-02-15 13:05:37 +01:00
|
|
|
Promise<td_api::object_ptr<td_api::webPage>> promise_;
|
2023-10-16 20:18:41 +02:00
|
|
|
string first_url_;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
public:
|
2023-02-15 13:05:37 +01:00
|
|
|
explicit GetWebPagePreviewQuery(Promise<td_api::object_ptr<td_api::webPage>> &&promise)
|
|
|
|
: promise_(std::move(promise)) {
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-10-16 20:18:41 +02:00
|
|
|
void send(const string &text, vector<tl_object_ptr<telegram_api::MessageEntity>> &&entities, string first_url) {
|
|
|
|
first_url_ = std::move(first_url);
|
2018-02-20 22:20:45 +01:00
|
|
|
|
|
|
|
int32 flags = 0;
|
|
|
|
if (!entities.empty()) {
|
|
|
|
flags |= telegram_api::messages_getWebPagePreview::ENTITIES_MASK;
|
|
|
|
}
|
|
|
|
|
2020-03-15 22:17:11 +01:00
|
|
|
send_query(
|
|
|
|
G()->net_query_creator().create(telegram_api::messages_getWebPagePreview(flags, text, std::move(entities))));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-11-08 12:19:57 +01:00
|
|
|
void on_result(BufferSlice packet) final {
|
2018-12-31 20:04:05 +01:00
|
|
|
auto result_ptr = fetch_result<telegram_api::messages_getWebPagePreview>(packet);
|
|
|
|
if (result_ptr.is_error()) {
|
2021-11-08 12:19:57 +01:00
|
|
|
return on_error(result_ptr.move_as_error());
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
auto ptr = result_ptr.move_as_ok();
|
2020-09-24 15:08:04 +02:00
|
|
|
LOG(INFO) << "Receive result for GetWebPagePreviewQuery: " << to_string(ptr);
|
2023-10-16 20:18:41 +02:00
|
|
|
td_->web_pages_manager_->on_get_web_page_preview(first_url_, std::move(ptr), std::move(promise_));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-11-08 12:19:57 +01:00
|
|
|
void on_error(Status status) final {
|
2023-02-15 13:05:37 +01:00
|
|
|
promise_.set_error(std::move(status));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-07-04 04:58:54 +02:00
|
|
|
class GetWebPageQuery final : public Td::ResultHandler {
|
2021-10-07 11:49:46 +02:00
|
|
|
Promise<WebPageId> promise_;
|
2020-03-13 23:56:47 +01:00
|
|
|
WebPageId web_page_id_;
|
2018-12-31 20:04:05 +01:00
|
|
|
string url_;
|
|
|
|
|
|
|
|
public:
|
2021-10-07 11:49:46 +02:00
|
|
|
explicit GetWebPageQuery(Promise<WebPageId> &&promise) : promise_(std::move(promise)) {
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2020-03-13 23:56:47 +01:00
|
|
|
void send(WebPageId web_page_id, const string &url, int32 hash) {
|
2022-02-09 22:59:52 +01:00
|
|
|
if (url.empty()) {
|
|
|
|
return promise_.set_value(WebPageId());
|
|
|
|
}
|
|
|
|
|
2020-03-13 23:56:47 +01:00
|
|
|
web_page_id_ = web_page_id;
|
2018-12-31 20:04:05 +01:00
|
|
|
url_ = url;
|
2020-03-15 22:17:11 +01:00
|
|
|
send_query(G()->net_query_creator().create(telegram_api::messages_getWebPage(url, hash)));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-11-08 12:19:57 +01:00
|
|
|
void on_result(BufferSlice packet) final {
|
2018-12-31 20:04:05 +01:00
|
|
|
auto result_ptr = fetch_result<telegram_api::messages_getWebPage>(packet);
|
|
|
|
if (result_ptr.is_error()) {
|
2021-11-08 12:19:57 +01:00
|
|
|
return on_error(result_ptr.move_as_error());
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
auto ptr = result_ptr.move_as_ok();
|
2020-09-24 15:08:04 +02:00
|
|
|
LOG(INFO) << "Receive result for GetWebPageQuery: " << to_string(ptr);
|
2023-09-25 17:16:26 +02:00
|
|
|
td_->contacts_manager_->on_get_users(std::move(ptr->users_), "GetWebPageQuery");
|
|
|
|
td_->contacts_manager_->on_get_chats(std::move(ptr->chats_), "GetWebPageQuery");
|
|
|
|
auto page = std::move(ptr->webpage_);
|
|
|
|
if (page->get_id() == telegram_api::webPageNotModified::ID) {
|
2020-03-13 23:56:47 +01:00
|
|
|
if (web_page_id_.is_valid()) {
|
2023-09-25 17:16:26 +02:00
|
|
|
auto web_page = move_tl_object_as<telegram_api::webPageNotModified>(page);
|
2022-12-18 20:15:57 +01:00
|
|
|
int32 view_count = web_page->cached_page_views_;
|
2021-11-08 13:20:38 +01:00
|
|
|
td_->web_pages_manager_->on_get_web_page_instant_view_view_count(web_page_id_, view_count);
|
2021-10-07 11:49:46 +02:00
|
|
|
return promise_.set_value(std::move(web_page_id_));
|
2020-03-13 23:56:47 +01:00
|
|
|
} else {
|
|
|
|
LOG(ERROR) << "Receive webPageNotModified for " << url_;
|
2021-11-08 12:19:57 +01:00
|
|
|
return on_error(Status::Error(500, "Receive webPageNotModified"));
|
2020-03-13 23:56:47 +01:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2023-09-25 17:16:26 +02:00
|
|
|
auto web_page_id = td_->web_pages_manager_->on_get_web_page(std::move(page), DialogId());
|
2021-11-08 13:20:38 +01:00
|
|
|
td_->web_pages_manager_->on_get_web_page_by_url(url_, web_page_id, false);
|
2021-10-07 11:49:46 +02:00
|
|
|
promise_.set_value(std::move(web_page_id));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-11-08 12:19:57 +01:00
|
|
|
void on_error(Status status) final {
|
2018-12-31 20:04:05 +01:00
|
|
|
promise_.set_error(std::move(status));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class WebPagesManager::WebPageInstantView {
|
|
|
|
public:
|
2023-08-18 14:33:33 +02:00
|
|
|
vector<unique_ptr<WebPageBlock>> page_blocks_;
|
|
|
|
string url_;
|
|
|
|
int32 view_count_ = 0;
|
|
|
|
int32 hash_ = 0;
|
|
|
|
bool is_v2_ = false;
|
|
|
|
bool is_rtl_ = false;
|
|
|
|
bool is_empty_ = true;
|
|
|
|
bool is_full_ = false;
|
|
|
|
bool is_loaded_ = false;
|
|
|
|
bool was_loaded_from_database_ = false;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-02-21 18:54:20 +01:00
|
|
|
template <class StorerT>
|
|
|
|
void store(StorerT &storer) const {
|
2018-12-31 20:04:05 +01:00
|
|
|
using ::td::store;
|
2023-08-18 14:33:33 +02:00
|
|
|
bool has_url = !url_.empty();
|
|
|
|
bool has_view_count = view_count_ > 0;
|
2018-12-31 20:04:05 +01:00
|
|
|
BEGIN_STORE_FLAGS();
|
2023-08-18 14:33:33 +02:00
|
|
|
STORE_FLAG(is_full_);
|
|
|
|
STORE_FLAG(is_loaded_);
|
|
|
|
STORE_FLAG(is_rtl_);
|
|
|
|
STORE_FLAG(is_v2_);
|
2019-02-07 21:25:27 +01:00
|
|
|
STORE_FLAG(has_url);
|
2020-03-13 23:56:47 +01:00
|
|
|
STORE_FLAG(has_view_count);
|
2018-12-31 20:04:05 +01:00
|
|
|
END_STORE_FLAGS();
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
store(page_blocks_, storer);
|
|
|
|
store(hash_, storer);
|
2019-02-07 21:25:27 +01:00
|
|
|
if (has_url) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(url_, storer);
|
2019-02-07 21:25:27 +01:00
|
|
|
}
|
2020-03-13 23:56:47 +01:00
|
|
|
if (has_view_count) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(view_count_, storer);
|
2020-03-13 23:56:47 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
CHECK(!is_empty_);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2019-02-21 18:54:20 +01:00
|
|
|
template <class ParserT>
|
|
|
|
void parse(ParserT &parser) {
|
2018-12-31 20:04:05 +01:00
|
|
|
using ::td::parse;
|
2019-02-07 21:25:27 +01:00
|
|
|
bool has_url;
|
2020-03-13 23:56:47 +01:00
|
|
|
bool has_view_count;
|
2018-12-31 20:04:05 +01:00
|
|
|
BEGIN_PARSE_FLAGS();
|
2023-08-18 14:33:33 +02:00
|
|
|
PARSE_FLAG(is_full_);
|
|
|
|
PARSE_FLAG(is_loaded_);
|
|
|
|
PARSE_FLAG(is_rtl_);
|
|
|
|
PARSE_FLAG(is_v2_);
|
2019-02-07 21:25:27 +01:00
|
|
|
PARSE_FLAG(has_url);
|
2020-03-13 23:56:47 +01:00
|
|
|
PARSE_FLAG(has_view_count);
|
2018-12-31 20:04:05 +01:00
|
|
|
END_PARSE_FLAGS();
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(page_blocks_, parser);
|
|
|
|
parse(hash_, parser);
|
2019-02-07 21:25:27 +01:00
|
|
|
if (has_url) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(url_, parser);
|
2019-02-07 21:25:27 +01:00
|
|
|
}
|
2020-03-13 23:56:47 +01:00
|
|
|
if (has_view_count) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(view_count_, parser);
|
2020-03-13 23:56:47 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
is_empty_ = false;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
friend StringBuilder &operator<<(StringBuilder &string_builder,
|
|
|
|
const WebPagesManager::WebPageInstantView &instant_view) {
|
2023-08-18 14:33:33 +02:00
|
|
|
return string_builder << "InstantView(URL = " << instant_view.url_
|
|
|
|
<< ", size = " << instant_view.page_blocks_.size()
|
|
|
|
<< ", view_count = " << instant_view.view_count_ << ", hash = " << instant_view.hash_
|
|
|
|
<< ", is_empty = " << instant_view.is_empty_ << ", is_v2 = " << instant_view.is_v2_
|
|
|
|
<< ", is_rtl = " << instant_view.is_rtl_ << ", is_full = " << instant_view.is_full_
|
|
|
|
<< ", is_loaded = " << instant_view.is_loaded_
|
|
|
|
<< ", was_loaded_from_database = " << instant_view.was_loaded_from_database_ << ")";
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
class WebPagesManager::WebPage {
|
|
|
|
public:
|
2023-08-18 14:33:33 +02:00
|
|
|
string url_;
|
|
|
|
string display_url_;
|
|
|
|
string type_;
|
|
|
|
string site_name_;
|
|
|
|
string title_;
|
|
|
|
string description_;
|
|
|
|
Photo photo_;
|
|
|
|
string embed_url_;
|
|
|
|
string embed_type_;
|
|
|
|
Dimensions embed_dimensions_;
|
|
|
|
int32 duration_ = 0;
|
|
|
|
string author_;
|
2023-10-16 15:02:18 +02:00
|
|
|
bool has_large_media_ = false;
|
2023-08-18 14:33:33 +02:00
|
|
|
Document document_;
|
|
|
|
vector<Document> documents_;
|
|
|
|
vector<StoryFullId> story_full_ids_;
|
|
|
|
WebPageInstantView instant_view_;
|
|
|
|
|
|
|
|
FileSourceId file_source_id_;
|
|
|
|
|
|
|
|
mutable uint64 log_event_id_ = 0;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-02-21 18:54:20 +01:00
|
|
|
template <class StorerT>
|
|
|
|
void store(StorerT &storer) const {
|
2018-12-31 20:04:05 +01:00
|
|
|
using ::td::store;
|
2023-08-18 14:33:33 +02:00
|
|
|
bool has_type = !type_.empty();
|
|
|
|
bool has_site_name = !site_name_.empty();
|
|
|
|
bool has_title = !title_.empty();
|
|
|
|
bool has_description = !description_.empty();
|
|
|
|
bool has_photo = !photo_.is_empty();
|
|
|
|
bool has_embed = !embed_url_.empty();
|
|
|
|
bool has_embed_dimensions = has_embed && embed_dimensions_ != Dimensions();
|
|
|
|
bool has_duration = duration_ > 0;
|
|
|
|
bool has_author = !author_.empty();
|
|
|
|
bool has_document = !document_.empty();
|
|
|
|
bool has_instant_view = !instant_view_.is_empty_;
|
|
|
|
bool is_instant_view_v2 = instant_view_.is_v2_;
|
2018-12-31 20:04:05 +01:00
|
|
|
bool has_no_hash = true;
|
2023-08-18 14:33:33 +02:00
|
|
|
bool has_documents = !documents_.empty();
|
|
|
|
bool has_story_full_ids = !story_full_ids_.empty();
|
2018-12-31 20:04:05 +01:00
|
|
|
BEGIN_STORE_FLAGS();
|
|
|
|
STORE_FLAG(has_type);
|
|
|
|
STORE_FLAG(has_site_name);
|
|
|
|
STORE_FLAG(has_title);
|
|
|
|
STORE_FLAG(has_description);
|
|
|
|
STORE_FLAG(has_photo);
|
|
|
|
STORE_FLAG(has_embed);
|
|
|
|
STORE_FLAG(has_embed_dimensions);
|
|
|
|
STORE_FLAG(has_duration);
|
|
|
|
STORE_FLAG(has_author);
|
|
|
|
STORE_FLAG(has_document);
|
|
|
|
STORE_FLAG(has_instant_view);
|
|
|
|
STORE_FLAG(has_no_hash);
|
2019-02-07 17:30:59 +01:00
|
|
|
STORE_FLAG(is_instant_view_v2);
|
2019-11-24 01:32:37 +01:00
|
|
|
STORE_FLAG(has_documents);
|
2023-06-13 14:45:10 +02:00
|
|
|
STORE_FLAG(has_story_full_ids);
|
2023-10-16 15:02:18 +02:00
|
|
|
STORE_FLAG(has_large_media_);
|
2018-12-31 20:04:05 +01:00
|
|
|
END_STORE_FLAGS();
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
store(url_, storer);
|
|
|
|
store(display_url_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (has_type) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(type_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_site_name) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(site_name_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_title) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(title_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_description) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(description_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_photo) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(photo_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_embed) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(embed_url_, storer);
|
|
|
|
store(embed_type_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_embed_dimensions) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(embed_dimensions_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_duration) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(duration_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_author) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(author_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_document) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(document_, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2019-11-24 01:32:37 +01:00
|
|
|
if (has_documents) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(documents_, storer);
|
2019-11-24 01:32:37 +01:00
|
|
|
}
|
2023-06-13 14:45:10 +02:00
|
|
|
if (has_story_full_ids) {
|
2023-08-18 14:33:33 +02:00
|
|
|
store(story_full_ids_, storer);
|
2023-06-13 14:45:10 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2019-02-21 18:54:20 +01:00
|
|
|
template <class ParserT>
|
|
|
|
void parse(ParserT &parser) {
|
2018-12-31 20:04:05 +01:00
|
|
|
using ::td::parse;
|
|
|
|
bool has_type;
|
|
|
|
bool has_site_name;
|
|
|
|
bool has_title;
|
|
|
|
bool has_description;
|
|
|
|
bool has_photo;
|
|
|
|
bool has_embed;
|
|
|
|
bool has_embed_dimensions;
|
|
|
|
bool has_duration;
|
|
|
|
bool has_author;
|
|
|
|
bool has_document;
|
|
|
|
bool has_instant_view;
|
2019-02-07 17:30:59 +01:00
|
|
|
bool is_instant_view_v2;
|
2018-12-31 20:04:05 +01:00
|
|
|
bool has_no_hash;
|
2019-11-24 01:32:37 +01:00
|
|
|
bool has_documents;
|
2023-06-13 14:45:10 +02:00
|
|
|
bool has_story_full_ids;
|
2018-12-31 20:04:05 +01:00
|
|
|
BEGIN_PARSE_FLAGS();
|
|
|
|
PARSE_FLAG(has_type);
|
|
|
|
PARSE_FLAG(has_site_name);
|
|
|
|
PARSE_FLAG(has_title);
|
|
|
|
PARSE_FLAG(has_description);
|
|
|
|
PARSE_FLAG(has_photo);
|
|
|
|
PARSE_FLAG(has_embed);
|
|
|
|
PARSE_FLAG(has_embed_dimensions);
|
|
|
|
PARSE_FLAG(has_duration);
|
|
|
|
PARSE_FLAG(has_author);
|
|
|
|
PARSE_FLAG(has_document);
|
|
|
|
PARSE_FLAG(has_instant_view);
|
|
|
|
PARSE_FLAG(has_no_hash);
|
2019-02-07 17:30:59 +01:00
|
|
|
PARSE_FLAG(is_instant_view_v2);
|
2019-11-24 01:32:37 +01:00
|
|
|
PARSE_FLAG(has_documents);
|
2023-06-13 14:45:10 +02:00
|
|
|
PARSE_FLAG(has_story_full_ids);
|
2023-10-16 15:02:18 +02:00
|
|
|
PARSE_FLAG(has_large_media_);
|
2018-12-31 20:04:05 +01:00
|
|
|
END_PARSE_FLAGS();
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(url_, parser);
|
|
|
|
parse(display_url_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (!has_no_hash) {
|
|
|
|
int32 hash;
|
|
|
|
parse(hash, parser);
|
|
|
|
}
|
|
|
|
if (has_type) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(type_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_site_name) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(site_name_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_title) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(title_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_description) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(description_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_photo) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(photo_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_embed) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(embed_url_, parser);
|
|
|
|
parse(embed_type_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_embed_dimensions) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(embed_dimensions_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_duration) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(duration_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_author) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(author_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (has_document) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(document_, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2019-11-24 01:32:37 +01:00
|
|
|
if (has_documents) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(documents_, parser);
|
2019-11-24 01:32:37 +01:00
|
|
|
}
|
2023-06-13 14:45:10 +02:00
|
|
|
if (has_story_full_ids) {
|
2023-08-18 14:33:33 +02:00
|
|
|
parse(story_full_ids_, parser);
|
|
|
|
td::remove_if(story_full_ids_, [](StoryFullId story_full_id) { return !story_full_id.is_server(); });
|
2023-06-13 14:45:10 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
if (has_instant_view) {
|
2023-08-18 14:33:33 +02:00
|
|
|
instant_view_.is_empty_ = false;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2019-02-07 17:30:59 +01:00
|
|
|
if (is_instant_view_v2) {
|
2023-08-18 14:33:33 +02:00
|
|
|
instant_view_.is_v2_ = true;
|
2019-02-07 17:30:59 +01:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2020-01-26 02:59:19 +01:00
|
|
|
|
|
|
|
friend bool operator==(const WebPage &lhs, const WebPage &rhs) {
|
2023-08-18 14:33:33 +02:00
|
|
|
return lhs.url_ == rhs.url_ && lhs.display_url_ == rhs.display_url_ && lhs.type_ == rhs.type_ &&
|
|
|
|
lhs.site_name_ == rhs.site_name_ && lhs.title_ == rhs.title_ && lhs.description_ == rhs.description_ &&
|
|
|
|
lhs.photo_ == rhs.photo_ && lhs.type_ == rhs.type_ && lhs.embed_url_ == rhs.embed_url_ &&
|
|
|
|
lhs.embed_type_ == rhs.embed_type_ && lhs.embed_dimensions_ == rhs.embed_dimensions_ &&
|
2023-10-16 15:02:18 +02:00
|
|
|
lhs.duration_ == rhs.duration_ && lhs.author_ == rhs.author_ &&
|
|
|
|
lhs.has_large_media_ == rhs.has_large_media_ && lhs.document_ == rhs.document_ &&
|
2023-08-18 14:33:33 +02:00
|
|
|
lhs.documents_ == rhs.documents_ && lhs.story_full_ids_ == rhs.story_full_ids_ &&
|
|
|
|
lhs.instant_view_.is_empty_ == rhs.instant_view_.is_empty_ &&
|
|
|
|
lhs.instant_view_.is_v2_ == rhs.instant_view_.is_v2_;
|
2020-01-26 02:59:19 +01:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
WebPagesManager::WebPagesManager(Td *td, ActorShared<> parent) : td_(td), parent_(std::move(parent)) {
|
|
|
|
pending_web_pages_timeout_.set_callback(on_pending_web_page_timeout_callback);
|
|
|
|
pending_web_pages_timeout_.set_callback_data(static_cast<void *>(this));
|
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::tear_down() {
|
|
|
|
parent_.reset();
|
2022-04-25 19:52:44 +02:00
|
|
|
|
2022-11-18 11:16:24 +01:00
|
|
|
LOG(DEBUG) << "Have " << web_pages_.calc_size() << " web pages to free";
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2022-07-20 12:40:14 +02:00
|
|
|
WebPagesManager::~WebPagesManager() {
|
2022-07-22 20:21:30 +02:00
|
|
|
Scheduler::instance()->destroy_on_scheduler(G()->get_gc_scheduler_id(), web_pages_, web_page_messages_,
|
2023-02-15 13:05:37 +01:00
|
|
|
url_to_web_page_id_, url_to_file_source_id_);
|
2022-07-20 12:40:14 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2023-10-16 18:37:40 +02:00
|
|
|
string WebPagesManager::get_web_page_url(const tl_object_ptr<telegram_api::WebPage> &web_page_ptr) {
|
|
|
|
CHECK(web_page_ptr != nullptr);
|
|
|
|
switch (web_page_ptr->get_id()) {
|
|
|
|
case telegram_api::webPageEmpty::ID:
|
|
|
|
return static_cast<const telegram_api::webPageEmpty *>(web_page_ptr.get())->url_;
|
|
|
|
case telegram_api::webPagePending::ID:
|
|
|
|
return static_cast<const telegram_api::webPagePending *>(web_page_ptr.get())->url_;
|
|
|
|
case telegram_api::webPage::ID:
|
|
|
|
return static_cast<const telegram_api::webPage *>(web_page_ptr.get())->url_;
|
|
|
|
case telegram_api::webPageNotModified::ID:
|
|
|
|
LOG(ERROR) << "Receive webPageNotModified";
|
|
|
|
return string();
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
return string();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
WebPageId WebPagesManager::on_get_web_page(tl_object_ptr<telegram_api::WebPage> &&web_page_ptr,
|
|
|
|
DialogId owner_dialog_id) {
|
|
|
|
CHECK(web_page_ptr != nullptr);
|
2022-06-28 16:10:57 +02:00
|
|
|
if (td_->auth_manager_->is_bot()) {
|
|
|
|
return WebPageId();
|
|
|
|
}
|
2023-02-26 12:22:31 +01:00
|
|
|
LOG(DEBUG) << "Receive " << to_string(web_page_ptr);
|
2018-12-31 20:04:05 +01:00
|
|
|
switch (web_page_ptr->get_id()) {
|
|
|
|
case telegram_api::webPageEmpty::ID: {
|
|
|
|
auto web_page = move_tl_object_as<telegram_api::webPageEmpty>(web_page_ptr);
|
|
|
|
WebPageId web_page_id(web_page->id_);
|
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
LOG_IF(ERROR, web_page_id != WebPageId()) << "Receive invalid " << web_page_id;
|
|
|
|
return WebPageId();
|
|
|
|
}
|
|
|
|
|
2023-02-26 12:22:31 +01:00
|
|
|
LOG(INFO) << "Receive empty " << web_page_id;
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page_to_delete = get_web_page(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page_to_delete != nullptr) {
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page_to_delete->log_event_id_ != 0) {
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Erase " << web_page_id << " from binlog";
|
2023-08-18 14:33:33 +02:00
|
|
|
binlog_erase(G()->td_db()->get_binlog(), web_page_to_delete->log_event_id_);
|
|
|
|
web_page_to_delete->log_event_id_ = 0;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page_to_delete->file_source_id_.is_valid()) {
|
|
|
|
td_->file_manager_->change_files_source(web_page_to_delete->file_source_id_,
|
2019-01-21 23:32:13 +01:00
|
|
|
get_web_page_file_ids(web_page_to_delete), vector<FileId>());
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
web_pages_.erase(web_page_id);
|
|
|
|
}
|
|
|
|
|
2020-01-27 00:55:18 +01:00
|
|
|
on_web_page_changed(web_page_id, false);
|
2023-03-13 17:47:38 +01:00
|
|
|
if (G()->use_message_database()) {
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Delete " << web_page_id << " from database";
|
2020-01-27 00:55:18 +01:00
|
|
|
G()->td_db()->get_sqlite_pmc()->erase(get_web_page_database_key(web_page_id), Auto());
|
2018-12-31 20:04:05 +01:00
|
|
|
G()->td_db()->get_sqlite_pmc()->erase(get_web_page_instant_view_database_key(web_page_id), Auto());
|
|
|
|
}
|
|
|
|
|
|
|
|
return WebPageId();
|
|
|
|
}
|
|
|
|
case telegram_api::webPagePending::ID: {
|
|
|
|
auto web_page = move_tl_object_as<telegram_api::webPagePending>(web_page_ptr);
|
|
|
|
WebPageId web_page_id(web_page->id_);
|
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
LOG(ERROR) << "Receive invalid " << web_page_id;
|
|
|
|
return WebPageId();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto web_page_date = web_page->date_;
|
2023-02-26 12:22:31 +01:00
|
|
|
LOG(INFO) << "Receive pending " << web_page_id << ", force_get_date = " << web_page_date
|
2020-05-24 23:11:18 +02:00
|
|
|
<< ", now = " << G()->server_time();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2018-02-12 11:37:54 +01:00
|
|
|
pending_web_pages_timeout_.add_timeout_in(web_page_id.get(), max(web_page_date - G()->server_time(), 1.0));
|
2018-12-31 20:04:05 +01:00
|
|
|
return web_page_id;
|
|
|
|
}
|
|
|
|
case telegram_api::webPage::ID: {
|
|
|
|
auto web_page = move_tl_object_as<telegram_api::webPage>(web_page_ptr);
|
|
|
|
WebPageId web_page_id(web_page->id_);
|
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
LOG(ERROR) << "Receive invalid " << web_page_id;
|
|
|
|
return WebPageId();
|
|
|
|
}
|
|
|
|
|
2023-02-26 12:22:31 +01:00
|
|
|
LOG(INFO) << "Receive " << web_page_id;
|
2018-12-31 20:04:05 +01:00
|
|
|
auto page = make_unique<WebPage>();
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
page->url_ = std::move(web_page->url_);
|
|
|
|
page->display_url_ = std::move(web_page->display_url_);
|
|
|
|
page->type_ = std::move(web_page->type_);
|
|
|
|
page->site_name_ = std::move(web_page->site_name_);
|
|
|
|
page->title_ = std::move(web_page->title_);
|
|
|
|
page->description_ = std::move(web_page->description_);
|
|
|
|
page->photo_ = get_photo(td_, std::move(web_page->photo_), owner_dialog_id);
|
|
|
|
page->embed_url_ = std::move(web_page->embed_url_);
|
|
|
|
page->embed_type_ = std::move(web_page->embed_type_);
|
|
|
|
page->embed_dimensions_ = get_dimensions(web_page->embed_width_, web_page->embed_height_, "webPage");
|
|
|
|
page->duration_ = web_page->duration_;
|
|
|
|
if (page->duration_ < 0) {
|
|
|
|
LOG(ERROR) << "Receive wrong web page duration " << page->duration_;
|
|
|
|
page->duration_ = 0;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
page->author_ = std::move(web_page->author_);
|
2023-10-16 15:02:18 +02:00
|
|
|
page->has_large_media_ = web_page->has_large_media_;
|
2022-12-18 20:15:57 +01:00
|
|
|
if (web_page->document_ != nullptr) {
|
2018-12-31 20:04:05 +01:00
|
|
|
int32 document_id = web_page->document_->get_id();
|
|
|
|
if (document_id == telegram_api::document::ID) {
|
|
|
|
auto parsed_document = td_->documents_manager_->on_get_document(
|
|
|
|
move_tl_object_as<telegram_api::document>(web_page->document_), owner_dialog_id);
|
2023-08-18 14:33:33 +02:00
|
|
|
page->document_ = std::move(parsed_document);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
2023-05-01 20:07:01 +02:00
|
|
|
for (auto &attribute_ptr : web_page->attributes_) {
|
|
|
|
CHECK(attribute_ptr != nullptr);
|
|
|
|
switch (attribute_ptr->get_id()) {
|
|
|
|
case telegram_api::webPageAttributeTheme::ID: {
|
|
|
|
auto attribute = telegram_api::move_object_as<telegram_api::webPageAttributeTheme>(attribute_ptr);
|
|
|
|
for (auto &document : attribute->documents_) {
|
|
|
|
int32 document_id = document->get_id();
|
|
|
|
if (document_id == telegram_api::document::ID) {
|
|
|
|
auto parsed_document = td_->documents_manager_->on_get_document(
|
|
|
|
move_tl_object_as<telegram_api::document>(document), owner_dialog_id);
|
|
|
|
if (!parsed_document.empty()) {
|
2023-08-18 14:33:33 +02:00
|
|
|
page->documents_.push_back(std::move(parsed_document));
|
2023-05-01 20:07:01 +02:00
|
|
|
}
|
|
|
|
}
|
2019-11-24 01:32:37 +01:00
|
|
|
}
|
2023-05-01 20:07:01 +02:00
|
|
|
// TODO attribute->settings_
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case telegram_api::webPageAttributeStory::ID: {
|
2023-06-13 14:45:10 +02:00
|
|
|
auto attribute = telegram_api::move_object_as<telegram_api::webPageAttributeStory>(attribute_ptr);
|
2023-08-15 21:45:37 +02:00
|
|
|
auto dialog_id = DialogId(attribute->peer_);
|
2023-06-13 14:45:10 +02:00
|
|
|
auto story_id = StoryId(attribute->id_);
|
2023-07-23 00:41:59 +02:00
|
|
|
auto story_full_id = StoryFullId(dialog_id, story_id);
|
2023-08-10 19:22:12 +02:00
|
|
|
if (!story_full_id.is_server()) {
|
2023-06-13 14:45:10 +02:00
|
|
|
LOG(ERROR) << "Receive " << to_string(attribute);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (attribute->story_ != nullptr) {
|
|
|
|
auto actual_story_id = td_->story_manager_->on_get_story(dialog_id, std::move(attribute->story_));
|
|
|
|
if (story_id != actual_story_id) {
|
|
|
|
LOG(ERROR) << "Receive " << actual_story_id << " instead of " << story_id;
|
|
|
|
}
|
|
|
|
}
|
2023-07-10 15:39:49 +02:00
|
|
|
td_->messages_manager_->force_create_dialog(dialog_id, "webPageAttributeStory");
|
2023-08-18 14:33:33 +02:00
|
|
|
page->story_full_ids_.push_back(story_full_id);
|
2023-05-01 20:07:01 +02:00
|
|
|
break;
|
2019-11-24 01:32:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-12-18 20:15:57 +01:00
|
|
|
if (web_page->cached_page_ != nullptr) {
|
2019-11-26 01:45:23 +01:00
|
|
|
on_get_web_page_instant_view(page.get(), std::move(web_page->cached_page_), web_page->hash_, owner_dialog_id);
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
update_web_page(std::move(page), web_page_id, false, false);
|
|
|
|
return web_page_id;
|
|
|
|
}
|
2023-10-16 18:37:40 +02:00
|
|
|
case telegram_api::webPageNotModified::ID:
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(ERROR) << "Receive webPageNotModified";
|
|
|
|
return WebPageId();
|
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
return WebPageId();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::update_web_page(unique_ptr<WebPage> web_page, WebPageId web_page_id, bool from_binlog,
|
|
|
|
bool from_database) {
|
2021-07-08 16:37:23 +02:00
|
|
|
LOG(INFO) << "Update " << web_page_id << (from_database ? " from database" : (from_binlog ? " from binlog" : ""));
|
2018-12-31 20:04:05 +01:00
|
|
|
CHECK(web_page != nullptr);
|
|
|
|
|
2023-06-13 14:45:10 +02:00
|
|
|
if (from_binlog || from_database) {
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page->story_full_ids_.empty()) {
|
2023-06-13 14:45:10 +02:00
|
|
|
Dependencies dependencies;
|
2023-08-18 14:33:33 +02:00
|
|
|
for (auto story_full_id : web_page->story_full_ids_) {
|
2023-07-10 14:36:38 +02:00
|
|
|
dependencies.add(story_full_id);
|
2023-06-13 14:45:10 +02:00
|
|
|
}
|
|
|
|
if (!dependencies.resolve_force(td_, "update_web_page")) {
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->story_full_ids_ = {};
|
2023-06-13 14:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
auto &page = web_pages_[web_page_id];
|
2019-01-21 23:32:13 +01:00
|
|
|
auto old_file_ids = get_web_page_file_ids(page.get());
|
2018-12-31 20:04:05 +01:00
|
|
|
WebPageInstantView old_instant_view;
|
2020-01-26 02:59:19 +01:00
|
|
|
bool is_changed = true;
|
2018-12-31 20:04:05 +01:00
|
|
|
if (page != nullptr) {
|
2020-01-26 02:59:19 +01:00
|
|
|
if (*page == *web_page) {
|
|
|
|
is_changed = false;
|
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (page->story_full_ids_ != web_page->story_full_ids_) {
|
|
|
|
for (auto story_full_id : page->story_full_ids_) {
|
2023-06-13 15:17:01 +02:00
|
|
|
auto it = story_web_pages_.find(story_full_id);
|
|
|
|
if (it != story_web_pages_.end()) {
|
|
|
|
it->second.erase(web_page_id);
|
|
|
|
if (it->second.empty()) {
|
|
|
|
story_web_pages_.erase(it);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
for (auto story_full_id : web_page->story_full_ids_) {
|
2023-06-13 15:17:01 +02:00
|
|
|
story_web_pages_[story_full_id].insert(web_page_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
old_instant_view = std::move(page->instant_view_);
|
|
|
|
web_page->log_event_id_ = page->log_event_id_;
|
2019-01-30 22:37:38 +01:00
|
|
|
} else {
|
2023-08-18 14:33:33 +02:00
|
|
|
auto it = url_to_file_source_id_.find(web_page->url_);
|
2019-01-30 22:37:38 +01:00
|
|
|
if (it != url_to_file_source_id_.end()) {
|
|
|
|
VLOG(file_references) << "Move " << it->second << " inside of " << web_page_id;
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->file_source_id_ = it->second;
|
2019-01-30 22:37:38 +01:00
|
|
|
url_to_file_source_id_.erase(it);
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
page = std::move(web_page);
|
|
|
|
|
2022-10-06 14:32:50 +02:00
|
|
|
// must be called before any other action for correct behavior of get_url_file_source_id
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!page->url_.empty()) {
|
|
|
|
on_get_web_page_by_url(page->url_, web_page_id, from_database);
|
2022-10-06 14:32:50 +02:00
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
update_web_page_instant_view(web_page_id, page->instant_view_, std::move(old_instant_view));
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-01-21 23:32:13 +01:00
|
|
|
auto new_file_ids = get_web_page_file_ids(page.get());
|
|
|
|
if (old_file_ids != new_file_ids) {
|
2019-01-30 22:37:38 +01:00
|
|
|
td_->file_manager_->change_files_source(get_web_page_file_source_id(page.get()), old_file_ids, new_file_ids);
|
2019-01-21 23:32:13 +01:00
|
|
|
}
|
|
|
|
|
2020-01-27 00:55:18 +01:00
|
|
|
if (is_changed && !from_database) {
|
|
|
|
on_web_page_changed(web_page_id, true);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
save_web_page(page.get(), web_page_id, from_binlog);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::update_web_page_instant_view(WebPageId web_page_id, WebPageInstantView &new_instant_view,
|
|
|
|
WebPageInstantView &&old_instant_view) {
|
2018-11-29 13:43:25 +01:00
|
|
|
LOG(INFO) << "Merge new " << new_instant_view << " and old " << old_instant_view;
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
bool new_from_database = new_instant_view.was_loaded_from_database_;
|
|
|
|
bool old_from_database = old_instant_view.was_loaded_from_database_;
|
2018-11-29 13:43:25 +01:00
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (new_instant_view.is_empty_ && !new_from_database) {
|
2018-11-29 13:43:25 +01:00
|
|
|
// new_instant_view is from server and is empty, need to delete the instant view
|
2023-08-18 14:33:33 +02:00
|
|
|
if (G()->use_message_database() && (!old_instant_view.is_empty_ || !old_from_database)) {
|
2018-11-29 13:43:25 +01:00
|
|
|
// we have no instant view and probably want it to be deleted from database
|
|
|
|
LOG(INFO) << "Erase instant view of " << web_page_id << " from database";
|
2023-08-18 14:33:33 +02:00
|
|
|
new_instant_view.was_loaded_from_database_ = true;
|
2018-11-29 13:43:25 +01:00
|
|
|
G()->td_db()->get_sqlite_pmc()->erase(get_web_page_instant_view_database_key(web_page_id), Auto());
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
if (need_use_old_instant_view(new_instant_view, old_instant_view)) {
|
|
|
|
new_instant_view = std::move(old_instant_view);
|
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (G()->use_message_database() && !new_instant_view.is_empty_ && new_instant_view.is_loaded_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
// we have instant view and probably want it to be saved
|
|
|
|
if (!new_from_database && !old_from_database) {
|
|
|
|
// if it wasn't loaded from the database, load it first
|
|
|
|
auto &load_web_page_instant_view_queries = load_web_page_instant_view_queries_[web_page_id];
|
|
|
|
auto previous_queries =
|
|
|
|
load_web_page_instant_view_queries.partial.size() + load_web_page_instant_view_queries.full.size();
|
|
|
|
if (previous_queries == 0) {
|
2021-09-06 12:58:40 +02:00
|
|
|
// try to load it only if there are no pending load queries
|
2018-12-31 20:04:05 +01:00
|
|
|
load_web_page_instant_view(web_page_id, false, Auto());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!new_instant_view.was_loaded_from_database_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Save instant view of " << web_page_id << " to database";
|
|
|
|
/*
|
|
|
|
if (web_page_id.get() == 0) {
|
2023-08-18 14:33:33 +02:00
|
|
|
auto blocks = std::move(new_instant_view.page_blocks_);
|
|
|
|
new_instant_view.page_blocks_.clear();
|
2018-12-31 20:04:05 +01:00
|
|
|
for (size_t i = 0; i < blocks.size(); i++) {
|
|
|
|
LOG(ERROR) << to_string(blocks[i]->get_page_block_object());
|
2023-08-18 14:33:33 +02:00
|
|
|
new_instant_view.page_blocks_.push_back(std::move(blocks[i]));
|
2018-12-31 20:04:05 +01:00
|
|
|
log_event_store(new_instant_view);
|
|
|
|
}
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
*/
|
2023-08-18 14:33:33 +02:00
|
|
|
new_instant_view.was_loaded_from_database_ = true;
|
2018-12-31 20:04:05 +01:00
|
|
|
G()->td_db()->get_sqlite_pmc()->set(get_web_page_instant_view_database_key(web_page_id),
|
|
|
|
log_event_store(new_instant_view).as_slice().str(), Auto());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool WebPagesManager::need_use_old_instant_view(const WebPageInstantView &new_instant_view,
|
|
|
|
const WebPageInstantView &old_instant_view) {
|
2023-08-18 14:33:33 +02:00
|
|
|
if (old_instant_view.is_empty_ || !old_instant_view.is_loaded_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return false;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (new_instant_view.is_empty_ || !new_instant_view.is_loaded_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return true;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (new_instant_view.is_full_ != old_instant_view.is_full_) {
|
|
|
|
return old_instant_view.is_full_;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (new_instant_view.hash_ == old_instant_view.hash_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
// the same instant view
|
2023-08-18 14:33:33 +02:00
|
|
|
return !new_instant_view.is_full_ || old_instant_view.is_full_;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// data in database is always outdated
|
2023-08-18 14:33:33 +02:00
|
|
|
return new_instant_view.was_loaded_from_database_;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2020-03-13 23:56:47 +01:00
|
|
|
void WebPagesManager::on_get_web_page_instant_view_view_count(WebPageId web_page_id, int32 view_count) {
|
|
|
|
if (get_web_page_instant_view(web_page_id) == nullptr) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
auto *instant_view = &web_pages_[web_page_id]->instant_view_;
|
|
|
|
CHECK(!instant_view->is_empty_);
|
|
|
|
if (instant_view->view_count_ >= view_count) {
|
2020-03-13 23:56:47 +01:00
|
|
|
return;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
instant_view->view_count_ = view_count;
|
2023-03-13 17:47:38 +01:00
|
|
|
if (G()->use_message_database()) {
|
2020-03-13 23:56:47 +01:00
|
|
|
LOG(INFO) << "Save instant view of " << web_page_id << " to database after updating view count to " << view_count;
|
|
|
|
G()->td_db()->get_sqlite_pmc()->set(get_web_page_instant_view_database_key(web_page_id),
|
|
|
|
log_event_store(*instant_view).as_slice().str(), Auto());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
void WebPagesManager::on_get_web_page_by_url(const string &url, WebPageId web_page_id, bool from_database) {
|
2023-10-16 20:41:43 +02:00
|
|
|
auto emplace_result = url_to_web_page_id_.emplace(url, std::make_pair(web_page_id, from_database));
|
|
|
|
auto &it = emplace_result.first;
|
|
|
|
bool is_inserted = emplace_result.second;
|
|
|
|
if (from_database && !it->second.second) {
|
|
|
|
// database data can't replace non-database data
|
|
|
|
CHECK(!is_inserted);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
auto &cached_web_page_id = it->second.first;
|
|
|
|
if (!from_database && G()->use_message_database() && (cached_web_page_id != web_page_id || is_inserted)) {
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page_id.is_valid()) {
|
2023-10-16 20:41:43 +02:00
|
|
|
G()->td_db()->get_sqlite_pmc()->set(get_web_page_url_database_key(url), to_string(web_page_id.get()), Auto());
|
2018-12-31 20:04:05 +01:00
|
|
|
} else {
|
|
|
|
G()->td_db()->get_sqlite_pmc()->erase(get_web_page_url_database_key(url), Auto());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-16 20:41:43 +02:00
|
|
|
if (!is_inserted) {
|
|
|
|
if (cached_web_page_id.is_valid() && !it->second.second && web_page_id.is_valid() &&
|
|
|
|
web_page_id != cached_web_page_id) {
|
|
|
|
LOG(ERROR) << "URL \"" << url << "\" preview is changed from " << cached_web_page_id << " to " << web_page_id;
|
|
|
|
}
|
|
|
|
cached_web_page_id = web_page_id;
|
|
|
|
it->second.second = from_database;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-21 18:11:17 +02:00
|
|
|
void WebPagesManager::register_web_page(WebPageId web_page_id, MessageFullId message_full_id, const char *source) {
|
2020-01-27 00:55:18 +01:00
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-09-21 18:11:17 +02:00
|
|
|
LOG(INFO) << "Register " << web_page_id << " from " << message_full_id << " from " << source;
|
|
|
|
bool is_inserted = web_page_messages_[web_page_id].insert(message_full_id).second;
|
|
|
|
LOG_CHECK(is_inserted) << source << " " << web_page_id << " " << message_full_id;
|
2020-01-27 00:55:18 +01:00
|
|
|
|
|
|
|
if (!td_->auth_manager_->is_bot() && !have_web_page_force(web_page_id)) {
|
2023-09-21 18:11:17 +02:00
|
|
|
LOG(INFO) << "Waiting for " << web_page_id << " needed in " << message_full_id;
|
2020-01-27 00:55:18 +01:00
|
|
|
pending_web_pages_timeout_.add_timeout_in(web_page_id.get(), 1.0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-21 18:11:17 +02:00
|
|
|
void WebPagesManager::unregister_web_page(WebPageId web_page_id, MessageFullId message_full_id, const char *source) {
|
2020-01-27 00:55:18 +01:00
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-09-21 18:11:17 +02:00
|
|
|
LOG(INFO) << "Unregister " << web_page_id << " from " << message_full_id << " from " << source;
|
2020-01-27 00:55:18 +01:00
|
|
|
auto &message_ids = web_page_messages_[web_page_id];
|
2023-09-21 18:11:17 +02:00
|
|
|
auto is_deleted = message_ids.erase(message_full_id) > 0;
|
|
|
|
LOG_CHECK(is_deleted) << source << " " << web_page_id << " " << message_full_id;
|
2020-01-27 00:55:18 +01:00
|
|
|
|
|
|
|
if (message_ids.empty()) {
|
|
|
|
web_page_messages_.erase(web_page_id);
|
|
|
|
if (pending_get_web_pages_.count(web_page_id) == 0) {
|
|
|
|
pending_web_pages_timeout_.cancel_timeout(web_page_id.get());
|
|
|
|
}
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-10-16 20:18:41 +02:00
|
|
|
void WebPagesManager::on_get_web_page_preview(const string &first_url,
|
2023-02-15 13:05:37 +01:00
|
|
|
tl_object_ptr<telegram_api::MessageMedia> &&message_media_ptr,
|
|
|
|
Promise<td_api::object_ptr<td_api::webPage>> &&promise) {
|
2018-12-31 20:04:05 +01:00
|
|
|
CHECK(message_media_ptr != nullptr);
|
|
|
|
int32 constructor_id = message_media_ptr->get_id();
|
|
|
|
if (constructor_id != telegram_api::messageMediaWebPage::ID) {
|
|
|
|
if (constructor_id == telegram_api::messageMediaEmpty::ID) {
|
2023-10-16 20:18:41 +02:00
|
|
|
on_get_web_page_preview_success(first_url, WebPageId(), std::move(promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG(ERROR) << "Receive " << to_string(message_media_ptr) << " instead of web page";
|
2023-02-15 13:05:37 +01:00
|
|
|
return promise.set_error(Status::Error(500, "Receive not web page in GetWebPagePreview"));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
auto message_media_web_page = move_tl_object_as<telegram_api::messageMediaWebPage>(message_media_ptr);
|
|
|
|
CHECK(message_media_web_page->webpage_ != nullptr);
|
|
|
|
|
|
|
|
auto web_page_id = on_get_web_page(std::move(message_media_web_page->webpage_), DialogId());
|
|
|
|
if (web_page_id.is_valid() && !have_web_page(web_page_id)) {
|
2023-10-16 20:18:41 +02:00
|
|
|
pending_get_web_pages_[web_page_id].emplace_back(first_url, std::move(promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-10-16 20:18:41 +02:00
|
|
|
on_get_web_page_preview_success(first_url, web_page_id, std::move(promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-10-16 20:18:41 +02:00
|
|
|
void WebPagesManager::on_get_web_page_preview_success(const string &first_url, WebPageId web_page_id,
|
2023-02-15 13:05:37 +01:00
|
|
|
Promise<td_api::object_ptr<td_api::webPage>> &&promise) {
|
2018-12-31 20:04:05 +01:00
|
|
|
CHECK(web_page_id == WebPageId() || have_web_page(web_page_id));
|
|
|
|
|
2023-10-16 20:18:41 +02:00
|
|
|
if (web_page_id.is_valid() && !first_url.empty()) {
|
|
|
|
on_get_web_page_by_url(first_url, web_page_id, true);
|
2018-02-20 22:20:45 +01:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2023-10-19 12:03:27 +02:00
|
|
|
promise.set_value(get_web_page_object(web_page_id, false, false, false));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-02-15 13:05:37 +01:00
|
|
|
void WebPagesManager::get_web_page_preview(td_api::object_ptr<td_api::formattedText> &&text,
|
|
|
|
Promise<td_api::object_ptr<td_api::webPage>> &&promise) {
|
2023-10-17 01:07:40 +02:00
|
|
|
TRY_RESULT_PROMISE(
|
|
|
|
promise, formatted_text,
|
|
|
|
get_formatted_text(td_, DialogId(), std::move(text), td_->auth_manager_->is_bot(), true, true, true));
|
2018-02-20 22:20:45 +01:00
|
|
|
|
2023-10-16 20:18:41 +02:00
|
|
|
auto first_url = get_first_url(formatted_text);
|
|
|
|
if (first_url.empty()) {
|
2023-02-15 13:05:37 +01:00
|
|
|
return promise.set_value(nullptr);
|
2018-02-20 22:20:45 +01:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2022-08-15 14:59:16 +02:00
|
|
|
LOG(INFO) << "Trying to get web page preview for message \"" << formatted_text.text << '"';
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2023-10-16 20:18:41 +02:00
|
|
|
auto web_page_id = get_web_page_by_url(first_url);
|
2018-02-20 22:20:45 +01:00
|
|
|
if (web_page_id.is_valid()) {
|
2023-10-19 12:03:27 +02:00
|
|
|
return promise.set_value(get_web_page_object(web_page_id, false, false, false));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2023-02-15 13:05:37 +01:00
|
|
|
td_->create_handler<GetWebPagePreviewQuery>(std::move(promise))
|
|
|
|
->send(formatted_text.text,
|
|
|
|
get_input_message_entities(td_->contacts_manager_.get(), formatted_text.entities, "get_web_page_preview"),
|
2023-10-16 20:18:41 +02:00
|
|
|
std::move(first_url));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-10-07 12:00:41 +02:00
|
|
|
void WebPagesManager::get_web_page_instant_view(const string &url, bool force_full, Promise<WebPageId> &&promise) {
|
2023-01-15 10:00:26 +01:00
|
|
|
LOG(INFO) << "Trying to get web page instant view for the URL \"" << url << '"';
|
2022-02-09 22:59:52 +01:00
|
|
|
if (url.empty()) {
|
|
|
|
return promise.set_value(WebPageId());
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
auto it = url_to_web_page_id_.find(url);
|
|
|
|
if (it != url_to_web_page_id_.end()) {
|
2023-10-16 20:41:43 +02:00
|
|
|
auto web_page_id = it->second.first;
|
|
|
|
if (web_page_id == WebPageId()) {
|
2018-04-22 19:26:00 +02:00
|
|
|
// ignore negative caching
|
2021-10-07 11:49:46 +02:00
|
|
|
return reload_web_page_by_url(url, std::move(promise));
|
2018-04-22 19:26:00 +02:00
|
|
|
}
|
2023-10-16 20:41:43 +02:00
|
|
|
return get_web_page_instant_view_impl(web_page_id, force_full, std::move(promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2022-02-16 17:14:25 +01:00
|
|
|
auto new_promise = PromiseCreator::lambda(
|
|
|
|
[actor_id = actor_id(this), force_full, promise = std::move(promise)](Result<WebPageId> r_web_page_id) mutable {
|
|
|
|
if (r_web_page_id.is_error()) {
|
|
|
|
promise.set_error(r_web_page_id.move_as_error());
|
|
|
|
} else {
|
|
|
|
send_closure(actor_id, &WebPagesManager::get_web_page_instant_view_impl, r_web_page_id.ok(), force_full,
|
|
|
|
std::move(promise));
|
|
|
|
}
|
|
|
|
});
|
|
|
|
load_web_page_by_url(url, std::move(new_promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2022-02-16 17:14:25 +01:00
|
|
|
void WebPagesManager::get_web_page_instant_view_impl(WebPageId web_page_id, bool force_full,
|
|
|
|
Promise<WebPageId> &&promise) {
|
|
|
|
TRY_STATUS_PROMISE(promise, G()->close_status());
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Trying to get web page instant view for " << web_page_id;
|
|
|
|
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPageInstantView *web_page_instant_view = get_web_page_instant_view(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page_instant_view == nullptr) {
|
2021-10-07 11:49:46 +02:00
|
|
|
return promise.set_value(WebPageId());
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page_instant_view->is_loaded_ || (force_full && !web_page_instant_view->is_full_)) {
|
2021-10-07 11:49:46 +02:00
|
|
|
return load_web_page_instant_view(web_page_id, force_full, std::move(promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (force_full) {
|
|
|
|
reload_web_page_instant_view(web_page_id);
|
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
promise.set_value(std::move(web_page_id));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
string WebPagesManager::get_web_page_instant_view_database_key(WebPageId web_page_id) {
|
2018-07-08 01:47:46 +02:00
|
|
|
return PSTRING() << "wpiv" << web_page_id.get();
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
void WebPagesManager::load_web_page_instant_view(WebPageId web_page_id, bool force_full, Promise<WebPageId> &&promise) {
|
2018-12-31 20:04:05 +01:00
|
|
|
auto &load_web_page_instant_view_queries = load_web_page_instant_view_queries_[web_page_id];
|
|
|
|
auto previous_queries =
|
|
|
|
load_web_page_instant_view_queries.partial.size() + load_web_page_instant_view_queries.full.size();
|
|
|
|
if (force_full) {
|
|
|
|
load_web_page_instant_view_queries.full.push_back(std::move(promise));
|
|
|
|
} else {
|
|
|
|
load_web_page_instant_view_queries.partial.push_back(std::move(promise));
|
|
|
|
}
|
|
|
|
LOG(INFO) << "Load " << web_page_id << " instant view, have " << previous_queries << " previous queries";
|
|
|
|
if (previous_queries == 0) {
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPageInstantView *web_page_instant_view = get_web_page_instant_view(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
CHECK(web_page_instant_view != nullptr);
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (G()->use_message_database() && !web_page_instant_view->was_loaded_from_database_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Trying to load " << web_page_id << " instant view from database";
|
|
|
|
G()->td_db()->get_sqlite_pmc()->get(
|
2021-10-07 11:49:46 +02:00
|
|
|
get_web_page_instant_view_database_key(web_page_id),
|
|
|
|
PromiseCreator::lambda([actor_id = actor_id(this), web_page_id](string value) {
|
|
|
|
send_closure(actor_id, &WebPagesManager::on_load_web_page_instant_view_from_database, web_page_id,
|
|
|
|
std::move(value));
|
2018-12-31 20:04:05 +01:00
|
|
|
}));
|
|
|
|
} else {
|
|
|
|
reload_web_page_instant_view(web_page_id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::reload_web_page_instant_view(WebPageId web_page_id) {
|
2023-01-31 11:33:30 +01:00
|
|
|
if (G()->close_flag()) {
|
|
|
|
return update_web_page_instant_view_load_requests(web_page_id, true, Global::request_aborted_error());
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Reload " << web_page_id << " instant view";
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2023-08-18 14:33:33 +02:00
|
|
|
CHECK(web_page != nullptr && !web_page->instant_view_.is_empty_);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
auto promise = PromiseCreator::lambda([actor_id = actor_id(this), web_page_id](Result<WebPageId> result) {
|
|
|
|
send_closure(actor_id, &WebPagesManager::update_web_page_instant_view_load_requests, web_page_id, true,
|
|
|
|
std::move(result));
|
2018-12-31 20:04:05 +01:00
|
|
|
});
|
|
|
|
td_->create_handler<GetWebPageQuery>(std::move(promise))
|
2023-08-18 14:33:33 +02:00
|
|
|
->send(web_page_id, web_page->url_, web_page->instant_view_.is_full_ ? web_page->instant_view_.hash_ : 0);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::on_load_web_page_instant_view_from_database(WebPageId web_page_id, string value) {
|
2020-05-16 01:25:03 +02:00
|
|
|
if (G()->close_flag()) {
|
|
|
|
return;
|
|
|
|
}
|
2023-03-13 17:47:38 +01:00
|
|
|
CHECK(G()->use_message_database());
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Successfully loaded " << web_page_id << " instant view of size " << value.size() << " from database";
|
|
|
|
// G()->td_db()->get_sqlite_pmc()->erase(get_web_page_instant_view_database_key(web_page_id), Auto());
|
2021-10-07 11:49:46 +02:00
|
|
|
// value.clear();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2022-08-04 13:48:10 +02:00
|
|
|
WebPage *web_page = web_pages_.get_pointer(web_page_id);
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page == nullptr || web_page->instant_view_.is_empty_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
// possible if web page loses preview/instant view
|
|
|
|
LOG(WARNING) << "There is no instant view in " << web_page_id;
|
|
|
|
if (!value.empty()) {
|
|
|
|
G()->td_db()->get_sqlite_pmc()->erase(get_web_page_instant_view_database_key(web_page_id), Auto());
|
|
|
|
}
|
2021-10-07 11:49:46 +02:00
|
|
|
update_web_page_instant_view_load_requests(web_page_id, true, web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
auto &web_page_instant_view = web_page->instant_view_;
|
|
|
|
if (web_page_instant_view.was_loaded_from_database_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
WebPageInstantView instant_view;
|
2018-12-31 20:04:05 +01:00
|
|
|
if (!value.empty()) {
|
2023-08-18 14:33:33 +02:00
|
|
|
auto status = log_event_parse(instant_view, value);
|
2019-09-27 02:21:55 +02:00
|
|
|
if (status.is_error()) {
|
2023-08-18 14:33:33 +02:00
|
|
|
instant_view = WebPageInstantView();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-09-27 02:21:55 +02:00
|
|
|
LOG(ERROR) << "Erase instant view in " << web_page_id << " from database because of " << status.message();
|
2018-12-31 20:04:05 +01:00
|
|
|
G()->td_db()->get_sqlite_pmc()->erase(get_web_page_instant_view_database_key(web_page_id), Auto());
|
|
|
|
}
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
instant_view.was_loaded_from_database_ = true;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-01-21 23:32:13 +01:00
|
|
|
auto old_file_ids = get_web_page_file_ids(web_page);
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
update_web_page_instant_view(web_page_id, web_page_instant_view, std::move(instant_view));
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-01-21 23:32:13 +01:00
|
|
|
auto new_file_ids = get_web_page_file_ids(web_page);
|
|
|
|
if (old_file_ids != new_file_ids) {
|
2019-01-30 22:37:38 +01:00
|
|
|
td_->file_manager_->change_files_source(get_web_page_file_source_id(web_page), old_file_ids, new_file_ids);
|
2019-01-21 23:32:13 +01:00
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
update_web_page_instant_view_load_requests(web_page_id, false, web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::update_web_page_instant_view_load_requests(WebPageId web_page_id, bool force_update,
|
2021-10-07 11:49:46 +02:00
|
|
|
Result<WebPageId> r_web_page_id) {
|
2023-02-16 11:35:27 +01:00
|
|
|
G()->ignore_result_if_closing(r_web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Update load requests for " << web_page_id;
|
|
|
|
auto it = load_web_page_instant_view_queries_.find(web_page_id);
|
|
|
|
if (it == load_web_page_instant_view_queries_.end()) {
|
|
|
|
return;
|
|
|
|
}
|
2021-10-07 11:49:46 +02:00
|
|
|
vector<Promise<WebPageId>> promises[2];
|
2018-12-31 20:04:05 +01:00
|
|
|
promises[0] = std::move(it->second.partial);
|
|
|
|
promises[1] = std::move(it->second.full);
|
|
|
|
reset_to_empty(it->second.partial);
|
|
|
|
reset_to_empty(it->second.full);
|
|
|
|
load_web_page_instant_view_queries_.erase(it);
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
if (r_web_page_id.is_error()) {
|
|
|
|
LOG(INFO) << "Receive error " << r_web_page_id.error() << " for load " << web_page_id;
|
2019-01-19 02:09:58 +01:00
|
|
|
combine(promises[0], std::move(promises[1]));
|
2022-04-13 16:40:12 +02:00
|
|
|
fail_promises(promises[0], r_web_page_id.move_as_error());
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
auto new_web_page_id = r_web_page_id.move_as_ok();
|
|
|
|
LOG(INFO) << "Successfully loaded web page " << web_page_id << " as " << new_web_page_id;
|
|
|
|
const WebPageInstantView *web_page_instant_view = get_web_page_instant_view(new_web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page_instant_view == nullptr) {
|
2019-01-19 02:09:58 +01:00
|
|
|
combine(promises[0], std::move(promises[1]));
|
2018-12-31 20:04:05 +01:00
|
|
|
for (auto &promise : promises[0]) {
|
2021-10-07 11:49:46 +02:00
|
|
|
promise.set_value(WebPageId());
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2022-02-09 22:59:52 +01:00
|
|
|
CHECK(new_web_page_id.is_valid());
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page_instant_view->is_loaded_) {
|
|
|
|
if (web_page_instant_view->is_full_) {
|
2019-01-19 02:09:58 +01:00
|
|
|
combine(promises[0], std::move(promises[1]));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &promise : promises[0]) {
|
2021-10-07 11:49:46 +02:00
|
|
|
promise.set_value(WebPageId(new_web_page_id));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2019-01-19 02:09:58 +01:00
|
|
|
reset_to_empty(promises[0]);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (!promises[0].empty() || !promises[1].empty()) {
|
|
|
|
if (force_update) {
|
|
|
|
// protection from cycles
|
2021-10-07 11:49:46 +02:00
|
|
|
LOG(ERROR) << "Expected to receive " << web_page_id << '/' << new_web_page_id
|
|
|
|
<< " from the server, but didn't receive it";
|
2019-01-19 02:09:58 +01:00
|
|
|
combine(promises[0], std::move(promises[1]));
|
2018-12-31 20:04:05 +01:00
|
|
|
for (auto &promise : promises[0]) {
|
2021-10-07 11:49:46 +02:00
|
|
|
promise.set_value(WebPageId());
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2021-10-07 11:49:46 +02:00
|
|
|
auto &load_queries = load_web_page_instant_view_queries_[new_web_page_id];
|
2018-12-31 20:04:05 +01:00
|
|
|
auto old_size = load_queries.partial.size() + load_queries.full.size();
|
2019-01-19 02:09:58 +01:00
|
|
|
combine(load_queries.partial, std::move(promises[0]));
|
|
|
|
combine(load_queries.full, std::move(promises[1]));
|
2018-12-31 20:04:05 +01:00
|
|
|
if (old_size == 0) {
|
2021-10-07 11:49:46 +02:00
|
|
|
reload_web_page_instant_view(new_web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-16 19:41:39 +02:00
|
|
|
string WebPagesManager::get_web_page_url(WebPageId web_page_id) const {
|
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
|
|
|
if (web_page != nullptr) {
|
|
|
|
return web_page->url_;
|
|
|
|
}
|
|
|
|
return string();
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
WebPageId WebPagesManager::get_web_page_by_url(const string &url) const {
|
|
|
|
if (url.empty()) {
|
|
|
|
return WebPageId();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto it = url_to_web_page_id_.find(url);
|
|
|
|
if (it != url_to_web_page_id_.end()) {
|
2023-01-15 10:00:26 +01:00
|
|
|
LOG(INFO) << "Return " << it->second << " for the URL \"" << url << '"';
|
2023-10-16 20:41:43 +02:00
|
|
|
return it->second.first;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-01-15 10:00:26 +01:00
|
|
|
LOG(INFO) << "Can't find web page identifier for the URL \"" << url << '"';
|
2018-12-31 20:04:05 +01:00
|
|
|
return WebPageId();
|
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
void WebPagesManager::get_web_page_by_url(const string &url, Promise<WebPageId> &&promise) {
|
2023-01-15 10:00:26 +01:00
|
|
|
LOG(INFO) << "Trying to get web page identifier for the URL \"" << url << '"';
|
2022-02-09 22:59:52 +01:00
|
|
|
if (url.empty()) {
|
|
|
|
return promise.set_value(WebPageId());
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
auto it = url_to_web_page_id_.find(url);
|
|
|
|
if (it != url_to_web_page_id_.end()) {
|
2023-10-16 20:41:43 +02:00
|
|
|
return promise.set_value(WebPageId(it->second.first));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
load_web_page_by_url(url, std::move(promise));
|
|
|
|
}
|
|
|
|
|
2021-10-19 17:11:16 +02:00
|
|
|
void WebPagesManager::load_web_page_by_url(string url, Promise<WebPageId> &&promise) {
|
2022-02-09 22:59:52 +01:00
|
|
|
if (url.empty()) {
|
|
|
|
return promise.set_value(WebPageId());
|
|
|
|
}
|
2023-03-13 17:47:38 +01:00
|
|
|
if (!G()->use_message_database()) {
|
2021-10-07 11:49:46 +02:00
|
|
|
return reload_web_page_by_url(url, std::move(promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
LOG(INFO) << "Load \"" << url << '"';
|
2021-10-19 17:11:16 +02:00
|
|
|
auto key = get_web_page_url_database_key(url);
|
|
|
|
G()->td_db()->get_sqlite_pmc()->get(key, PromiseCreator::lambda([actor_id = actor_id(this), url = std::move(url),
|
|
|
|
promise = std::move(promise)](string value) mutable {
|
|
|
|
send_closure(actor_id,
|
|
|
|
&WebPagesManager::on_load_web_page_id_by_url_from_database,
|
|
|
|
std::move(url), std::move(value), std::move(promise));
|
|
|
|
}));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
void WebPagesManager::on_load_web_page_id_by_url_from_database(string url, string value, Promise<WebPageId> &&promise) {
|
2021-10-07 15:36:21 +02:00
|
|
|
TRY_STATUS_PROMISE(promise, G()->close_status());
|
2021-10-07 11:49:46 +02:00
|
|
|
|
2023-01-15 10:00:26 +01:00
|
|
|
LOG(INFO) << "Successfully loaded URL \"" << url << "\" of size " << value.size() << " from database";
|
2018-12-31 20:04:05 +01:00
|
|
|
// G()->td_db()->get_sqlite_pmc()->erase(get_web_page_url_database_key(web_page_id), Auto());
|
2021-10-07 11:49:46 +02:00
|
|
|
// value.clear();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
auto it = url_to_web_page_id_.find(url);
|
|
|
|
if (it != url_to_web_page_id_.end()) {
|
|
|
|
// URL web page has already been loaded
|
2023-10-16 20:41:43 +02:00
|
|
|
return promise.set_value(WebPageId(it->second.first));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
if (!value.empty()) {
|
|
|
|
auto web_page_id = WebPageId(to_integer<int64>(value));
|
|
|
|
if (web_page_id.is_valid()) {
|
|
|
|
if (have_web_page(web_page_id)) {
|
|
|
|
// URL web page has already been loaded
|
|
|
|
on_get_web_page_by_url(url, web_page_id, true);
|
2021-10-07 11:49:46 +02:00
|
|
|
promise.set_value(WebPageId(web_page_id));
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
load_web_page_from_database(web_page_id,
|
|
|
|
PromiseCreator::lambda([actor_id = actor_id(this), web_page_id, url = std::move(url),
|
|
|
|
promise = std::move(promise)](Result<Unit> result) mutable {
|
|
|
|
send_closure(actor_id, &WebPagesManager::on_load_web_page_by_url_from_database,
|
|
|
|
web_page_id, std::move(url), std::move(promise), std::move(result));
|
|
|
|
}));
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
LOG(ERROR) << "Receive invalid " << web_page_id;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
reload_web_page_by_url(url, std::move(promise));
|
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
void WebPagesManager::on_load_web_page_by_url_from_database(WebPageId web_page_id, string url,
|
|
|
|
Promise<WebPageId> &&promise, Result<Unit> &&result) {
|
2018-12-31 20:04:05 +01:00
|
|
|
if (result.is_error()) {
|
|
|
|
CHECK(G()->close_flag());
|
2021-10-07 15:36:21 +02:00
|
|
|
return promise.set_error(Global::request_aborted_error());
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page == nullptr) {
|
2021-10-07 11:49:46 +02:00
|
|
|
return reload_web_page_by_url(url, std::move(promise));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page->url_ != url) {
|
2018-12-31 20:04:05 +01:00
|
|
|
on_get_web_page_by_url(url, web_page_id, true);
|
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
promise.set_value(WebPageId(web_page_id));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-10-07 11:49:46 +02:00
|
|
|
void WebPagesManager::reload_web_page_by_url(const string &url, Promise<WebPageId> &&promise) {
|
2021-10-07 15:36:21 +02:00
|
|
|
TRY_STATUS_PROMISE(promise, G()->close_status());
|
2020-03-13 23:56:47 +01:00
|
|
|
td_->create_handler<GetWebPageQuery>(std::move(promise))->send(WebPageId(), url, 0);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
bool WebPagesManager::have_web_page(WebPageId web_page_id) const {
|
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return get_web_page(web_page_id) != nullptr;
|
|
|
|
}
|
|
|
|
|
2023-10-16 16:36:45 +02:00
|
|
|
tl_object_ptr<td_api::webPage> WebPagesManager::get_web_page_object(WebPageId web_page_id, bool force_small_media,
|
2023-10-19 12:03:27 +02:00
|
|
|
bool force_large_media,
|
|
|
|
bool skip_confirmation) const {
|
2018-12-31 20:04:05 +01:00
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page == nullptr) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
2019-02-07 17:30:59 +01:00
|
|
|
int32 instant_view_version = [web_page] {
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page->instant_view_.is_empty_) {
|
2019-02-07 17:30:59 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page->instant_view_.is_v2_) {
|
2019-02-07 17:30:59 +01:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}();
|
2020-03-02 14:05:18 +01:00
|
|
|
|
|
|
|
FormattedText description;
|
2023-08-18 14:33:33 +02:00
|
|
|
description.text = web_page->description_;
|
|
|
|
description.entities = find_entities(web_page->description_, true, false);
|
2020-03-02 14:05:18 +01:00
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
auto r_url = parse_url(web_page->display_url_);
|
2020-03-02 14:05:18 +01:00
|
|
|
if (r_url.is_ok()) {
|
|
|
|
Slice host = r_url.ok().host_;
|
|
|
|
if (!host.empty() && host.back() == '.') {
|
|
|
|
host.truncate(host.size() - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto replace_entities = [](Slice text, vector<MessageEntity> &entities, auto replace_url) {
|
|
|
|
int32 current_offset = 0;
|
|
|
|
for (auto &entity : entities) {
|
|
|
|
CHECK(entity.offset >= current_offset);
|
|
|
|
text = utf8_utf16_substr(text, static_cast<size_t>(entity.offset - current_offset));
|
|
|
|
auto entity_text = utf8_utf16_substr(text, 0, static_cast<size_t>(entity.length));
|
|
|
|
text = text.substr(entity_text.size());
|
|
|
|
current_offset = entity.offset + entity.length;
|
|
|
|
|
|
|
|
auto replaced_url = replace_url(entity, entity_text);
|
|
|
|
if (!replaced_url.empty()) {
|
|
|
|
entity = MessageEntity(MessageEntity::Type::TextUrl, entity.offset, entity.length, std::move(replaced_url));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (host == "instagram.com" || ends_with(host, ".instagram.com")) {
|
|
|
|
replace_entities(description.text, description.entities, [](const MessageEntity &entity, Slice text) {
|
|
|
|
if (entity.type == MessageEntity::Type::Mention) {
|
|
|
|
return PSTRING() << "https://www.instagram.com/" << text.substr(1) << '/';
|
|
|
|
}
|
|
|
|
if (entity.type == MessageEntity::Type::Hashtag) {
|
2020-03-02 22:53:15 +01:00
|
|
|
return PSTRING() << "https://www.instagram.com/explore/tags/" << url_encode(text.substr(1)) << '/';
|
2020-03-02 14:05:18 +01:00
|
|
|
}
|
|
|
|
return string();
|
|
|
|
});
|
2020-03-02 22:53:15 +01:00
|
|
|
} else if (host == "twitter.com" || ends_with(host, ".twitter.com")) {
|
2020-03-02 14:05:18 +01:00
|
|
|
replace_entities(description.text, description.entities, [](const MessageEntity &entity, Slice text) {
|
|
|
|
if (entity.type == MessageEntity::Type::Mention) {
|
|
|
|
return PSTRING() << "https://twitter.com/" << text.substr(1);
|
|
|
|
}
|
|
|
|
if (entity.type == MessageEntity::Type::Hashtag) {
|
2020-03-02 22:53:15 +01:00
|
|
|
return PSTRING() << "https://twitter.com/hashtag/" << url_encode(text.substr(1));
|
2020-03-02 14:05:18 +01:00
|
|
|
}
|
2022-09-20 17:13:14 +02:00
|
|
|
if (entity.type == MessageEntity::Type::Cashtag) {
|
|
|
|
return PSTRING() << "https://twitter.com/search?q=" << url_encode(text) << "&src=cashtag_click";
|
|
|
|
}
|
2020-03-02 14:05:18 +01:00
|
|
|
return string();
|
|
|
|
});
|
2020-03-02 22:53:15 +01:00
|
|
|
} else if (host == "t.me" || host == "telegram.me" || host == "telegram.dog" || host == "telesco.pe") {
|
|
|
|
// leave everything as is
|
|
|
|
} else {
|
|
|
|
td::remove_if(description.entities,
|
|
|
|
[](const MessageEntity &entity) { return entity.type == MessageEntity::Type::Mention; });
|
|
|
|
|
|
|
|
if (host == "youtube.com" || host == "www.youtube.com") {
|
|
|
|
replace_entities(description.text, description.entities, [](const MessageEntity &entity, Slice text) {
|
|
|
|
if (entity.type == MessageEntity::Type::Hashtag) {
|
|
|
|
return PSTRING() << "https://www.youtube.com/results?search_query=" << url_encode(text);
|
|
|
|
}
|
|
|
|
return string();
|
|
|
|
});
|
|
|
|
} else if (host == "music.youtube.com") {
|
|
|
|
replace_entities(description.text, description.entities, [](const MessageEntity &entity, Slice text) {
|
|
|
|
if (entity.type == MessageEntity::Type::Hashtag) {
|
|
|
|
return PSTRING() << "https://music.youtube.com/search?q=" << url_encode(text);
|
|
|
|
}
|
|
|
|
return string();
|
|
|
|
});
|
|
|
|
}
|
2020-03-02 14:05:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-05 01:09:04 +02:00
|
|
|
auto duration = get_web_page_media_duration(web_page);
|
2023-06-27 20:51:36 +02:00
|
|
|
DialogId story_sender_dialog_id;
|
2023-06-13 14:45:10 +02:00
|
|
|
StoryId story_id;
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page->story_full_ids_.size() == 1) {
|
|
|
|
story_sender_dialog_id = web_page->story_full_ids_[0].get_dialog_id();
|
|
|
|
story_id = web_page->story_full_ids_[0].get_story_id();
|
2023-06-13 14:45:10 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
return make_tl_object<td_api::webPage>(
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->url_, web_page->display_url_, web_page->type_, web_page->site_name_, web_page->title_,
|
2021-08-05 01:09:04 +02:00
|
|
|
get_formatted_text_object(description, true, duration == 0 ? std::numeric_limits<int32>::max() : duration),
|
2023-08-18 14:33:33 +02:00
|
|
|
get_photo_object(td_->file_manager_.get(), web_page->photo_), web_page->embed_url_, web_page->embed_type_,
|
|
|
|
web_page->embed_dimensions_.width, web_page->embed_dimensions_.height, web_page->duration_, web_page->author_,
|
2023-10-19 12:03:27 +02:00
|
|
|
web_page->has_large_media_, force_small_media, web_page->has_large_media_ && force_large_media, skip_confirmation,
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->document_.type == Document::Type::Animation
|
|
|
|
? td_->animations_manager_->get_animation_object(web_page->document_.file_id)
|
2018-12-31 20:04:05 +01:00
|
|
|
: nullptr,
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->document_.type == Document::Type::Audio
|
|
|
|
? td_->audios_manager_->get_audio_object(web_page->document_.file_id)
|
2018-12-31 20:04:05 +01:00
|
|
|
: nullptr,
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->document_.type == Document::Type::General
|
|
|
|
? td_->documents_manager_->get_document_object(web_page->document_.file_id, PhotoFormat::Jpeg)
|
2018-12-31 20:04:05 +01:00
|
|
|
: nullptr,
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->document_.type == Document::Type::Sticker
|
|
|
|
? td_->stickers_manager_->get_sticker_object(web_page->document_.file_id)
|
2018-12-31 20:04:05 +01:00
|
|
|
: nullptr,
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->document_.type == Document::Type::Video
|
|
|
|
? td_->videos_manager_->get_video_object(web_page->document_.file_id)
|
2018-12-31 20:04:05 +01:00
|
|
|
: nullptr,
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->document_.type == Document::Type::VideoNote
|
|
|
|
? td_->video_notes_manager_->get_video_note_object(web_page->document_.file_id)
|
2018-12-31 20:04:05 +01:00
|
|
|
: nullptr,
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->document_.type == Document::Type::VoiceNote
|
|
|
|
? td_->voice_notes_manager_->get_voice_note_object(web_page->document_.file_id)
|
2018-12-31 20:04:05 +01:00
|
|
|
: nullptr,
|
2023-06-27 20:51:36 +02:00
|
|
|
td_->messages_manager_->get_chat_id_object(story_sender_dialog_id, "webPage"), story_id.get(),
|
2019-02-07 17:30:59 +01:00
|
|
|
instant_view_version);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
tl_object_ptr<td_api::webPageInstantView> WebPagesManager::get_web_page_instant_view_object(
|
|
|
|
WebPageId web_page_id) const {
|
2022-09-26 18:24:39 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page == nullptr || web_page->instant_view_.is_empty_) {
|
2022-09-26 18:24:39 +02:00
|
|
|
return nullptr;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
return get_web_page_instant_view_object(web_page_id, &web_page->instant_view_, web_page->url_);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
tl_object_ptr<td_api::webPageInstantView> WebPagesManager::get_web_page_instant_view_object(
|
2022-09-26 18:24:39 +02:00
|
|
|
WebPageId web_page_id, const WebPageInstantView *web_page_instant_view, Slice web_page_url) const {
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page_instant_view == nullptr) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page_instant_view->is_loaded_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(ERROR) << "Trying to get not loaded web page instant view";
|
|
|
|
return nullptr;
|
|
|
|
}
|
2022-07-01 15:21:29 +02:00
|
|
|
auto feedback_link = td_api::make_object<td_api::internalLinkTypeBotStart>(
|
|
|
|
"previews", PSTRING() << "webpage" << web_page_id.get(), true);
|
2020-03-02 22:40:52 +01:00
|
|
|
return td_api::make_object<td_api::webPageInstantView>(
|
2023-08-18 14:33:33 +02:00
|
|
|
get_page_blocks_object(web_page_instant_view->page_blocks_, td_, web_page_instant_view->url_, web_page_url),
|
|
|
|
web_page_instant_view->view_count_, web_page_instant_view->is_v2_ ? 2 : 1, web_page_instant_view->is_rtl_,
|
|
|
|
web_page_instant_view->is_full_, std::move(feedback_link));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2020-01-27 00:55:18 +01:00
|
|
|
void WebPagesManager::on_web_page_changed(WebPageId web_page_id, bool have_web_page) {
|
|
|
|
LOG(INFO) << "Updated " << web_page_id;
|
|
|
|
auto it = web_page_messages_.find(web_page_id);
|
|
|
|
if (it != web_page_messages_.end()) {
|
2023-09-21 18:11:17 +02:00
|
|
|
vector<MessageFullId> message_full_ids;
|
|
|
|
for (const auto &message_full_id : it->second) {
|
|
|
|
message_full_ids.push_back(message_full_id);
|
2020-01-27 00:55:18 +01:00
|
|
|
}
|
2023-09-21 18:11:17 +02:00
|
|
|
CHECK(!message_full_ids.empty());
|
|
|
|
for (const auto &message_full_id : message_full_ids) {
|
2020-01-27 00:55:18 +01:00
|
|
|
if (!have_web_page) {
|
2023-09-21 18:11:17 +02:00
|
|
|
td_->messages_manager_->delete_pending_message_web_page(message_full_id);
|
2020-01-27 00:55:18 +01:00
|
|
|
} else {
|
2023-09-21 18:11:17 +02:00
|
|
|
td_->messages_manager_->on_external_update_message_content(message_full_id);
|
2020-01-27 00:55:18 +01:00
|
|
|
}
|
|
|
|
}
|
2022-10-21 23:48:34 +02:00
|
|
|
|
2023-09-21 18:11:17 +02:00
|
|
|
bool is_ok = (have_web_page ? web_page_messages_[web_page_id].size() == message_full_ids.size()
|
2022-10-21 23:48:34 +02:00
|
|
|
: web_page_messages_.count(web_page_id) == 0);
|
|
|
|
if (!is_ok) {
|
2023-09-21 18:11:17 +02:00
|
|
|
vector<MessageFullId> new_message_full_ids;
|
|
|
|
for (const auto &message_full_id : web_page_messages_[web_page_id]) {
|
|
|
|
new_message_full_ids.push_back(message_full_id);
|
2022-10-21 23:48:34 +02:00
|
|
|
}
|
2023-09-21 18:11:17 +02:00
|
|
|
LOG_CHECK(is_ok) << have_web_page << ' ' << message_full_ids << ' ' << new_message_full_ids;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
auto get_it = pending_get_web_pages_.find(web_page_id);
|
|
|
|
if (get_it != pending_get_web_pages_.end()) {
|
|
|
|
auto requests = std::move(get_it->second);
|
|
|
|
pending_get_web_pages_.erase(get_it);
|
|
|
|
for (auto &request : requests) {
|
2023-02-15 13:05:37 +01:00
|
|
|
on_get_web_page_preview_success(request.first, have_web_page ? web_page_id : WebPageId(),
|
|
|
|
std::move(request.second));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
pending_web_pages_timeout_.cancel_timeout(web_page_id.get());
|
|
|
|
}
|
|
|
|
|
2023-06-13 15:17:01 +02:00
|
|
|
void WebPagesManager::on_story_changed(StoryFullId story_full_id) {
|
|
|
|
auto story_it = story_web_pages_.find(story_full_id);
|
|
|
|
if (story_it == story_web_pages_.end()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
vector<WebPageId> web_page_ids;
|
|
|
|
for (auto web_page_id : story_it->second) {
|
|
|
|
web_page_ids.push_back(web_page_id);
|
|
|
|
}
|
|
|
|
for (auto web_page_id : web_page_ids) {
|
|
|
|
on_web_page_changed(web_page_id, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
const WebPagesManager::WebPage *WebPagesManager::get_web_page(WebPageId web_page_id) const {
|
2022-08-04 13:48:10 +02:00
|
|
|
return web_pages_.get_pointer(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
const WebPagesManager::WebPageInstantView *WebPagesManager::get_web_page_instant_view(WebPageId web_page_id) const {
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page == nullptr || web_page->instant_view_.is_empty_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return nullptr;
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
return &web_page->instant_view_;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-11-13 20:37:41 +01:00
|
|
|
void WebPagesManager::on_pending_web_page_timeout_callback(void *web_pages_manager_ptr, int64 web_page_id_int) {
|
|
|
|
if (G()->close_flag()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto web_pages_manager = static_cast<WebPagesManager *>(web_pages_manager_ptr);
|
|
|
|
send_closure_later(web_pages_manager->actor_id(web_pages_manager), &WebPagesManager::on_pending_web_page_timeout,
|
|
|
|
WebPageId(web_page_id_int));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::on_pending_web_page_timeout(WebPageId web_page_id) {
|
2021-11-13 20:37:41 +01:00
|
|
|
if (G()->close_flag() || have_web_page(web_page_id)) {
|
2020-01-27 00:55:18 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
int32 count = 0;
|
2020-01-27 00:55:18 +01:00
|
|
|
auto it = web_page_messages_.find(web_page_id);
|
|
|
|
if (it != web_page_messages_.end()) {
|
2023-09-21 18:11:17 +02:00
|
|
|
vector<MessageFullId> message_full_ids;
|
|
|
|
for (const auto &message_full_id : it->second) {
|
|
|
|
if (message_full_id.get_dialog_id().get_type() != DialogType::SecretChat) {
|
|
|
|
message_full_ids.push_back(message_full_id);
|
2021-06-29 16:22:12 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
count++;
|
|
|
|
}
|
2023-09-21 18:11:17 +02:00
|
|
|
if (!message_full_ids.empty()) {
|
2021-06-29 16:22:12 +02:00
|
|
|
send_closure_later(G()->messages_manager(), &MessagesManager::get_messages_from_server,
|
2023-09-21 18:11:17 +02:00
|
|
|
std::move(message_full_ids), Promise<Unit>(), "on_pending_web_page_timeout", nullptr);
|
2021-06-29 16:22:12 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
auto get_it = pending_get_web_pages_.find(web_page_id);
|
|
|
|
if (get_it != pending_get_web_pages_.end()) {
|
|
|
|
auto requests = std::move(get_it->second);
|
|
|
|
pending_get_web_pages_.erase(get_it);
|
|
|
|
for (auto &request : requests) {
|
2023-02-15 13:05:37 +01:00
|
|
|
request.second.set_error(Status::Error(500, "Request timeout exceeded"));
|
2018-12-31 20:04:05 +01:00
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (count == 0) {
|
2020-01-27 00:55:18 +01:00
|
|
|
LOG(WARNING) << "Have no messages and requests waiting for " << web_page_id;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-04 23:08:15 +01:00
|
|
|
void WebPagesManager::on_get_web_page_instant_view(WebPage *web_page, tl_object_ptr<telegram_api::page> &&page,
|
2018-12-31 20:04:05 +01:00
|
|
|
int32 hash, DialogId owner_dialog_id) {
|
2019-02-04 23:08:15 +01:00
|
|
|
CHECK(page != nullptr);
|
2022-02-11 17:27:32 +01:00
|
|
|
FlatHashMap<int64, unique_ptr<Photo>> photos;
|
2019-02-04 23:08:15 +01:00
|
|
|
for (auto &photo_ptr : page->photos_) {
|
2023-01-20 15:31:33 +01:00
|
|
|
Photo photo = get_photo(td_, std::move(photo_ptr), owner_dialog_id);
|
2020-07-06 14:26:29 +02:00
|
|
|
if (photo.is_empty() || photo.id.get() == 0) {
|
2023-08-18 14:33:33 +02:00
|
|
|
LOG(ERROR) << "Receive empty photo in web page instant view for " << web_page->url_;
|
2019-06-17 01:41:57 +02:00
|
|
|
} else {
|
2020-07-06 14:26:29 +02:00
|
|
|
auto photo_id = photo.id.get();
|
2022-02-11 17:27:32 +01:00
|
|
|
photos.emplace(photo_id, make_unique<Photo>(std::move(photo)));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page->photo_.is_empty() && web_page->photo_.id.get() != 0) {
|
|
|
|
photos.emplace(web_page->photo_.id.get(), make_unique<Photo>(web_page->photo_));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2022-02-07 20:41:07 +01:00
|
|
|
FlatHashMap<int64, FileId> animations;
|
|
|
|
FlatHashMap<int64, FileId> audios;
|
|
|
|
FlatHashMap<int64, FileId> documents;
|
|
|
|
FlatHashMap<int64, FileId> videos;
|
|
|
|
FlatHashMap<int64, FileId> voice_notes;
|
|
|
|
FlatHashMap<int64, FileId> others;
|
2020-01-30 02:56:08 +01:00
|
|
|
auto get_map = [&](Document::Type document_type) {
|
2019-11-24 01:32:37 +01:00
|
|
|
switch (document_type) {
|
|
|
|
case Document::Type::Animation:
|
2020-01-30 02:56:08 +01:00
|
|
|
return &animations;
|
2019-11-24 01:32:37 +01:00
|
|
|
case Document::Type::Audio:
|
2020-01-30 02:56:08 +01:00
|
|
|
return &audios;
|
2019-11-24 01:32:37 +01:00
|
|
|
case Document::Type::General:
|
2020-01-30 02:56:08 +01:00
|
|
|
return &documents;
|
2019-11-24 01:32:37 +01:00
|
|
|
case Document::Type::Video:
|
2020-01-30 02:56:08 +01:00
|
|
|
return &videos;
|
2019-11-24 01:32:37 +01:00
|
|
|
case Document::Type::VoiceNote:
|
2020-01-30 02:56:08 +01:00
|
|
|
return &voice_notes;
|
2019-11-24 01:32:37 +01:00
|
|
|
default:
|
2020-01-30 02:56:08 +01:00
|
|
|
return &others;
|
2019-11-24 01:32:37 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-02-04 23:08:15 +01:00
|
|
|
for (auto &document_ptr : page->documents_) {
|
2018-12-31 20:04:05 +01:00
|
|
|
if (document_ptr->get_id() == telegram_api::document::ID) {
|
|
|
|
auto document = move_tl_object_as<telegram_api::document>(document_ptr);
|
|
|
|
auto document_id = document->id_;
|
|
|
|
auto parsed_document = td_->documents_manager_->on_get_document(std::move(document), owner_dialog_id);
|
2022-02-09 22:59:52 +01:00
|
|
|
if (!parsed_document.empty() && document_id != 0) {
|
2020-01-30 02:56:08 +01:00
|
|
|
get_map(parsed_document.type)->emplace(document_id, parsed_document.file_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-24 01:32:37 +01:00
|
|
|
if (!others.empty()) {
|
|
|
|
auto file_view = td_->file_manager_->get_file_view(others.begin()->second);
|
|
|
|
LOG(ERROR) << "Receive document of an unexpected type " << file_view.get_type();
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2019-11-24 01:32:37 +01:00
|
|
|
|
|
|
|
auto add_document = [&](const Document &document) {
|
|
|
|
auto file_view = td_->file_manager_->get_file_view(document.file_id);
|
2019-09-27 02:21:55 +02:00
|
|
|
if (file_view.has_remote_location()) {
|
2022-02-09 22:59:52 +01:00
|
|
|
auto document_id = file_view.remote_location().get_id();
|
|
|
|
if (document_id != 0) {
|
|
|
|
get_map(document.type)->emplace(document_id, document.file_id);
|
|
|
|
} else {
|
|
|
|
LOG(ERROR) << document.type << " has zero ID";
|
|
|
|
}
|
2019-09-27 02:21:55 +02:00
|
|
|
} else {
|
2019-11-24 01:32:37 +01:00
|
|
|
LOG(ERROR) << document.type << " has no remote location";
|
2019-09-27 02:21:55 +02:00
|
|
|
}
|
2019-11-24 01:32:37 +01:00
|
|
|
};
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page->document_.empty()) {
|
|
|
|
add_document(web_page->document_);
|
2019-11-26 01:45:23 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
for (auto &document : web_page->documents_) {
|
2019-11-24 01:32:37 +01:00
|
|
|
add_document(document);
|
2019-09-27 02:21:55 +02:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-02-04 23:08:15 +01:00
|
|
|
LOG(INFO) << "Receive a web page instant view with " << page->blocks_.size() << " blocks, " << animations.size()
|
2019-02-05 20:41:25 +01:00
|
|
|
<< " animations, " << audios.size() << " audios, " << documents.size() << " documents, " << photos.size()
|
2019-09-27 02:21:55 +02:00
|
|
|
<< " photos, " << videos.size() << " videos and " << voice_notes.size() << " voice notes";
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->instant_view_.page_blocks_ =
|
2019-09-27 02:21:55 +02:00
|
|
|
get_web_page_blocks(td_, std::move(page->blocks_), animations, audios, documents, photos, videos, voice_notes);
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->instant_view_.view_count_ = page->views_;
|
|
|
|
web_page->instant_view_.is_v2_ = page->v2_;
|
|
|
|
web_page->instant_view_.is_rtl_ = page->rtl_;
|
|
|
|
web_page->instant_view_.hash_ = hash;
|
|
|
|
web_page->instant_view_.url_ = std::move(page->url_);
|
|
|
|
web_page->instant_view_.is_empty_ = false;
|
|
|
|
web_page->instant_view_.is_full_ = !page->part_;
|
|
|
|
web_page->instant_view_.is_loaded_ = true;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
LOG(DEBUG) << "Receive web page instant view: "
|
2023-08-18 14:33:33 +02:00
|
|
|
<< to_string(get_web_page_instant_view_object(WebPageId(), &web_page->instant_view_, web_page->url_));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
class WebPagesManager::WebPageLogEvent {
|
|
|
|
public:
|
|
|
|
WebPageId web_page_id;
|
|
|
|
const WebPage *web_page_in;
|
|
|
|
unique_ptr<WebPage> web_page_out;
|
|
|
|
|
|
|
|
WebPageLogEvent() = default;
|
|
|
|
|
|
|
|
WebPageLogEvent(WebPageId web_page_id, const WebPage *web_page) : web_page_id(web_page_id), web_page_in(web_page) {
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class StorerT>
|
|
|
|
void store(StorerT &storer) const {
|
|
|
|
td::store(web_page_id, storer);
|
|
|
|
td::store(*web_page_in, storer);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class ParserT>
|
|
|
|
void parse(ParserT &parser) {
|
|
|
|
td::parse(web_page_id, parser);
|
2019-09-18 01:42:38 +02:00
|
|
|
td::parse(web_page_out, parser);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-01-21 21:55:26 +01:00
|
|
|
void WebPagesManager::save_web_page(const WebPage *web_page, WebPageId web_page_id, bool from_binlog) {
|
2023-03-13 17:47:38 +01:00
|
|
|
if (!G()->use_message_database()) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
CHECK(web_page != nullptr);
|
|
|
|
if (!from_binlog) {
|
2020-09-22 01:15:09 +02:00
|
|
|
WebPageLogEvent log_event(web_page_id, web_page);
|
|
|
|
auto storer = get_log_event_storer(log_event);
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page->log_event_id_ == 0) {
|
|
|
|
web_page->log_event_id_ = binlog_add(G()->td_db()->get_binlog(), LogEvent::HandlerType::WebPages, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
} else {
|
2023-08-18 14:33:33 +02:00
|
|
|
binlog_rewrite(G()->td_db()->get_binlog(), web_page->log_event_id_, LogEvent::HandlerType::WebPages, storer);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG(INFO) << "Save " << web_page_id << " to database";
|
|
|
|
G()->td_db()->get_sqlite_pmc()->set(
|
|
|
|
get_web_page_database_key(web_page_id), log_event_store(*web_page).as_slice().str(),
|
2021-10-07 11:49:46 +02:00
|
|
|
PromiseCreator::lambda([actor_id = actor_id(this), web_page_id](Result<> result) {
|
|
|
|
send_closure(actor_id, &WebPagesManager::on_save_web_page_to_database, web_page_id, result.is_ok());
|
2018-12-31 20:04:05 +01:00
|
|
|
}));
|
|
|
|
}
|
|
|
|
|
|
|
|
string WebPagesManager::get_web_page_url_database_key(const string &url) {
|
|
|
|
return "wpurl" + url;
|
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::on_binlog_web_page_event(BinlogEvent &&event) {
|
2023-03-13 17:47:38 +01:00
|
|
|
if (!G()->use_message_database()) {
|
2018-06-28 15:52:40 +02:00
|
|
|
binlog_erase(G()->td_db()->get_binlog(), event.id_);
|
2018-12-31 20:04:05 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
WebPageLogEvent log_event;
|
2023-02-02 01:08:26 +01:00
|
|
|
log_event_parse(log_event, event.get_data()).ensure();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
auto web_page_id = log_event.web_page_id;
|
2022-02-09 22:59:52 +01:00
|
|
|
if (!web_page_id.is_valid()) {
|
|
|
|
binlog_erase(G()->td_db()->get_binlog(), event.id_);
|
|
|
|
return;
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Add " << web_page_id << " from binlog";
|
|
|
|
auto web_page = std::move(log_event.web_page_out);
|
|
|
|
CHECK(web_page != nullptr);
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
web_page->log_event_id_ = event.id_;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
update_web_page(std::move(web_page), web_page_id, true, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
string WebPagesManager::get_web_page_database_key(WebPageId web_page_id) {
|
2018-07-08 01:47:46 +02:00
|
|
|
return PSTRING() << "wp" << web_page_id.get();
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::on_save_web_page_to_database(WebPageId web_page_id, bool success) {
|
2020-05-16 01:25:03 +02:00
|
|
|
if (G()->close_flag()) {
|
|
|
|
return;
|
|
|
|
}
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page == nullptr) {
|
|
|
|
LOG(ERROR) << "Can't find " << (success ? "saved " : "failed to save ") << web_page_id;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!success) {
|
|
|
|
LOG(ERROR) << "Failed to save " << web_page_id << " to database";
|
2023-08-18 14:33:33 +02:00
|
|
|
save_web_page(web_page, web_page_id, web_page->log_event_id_ != 0);
|
2018-12-31 20:04:05 +01:00
|
|
|
} else {
|
|
|
|
LOG(INFO) << "Successfully saved " << web_page_id << " to database";
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page->log_event_id_ != 0) {
|
2018-12-31 20:04:05 +01:00
|
|
|
LOG(INFO) << "Erase " << web_page_id << " from binlog";
|
2023-08-18 14:33:33 +02:00
|
|
|
binlog_erase(G()->td_db()->get_binlog(), web_page->log_event_id_);
|
|
|
|
web_page->log_event_id_ = 0;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::load_web_page_from_database(WebPageId web_page_id, Promise<Unit> promise) {
|
2023-03-13 17:47:38 +01:00
|
|
|
if (!G()->use_message_database() || loaded_from_database_web_pages_.count(web_page_id) || !web_page_id.is_valid()) {
|
2018-12-31 20:04:05 +01:00
|
|
|
promise.set_value(Unit());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG(INFO) << "Load " << web_page_id << " from database";
|
|
|
|
auto &load_web_page_queries = load_web_page_from_database_queries_[web_page_id];
|
|
|
|
load_web_page_queries.push_back(std::move(promise));
|
|
|
|
if (load_web_page_queries.size() == 1u) {
|
2021-10-07 11:49:46 +02:00
|
|
|
G()->td_db()->get_sqlite_pmc()->get(get_web_page_database_key(web_page_id),
|
|
|
|
PromiseCreator::lambda([actor_id = actor_id(this), web_page_id](string value) {
|
|
|
|
send_closure(actor_id, &WebPagesManager::on_load_web_page_from_database,
|
|
|
|
web_page_id, std::move(value));
|
|
|
|
}));
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void WebPagesManager::on_load_web_page_from_database(WebPageId web_page_id, string value) {
|
2020-05-16 01:25:03 +02:00
|
|
|
if (G()->close_flag()) {
|
|
|
|
return;
|
|
|
|
}
|
2022-03-11 19:38:48 +01:00
|
|
|
CHECK(web_page_id.is_valid());
|
2018-12-31 20:04:05 +01:00
|
|
|
if (!loaded_from_database_web_pages_.insert(web_page_id).second) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto it = load_web_page_from_database_queries_.find(web_page_id);
|
|
|
|
vector<Promise<Unit>> promises;
|
|
|
|
if (it != load_web_page_from_database_queries_.end()) {
|
|
|
|
promises = std::move(it->second);
|
|
|
|
CHECK(!promises.empty());
|
|
|
|
load_web_page_from_database_queries_.erase(it);
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG(INFO) << "Successfully loaded " << web_page_id << " of size " << value.size() << " from database";
|
|
|
|
// G()->td_db()->get_sqlite_pmc()->erase(get_web_page_database_key(web_page_id), Auto());
|
2021-10-07 11:49:46 +02:00
|
|
|
// value.clear();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2019-01-21 21:55:26 +01:00
|
|
|
if (!have_web_page(web_page_id)) {
|
2018-12-31 20:04:05 +01:00
|
|
|
if (!value.empty()) {
|
|
|
|
auto result = make_unique<WebPage>();
|
2018-10-28 16:21:27 +01:00
|
|
|
auto status = log_event_parse(*result, value);
|
|
|
|
if (status.is_error()) {
|
2019-09-30 14:27:22 +02:00
|
|
|
LOG(ERROR) << "Failed to parse web page loaded from database: " << status
|
|
|
|
<< ", value = " << format::as_hex_dump<4>(Slice(value));
|
|
|
|
} else {
|
|
|
|
update_web_page(std::move(result), web_page_id, true, true);
|
2018-10-28 16:21:27 +01:00
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// web page has already been loaded from the server
|
|
|
|
}
|
|
|
|
|
2022-04-13 16:40:12 +02:00
|
|
|
set_promises(promises);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
bool WebPagesManager::have_web_page_force(WebPageId web_page_id) {
|
|
|
|
return get_web_page_force(web_page_id) != nullptr;
|
|
|
|
}
|
|
|
|
|
2019-01-21 21:55:26 +01:00
|
|
|
const WebPagesManager::WebPage *WebPagesManager::get_web_page_force(WebPageId web_page_id) {
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page != nullptr) {
|
|
|
|
return web_page;
|
|
|
|
}
|
2023-03-13 17:47:38 +01:00
|
|
|
if (!G()->use_message_database()) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return nullptr;
|
|
|
|
}
|
2022-02-09 22:59:52 +01:00
|
|
|
if (!web_page_id.is_valid() || loaded_from_database_web_pages_.count(web_page_id)) {
|
2018-12-31 20:04:05 +01:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2018-10-07 22:40:58 +02:00
|
|
|
LOG(INFO) << "Trying to load " << web_page_id << " from database";
|
2018-12-31 20:04:05 +01:00
|
|
|
on_load_web_page_from_database(web_page_id,
|
|
|
|
G()->td_db()->get_sqlite_sync_pmc()->get(get_web_page_database_key(web_page_id)));
|
|
|
|
return get_web_page(web_page_id);
|
|
|
|
}
|
|
|
|
|
2019-01-30 22:37:38 +01:00
|
|
|
FileSourceId WebPagesManager::get_web_page_file_source_id(WebPage *web_page) {
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page->file_source_id_.is_valid()) {
|
|
|
|
web_page->file_source_id_ = td_->file_reference_manager_->create_web_page_file_source(web_page->url_);
|
|
|
|
VLOG(file_references) << "Create " << web_page->file_source_id_ << " for URL " << web_page->url_;
|
2022-10-06 14:32:50 +02:00
|
|
|
} else {
|
2023-08-18 14:33:33 +02:00
|
|
|
VLOG(file_references) << "Return " << web_page->file_source_id_ << " for URL " << web_page->url_;
|
2019-01-30 22:37:38 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
return web_page->file_source_id_;
|
2019-01-30 22:37:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
FileSourceId WebPagesManager::get_url_file_source_id(const string &url) {
|
2022-02-09 22:59:52 +01:00
|
|
|
if (url.empty()) {
|
|
|
|
return FileSourceId();
|
|
|
|
}
|
|
|
|
|
2019-01-30 22:37:38 +01:00
|
|
|
auto web_page_id = get_web_page_by_url(url);
|
|
|
|
if (web_page_id.is_valid()) {
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2019-01-30 22:37:38 +01:00
|
|
|
if (web_page != nullptr) {
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page->file_source_id_.is_valid()) {
|
|
|
|
web_pages_[web_page_id]->file_source_id_ =
|
|
|
|
td_->file_reference_manager_->create_web_page_file_source(web_page->url_);
|
|
|
|
VLOG(file_references) << "Create " << web_page->file_source_id_ << " for " << web_page_id << " with URL "
|
|
|
|
<< url;
|
2022-10-06 14:32:50 +02:00
|
|
|
} else {
|
2023-08-18 14:33:33 +02:00
|
|
|
VLOG(file_references) << "Return " << web_page->file_source_id_ << " for " << web_page_id << " with URL "
|
|
|
|
<< url;
|
2019-01-30 22:37:38 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
return web_page->file_source_id_;
|
2019-01-30 22:37:38 +01:00
|
|
|
}
|
|
|
|
}
|
2022-10-06 14:09:00 +02:00
|
|
|
auto &source_id = url_to_file_source_id_[url];
|
|
|
|
if (!source_id.is_valid()) {
|
|
|
|
source_id = td_->file_reference_manager_->create_web_page_file_source(url);
|
2022-10-06 14:32:50 +02:00
|
|
|
VLOG(file_references) << "Create " << source_id << " for URL " << url;
|
|
|
|
} else {
|
|
|
|
VLOG(file_references) << "Return " << source_id << " for URL " << url;
|
2022-10-06 14:09:00 +02:00
|
|
|
}
|
|
|
|
return source_id;
|
2019-01-30 22:37:38 +01:00
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
string WebPagesManager::get_web_page_search_text(WebPageId web_page_id) const {
|
2019-04-27 01:00:32 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2018-12-31 20:04:05 +01:00
|
|
|
if (web_page == nullptr) {
|
|
|
|
return "";
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
return PSTRING() << web_page->title_ + ' ' + web_page->description_;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
2021-08-05 01:09:04 +02:00
|
|
|
int32 WebPagesManager::get_web_page_media_duration(WebPageId web_page_id) const {
|
2021-07-26 03:33:56 +02:00
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
|
|
|
if (web_page == nullptr) {
|
2021-08-05 01:09:04 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return get_web_page_media_duration(web_page);
|
|
|
|
}
|
|
|
|
|
2023-06-13 14:45:10 +02:00
|
|
|
int32 WebPagesManager::get_web_page_media_duration(const WebPage *web_page) const {
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page->document_.type == Document::Type::Audio || web_page->document_.type == Document::Type::Video ||
|
|
|
|
web_page->document_.type == Document::Type::VideoNote || web_page->document_.type == Document::Type::VoiceNote ||
|
|
|
|
web_page->embed_type_ == "iframe") {
|
|
|
|
return web_page->duration_;
|
2021-07-26 03:33:56 +02:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page->story_full_ids_.empty()) {
|
|
|
|
auto story_duration = td_->story_manager_->get_story_duration(web_page->story_full_ids_[0]);
|
|
|
|
return story_duration >= 0 ? story_duration : web_page->duration_;
|
2023-06-13 14:45:10 +02:00
|
|
|
}
|
2021-08-05 01:09:04 +02:00
|
|
|
|
|
|
|
return -1;
|
2021-07-26 03:33:56 +02:00
|
|
|
}
|
|
|
|
|
2023-06-22 21:39:47 +02:00
|
|
|
StoryFullId WebPagesManager::get_web_page_story_full_id(WebPageId web_page_id) const {
|
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page == nullptr || web_page->story_full_ids_.empty()) {
|
2023-06-22 21:39:47 +02:00
|
|
|
return StoryFullId();
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
return web_page->story_full_ids_[0];
|
2023-06-22 21:39:47 +02:00
|
|
|
}
|
|
|
|
|
2023-06-13 16:14:19 +02:00
|
|
|
vector<UserId> WebPagesManager::get_web_page_user_ids(WebPageId web_page_id) const {
|
|
|
|
const WebPage *web_page = get_web_page(web_page_id);
|
|
|
|
vector<UserId> user_ids;
|
2023-08-18 14:33:33 +02:00
|
|
|
if (web_page != nullptr && !web_page->story_full_ids_.empty()) {
|
|
|
|
for (auto story_full_id : web_page->story_full_ids_) {
|
2023-06-13 16:14:19 +02:00
|
|
|
auto dialog_id = story_full_id.get_dialog_id();
|
|
|
|
if (dialog_id.get_type() == DialogType::User) {
|
|
|
|
user_ids.push_back(dialog_id.get_user_id());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return user_ids;
|
|
|
|
}
|
|
|
|
|
2019-02-28 19:40:57 +01:00
|
|
|
vector<FileId> WebPagesManager::get_web_page_file_ids(const WebPage *web_page) const {
|
2019-01-21 23:32:13 +01:00
|
|
|
if (web_page == nullptr) {
|
|
|
|
return vector<FileId>();
|
|
|
|
}
|
|
|
|
|
2023-08-18 14:33:33 +02:00
|
|
|
vector<FileId> result = photo_get_file_ids(web_page->photo_);
|
|
|
|
if (!web_page->document_.empty()) {
|
|
|
|
web_page->document_.append_file_ids(td_, result);
|
2019-01-21 22:59:55 +01:00
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
for (auto &document : web_page->documents_) {
|
2019-11-24 01:32:37 +01:00
|
|
|
document.append_file_ids(td_, result);
|
|
|
|
}
|
2023-08-18 14:33:33 +02:00
|
|
|
if (!web_page->instant_view_.is_empty_) {
|
|
|
|
for (auto &page_block : web_page->instant_view_.page_blocks_) {
|
2020-03-02 20:31:01 +01:00
|
|
|
page_block->append_file_ids(td_, result);
|
2019-01-21 22:59:55 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
} // namespace td
|