2018-12-31 20:04:05 +01:00
|
|
|
//
|
2024-01-01 01:07:21 +01:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2024
|
2018-12-31 20:04:05 +01:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#pragma once
|
|
|
|
|
2022-10-03 00:26:32 +02:00
|
|
|
#include "td/telegram/CustomEmojiId.h"
|
2018-09-29 03:41:15 +02:00
|
|
|
#include "td/telegram/DialogId.h"
|
2021-09-18 23:47:05 +02:00
|
|
|
#include "td/telegram/secret_api.h"
|
|
|
|
#include "td/telegram/td_api.h"
|
|
|
|
#include "td/telegram/telegram_api.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/telegram/UserId.h"
|
|
|
|
|
|
|
|
#include "td/utils/common.h"
|
2022-03-11 19:38:48 +01:00
|
|
|
#include "td/utils/FlatHashSet.h"
|
2023-01-18 18:45:46 +01:00
|
|
|
#include "td/utils/HashTableUtils.h"
|
2018-12-31 20:04:05 +01:00
|
|
|
#include "td/utils/Slice.h"
|
|
|
|
#include "td/utils/Status.h"
|
|
|
|
#include "td/utils/StringBuilder.h"
|
|
|
|
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
namespace td {
|
|
|
|
|
2022-03-11 13:10:24 +01:00
|
|
|
class Dependencies;
|
2022-07-27 22:47:16 +02:00
|
|
|
class MultiPromiseActor;
|
2022-07-27 19:35:40 +02:00
|
|
|
class Td;
|
2024-04-02 02:52:34 +02:00
|
|
|
class UserManager;
|
2018-01-30 18:06:54 +01:00
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
class MessageEntity {
|
|
|
|
public:
|
|
|
|
enum class Type : int32 {
|
|
|
|
Mention,
|
|
|
|
Hashtag,
|
|
|
|
BotCommand,
|
|
|
|
Url,
|
|
|
|
EmailAddress,
|
|
|
|
Bold,
|
|
|
|
Italic,
|
|
|
|
Code,
|
|
|
|
Pre,
|
|
|
|
PreCode,
|
|
|
|
TextUrl,
|
2018-03-08 14:28:54 +01:00
|
|
|
MentionName,
|
2018-03-12 22:17:29 +01:00
|
|
|
Cashtag,
|
2019-09-19 21:07:11 +02:00
|
|
|
PhoneNumber,
|
|
|
|
Underline,
|
|
|
|
Strikethrough,
|
2020-02-13 16:07:40 +01:00
|
|
|
BlockQuote,
|
2020-08-23 20:25:06 +02:00
|
|
|
BankCardNumber,
|
2021-07-26 06:53:36 +02:00
|
|
|
MediaTimestamp,
|
2021-12-28 18:41:37 +01:00
|
|
|
Spoiler,
|
2022-07-18 00:03:58 +02:00
|
|
|
CustomEmoji,
|
2020-08-23 20:25:06 +02:00
|
|
|
Size
|
2018-12-31 20:04:05 +01:00
|
|
|
};
|
2021-08-06 08:14:52 +02:00
|
|
|
Type type = Type::Size;
|
|
|
|
int32 offset = -1;
|
|
|
|
int32 length = -1;
|
|
|
|
int32 media_timestamp = -1;
|
2018-12-31 20:04:05 +01:00
|
|
|
string argument;
|
|
|
|
UserId user_id;
|
2022-10-03 00:26:32 +02:00
|
|
|
CustomEmojiId custom_emoji_id;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
MessageEntity() = default;
|
|
|
|
|
|
|
|
MessageEntity(Type type, int32 offset, int32 length, string argument = "")
|
2021-08-13 12:10:54 +02:00
|
|
|
: type(type), offset(offset), length(length), argument(std::move(argument)) {
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
MessageEntity(int32 offset, int32 length, UserId user_id)
|
2021-08-13 12:10:54 +02:00
|
|
|
: type(Type::MentionName), offset(offset), length(length), user_id(user_id) {
|
2021-08-06 08:14:52 +02:00
|
|
|
}
|
|
|
|
MessageEntity(Type type, int32 offset, int32 length, int32 media_timestamp)
|
2021-08-13 12:10:54 +02:00
|
|
|
: type(type), offset(offset), length(length), media_timestamp(media_timestamp) {
|
2021-08-06 08:14:52 +02:00
|
|
|
CHECK(type == Type::MediaTimestamp);
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
2022-10-03 00:26:32 +02:00
|
|
|
MessageEntity(Type type, int32 offset, int32 length, CustomEmojiId custom_emoji_id)
|
|
|
|
: type(type), offset(offset), length(length), custom_emoji_id(custom_emoji_id) {
|
2022-07-18 00:03:58 +02:00
|
|
|
CHECK(type == Type::CustomEmoji);
|
|
|
|
}
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
tl_object_ptr<td_api::textEntity> get_text_entity_object() const;
|
|
|
|
|
|
|
|
bool operator==(const MessageEntity &other) const {
|
2021-08-09 18:37:47 +02:00
|
|
|
return offset == other.offset && length == other.length && type == other.type &&
|
2022-07-18 22:40:57 +02:00
|
|
|
media_timestamp == other.media_timestamp && argument == other.argument && user_id == other.user_id &&
|
2022-10-03 00:26:32 +02:00
|
|
|
custom_emoji_id == other.custom_emoji_id;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
bool operator<(const MessageEntity &other) const {
|
2019-09-23 21:57:02 +02:00
|
|
|
if (offset != other.offset) {
|
|
|
|
return offset < other.offset;
|
|
|
|
}
|
|
|
|
if (length != other.length) {
|
|
|
|
return length > other.length;
|
|
|
|
}
|
|
|
|
auto priority = get_type_priority(type);
|
|
|
|
auto other_priority = get_type_priority(other.type);
|
|
|
|
return priority < other_priority;
|
2018-12-31 20:04:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
bool operator!=(const MessageEntity &rhs) const {
|
|
|
|
return !(*this == rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class StorerT>
|
2018-04-02 00:45:51 +02:00
|
|
|
void store(StorerT &storer) const;
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
template <class ParserT>
|
2018-04-02 00:45:51 +02:00
|
|
|
void parse(ParserT &parser);
|
2019-09-23 21:57:02 +02:00
|
|
|
|
|
|
|
private:
|
|
|
|
tl_object_ptr<td_api::TextEntityType> get_text_entity_type_object() const;
|
|
|
|
|
|
|
|
static int get_type_priority(Type type);
|
2018-12-31 20:04:05 +01:00
|
|
|
};
|
|
|
|
|
2019-10-03 01:31:06 +02:00
|
|
|
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity::Type &message_entity_type);
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
StringBuilder &operator<<(StringBuilder &string_builder, const MessageEntity &message_entity);
|
|
|
|
|
2018-01-30 18:06:54 +01:00
|
|
|
struct FormattedText {
|
|
|
|
string text;
|
|
|
|
vector<MessageEntity> entities;
|
|
|
|
|
|
|
|
template <class StorerT>
|
2018-04-02 00:45:51 +02:00
|
|
|
void store(StorerT &storer) const;
|
2018-01-30 18:06:54 +01:00
|
|
|
|
|
|
|
template <class ParserT>
|
2018-04-02 00:45:51 +02:00
|
|
|
void parse(ParserT &parser);
|
2018-01-30 18:06:54 +01:00
|
|
|
};
|
|
|
|
|
2023-01-18 18:45:46 +01:00
|
|
|
struct FormattedTextHash {
|
|
|
|
uint32 operator()(const FormattedText &formatted_text) const {
|
|
|
|
auto hash = Hash<string>()(formatted_text.text);
|
|
|
|
for (auto &entity : formatted_text.entities) {
|
2023-07-27 18:05:15 +02:00
|
|
|
hash = combine_hashes(hash, Hash<int32>()(static_cast<int32>(entity.type)));
|
|
|
|
hash = combine_hashes(hash, Hash<int32>()(entity.length));
|
|
|
|
hash = combine_hashes(hash, Hash<int32>()(entity.offset));
|
2023-01-18 18:45:46 +01:00
|
|
|
}
|
|
|
|
return hash;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-03-11 02:09:23 +01:00
|
|
|
StringBuilder &operator<<(StringBuilder &string_builder, const FormattedText &text);
|
|
|
|
|
2018-01-30 18:06:54 +01:00
|
|
|
inline bool operator==(const FormattedText &lhs, const FormattedText &rhs) {
|
|
|
|
return lhs.text == rhs.text && lhs.entities == rhs.entities;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator!=(const FormattedText &lhs, const FormattedText &rhs) {
|
|
|
|
return !(lhs == rhs);
|
|
|
|
}
|
|
|
|
|
2022-03-11 19:38:48 +01:00
|
|
|
const FlatHashSet<Slice, SliceHash> &get_valid_short_usernames();
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2024-04-02 02:52:34 +02:00
|
|
|
Result<vector<MessageEntity>> get_message_entities(const UserManager *user_manager,
|
2020-03-10 01:51:56 +01:00
|
|
|
vector<tl_object_ptr<td_api::textEntity>> &&input_entities,
|
|
|
|
bool allow_all = false);
|
2018-01-30 18:06:54 +01:00
|
|
|
|
2021-07-22 04:39:16 +02:00
|
|
|
vector<tl_object_ptr<td_api::textEntity>> get_text_entities_object(const vector<MessageEntity> &entities,
|
2021-08-04 08:28:53 +02:00
|
|
|
bool skip_bot_commands, int32 max_media_timestamp);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2021-08-04 08:28:53 +02:00
|
|
|
td_api::object_ptr<td_api::formattedText> get_formatted_text_object(const FormattedText &text, bool skip_bot_commands,
|
|
|
|
int32 max_media_timestamp);
|
2018-01-30 18:06:54 +01:00
|
|
|
|
2022-07-27 22:36:44 +02:00
|
|
|
void remove_premium_custom_emoji_entities(const Td *td, vector<MessageEntity> &entities, bool remove_unknown);
|
|
|
|
|
2022-07-27 19:35:40 +02:00
|
|
|
void remove_unallowed_entities(const Td *td, FormattedText &text, DialogId dialog_id);
|
2022-07-22 15:47:58 +02:00
|
|
|
|
2022-07-16 15:10:06 +02:00
|
|
|
vector<MessageEntity> find_entities(Slice text, bool skip_bot_commands, bool skip_media_timestamps);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
|
|
|
vector<Slice> find_mentions(Slice str);
|
|
|
|
vector<Slice> find_bot_commands(Slice str);
|
|
|
|
vector<Slice> find_hashtags(Slice str);
|
2018-03-07 18:29:33 +01:00
|
|
|
vector<Slice> find_cashtags(Slice str);
|
2020-02-13 16:07:40 +01:00
|
|
|
vector<Slice> find_bank_card_numbers(Slice str);
|
2021-06-03 17:27:40 +02:00
|
|
|
vector<Slice> find_tg_urls(Slice str);
|
2018-12-31 20:04:05 +01:00
|
|
|
bool is_email_address(Slice str);
|
2021-07-28 07:30:22 +02:00
|
|
|
vector<std::pair<Slice, bool>> find_urls(Slice str); // slice + is_email_address
|
|
|
|
vector<std::pair<Slice, int32>> find_media_timestamps(Slice str); // slice + media_timestamp
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2022-07-30 23:34:30 +02:00
|
|
|
void remove_empty_entities(vector<MessageEntity> &entities);
|
|
|
|
|
2023-11-01 20:13:29 +01:00
|
|
|
Slice get_first_url(const FormattedText &text);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2023-10-19 12:20:48 +02:00
|
|
|
bool is_visible_url(const FormattedText &text, const string &url);
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
Result<vector<MessageEntity>> parse_markdown(string &text);
|
|
|
|
|
2019-10-03 01:31:06 +02:00
|
|
|
Result<vector<MessageEntity>> parse_markdown_v2(string &text);
|
|
|
|
|
2020-03-09 21:55:32 +01:00
|
|
|
FormattedText parse_markdown_v3(FormattedText text);
|
|
|
|
|
2020-03-12 04:22:14 +01:00
|
|
|
FormattedText get_markdown_v3(FormattedText text);
|
|
|
|
|
2023-01-09 10:59:14 +01:00
|
|
|
Result<vector<MessageEntity>> parse_html(string &str);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2024-04-02 02:52:34 +02:00
|
|
|
vector<tl_object_ptr<telegram_api::MessageEntity>> get_input_message_entities(const UserManager *user_manager,
|
2018-04-02 00:10:22 +02:00
|
|
|
const vector<MessageEntity> &entities,
|
|
|
|
const char *source);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2024-04-02 02:52:34 +02:00
|
|
|
vector<tl_object_ptr<telegram_api::MessageEntity>> get_input_message_entities(const UserManager *user_manager,
|
2018-09-28 04:09:28 +02:00
|
|
|
const FormattedText *text,
|
|
|
|
const char *source);
|
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
vector<tl_object_ptr<secret_api::MessageEntity>> get_input_secret_message_entities(
|
2019-09-19 21:07:11 +02:00
|
|
|
const vector<MessageEntity> &entities, int32 layer);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2022-07-27 22:47:16 +02:00
|
|
|
vector<MessageEntity> get_message_entities(Td *td, vector<tl_object_ptr<secret_api::MessageEntity>> &&secret_entities,
|
|
|
|
bool is_premium, MultiPromiseActor &load_data_multipromise);
|
2018-12-31 20:04:05 +01:00
|
|
|
|
2024-04-02 02:52:34 +02:00
|
|
|
telegram_api::object_ptr<telegram_api::textWithEntities> get_input_text_with_entities(const UserManager *user_manager,
|
|
|
|
const FormattedText &text,
|
|
|
|
const char *source);
|
2023-01-18 13:38:36 +01:00
|
|
|
|
2024-04-23 18:23:02 +02:00
|
|
|
FormattedText get_formatted_text(const UserManager *user_manager, string &&text,
|
|
|
|
vector<telegram_api::object_ptr<telegram_api::MessageEntity>> &&server_entities,
|
2024-05-04 22:12:26 +02:00
|
|
|
bool skip_media_timestamps, bool skip_trim, const char *source);
|
2024-04-23 18:23:02 +02:00
|
|
|
|
2024-04-02 02:52:34 +02:00
|
|
|
FormattedText get_formatted_text(const UserManager *user_manager,
|
2023-01-18 13:38:36 +01:00
|
|
|
telegram_api::object_ptr<telegram_api::textWithEntities> text_with_entities,
|
2024-05-04 22:12:26 +02:00
|
|
|
bool skip_media_timestamps, bool skip_trim, const char *source);
|
2023-01-18 13:38:36 +01:00
|
|
|
|
2024-04-25 15:30:33 +02:00
|
|
|
void fix_entities(vector<MessageEntity> &entities);
|
|
|
|
|
2018-02-20 03:41:17 +01:00
|
|
|
// like clean_input_string but also validates entities
|
|
|
|
Status fix_formatted_text(string &text, vector<MessageEntity> &entities, bool allow_empty, bool skip_new_entities,
|
2023-12-04 14:27:15 +01:00
|
|
|
bool skip_bot_commands, bool skip_media_timestamps, bool skip_trim,
|
|
|
|
int32 *ltrim_count = nullptr) TD_WARN_UNUSED_RESULT;
|
2018-02-20 03:41:17 +01:00
|
|
|
|
2024-04-02 02:52:34 +02:00
|
|
|
FormattedText get_message_text(const UserManager *user_manager, string message_text,
|
2019-03-25 23:24:23 +01:00
|
|
|
vector<tl_object_ptr<telegram_api::MessageEntity>> &&server_entities,
|
2021-07-26 21:48:33 +02:00
|
|
|
bool skip_new_entities, bool skip_media_timestamps, int32 send_date, bool from_album,
|
|
|
|
const char *source);
|
2018-09-28 22:57:34 +02:00
|
|
|
|
2023-10-29 19:22:25 +01:00
|
|
|
void truncate_formatted_text(FormattedText &text, size_t length);
|
|
|
|
|
2022-08-15 14:37:17 +02:00
|
|
|
Result<FormattedText> get_formatted_text(const Td *td, DialogId dialog_id,
|
2022-08-15 15:16:50 +02:00
|
|
|
td_api::object_ptr<td_api::formattedText> &&text, bool is_bot,
|
2023-12-04 14:27:15 +01:00
|
|
|
bool allow_empty, bool skip_media_timestamps, bool skip_trim,
|
|
|
|
int32 *ltrim_count = nullptr);
|
2022-08-15 14:37:17 +02:00
|
|
|
|
2018-09-28 04:09:28 +02:00
|
|
|
void add_formatted_text_dependencies(Dependencies &dependencies, const FormattedText *text);
|
2018-09-28 03:21:20 +02:00
|
|
|
|
2021-08-06 08:14:52 +02:00
|
|
|
bool has_media_timestamps(const FormattedText *text, int32 min_media_timestamp, int32 max_media_timestamp);
|
2021-08-05 04:41:24 +02:00
|
|
|
|
2021-07-22 05:54:43 +02:00
|
|
|
bool has_bot_commands(const FormattedText *text);
|
|
|
|
|
2024-04-02 02:52:34 +02:00
|
|
|
bool need_always_skip_bot_commands(const UserManager *user_manager, DialogId dialog_id, bool is_bot);
|
2018-09-28 22:57:34 +02:00
|
|
|
|
2018-12-31 20:04:05 +01:00
|
|
|
} // namespace td
|