Allow some new characters in hashtags.

This commit is contained in:
levlam 2022-07-29 16:23:05 +03:00
parent 8072fe673e
commit 8f81a46ede
2 changed files with 11 additions and 3 deletions

View File

@ -350,7 +350,7 @@ static vector<Slice> match_bot_commands(Slice str) {
static bool is_hashtag_letter(uint32 c, UnicodeSimpleCategory &category) {
category = get_unicode_simple_category(c);
if (c == '_' || c == 0x200c || c == 0xb7) {
if (c == '_' || c == 0x200c || c == 0xb7 || (0xd80 <= c && c <= 0xdff)) {
return true;
}
switch (category) {
@ -368,7 +368,7 @@ static vector<Slice> match_hashtags(Slice str) {
const unsigned char *end = str.uend();
const unsigned char *ptr = begin;
// '/(?<=^|[^\d_\pL\x{200c}])#([\d_\pL\x{200c}]{1,256})(?![\d_\pL\x{200c}]*#)/u'
// '/(?<=^|[^\d_\pL\x{200c}\x{0d80}-\x{0dff}])#([\d_\pL\x{200c}\x{0d80}-\x{0dff}]{1,256})(?![\d_\pL\x{200c}\x{0d80}-\x{0dff}]*#)/u'
// and at least one letter
UnicodeSimpleCategory category;
@ -431,7 +431,7 @@ static vector<Slice> match_cashtags(Slice str) {
const unsigned char *end = str.uend();
const unsigned char *ptr = begin;
// '/(?<=^|[^$\d_\pL\x{200c}])\$(1INCH|[A-Z]{1,8})(?![$\d_\pL\x{200c}])/u'
// '/(?<=^|[^$\d_\pL\x{200c}\x{0d80}-\x{0dff}])\$(1INCH|[A-Z]{1,8})(?![$\d_\pL\x{200c}\x{0d80}-\x{0dff}])/u'
UnicodeSimpleCategory category;
while (true) {

View File

@ -114,6 +114,8 @@ TEST(MessageEntities, hashtag) {
"ООО" + td::string(200, '2'),
{"#" + td::string(200, '1') + "ООО" + td::string(53, '2')});
check_hashtag(u8"#a\u2122", {"#a"});
check_hashtag("#a൹", {"#a"});
check_hashtag("#aඁ෴ก฿", {"#aඁ෴ก"});
}
static void check_cashtag(const td::string &str, const td::vector<td::string> &expected) {
@ -173,6 +175,12 @@ TEST(MessageEntities, cashtag) {
check_cashtag(u8"$ABC\u2122", {"$ABC"});
check_cashtag(u8"\u2122$ABC", {"$ABC"});
check_cashtag(u8"\u2122$ABC\u2122", {"$ABC"});
check_cashtag("$ABC൹", {"$ABC"});
check_cashtag("$ABCඁ", {});
check_cashtag("$ABC", {});
check_cashtag("$ABC෴", {});
check_cashtag("$ABCก", {});
check_cashtag("$ABC฿", {"$ABC"});
}
static void check_media_timestamp(const td::string &str, const td::vector<std::pair<td::string, td::int32>> &expected) {