Allow some new characters in hashtags.
This commit is contained in:
parent
8072fe673e
commit
8f81a46ede
@ -350,7 +350,7 @@ static vector<Slice> match_bot_commands(Slice str) {
|
||||
|
||||
static bool is_hashtag_letter(uint32 c, UnicodeSimpleCategory &category) {
|
||||
category = get_unicode_simple_category(c);
|
||||
if (c == '_' || c == 0x200c || c == 0xb7) {
|
||||
if (c == '_' || c == 0x200c || c == 0xb7 || (0xd80 <= c && c <= 0xdff)) {
|
||||
return true;
|
||||
}
|
||||
switch (category) {
|
||||
@ -368,7 +368,7 @@ static vector<Slice> match_hashtags(Slice str) {
|
||||
const unsigned char *end = str.uend();
|
||||
const unsigned char *ptr = begin;
|
||||
|
||||
// '/(?<=^|[^\d_\pL\x{200c}])#([\d_\pL\x{200c}]{1,256})(?![\d_\pL\x{200c}]*#)/u'
|
||||
// '/(?<=^|[^\d_\pL\x{200c}\x{0d80}-\x{0dff}])#([\d_\pL\x{200c}\x{0d80}-\x{0dff}]{1,256})(?![\d_\pL\x{200c}\x{0d80}-\x{0dff}]*#)/u'
|
||||
// and at least one letter
|
||||
|
||||
UnicodeSimpleCategory category;
|
||||
@ -431,7 +431,7 @@ static vector<Slice> match_cashtags(Slice str) {
|
||||
const unsigned char *end = str.uend();
|
||||
const unsigned char *ptr = begin;
|
||||
|
||||
// '/(?<=^|[^$\d_\pL\x{200c}])\$(1INCH|[A-Z]{1,8})(?![$\d_\pL\x{200c}])/u'
|
||||
// '/(?<=^|[^$\d_\pL\x{200c}\x{0d80}-\x{0dff}])\$(1INCH|[A-Z]{1,8})(?![$\d_\pL\x{200c}\x{0d80}-\x{0dff}])/u'
|
||||
|
||||
UnicodeSimpleCategory category;
|
||||
while (true) {
|
||||
|
@ -114,6 +114,8 @@ TEST(MessageEntities, hashtag) {
|
||||
"ООО" + td::string(200, '2'),
|
||||
{"#" + td::string(200, '1') + "ООО" + td::string(53, '2')});
|
||||
check_hashtag(u8"#a\u2122", {"#a"});
|
||||
check_hashtag("#a൹", {"#a"});
|
||||
check_hashtag("#aඁං෴ก฿", {"#aඁං෴ก"});
|
||||
}
|
||||
|
||||
static void check_cashtag(const td::string &str, const td::vector<td::string> &expected) {
|
||||
@ -173,6 +175,12 @@ TEST(MessageEntities, cashtag) {
|
||||
check_cashtag(u8"$ABC\u2122", {"$ABC"});
|
||||
check_cashtag(u8"\u2122$ABC", {"$ABC"});
|
||||
check_cashtag(u8"\u2122$ABC\u2122", {"$ABC"});
|
||||
check_cashtag("$ABC൹", {"$ABC"});
|
||||
check_cashtag("$ABCඁ", {});
|
||||
check_cashtag("$ABCං", {});
|
||||
check_cashtag("$ABC෴", {});
|
||||
check_cashtag("$ABCก", {});
|
||||
check_cashtag("$ABC฿", {"$ABC"});
|
||||
}
|
||||
|
||||
static void check_media_timestamp(const td::string &str, const td::vector<std::pair<td::string, td::int32>> &expected) {
|
||||
|
Loading…
Reference in New Issue
Block a user