From 13d5854e2f99f959dbf38b2fb769b299e79d7bde Mon Sep 17 00:00:00 2001 From: levlam Date: Tue, 3 Aug 2021 02:58:12 +0300 Subject: [PATCH] Update match_cashtags. --- td/telegram/MessageEntity.cpp | 12 ++++++++---- test/message_entities.cpp | 11 +++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index c284b8ad7..e4dfd459f 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -384,7 +384,7 @@ static vector match_cashtags(Slice str) { const unsigned char *end = str.uend(); const unsigned char *ptr = begin; - // '/(?<=^|[^$\d_\pL\x{200c}])\$([A-Z]{3,8})(?![$\d_\pL\x{200c}])/u' + // '/(?<=^|[^$\d_\pL\x{200c}])\$(1INCH|[A-Z]{1,8})(?![$\d_\pL\x{200c}])/u' UnicodeSimpleCategory category; while (true) { @@ -404,12 +404,16 @@ static vector match_cashtags(Slice str) { } auto cashtag_begin = ++ptr; - while (ptr != end && 'Z' >= *ptr && *ptr >= 'A') { - ptr++; + if (end - ptr >= 5 && Slice(ptr, ptr + 5) == Slice("1INCH")) { + ptr += 5; + } else { + while (ptr != end && 'Z' >= *ptr && *ptr >= 'A') { + ptr++; + } } auto cashtag_end = ptr; auto cashtag_size = cashtag_end - cashtag_begin; - if (cashtag_size < 3 || cashtag_size > 8) { + if (cashtag_size < 1 || cashtag_size > 8) { continue; } diff --git a/test/message_entities.cpp b/test/message_entities.cpp index fd6ad997a..6583b9129 100644 --- a/test/message_entities.cpp +++ b/test/message_entities.cpp @@ -137,8 +137,9 @@ TEST(MessageEntities, cashtag) { check_cashtag("$ab", {}); check_cashtag("$abc", {}); check_cashtag("$", {}); - check_cashtag("$A", {}); - check_cashtag("$AB", {}); + check_cashtag("$A", {"$A"}); + check_cashtag("$AB", {"$AB"}); + check_cashtag("$ABС", {}); check_cashtag("$АBC", {}); check_cashtag("$АВС", {}); check_cashtag("$ABC", {"$ABC"}); @@ -160,6 +161,12 @@ TEST(MessageEntities, cashtag) { check_cashtag(" А$ABC ", {}); check_cashtag("$ABC$DEF $GHI $KLM", {"$GHI", "$KLM"}); check_cashtag("$TEST", {"$TEST"}); + check_cashtag("$1INC", {}); + check_cashtag("$1INCH", {"$1INCH"}); + check_cashtag("...$1INCH...", {"$1INCH"}); + check_cashtag("$1inch", {}); + check_cashtag("$1INCHA", {}); + check_cashtag("$1INCHА", {}); check_cashtag(u8"$ABC\u2122", {"$ABC"}); check_cashtag(u8"\u2122$ABC", {"$ABC"}); check_cashtag(u8"\u2122$ABC\u2122", {"$ABC"});