Update match_cashtags.
This commit is contained in:
parent
0dc42e4410
commit
13d5854e2f
@ -384,7 +384,7 @@ static vector<Slice> match_cashtags(Slice str) {
|
||||
const unsigned char *end = str.uend();
|
||||
const unsigned char *ptr = begin;
|
||||
|
||||
// '/(?<=^|[^$\d_\pL\x{200c}])\$([A-Z]{3,8})(?![$\d_\pL\x{200c}])/u'
|
||||
// '/(?<=^|[^$\d_\pL\x{200c}])\$(1INCH|[A-Z]{1,8})(?![$\d_\pL\x{200c}])/u'
|
||||
|
||||
UnicodeSimpleCategory category;
|
||||
while (true) {
|
||||
@ -404,12 +404,16 @@ static vector<Slice> match_cashtags(Slice str) {
|
||||
}
|
||||
|
||||
auto cashtag_begin = ++ptr;
|
||||
while (ptr != end && 'Z' >= *ptr && *ptr >= 'A') {
|
||||
ptr++;
|
||||
if (end - ptr >= 5 && Slice(ptr, ptr + 5) == Slice("1INCH")) {
|
||||
ptr += 5;
|
||||
} else {
|
||||
while (ptr != end && 'Z' >= *ptr && *ptr >= 'A') {
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
auto cashtag_end = ptr;
|
||||
auto cashtag_size = cashtag_end - cashtag_begin;
|
||||
if (cashtag_size < 3 || cashtag_size > 8) {
|
||||
if (cashtag_size < 1 || cashtag_size > 8) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -137,8 +137,9 @@ TEST(MessageEntities, cashtag) {
|
||||
check_cashtag("$ab", {});
|
||||
check_cashtag("$abc", {});
|
||||
check_cashtag("$", {});
|
||||
check_cashtag("$A", {});
|
||||
check_cashtag("$AB", {});
|
||||
check_cashtag("$A", {"$A"});
|
||||
check_cashtag("$AB", {"$AB"});
|
||||
check_cashtag("$ABС", {});
|
||||
check_cashtag("$АBC", {});
|
||||
check_cashtag("$АВС", {});
|
||||
check_cashtag("$ABC", {"$ABC"});
|
||||
@ -160,6 +161,12 @@ TEST(MessageEntities, cashtag) {
|
||||
check_cashtag(" А$ABC ", {});
|
||||
check_cashtag("$ABC$DEF $GHI $KLM", {"$GHI", "$KLM"});
|
||||
check_cashtag("$TEST", {"$TEST"});
|
||||
check_cashtag("$1INC", {});
|
||||
check_cashtag("$1INCH", {"$1INCH"});
|
||||
check_cashtag("...$1INCH...", {"$1INCH"});
|
||||
check_cashtag("$1inch", {});
|
||||
check_cashtag("$1INCHA", {});
|
||||
check_cashtag("$1INCHА", {});
|
||||
check_cashtag(u8"$ABC\u2122", {"$ABC"});
|
||||
check_cashtag(u8"\u2122$ABC", {"$ABC"});
|
||||
check_cashtag(u8"\u2122$ABC\u2122", {"$ABC"});
|
||||
|
Loading…
Reference in New Issue
Block a user