Improve match_bank_card_numbers.
GitOrigin-RevId: 1df7c2fc487a66ba12e92d80d63e355d1490cef5
This commit is contained in:
parent
033ae55a46
commit
390d66e3d2
@ -422,7 +422,7 @@ static vector<Slice> match_bank_card_numbers(Slice str) {
|
||||
const unsigned char *end = str.uend();
|
||||
const unsigned char *ptr = begin;
|
||||
|
||||
// '/[\d- ]{13,}/'
|
||||
// '/(?<=^|[^+_\pL\d-])[\d -]{13,}([^_\pL\d-]|$)/'
|
||||
|
||||
while (true) {
|
||||
while (ptr != end && !is_digit(*ptr)) {
|
||||
@ -431,6 +431,18 @@ static vector<Slice> match_bank_card_numbers(Slice str) {
|
||||
if (ptr == end) {
|
||||
break;
|
||||
}
|
||||
if (ptr != begin) {
|
||||
uint32 prev;
|
||||
next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev, "match_bank_card_numbers");
|
||||
|
||||
if (prev == '+' || prev == '-' || prev == '_' ||
|
||||
get_unicode_simple_category(prev) == UnicodeSimpleCategory::Letter) {
|
||||
while (ptr != end && (is_digit(*ptr) || *ptr == ' ' || *ptr == '-')) {
|
||||
ptr++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
auto card_number_begin = ptr;
|
||||
size_t digit_count = 0;
|
||||
@ -455,6 +467,13 @@ static vector<Slice> match_bank_card_numbers(Slice str) {
|
||||
if (card_number_size > 2 * digit_count - 1) {
|
||||
continue;
|
||||
}
|
||||
if (card_number_end != end) {
|
||||
uint32 next;
|
||||
next_utf8_unsafe(card_number_end, &next, "match_bank_card_numbers 2");
|
||||
if (next == '-' || next == '_' || get_unicode_simple_category(next) == UnicodeSimpleCategory::Letter) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
result.emplace_back(card_number_begin, card_number_end);
|
||||
}
|
||||
|
@ -197,12 +197,21 @@ TEST(MessageEntities, bank_card_number) {
|
||||
check_bank_card_number("123456789015009100", {"123456789015009100"});
|
||||
check_bank_card_number("1234567890128000000", {"1234567890128000000"});
|
||||
check_bank_card_number("12345678901500910000", {});
|
||||
check_bank_card_number(" - - - -1 - -- 2 - - -- 34 - - - 56- - 7890150000 - - - -", {});
|
||||
check_bank_card_number(" - - - -1 - -- 234 - - 56- - 7890150000 - - - -", {"1 - -- 234 - - 56- - 7890150000"});
|
||||
check_bank_card_number(" - - - - 1 - -- 2 - - -- 34 - - - 56- - 7890150000 - - - -", {});
|
||||
check_bank_card_number(" - - - - 1 - -- 234 - - 56- - 7890150000 - - - -", {"1 - -- 234 - - 56- - 7890150000"});
|
||||
check_bank_card_number("4916-3385-0608-2832; 5280 9342 8317 1080 ;345936346788903",
|
||||
{"4916-3385-0608-2832", "5280 9342 8317 1080", "345936346788903"});
|
||||
check_bank_card_number("4556728228023269,4916141675244747020,49161416752447470,4556728228023269",
|
||||
{"4556728228023269", "4916141675244747020", "4556728228023269"});
|
||||
check_bank_card_number("a1234567890128", {});
|
||||
check_bank_card_number("1234567890128a", {});
|
||||
check_bank_card_number("1234567890128а", {});
|
||||
check_bank_card_number("а1234567890128", {});
|
||||
check_bank_card_number("1234567890128_", {});
|
||||
check_bank_card_number("_1234567890128", {});
|
||||
check_bank_card_number("1234567890128/", {"1234567890128"});
|
||||
check_bank_card_number(",1234567890128", {"1234567890128"});
|
||||
check_bank_card_number("+1234567890128", {});
|
||||
}
|
||||
|
||||
static void check_is_email_address(const td::string &str, bool expected) {
|
||||
|
Loading…
Reference in New Issue
Block a user