Improve match_bank_card_numbers.

GitOrigin-RevId: 1df7c2fc487a66ba12e92d80d63e355d1490cef5
This commit is contained in:
levlam 2020-05-05 03:15:30 +03:00
parent 033ae55a46
commit 390d66e3d2
2 changed files with 31 additions and 3 deletions

View File

@ -422,7 +422,7 @@ static vector<Slice> match_bank_card_numbers(Slice str) {
const unsigned char *end = str.uend();
const unsigned char *ptr = begin;
// '/[\d- ]{13,}/'
// '/(?<=^|[^+_\pL\d-])[\d -]{13,}([^_\pL\d-]|$)/'
while (true) {
while (ptr != end && !is_digit(*ptr)) {
@ -431,6 +431,18 @@ static vector<Slice> match_bank_card_numbers(Slice str) {
if (ptr == end) {
break;
}
if (ptr != begin) {
uint32 prev;
next_utf8_unsafe(prev_utf8_unsafe(ptr), &prev, "match_bank_card_numbers");
if (prev == '+' || prev == '-' || prev == '_' ||
get_unicode_simple_category(prev) == UnicodeSimpleCategory::Letter) {
while (ptr != end && (is_digit(*ptr) || *ptr == ' ' || *ptr == '-')) {
ptr++;
}
continue;
}
}
auto card_number_begin = ptr;
size_t digit_count = 0;
@ -455,6 +467,13 @@ static vector<Slice> match_bank_card_numbers(Slice str) {
if (card_number_size > 2 * digit_count - 1) {
continue;
}
if (card_number_end != end) {
uint32 next;
next_utf8_unsafe(card_number_end, &next, "match_bank_card_numbers 2");
if (next == '-' || next == '_' || get_unicode_simple_category(next) == UnicodeSimpleCategory::Letter) {
continue;
}
}
result.emplace_back(card_number_begin, card_number_end);
}

View File

@ -197,12 +197,21 @@ TEST(MessageEntities, bank_card_number) {
check_bank_card_number("123456789015009100", {"123456789015009100"});
check_bank_card_number("1234567890128000000", {"1234567890128000000"});
check_bank_card_number("12345678901500910000", {});
check_bank_card_number(" - - - -1 - -- 2 - - -- 34 - - - 56- - 7890150000 - - - -", {});
check_bank_card_number(" - - - -1 - -- 234 - - 56- - 7890150000 - - - -", {"1 - -- 234 - - 56- - 7890150000"});
check_bank_card_number(" - - - - 1 - -- 2 - - -- 34 - - - 56- - 7890150000 - - - -", {});
check_bank_card_number(" - - - - 1 - -- 234 - - 56- - 7890150000 - - - -", {"1 - -- 234 - - 56- - 7890150000"});
check_bank_card_number("4916-3385-0608-2832; 5280 9342 8317 1080 ;345936346788903",
{"4916-3385-0608-2832", "5280 9342 8317 1080", "345936346788903"});
check_bank_card_number("4556728228023269,4916141675244747020,49161416752447470,4556728228023269",
{"4556728228023269", "4916141675244747020", "4556728228023269"});
check_bank_card_number("a1234567890128", {});
check_bank_card_number("1234567890128a", {});
check_bank_card_number("1234567890128а", {});
check_bank_card_number("а1234567890128", {});
check_bank_card_number("1234567890128_", {});
check_bank_card_number("_1234567890128", {});
check_bank_card_number("1234567890128/", {"1234567890128"});
check_bank_card_number(",1234567890128", {"1234567890128"});
check_bank_card_number("+1234567890128", {});
}
static void check_is_email_address(const td::string &str, bool expected) {