Allow only domain symbols in user data after first dot to improve performance.

This commit is contained in:
levlam 2022-08-19 21:01:39 +03:00
parent 1afbbe5fca
commit 0d2ddc2d56
2 changed files with 4 additions and 6 deletions

View File

@ -755,13 +755,11 @@ static vector<Slice> match_urls(Slice str) {
auto next_ptr = next_utf8_unsafe(domain_end_ptr, &code); auto next_ptr = next_utf8_unsafe(domain_end_ptr, &code);
if (code == '@') { if (code == '@') {
last_at_ptr = domain_end_ptr; last_at_ptr = domain_end_ptr;
} } else if (!is_domain_symbol(code)) {
if (!is_user_data_symbol(code)) {
break; break;
} }
domain_end_ptr = next_ptr; domain_end_ptr = next_ptr;
} }
domain_end_ptr = last_at_ptr == nullptr ? begin + dot_pos : last_at_ptr + 1;
} }
while (domain_end_ptr != end) { while (domain_end_ptr != end) {
uint32 code = 0; uint32 code = 0;

View File

@ -655,11 +655,11 @@ TEST(MessageEntities, url) {
check_url("https://a.de}bc@c.com", {"https://a.de"}, {"bc@c.com"}); check_url("https://a.de}bc@c.com", {"https://a.de"}, {"bc@c.com"});
check_url("https://a.de(bc@c.com", {"https://a.de"}, {"bc@c.com"}); check_url("https://a.de(bc@c.com", {"https://a.de"}, {"bc@c.com"});
check_url("https://a.de)bc@c.com", {"https://a.de"}, {"bc@c.com"}); check_url("https://a.de)bc@c.com", {"https://a.de"}, {"bc@c.com"});
check_url("https://a.de\\bc@c.com", {"https://a.de\\bc@c.com"}); check_url("https://a.debc@c.com", {"https://a.debc@c.com"});
check_url("https://a.de'bc@c.com", {"https://a.de"}, {"bc@c.com"}); check_url("https://a.de'bc@c.com", {"https://a.de"}, {"bc@c.com"});
check_url("https://a.de`bc@c.com", {"https://a.de"}, {"bc@c.com"}); check_url("https://a.de`bc@c.com", {"https://a.de"}, {"bc@c.com"});
check_url("https://a.bc:de.fg@c.com", {"https://a.bc:de.fg@c.com"}); check_url("https://a.bcde.fg@c.com", {"https://a.bcde.fg@c.com"});
check_url("https://a:h.bc:de.fg@c.com", {"https://a:h.bc:de.fg@c.com"}); check_url("https://a:h.bcde.fg@c.com", {"https://a:h.bcde.fg@c.com"});
check_url("https://abc@c.com", {"https://abc@c.com"}); check_url("https://abc@c.com", {"https://abc@c.com"});
check_url("https://de[bc@c.com", {}, {"bc@c.com"}); check_url("https://de[bc@c.com", {}, {"bc@c.com"});
check_url("https://de/bc@c.com", {}); check_url("https://de/bc@c.com", {});