Use entity_offset instead of utf16_entity_length.
GitOrigin-RevId: 8e02eac54c534eb33b37ef2b2f345e9c0781164a
This commit is contained in:
parent
38d5be1ee9
commit
3006357f7c
@ -1339,11 +1339,11 @@ Result<vector<MessageEntity>> parse_markdown(string &text) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int32 utf16_entity_length = 0;
|
int32 entity_offset = utf16_offset;
|
||||||
while (i < size && (text[i] != end_character || (is_pre && !(text[i + 1] == '`' && text[i + 2] == '`')))) {
|
while (i < size && (text[i] != end_character || (is_pre && !(text[i + 1] == '`' && text[i + 2] == '`')))) {
|
||||||
auto cur_ch = static_cast<unsigned char>(text[i]);
|
auto cur_ch = static_cast<unsigned char>(text[i]);
|
||||||
if (is_utf8_character_first_code_unit(cur_ch)) {
|
if (is_utf8_character_first_code_unit(cur_ch)) {
|
||||||
utf16_entity_length += 1 + (cur_ch >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
|
utf16_offset += 1 + (cur_ch >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
|
||||||
}
|
}
|
||||||
result.push_back(text[i++]);
|
result.push_back(text[i++]);
|
||||||
}
|
}
|
||||||
@ -1351,13 +1351,14 @@ Result<vector<MessageEntity>> parse_markdown(string &text) {
|
|||||||
return Status::Error(400, PSLICE() << "Can't find end of the entity starting at byte offset " << begin_pos);
|
return Status::Error(400, PSLICE() << "Can't find end of the entity starting at byte offset " << begin_pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (utf16_entity_length > 0) {
|
if (entity_offset != utf16_offset) {
|
||||||
|
auto entity_length = utf16_offset - entity_offset;
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '_':
|
case '_':
|
||||||
entities.emplace_back(MessageEntity::Type::Italic, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Italic, entity_offset, entity_length);
|
||||||
break;
|
break;
|
||||||
case '*':
|
case '*':
|
||||||
entities.emplace_back(MessageEntity::Type::Bold, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Bold, entity_offset, entity_length);
|
||||||
break;
|
break;
|
||||||
case '[': {
|
case '[': {
|
||||||
string url;
|
string url;
|
||||||
@ -1372,12 +1373,11 @@ Result<vector<MessageEntity>> parse_markdown(string &text) {
|
|||||||
}
|
}
|
||||||
auto user_id = get_link_user_id(url);
|
auto user_id = get_link_user_id(url);
|
||||||
if (user_id.is_valid()) {
|
if (user_id.is_valid()) {
|
||||||
entities.emplace_back(utf16_offset, utf16_entity_length, user_id);
|
entities.emplace_back(entity_offset, entity_length, user_id);
|
||||||
} else {
|
} else {
|
||||||
auto r_url = check_url(url);
|
auto r_url = check_url(url);
|
||||||
if (r_url.is_ok()) {
|
if (r_url.is_ok()) {
|
||||||
entities.emplace_back(MessageEntity::Type::TextUrl, utf16_offset, utf16_entity_length,
|
entities.emplace_back(MessageEntity::Type::TextUrl, entity_offset, entity_length, r_url.move_as_ok());
|
||||||
r_url.move_as_ok());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -1385,18 +1385,17 @@ Result<vector<MessageEntity>> parse_markdown(string &text) {
|
|||||||
case '`':
|
case '`':
|
||||||
if (is_pre) {
|
if (is_pre) {
|
||||||
if (language.empty()) {
|
if (language.empty()) {
|
||||||
entities.emplace_back(MessageEntity::Type::Pre, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Pre, entity_offset, entity_length);
|
||||||
} else {
|
} else {
|
||||||
entities.emplace_back(MessageEntity::Type::PreCode, utf16_offset, utf16_entity_length, language);
|
entities.emplace_back(MessageEntity::Type::PreCode, entity_offset, entity_length, language);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
entities.emplace_back(MessageEntity::Type::Code, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Code, entity_offset, entity_length);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
utf16_offset += utf16_entity_length;
|
|
||||||
}
|
}
|
||||||
if (is_pre) {
|
if (is_pre) {
|
||||||
i += 2;
|
i += 2;
|
||||||
@ -1572,20 +1571,20 @@ static Result<vector<MessageEntity>> do_parse_html(Slice text, string &result) {
|
|||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
int32 utf16_entity_length = 0;
|
int32 entity_offset = utf16_offset;
|
||||||
size_t entity_begin_pos = result.size();
|
size_t entity_begin_pos = result.size();
|
||||||
while (text[i] != 0 && text[i] != '<') {
|
while (text[i] != 0 && text[i] != '<') {
|
||||||
auto cur_ch = static_cast<unsigned char>(text[i]);
|
auto cur_ch = static_cast<unsigned char>(text[i]);
|
||||||
if (cur_ch == '&') {
|
if (cur_ch == '&') {
|
||||||
auto ch = decode_html_entity(text, i);
|
auto ch = decode_html_entity(text, i);
|
||||||
if (ch != 0) {
|
if (ch != 0) {
|
||||||
utf16_entity_length += 1 + (ch > 0xffff);
|
utf16_offset += 1 + (ch > 0xffff);
|
||||||
append_utf8_character(result, ch);
|
append_utf8_character(result, ch);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (is_utf8_character_first_code_unit(cur_ch)) {
|
if (is_utf8_character_first_code_unit(cur_ch)) {
|
||||||
utf16_entity_length += 1 + (cur_ch >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
|
utf16_offset += 1 + (cur_ch >= 0xf0); // >= 4 bytes in symbol => surrogaite pair
|
||||||
}
|
}
|
||||||
result.push_back(text[i++]);
|
result.push_back(text[i++]);
|
||||||
}
|
}
|
||||||
@ -1613,30 +1612,30 @@ static Result<vector<MessageEntity>> do_parse_html(Slice text, string &result) {
|
|||||||
<< ", expected \"</" << tag_name << ">\", found\"</" << end_tag_name << ">\"");
|
<< ", expected \"</" << tag_name << ">\", found\"</" << end_tag_name << ">\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (utf16_entity_length > 0) {
|
if (utf16_offset > entity_offset) {
|
||||||
|
auto entity_length = utf16_offset - entity_offset;
|
||||||
if (tag_name == "i" || tag_name == "em") {
|
if (tag_name == "i" || tag_name == "em") {
|
||||||
entities.emplace_back(MessageEntity::Type::Italic, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Italic, entity_offset, entity_length);
|
||||||
} else if (tag_name == "b" || tag_name == "strong") {
|
} else if (tag_name == "b" || tag_name == "strong") {
|
||||||
entities.emplace_back(MessageEntity::Type::Bold, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Bold, entity_offset, entity_length);
|
||||||
} else if (tag_name == "a") {
|
} else if (tag_name == "a") {
|
||||||
if (url.empty()) {
|
if (url.empty()) {
|
||||||
url = result.substr(entity_begin_pos);
|
url = result.substr(entity_begin_pos);
|
||||||
}
|
}
|
||||||
auto user_id = get_link_user_id(url);
|
auto user_id = get_link_user_id(url);
|
||||||
if (user_id.is_valid()) {
|
if (user_id.is_valid()) {
|
||||||
entities.emplace_back(utf16_offset, utf16_entity_length, user_id);
|
entities.emplace_back(entity_offset, entity_length, user_id);
|
||||||
} else {
|
} else {
|
||||||
auto r_url = check_url(url);
|
auto r_url = check_url(url);
|
||||||
if (r_url.is_ok()) {
|
if (r_url.is_ok()) {
|
||||||
entities.emplace_back(MessageEntity::Type::TextUrl, utf16_offset, utf16_entity_length, r_url.move_as_ok());
|
entities.emplace_back(MessageEntity::Type::TextUrl, entity_offset, entity_length, r_url.move_as_ok());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (tag_name == "pre") {
|
} else if (tag_name == "pre") {
|
||||||
entities.emplace_back(MessageEntity::Type::Pre, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Pre, entity_offset, entity_length);
|
||||||
} else if (tag_name == "code") {
|
} else if (tag_name == "code") {
|
||||||
entities.emplace_back(MessageEntity::Type::Code, utf16_offset, utf16_entity_length);
|
entities.emplace_back(MessageEntity::Type::Code, entity_offset, entity_length);
|
||||||
}
|
}
|
||||||
utf16_offset += utf16_entity_length;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return entities;
|
return entities;
|
||||||
|
Reference in New Issue
Block a user