Always pass non-null pointer to next_utf8_unsafe.

This commit is contained in:
levlam 2022-08-19 16:19:58 +03:00
parent 31a4608463
commit a8b3573a00
3 changed files with 10 additions and 18 deletions

View File

@ -5968,20 +5968,21 @@ void update_used_hashtags(Td *td, const MessageContent *content) {
const unsigned char *ptr = Slice(text->text).ubegin(); const unsigned char *ptr = Slice(text->text).ubegin();
const unsigned char *end = Slice(text->text).uend(); const unsigned char *end = Slice(text->text).uend();
int32 utf16_pos = 0; int32 utf16_pos = 0;
uint32 skipped_code = 0;
for (auto &entity : text->entities) { for (auto &entity : text->entities) {
if (entity.type != MessageEntity::Type::Hashtag) { if (entity.type != MessageEntity::Type::Hashtag) {
continue; continue;
} }
while (utf16_pos < entity.offset && ptr < end) { while (utf16_pos < entity.offset && ptr < end) {
utf16_pos += 1 + (ptr[0] >= 0xf0); utf16_pos += 1 + (ptr[0] >= 0xf0);
ptr = next_utf8_unsafe(ptr, nullptr, "update_used_hashtags"); ptr = next_utf8_unsafe(ptr, &skipped_code, "update_used_hashtags");
} }
CHECK(utf16_pos == entity.offset); CHECK(utf16_pos == entity.offset);
auto from = ptr; auto from = ptr;
while (utf16_pos < entity.offset + entity.length && ptr < end) { while (utf16_pos < entity.offset + entity.length && ptr < end) {
utf16_pos += 1 + (ptr[0] >= 0xf0); utf16_pos += 1 + (ptr[0] >= 0xf0);
ptr = next_utf8_unsafe(ptr, nullptr, "update_used_hashtags 2"); ptr = next_utf8_unsafe(ptr, &skipped_code, "update_used_hashtags 2");
} }
CHECK(utf16_pos == entity.offset + entity.length); CHECK(utf16_pos == entity.offset + entity.length);
auto to = ptr; auto to = ptr;

View File

@ -1665,10 +1665,11 @@ static void fix_entity_offsets(Slice text, vector<MessageEntity> &entities) {
entity.offset = utf16_pos; entity.offset = utf16_pos;
} }
uint32 skipped_code = 0;
while (ptr != end && cnt > 0) { while (ptr != end && cnt > 0) {
unsigned char c = ptr[0]; unsigned char c = ptr[0];
utf16_pos += 1 + (c >= 0xf0); utf16_pos += 1 + (c >= 0xf0);
ptr = next_utf8_unsafe(ptr, nullptr, "fix_entity_offsets"); ptr = next_utf8_unsafe(ptr, &skipped_code, "fix_entity_offsets");
pos = static_cast<int32>(ptr - begin); pos = static_cast<int32>(ptr - begin);
if (entity_begin == pos) { if (entity_begin == pos) {

View File

@ -84,30 +84,20 @@ void append_utf8_character(string &str, uint32 ch) {
const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code, const char *source) { const unsigned char *next_utf8_unsafe(const unsigned char *ptr, uint32 *code, const char *source) {
uint32 a = ptr[0]; uint32 a = ptr[0];
if ((a & 0x80) == 0) { if ((a & 0x80) == 0) {
if (code) { *code = a;
*code = a;
}
return ptr + 1; return ptr + 1;
} else if ((a & 0x20) == 0) { } else if ((a & 0x20) == 0) {
if (code) { *code = ((a & 0x1f) << 6) | (ptr[1] & 0x3f);
*code = ((a & 0x1f) << 6) | (ptr[1] & 0x3f);
}
return ptr + 2; return ptr + 2;
} else if ((a & 0x10) == 0) { } else if ((a & 0x10) == 0) {
if (code) { *code = ((a & 0x0f) << 12) | ((ptr[1] & 0x3f) << 6) | (ptr[2] & 0x3f);
*code = ((a & 0x0f) << 12) | ((ptr[1] & 0x3f) << 6) | (ptr[2] & 0x3f);
}
return ptr + 3; return ptr + 3;
} else if ((a & 0x08) == 0) { } else if ((a & 0x08) == 0) {
if (code) { *code = ((a & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f);
*code = ((a & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f);
}
return ptr + 4; return ptr + 4;
} }
LOG(FATAL) << a << " " << source; LOG(FATAL) << a << " " << source;
if (code) { *code = 0;
*code = 0;
}
return ptr; return ptr;
} }