From 6dbef6ef768d54a984388d304c6030ea891973ce Mon Sep 17 00:00:00 2001 From: levlam Date: Tue, 9 Oct 2018 16:27:40 +0300 Subject: [PATCH] Fix returning invalid Unicode from parse_html. GitOrigin-RevId: 9277022a53d23ac5c3eb1427eb48bd886b789d09 --- td/telegram/MessageEntity.cpp | 6 ++++++ td/telegram/cli.cpp | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/td/telegram/MessageEntity.cpp b/td/telegram/MessageEntity.cpp index 3324400c8..3bf24fb9d 100644 --- a/td/telegram/MessageEntity.cpp +++ b/td/telegram/MessageEntity.cpp @@ -1616,6 +1616,12 @@ Result> parse_html(string &text) { utf16_offset += utf16_entity_length; } } + if (!check_utf8(result)) { + return Status::Error(400, + "Text contains invalid Unicode characters after decoding HTML entities, check for unmatched " + "surrogate code units"); + } + text = result; return entities; } diff --git a/td/telegram/cli.cpp b/td/telegram/cli.cpp index f17b10456..8bd4bbd06 100644 --- a/td/telegram/cli.cpp +++ b/td/telegram/cli.cpp @@ -2365,8 +2365,12 @@ class CliClient final : public Actor { execute(make_tl_object(args)); } else if (op == "pte") { send_request(make_tl_object(args, make_tl_object())); + } else if (op == "pteh") { + send_request(make_tl_object(args, make_tl_object())); } else if (op == "ptes") { execute(make_tl_object(args, make_tl_object())); + } else if (op == "ptehs") { + execute(make_tl_object(args, make_tl_object())); } else if (op == "gfmt") { send_request(make_tl_object(trim(args))); } else if (op == "gfe") {