From e2d079860d83aa803ada6cb079cbb8819cb93fe2 Mon Sep 17 00:00:00 2001 From: levlam Date: Mon, 30 Jan 2023 13:40:09 +0300 Subject: [PATCH] Add userinfo characters check. --- tdutils/td/utils/HttpUrl.cpp | 71 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/tdutils/td/utils/HttpUrl.cpp b/tdutils/td/utils/HttpUrl.cpp index f5138b20d..09740c58b 100644 --- a/tdutils/td/utils/HttpUrl.cpp +++ b/tdutils/td/utils/HttpUrl.cpp @@ -11,6 +11,7 @@ #include "td/utils/misc.h" #include "td/utils/Parser.h" #include "td/utils/port/IPAddress.h" +#include "td/utils/SliceBuilder.h" #include @@ -138,42 +139,50 @@ Result parse_url(Slice url, HttpUrl::Protocol default_protocol) { } } + auto check_url_part = [](Slice part, Slice name, bool allow_colon) { + for (size_t i = 0; i < part.size(); i++) { + char c = part[i]; + if (is_alnum(c) || c == '.' || c == '-' || c == '_' || c == '!' || c == '$' || c == ',' || c == '~' || c == '*' || + c == '\'' || c == '(' || c == ')' || c == ';' || c == '&' || c == '+' || c == '=' || + (allow_colon && c == ':')) { + // symbols allowed by RFC 7230 and RFC 3986 + continue; + } + if (c == '%') { + c = part[++i]; + if (is_hex_digit(c)) { + c = part[++i]; + if (is_hex_digit(c)) { + // percent encoded symbol as allowed by RFC 7230 and RFC 3986 + continue; + } + } + return Status::Error(PSLICE() << "Wrong percent-encoded symbol in URL " << name); + } + + // all other symbols aren't allowed + auto uc = static_cast(c); + if (uc >= 128) { + // but we allow plain UTF-8 symbols + continue; + } + return Status::Error(PSLICE() << "Disallowed character in URL " << name); + } + return Status::OK(); + }; + string host_str = to_lower(host); - for (size_t i = 0; i < host_str.size(); i++) { - char c = host_str[i]; - if (is_ipv6) { - if (i == 0 || i + 1 == host_str.size() || c == ':' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || - c == '.') { + if (is_ipv6) { + for (size_t i = 1; i + 1 < host_str.size(); i++) { + char c = host_str[i]; + if (c == ':' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || c == '.') { continue; } return Status::Error("Wrong IPv6 URL host"); } - - if (('a' <= c && c <= 'z') || c == '.' || ('0' <= c && c <= '9') || c == '-' || c == '_' || c == '!' || c == '$' || - c == ',' || c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' || c == ';' || c == '&' || c == '+' || - c == '=') { - // symbols allowed by RFC 7230 and RFC 3986 - continue; - } - if (c == '%') { - c = host_str[++i]; - if (('a' <= c && c <= 'f') || ('0' <= c && c <= '9')) { - c = host_str[++i]; - if (('a' <= c && c <= 'f') || ('0' <= c && c <= '9')) { - // percent encoded symbol as allowed by RFC 7230 and RFC 3986 - continue; - } - } - return Status::Error("Wrong percent-encoded symbol in URL host"); - } - - // all other symbols aren't allowed - auto uc = static_cast(c); - if (uc >= 128) { - // but we allow plain UTF-8 symbols - continue; - } - return Status::Error("Wrong URL host"); + } else { + TRY_STATUS(check_url_part(host_str, "host", false)); + TRY_STATUS(check_url_part(userinfo, "userinfo", true)); } return HttpUrl{protocol, userinfo.str(), std::move(host_str), is_ipv6, specified_port, port, std::move(query_str)};