diff --git a/tdutils/CMakeLists.txt b/tdutils/CMakeLists.txt index 1fbc34df..9ecb02cd 100644 --- a/tdutils/CMakeLists.txt +++ b/tdutils/CMakeLists.txt @@ -215,7 +215,7 @@ if (WIN32) # find_library(WS2_32_LIBRARY ws2_32) # find_library(MSWSOCK_LIBRARY Mswsock) # target_link_libraries(tdutils PRIVATE ${WS2_32_LIBRARY} ${MSWSOCK_LIBRARY}) - target_link_libraries(tdutils PRIVATE ws2_32 Mswsock) + target_link_libraries(tdutils PRIVATE ws2_32 Mswsock Normaliz) endif() if (NOT CMAKE_CROSSCOMPILING) add_dependencies(tdutils tdmime_auto) diff --git a/tdutils/td/utils/port/IPAddress.cpp b/tdutils/td/utils/port/IPAddress.cpp index 1fa9c5fd..e8d925a1 100644 --- a/tdutils/td/utils/port/IPAddress.cpp +++ b/tdutils/td/utils/port/IPAddress.cpp @@ -12,8 +12,11 @@ #include "td/utils/port/SocketFd.h" #include "td/utils/port/thread_local.h" #include "td/utils/ScopeGuard.h" +#include "td/utils/utf8.h" -#if !TD_WINDOWS +#if TD_WINDOWS +#include "td/utils/port/wstring_convert.h" +#else #include #include #include @@ -25,6 +28,44 @@ namespace td { +static bool is_ascii_host_char(char c) { + return is_alnum(c) || c == '-'; +} + +static bool is_ascii_host(Slice host) { + for (auto c : host) { + // ':' and '@' are not allowed in a host name anyway, so we can skip them + if (!is_ascii_host_char(c) && c != '.' && c != ':' && c != '@') { + return false; + } + } + return true; +} + +Result idn_to_ascii(CSlice host) { + if (is_ascii_host(host)) { + return to_lower(host); + } + if (!check_utf8(host)) { + return Status::Error("Host name must be encoded in UTF-8"); + } + +#if TD_WINDOWS + TRY_RESULT(whost, to_wstring(host)); + wchar_t punycode[256]; + int result_length = IdnToAscii(IDN_ALLOW_UNASSIGNED, whost.c_str(), whost.size(), punycode, 255); + if (result_length == 0) { + return Status::Error("Host can't be punycoded"); + } + + TRY_RESULT(idn_host, from_wstring(punycode, result_length)); + return idn_host; +#else + // TODO + return Status::Error("Internationalized Domain Names are not supported"); +#endif +} + IPAddress::IPAddress() : is_valid_(false) { } @@ -84,12 +125,14 @@ IPAddress IPAddress::get_any_addr() const { } return res; } + void IPAddress::init_ipv4_any() { is_valid_ = true; ipv4_addr_.sin_family = AF_INET; ipv4_addr_.sin_addr.s_addr = INADDR_ANY; ipv4_addr_.sin_port = 0; } + void IPAddress::init_ipv6_any() { is_valid_ = true; ipv6_addr_.sin6_family = AF_INET6; @@ -151,18 +194,21 @@ Status IPAddress::init_host_port(CSlice host, CSlice port) { return Status::Error("Host is invalid"); } #endif + TRY_RESULT(ascii_host, idn_to_ascii(host)); + host = ascii_host; + addrinfo hints; addrinfo *info = nullptr; std::memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_INET; // TODO AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; LOG(INFO) << "Try to init IP address of " << host << " with port " << port; - auto s = getaddrinfo(host.c_str(), port.c_str(), &hints, &info); - if (s != 0) { + auto err = getaddrinfo(host.c_str(), port.c_str(), &hints, &info); + if (err != 0) { #if TD_WINDOWS return OS_SOCKET_ERROR("Failed to resolve host"); #else - return Status::Error(PSLICE() << "Failed to resolve host: " << gai_strerror(s)); + return Status::Error(PSLICE() << "Failed to resolve host: " << gai_strerror(err)); #endif } SCOPE_EXIT { diff --git a/tdutils/td/utils/port/IPAddress.h b/tdutils/td/utils/port/IPAddress.h index 116a4c54..f2bab9fc 100644 --- a/tdutils/td/utils/port/IPAddress.h +++ b/tdutils/td/utils/port/IPAddress.h @@ -19,7 +19,11 @@ #endif namespace td { + +Result idn_to_ascii(CSlice host); + class SocketFd; + class IPAddress { public: IPAddress(); diff --git a/tdutils/test/misc.cpp b/tdutils/test/misc.cpp index dd1f1ec4..56cd5af0 100644 --- a/tdutils/test/misc.cpp +++ b/tdutils/test/misc.cpp @@ -10,6 +10,7 @@ #include "td/utils/misc.h" #include "td/utils/port/EventFd.h" #include "td/utils/port/FileFd.h" +#include "td/utils/port/IPAddress.h" #include "td/utils/port/path.h" #include "td/utils/port/sleep.h" #include "td/utils/port/Stat.h" @@ -260,3 +261,40 @@ TEST(Misc, get_url_query_file_name) { test_get_url_query_file_name_one("/", suffix, "\\a\\1\\2\\3\\a\\s\\a\\das"); } } + +static void test_idn_to_ascii_one(string host, string result) { + if (result != idn_to_ascii(host).ok()) { + LOG(ERROR) << "Failed to convert " << host << " to " << result << ", got \"" << idn_to_ascii(host).ok() << "\""; + } +} + +TEST(Misc, idn_to_ascii) { + test_idn_to_ascii_one("::::::::::::::::::::::::::::::::::::::@/", "::::::::::::::::::::::::::::::::::::::@/"); + test_idn_to_ascii_one("%30", "%30"); + test_idn_to_ascii_one("%30", "%30"); + test_idn_to_ascii_one("127.0.0.1", "127.0.0.1"); + test_idn_to_ascii_one("fe80::", "fe80::"); + test_idn_to_ascii_one("fe80:0:0:0:200:f8ff:fe21:67cf", "fe80:0:0:0:200:f8ff:fe21:67cf"); + test_idn_to_ascii_one("2001:0db8:11a3:09d7:1f34:8a2e:07a0:765d", "2001:0db8:11a3:09d7:1f34:8a2e:07a0:765d"); + test_idn_to_ascii_one("::ffff:192.0.2.1", "::ffff:192.0.2.1"); + test_idn_to_ascii_one("ABCDEF", "abcdef"); + test_idn_to_ascii_one("abcdef", "abcdef"); + test_idn_to_ascii_one("abæcdöef", "xn--abcdef-qua4k"); + test_idn_to_ascii_one("schön", "xn--schn-7qa"); + test_idn_to_ascii_one("ยจฆฟคฏข", "xn--22cdfh1b8fsa"); + test_idn_to_ascii_one("☺", "xn--74h"); + test_idn_to_ascii_one("правда", "xn--80aafi6cg"); + test_idn_to_ascii_one("büücher", "xn--bcher-kvaa"); + test_idn_to_ascii_one("BüüCHER", "xn--bcher-kvaa"); + test_idn_to_ascii_one("bücüher", "xn--bcher-kvab"); + test_idn_to_ascii_one("bücherü", "xn--bcher-kvae"); + test_idn_to_ascii_one("ýbücher", "xn--bcher-kvaf"); + test_idn_to_ascii_one("übücher", "xn--bcher-jvab"); + test_idn_to_ascii_one("bücher.tld", "xn--bcher-kva.tld"); + test_idn_to_ascii_one("кто.рф", "xn--j1ail.xn--p1ai"); + test_idn_to_ascii_one("wіkіреdіа.org", "xn--wkd-8cdx9d7hbd.org"); + test_idn_to_ascii_one("cnwin2k8中国.avol.com", "xn--cnwin2k8-sd0mx14e.avol.com"); + test_idn_to_ascii_one("win-2k12r2-addc.阿伯测阿伯测ad.hai.com", "win-2k12r2-addc.xn--ad-tl3ca3569aba8944eca.hai.com"); + test_idn_to_ascii_one("✌️.ws", "xn--7bi.ws"); + test_idn_to_ascii_one("⛧", "xn--59h"); +}