2021-09-20 21:30:23 +02:00
|
|
|
//
|
2024-01-01 01:07:21 +01:00
|
|
|
// Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2024
|
2021-09-20 21:30:23 +02:00
|
|
|
//
|
|
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
//
|
|
|
|
#include "td/utils/emoji.h"
|
|
|
|
|
2022-01-13 21:43:22 +01:00
|
|
|
#include "td/utils/base64.h"
|
2022-03-11 19:38:48 +01:00
|
|
|
#include "td/utils/FlatHashSet.h"
|
2022-01-13 21:43:22 +01:00
|
|
|
#include "td/utils/Gzip.h"
|
2021-09-21 05:32:34 +02:00
|
|
|
|
2021-09-20 21:30:23 +02:00
|
|
|
namespace td {
|
|
|
|
|
2023-05-29 16:15:48 +02:00
|
|
|
static constexpr size_t MAX_EMOJI_LENGTH = 28;
|
|
|
|
|
|
|
|
static bool is_emoji_element(Slice str) {
|
2022-03-11 19:38:48 +01:00
|
|
|
static const FlatHashSet<Slice, SliceHash> emojis = [max_emoji_length = MAX_EMOJI_LENGTH] {
|
2022-01-31 18:20:31 +01:00
|
|
|
#if TD_HAVE_ZLIB
|
2022-01-13 21:43:22 +01:00
|
|
|
Slice packed_emojis(
|
2023-05-29 16:15:48 +02:00
|
|
|
"eJxtmlly40iWRbdCs_rrr56H3WVmzBHE4A6AxCCFqiodcBFOKShSComawqyWglpAl1lvoP0wcdPK2vrDnY-mC8e75_"
|
|
|
|
"kkRUzLdjEtu8WUbWIbYwuxbWPbxXZYTOuX2F4XU13GVi2m5mNsn2L7HNuX2JaxJbGlsWWx5bGZ2GxsRWw_FlMbP9s_xRbHb-"
|
|
|
|
"P4bRyzjWN2b2N7F1t8tovjd_Hn3T62-N7uLrbH2KL2LGrO4vvO4vvOLhfTeXzneXzmPI57Hp87j3mdrxfT1_j5dRUbccz_"
|
|
|
|
"a3z3Nnrbni2mq5jXVbX428VPb2P3Jovd-5TO0Fm6gq6kQ_d-Rbemq-kaujZ2H3-mO0Xf6PZ0B7pbuju673QPdEe6x9h9yul42_InOkZZ_"
|
|
|
|
"kL3ho6slu_"
|
|
|
|
"o3tN9oPtI94nuM90XuiVdQkf2S3wsGXl5GhkfS3ws8bHExxIfS3ws8bHEx5LsY9Vjd0Z3TveV7oLuj3RXdNd0uFzu6G7o8LvE7xK_S_wu7-"
|
|
|
|
"nwu8TvEr_LJ7pnuhe6V7ofsUuwn2A_wX6C_QT7CfYT7CfYT7CfYD_BfoL9BPsJ9hPsJ9hPsJ9gP8F-gv0E-wn2E-wn2E-wn2A_wX6C_"
|
|
|
|
"QT7CfYT7CfYT7Cf_Inuz3S_0jm6nm6g83SXdBu6kS7QbelAl4AuAV0CugR0CegS0CWgS0CXMFUS-CXwS-CXwC-BXwK_"
|
|
|
|
"BHQJ6FLQpaBLQZeCLgVdCroUdCnoUtCloEtBl4IuBV0KuhR0KehS0KWgS0GXgi7Ffor9FPsp9lPsp9hPsZ9iP8V-iv0U-yn2U-yn2E-xn2I_"
|
|
|
|
"xX6K_RT7KfZT7KfYT7GfYj_Ffor9FPsp9lPsp0yfFAYpDDIYZDDIYJDBIINBBoMMBhkMMhhkMMhgkOE3w2-G3wy_GX4z_"
|
|
|
|
"Gb4zfCb4TfDb4bfDL8ZfjP8ZvjN8JvhN8Nvht8Mvxl-M_"
|
|
|
|
"xmGMxwlOEow1GGowxHGY4yHGU4ynGU4yjHUY6jHEc5jnIc5TjKcZTjKMdRTlVzqppT1Zyq5rjMcZnjMsdljsucBZGzIHIWRM6CyFkQOQsiZ0Hk"
|
|
|
|
"LIicBZGzIHIWRM6CyCGUQyiHUA6hHEI5hHII5RDKIZRDKIdQDqEcQjmEcgjlEMohlDMjcmZEzozIAZYzI3JmRM6MyOGXwy-HXw6_HH45_"
|
|
|
|
"HL4GdAZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGag"
|
|
|
|
"ZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGagZqBmoGaiZEzVmnQWdZdZZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-"
|
|
|
|
"Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-Fn4WfhZ-"
|
|
|
|
"Fn4WfhZ-Fn4WfhZ-Fn4FfAr4FfAr4BfAb8CfgX8CvgV8CvgV8CvgF8BvwJ-BfwK-BXwK-BXwK-AXwG_An4F_"
|
|
|
|
"Ar4FfAr4FfAr4BfAb8CfgX8CvgV8CvgV8CvgF8BvwJ-BfwK-BXwK-BXwK-AXwG_An4F_"
|
|
|
|
"Ar4FfAr4FfAr4BfAb8CfgX8CvgV8CvgV8CvgF8BvwJqJdRKqJVQK6FWQq2EWgm1Emol1EqolVAroVZCrYRaCbUSaiXUSqiVUCuhVkKthFoJtRJ"
|
|
|
|
"qJdRKqJVQK6FWQq2EWgm1Emol1EqolVAroVZCrYRaCbUSaiXUSqiVUCuhVkKthFoJtRJqJdRKqJVQK6FWQq2EWgm1Emol1EqolVAroVZCrWTWV"
|
|
|
|
"divsF9hv8J-hfMK5xXOK5xXOK9wXuG8wnmF8wrnFc4rnFc4r3Be4bzCeYXzCucVziucVzivcF7hvMJ5RX4r3rHiHSska9Jdk-"
|
|
|
|
"6adNcskjXlril3Tblryl1T7ppy15S7ptw15a4pd025a8pd47fGb43fGr815a4xXWO6xnSN6RrTNQnVJFRjusZ0jeka0zWma0zXmK4xXWO6xnSN"
|
|
|
|
"6RrTNaZrHNWYrjFdY7qm3DXlril3Tblryl1T7ppy15S7ptw15a4pd025a8pdU-"
|
|
|
|
"6acteUu6bcNeWuwVnDr4ZfDb8afjX8Gvg18Gvg18CvgV8DvwZ-Dfwa-DXwa-DXwK-BXwO_Bn4N_"
|
|
|
|
"FrGaxmvZbyW8VrGaxmvZbyW8VrGaxmvZbyW8VrGaxmvZbz2NB71aKlHSz1a6tFSj5Z6tNSjpR4t9WipR0s9WurRUo-"
|
|
|
|
"WerTUo6UeLfVoqUdLPVrq0VKPlnq01KOlHi31aKlHSz1a6tFSj5Z6tNSjpR4t9WipR0s9WurRUo-"
|
|
|
|
"WerTUo6UeLfVoqUdLPVrq0VKPlnq01KOlHh38Ovh18Ovg18Gvg18HoQ4kHUg6kHSA6ADRAaLDdIfpDtMdpjuy78i-"
|
|
|
|
"I7WO1DpS60itI7WO1DpS60itI7ULWF3A6gJWF7C6gNUFrC5gdQGrC1hdwOoCVhe87QIujnQdBXUU1FFQR_"
|
|
|
|
"aO7B3ZOwrqKKjDh8OHw4ejoI6COgrqKKjDm8Obw5vDmyNJR5KOJB1JOpJ0JOlI0pGkI0lHko4kHUk6kDgK6iioo6DulDgFdRTUUVAHNQc1BzUH"
|
|
|
|
"NQc1BzUHNQcwRy0dtXTUsqeWPbXsqWVPLXtq2VPLnmXQswx6lkHPMuhZBj3Ueqj1UOuh1kOth1oPtR5qPdR6qPVQ66HWQ62HWg-"
|
|
|
|
"1Hmo91Hqo9VDrodZDrYdaD7Ueaj3Ueqj1UOuh1kOth1oPtR5qPdR6qPVQ66HWQ62HWg-"
|
|
|
|
"1Hmo91Hqo9VDrodZDrYdaD7WeudaDrgddD7oedAPoBtANoBtAN4BuAN3ANjLAb4DfAL8BfgP8BvgN8BvgN8BvgN8AvwF-A_wG-A3wG-A3wG-"
|
|
|
|
"A3wC_AX4D_Ab4DfAb4DfAb4DfAL8BfgP8BvgN8BvgN8BvgN8AvwF-A_wG-A3wG-A3wG-A3wC_AX4D_"
|
|
|
|
"Ab4DfAb4DfAb4DfAL8BfgP8BvgN8BvgN8DPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_"
|
|
|
|
"Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw8_Dz8PPw2_"
|
|
|
|
"DeBvG2zDehvE2jLdhvA3jbRhvw3gbxtsw3obxNow3UoCRAowUYKQAIwUYKcBIAUYKMFKAEbojdEfojtAdoTtCd4TuCN0RuiN0R-"
|
|
|
|
"iO0B2hO0J3hO4I3RG6I3RH6I7QHaE7QneE7gjdEbojdEfojtAdoTtCd4TuCN0RGiM0RmiM0BihMUJjhMYIjREaIzRGaIzQGE80oDsCNsAlwCXA"
|
|
|
|
"JcAlwCXAJTDhAhMugCSAJIAkgCSAJIAkgCSAJIAkgCSAJIAkYD9gP2A_YD9gP2A_YD9gP2A_4C3gLeAt4C3gLeAt4C3gLTws_rJZ_"
|
|
|
|
"OV6Mf30vJh-_rSY3v45tuNiel_GVsW2im0dWx1bE9smtnExLS8XU5bFdhVbfD77FttNbPvYHmKLY2SPiyn-ljeton4VYvu-mNY_"
|
|
|
|
"xfYUW3xfvOZO8ZY7xUvuFO-4U7ziTvHSOsWb6hSvolO8dU7xqjnFS-YU75hTvFhO8SI5xYvjFO-"
|
|
|
|
"NU7wOTvE2OMXL4BSvfFO87E3NRWzxmSbqmz62-"
|
|
|
|
"EwTc23iO5vXxRQvXFO8ak3xpjXFi9YU71lTvGFN8W41xavVFK9SU7wrTfEKNMUb0NR9jC36jFeTqStii_67-LOOn93Gdh9bzCNeNaaz-P6zqD-"
|
|
|
|
"LHM-WsSWxxWfP4jvP4jvP4vvOoqezP8V2iC0-fx49n3-ILeZ77hbT1_izeDJP8WCetu9iex_bh8Vff9rF9rL46-d1bMyMd5T4HSV-xwJ_"
|
|
|
|
"xzz8yOz7ePqrMhNlyfRYMj2WTI8l02PJ9FiyOpasjiWrY8nqWJ7-"
|
|
|
|
"onr6oyHzMGUeprwoZQqmTMGUtZiyFjO21IwtNWNLzZjhGRM5YyJnDJAxQMZEzhglY5SMUTLWdsbazhgvO43HBM2Ym9npT1iYMac_Epx-"
|
|
|
|
"j2Mrr9jKKxZuhfOKJyqmdMVjFdlXPFuxXCuW64qtaMVjK9Jdke6KdFcswxVcVtBYAXHFOlmdfhcD4polvGYJr1mua5brmuW6Jvs12a_"
|
|
|
|
"Jfg3nNWtxzVBr8luT2vp06-S9Ha_sINSxB3Ss447HOtZxx7MdhepIqKMyHQa7w-IP05tfF3_474eMz384dfOXfzx185d_OnXzl38-"
|
|
|
|
"dfOXfzl185d_PXXzl387dfOXfz9185f_OHXzl_88dfOX_zp185e_"
|
|
|
|
"XXwYYrtUsFEwKggKtgquFdwo2Cu4VfBdwb2CBwVHBY8KnhW8KGDifPB8UeAVbBSMCoKCrYIrBdcKvim4UbBXcFBwq-"
|
|
|
|
"C7gnsFDwqOCp4UPCt4VXBK_lLJXwrvpZK_"
|
|
|
|
"VM6XyvlSOV8qZ4KdghsFewUHBbcK7hTcK3hU8KTgWcGLglcFp5w3orpRYhuh2yifjdLY6O0bPT7K8ijLowYcNeAop6NSHYV3FN5RyQfRCEojKI"
|
|
|
|
"2gNILSCBpwqzS2mjZbkd8qn61KsFViWyW21Uu3Ir_Vu7YivxXwrWbL9u_f_qDgqOBRwbOCE_"
|
|
|
|
"kr2bnSK670iisNeKVxrjTOtfBey9e1fF0r52sNeK0BrwXqWjlf6xXXyvla7_qmAb9pnG96_Ju876TZieFODHdiuNPjO6Wx-_"
|
|
|
|
"vH7xU8K3hVcJpRNyrljUp5I-83esWNGN5owBvZuZGdG6EjeFJwetder9hr5L2o7mVwr9myl9O9nO719r3I72V5L8t7odvL-14l2CvnvXLeK-"
|
|
|
|
"e9ct4r571AEbwo-N3FidhBdg6yc5CLg1wc5OIghgclf1CqB6V6UIYH5XPQu27l9E4vvdO77vSuO73rTsTuROxOL737-3EOCu4VPCg4KnhWcPL-"
|
|
|
|
"XW-_19vv5eJej98r-Xs9_qCnCLyCSwUbBaOCrYIrBdcKvinYKbhRsFdwUHCr4F7Bg4KjgicFLwpeFZzIH5X8UTkflfNR5I_K-"
|
|
|
|
"aicj0r1qFSPSvWoVI9K9ahUj0r1qAyPyvAomEcl9qjEHvX2R438qJEfZflRvh71-JMef5KvJ5XgSQM-ifyTBnxScZ_l_VmveJHTV43zKhc_"
|
|
|
|
"9K4fyvDHbCe7_Z8__uxj-3NsfWzX8_cf__eHh9h-je32__vhfWzbWfSD34JO_wj-2-fz_Pkyf77Onz_4DeQ33enzef58mT9f588f_Jeg33Snz-"
|
|
|
|
"f582X-fJ0_0X2ZdV9m3ZdZ92XWfZl1y1m3nHXLWbecdctZl8y6ZNYlsy6Zdcn8D_vpu_lf_U_Bs4IXBa8KTuR-0X8R-EX_TeAX_VeBX_"
|
|
|
|
"TfBX6R-I3EbyR-I_Ebid9I_FbitxK_lfitxG8l_iDxB4k_SPxB4g8Sf5b4s8SfJf4s8WeJv0j8ReIvEn-R-"
|
|
|
|
"IvES4mXEi8lXkq8nMVG6IzQGaEzQmeEzgidETojdEbojNCZ9xK_l_i9xO8lfi-x0BmhM0JnhM4Infko8UeJP0r8UeKPEn-S-JPEnyT-"
|
|
|
|
"JPEniVUUo6IYFcWoKEZFMSqKUVGMimJUFKOiGBXFqChGRTEqivm9KInEicSJxInEWikmlTiVOJU4lTiVOJM4kziTOJM4kziXOJc4lziXOJd4kH"
|
|
|
|
"iQeJB4kHiQ2EvsJfYSe4m9xJcSX0p8KfGlxJcSbyTeSLyReCPxRuIgcZA4SBwkDhJvJd5KvJV4K_FW4iuJryS-kvhK4iuJryW-"
|
|
|
|
"lvha4muJryXeSbyTeCfxTuKdxDcS30h8I_GNxDcS7yXeS7yXeC_"
|
|
|
|
"xXuKDxAeJDxIfJD5IfCvxrcS3Et9KfCvxncR3Et9JfCfxncTfJf4u8XeJv0v8XeJ7ie8lvpf4XuJ7iR8kfpD4QeIHiR8kfpb4WeJniZ8lfp7F9"
|
|
|
|
"udZfAqeFbwoeFVwEmt_ttqfrfZnq_3Zan-22p-t9mer_"
|
|
|
|
"dlqf7ban61OWKsT1uqEtTphrU5Yq83cajO32sytNnOrzdxqM7fazK02c6vN3Gozt9qRrHYkqx3Jakey2pGskdhIbCQ2EhuJR4lHiUeJR4nHWVx"
|
|
|
|
"pilaaopWmaKUpWmmKVpqilaZopSlaaYpWmqLVo8SPEj9K_"
|
|
|
|
"Cjx4yxe5bP4FDwreFHwquAkriSuJK4kriSuJF5JvJJ4JfFK4tUsbjQ3Gs2NRnOj0dxoNDcazY1Gc6PR3Gg0NxrNjUZzo9HcaDQ3Gs2NRnOj0Qn"
|
|
|
|
"b6IRtdMI2OmEbnbCNTthGJ2yjE7bRCdvohG10wjY6YRudsI1O2EYnbKMTttEJ2-"
|
|
|
|
"iEbXTCNjphG83nRvO50XxuNJ8bzef211l8Cp4VvCh4VXASa4q2mqKtpmirKdpqiraaoq2maKsp2mqKtpqirXbRVrtoq1201S7aahftfprFp-"
|
|
|
|
"BZwYuCVwUnsYrSqSiditKpKJ2K4iR2EjuJncTud7E4O3F24uzE2YmzqyWuJa4lriWuJW4kbiRuJG4kbiRuJW4lbiVuJW4l7iTuJO4k7iTuJD6T"
|
|
|
|
"-EziM4nPJD6T-Fzic4nPJT6X-FzirxJ_lfirxF8l_irxhcQXEl9IfCHxhcS6BDpdAp0ugU6XQKdLoNtJvJN4J_"
|
|
|
|
"FO4p3Eusk43WScbjJONxmnm4zTTcbpJuN0k3G6yTjdZJxuMk43GaebjNNNxukm47QGndag0xp0WoNOa9BpDTqtQac16LQGndag0xp0WoNOa9Bp"
|
|
|
|
"DTqtQaebjNNNxukm43STcbrJON1knG4yTjcZp5uM003GHSU-SnyU-CjxUeIXiV8kfpH4ReIXiV8lfpX4VeJXiV9ncS-DvQz2MtjLYC-"
|
|
|
|
"DgzgP4jyI8yDOgzgP4jyI8yDOgzgP4jwI3SB0g9ANQjcI3SB0g9ANQjcI3SB0w5PETxI_"
|
|
|
|
"Sfwk8dMs9jqtvE4rr9PK67TyOq28Tiuv08rrtPI6rbxOK69d1GsX9dpFvXZRr13U6_bldfvyun153b68bl_"
|
|
|
|
"eSmwlthJbia3EhcSFxIXEhcSFxKXEpcSlxKXEpcS6I3ndkbzuSF53JK87ktcdyeuO5HVH8rojed2R_"
|
|
|
|
"FritcRridcSryXWAeR1AHkdQF4HkNcB5HUAeR1AXgeQ1wHkdQB5HUBeB5DXAeR1AHkdQF4HkNcB5HUAeR1AXgeQ1wHkdQB5HUBeB5DXAeR1AHk"
|
|
|
|
"dQF4HkNcB5HUABf2eEvR7StDvKUG_pwT9nhL0J7igP8EF_Qku6E9wQX-"
|
|
|
|
"CC7q4Bl1cgy6uQRfXoItr2Em8k3gn8U7incQ6gIIOoKADKOgACjqAgg6goAMo6AAKOoCCDqCgAyjoAAo6gIIOoKADKOgACjqAgg6goAMo6AAK2"
|
|
|
|
"hiDNsagjTFoYwzaGIM2xqCNMWhjDNoYgzbGoP05aH8O2p-D9ueg_TloFw3aRYN20aBdNPy2i_4vsAI1eQ");
|
2022-01-13 21:43:22 +01:00
|
|
|
static string all_emojis_str = gzdecode(base64url_decode(packed_emojis).ok()).as_slice().str();
|
2023-05-29 16:15:48 +02:00
|
|
|
constexpr size_t EMOJI_COUNT = 2326;
|
2022-01-31 18:20:31 +01:00
|
|
|
#else
|
|
|
|
string all_emojis_str;
|
|
|
|
constexpr size_t EMOJI_COUNT = 0;
|
|
|
|
#endif
|
2022-03-11 19:38:48 +01:00
|
|
|
FlatHashSet<Slice, SliceHash> all_emojis;
|
2022-01-14 14:30:50 +01:00
|
|
|
all_emojis.reserve(EMOJI_COUNT);
|
2022-01-13 21:43:22 +01:00
|
|
|
for (size_t i = 0; i < all_emojis_str.size(); i++) {
|
|
|
|
CHECK(all_emojis_str[i] != ' ');
|
|
|
|
CHECK(all_emojis_str[i + 1] != ' ');
|
|
|
|
size_t j = i + 2;
|
|
|
|
while (j < all_emojis_str.size() && all_emojis_str[j] != ' ') {
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
CHECK(j < all_emojis_str.size());
|
|
|
|
all_emojis.insert(Slice(&all_emojis_str[i], &all_emojis_str[j]));
|
2022-01-31 20:00:00 +01:00
|
|
|
CHECK(j - i <= max_emoji_length);
|
2022-01-13 21:43:22 +01:00
|
|
|
i = j;
|
|
|
|
}
|
2022-01-14 14:30:50 +01:00
|
|
|
CHECK(all_emojis.size() == EMOJI_COUNT);
|
2022-01-13 21:43:22 +01:00
|
|
|
return all_emojis;
|
|
|
|
}();
|
2023-05-17 19:04:12 +02:00
|
|
|
auto len = str.size();
|
|
|
|
if (len > MAX_EMOJI_LENGTH + 3) {
|
2021-09-20 21:30:23 +02:00
|
|
|
return false;
|
|
|
|
}
|
2023-05-17 19:04:12 +02:00
|
|
|
if (emojis.count(str) != 0) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (len <= 3 || str[len - 3] != '\xEF' || str[len - 2] != '\xB8' || str[len - 1] != '\x8F') {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (len >= 6 && str[len - 6] == '\xEF' && str[len - 5] == '\xB8' && str[len - 4] == '\x8F') {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return emojis.count(str.substr(0, len - 3)) != 0;
|
2021-09-20 21:30:23 +02:00
|
|
|
}
|
|
|
|
|
2023-05-29 16:15:48 +02:00
|
|
|
bool is_emoji(Slice str) {
|
|
|
|
size_t i = str.substr(0, MAX_EMOJI_LENGTH + 4).find('\xE2');
|
2023-07-01 15:22:01 +02:00
|
|
|
if (i == Slice::npos) {
|
2023-05-29 16:15:48 +02:00
|
|
|
return is_emoji_element(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t start_pos = 0;
|
|
|
|
for (; i + 3 < str.size(); i++) {
|
|
|
|
if (str[i] == '\xE2' && str[i + 1] == '\x80' && str[i + 2] == '\x8D') {
|
|
|
|
// zero-width joiner \u200D
|
|
|
|
if (!is_emoji_element(str.substr(start_pos, i - start_pos))) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
start_pos = i + 3;
|
|
|
|
i += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return is_emoji_element(str.substr(start_pos));
|
|
|
|
}
|
|
|
|
|
2021-10-05 15:12:04 +02:00
|
|
|
int get_fitzpatrick_modifier(Slice emoji) {
|
|
|
|
if (emoji.size() < 4 || emoji[emoji.size() - 4] != '\xF0' || emoji[emoji.size() - 3] != '\x9F' ||
|
|
|
|
emoji[emoji.size() - 2] != '\x8F') {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
auto c = static_cast<unsigned char>(emoji.back());
|
|
|
|
if (c < 0xBB || c > 0xBF) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return (c - 0xBB) + 2;
|
|
|
|
}
|
|
|
|
|
2021-10-05 22:20:49 +02:00
|
|
|
Slice remove_fitzpatrick_modifier(Slice emoji) {
|
|
|
|
while (get_fitzpatrick_modifier(emoji) != 0) {
|
|
|
|
emoji.remove_suffix(4);
|
|
|
|
}
|
|
|
|
return emoji;
|
|
|
|
}
|
|
|
|
|
2023-01-23 20:10:23 +01:00
|
|
|
string remove_emoji_modifiers(Slice emoji, bool remove_selectors) {
|
2022-05-12 17:02:27 +02:00
|
|
|
string result = emoji.str();
|
2023-01-23 20:10:23 +01:00
|
|
|
remove_emoji_modifiers_in_place(result, remove_selectors);
|
2022-05-12 17:02:27 +02:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2023-01-23 20:10:23 +01:00
|
|
|
void remove_emoji_modifiers_in_place(string &emoji, bool remove_selectors) {
|
2021-12-22 11:40:20 +01:00
|
|
|
static const Slice modifiers[] = {u8"\uFE0F" /* variation selector-16 */,
|
2021-09-21 05:32:34 +02:00
|
|
|
u8"\u200D\u2640" /* zero width joiner + female sign */,
|
|
|
|
u8"\u200D\u2642" /* zero width joiner + male sign */,
|
|
|
|
u8"\U0001F3FB" /* emoji modifier fitzpatrick type-1-2 */,
|
|
|
|
u8"\U0001F3FC" /* emoji modifier fitzpatrick type-3 */,
|
|
|
|
u8"\U0001F3FD" /* emoji modifier fitzpatrick type-4 */,
|
|
|
|
u8"\U0001F3FE" /* emoji modifier fitzpatrick type-5 */,
|
|
|
|
u8"\U0001F3FF" /* emoji modifier fitzpatrick type-6 */};
|
2023-01-23 20:10:23 +01:00
|
|
|
const size_t start_index = remove_selectors ? 0 : 1;
|
2022-05-12 17:02:27 +02:00
|
|
|
size_t j = 0;
|
|
|
|
for (size_t i = 0; i < emoji.size();) {
|
|
|
|
bool is_found = false;
|
2023-01-23 20:10:23 +01:00
|
|
|
for (size_t k = start_index; k < sizeof(modifiers) / sizeof(*modifiers); k++) {
|
|
|
|
auto length = modifiers[k].size();
|
|
|
|
if (i + length <= emoji.size() && Slice(&emoji[i], length) == modifiers[k]) {
|
|
|
|
// skip the modifier
|
2022-05-12 17:02:27 +02:00
|
|
|
i += length;
|
|
|
|
is_found = true;
|
|
|
|
break;
|
2021-09-21 05:32:34 +02:00
|
|
|
}
|
|
|
|
}
|
2022-05-12 17:02:27 +02:00
|
|
|
if (!is_found) {
|
|
|
|
emoji[j++] = emoji[i++];
|
|
|
|
}
|
2021-09-21 05:32:34 +02:00
|
|
|
}
|
2022-11-11 20:20:38 +01:00
|
|
|
if (j != 0) {
|
|
|
|
emoji.resize(j);
|
|
|
|
}
|
2021-09-21 05:32:34 +02:00
|
|
|
}
|
|
|
|
|
2021-12-22 11:40:20 +01:00
|
|
|
string remove_emoji_selectors(Slice emoji) {
|
|
|
|
if (!is_emoji(emoji)) {
|
|
|
|
return emoji.str();
|
|
|
|
}
|
|
|
|
string str;
|
|
|
|
for (size_t i = 0; i < emoji.size(); i++) {
|
|
|
|
if (i + 3 <= emoji.size() && emoji[i] == '\xEF' && emoji[i + 1] == '\xB8' && emoji[i + 2] == '\x8F') {
|
|
|
|
// skip \uFE0F
|
|
|
|
i += 2;
|
|
|
|
} else {
|
|
|
|
str += emoji[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CHECK(is_emoji(str));
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
2021-09-20 21:30:23 +02:00
|
|
|
} // namespace td
|