From bdb904faf94180695498240259dfe71fbf5ee87c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Rebelo?= Date: Tue, 11 Jul 2023 18:15:49 +0100 Subject: [PATCH] Fix character mark removal before flattening to ASCII --- .../language/impl/FlattenToAsciiTransliterator.java | 7 +++++-- .../gadgetbridge/util/language/LanguageUtilsTest.java | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/impl/FlattenToAsciiTransliterator.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/impl/FlattenToAsciiTransliterator.java index a26e7e10b..01ca0e7e0 100644 --- a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/impl/FlattenToAsciiTransliterator.java +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/impl/FlattenToAsciiTransliterator.java @@ -32,8 +32,11 @@ public class FlattenToAsciiTransliterator implements Transliterator { return txt; } + // Decompose the string into its canonical decomposition (splits base characters from accents/marks) txt = Normalizer.normalize(txt, Normalizer.Form.NFD); - txt = new String(txt.getBytes(StandardCharsets.US_ASCII), StandardCharsets.US_ASCII); - return txt.replaceAll("\\p{M}", ""); + // Remove all marks (characters intended to be combined with another character), keeping the base glyphs + txt = txt.replaceAll("\\p{M}", ""); + // Flatten the resulting string to ASCII + return new String(txt.getBytes(StandardCharsets.US_ASCII), StandardCharsets.US_ASCII); } } diff --git a/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtilsTest.java b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtilsTest.java index fe428818d..68e0e89cf 100644 --- a/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtilsTest.java +++ b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtilsTest.java @@ -7,6 +7,7 @@ import org.junit.Test; import nodomain.freeyourgadget.gadgetbridge.GBApplication; import nodomain.freeyourgadget.gadgetbridge.impl.GBDevice; import nodomain.freeyourgadget.gadgetbridge.test.TestBase; +import nodomain.freeyourgadget.gadgetbridge.util.language.impl.FlattenToAsciiTransliterator; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -183,6 +184,15 @@ public class LanguageUtilsTest extends TestBase { assertEquals("croatian transliteration failed", expected, output); } + @Test + public void testFlattenToAscii() throws Exception { + final FlattenToAsciiTransliterator transliterator = new FlattenToAsciiTransliterator(); + String input = "ä ș ț ă"; + String output = transliterator.transliterate(input); + String expected = "a s t a"; + assertEquals("flatten to ascii transliteration failed", expected, output); + } + @Test public void testTransliterateOption() throws Exception { enableTransliteration(false);