From b942db166ee871a1bea611167e392d00280049b5 Mon Sep 17 00:00:00 2001 From: Aniruddha Adhikary Date: Wed, 22 Nov 2017 23:57:05 +0600 Subject: [PATCH] add Bengali Transliteration --- .../util/BengaliLanguageUtils.java | 110 ++++++++++++++++++ .../gadgetbridge/util/LanguageUtils.java | 4 +- .../gadgetbridge/test/LanguageUtilsTest.java | 9 ++ 3 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java new file mode 100644 index 000000000..2df264b5b --- /dev/null +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java @@ -0,0 +1,110 @@ +package nodomain.freeyourgadget.gadgetbridge.util; + +import java.util.HashMap; + +public class BengaliLanguageUtils extends LanguageUtils { + + private final static char BENGALI_JOIN_CHAR = '্'; + + private final static HashMap numbers = new HashMap() { + { + put('০',"0"); put('১',"1"); put('২',"2"); put('৩',"3"); put('৪',"4"); + put('৫',"5"); put('৬',"6"); put('৭',"7"); put('৮',"8"); + put('৯',"9"); + } + }; + + private final static HashMap vowels = new HashMap() { + { + put('অ', "o"); put('আ', "a"); put('ই', "i"); put('ঈ', "ee"); + put('উ', "u"); put('ঊ', "oo"); put('ঋ', "ri"); put('এ', "e"); + put('ঐ', "oi"); put('ও', "o"); put('ঔ', "ou"); put('া', "a"); + put('ি', "i"); put('ী', "ee"); put('ু', "u"); put('ূ', "oo"); + put('ৃ', "ri"); put('ে', "e"); put('ৈ', "oi"); put('ো', "o"); + put('ৌ', "ou"); + } + }; + + private final static HashMap consonants = new HashMap() { + { + put('ঁ', ""); put('ং', "ng"); put('ঃ', ""); + put('ক', "k"); put('খ', "kh"); put('গ', "g"); put('ঘ', "gh"); put('ঙ', "ng"); + put('চ', "ch"); put('ছ', "ch"); put('জ', "j"); put('ঝ', "jh"); put('ঞ', "ng"); + put('ট', "t"); put('ঠ', "th"); put('ড', "d"); put('ঢ', "dh"); put('ণ', "n"); + put('ত', "t"); put('থ', "th"); put('দ', "d"); put('ধ', "dh"); put('ন', "n"); + put('প', "p"); put('ফ', "f"); put('ব', "b"); put('ভ', "v"); put('ম', "m"); + put('য', "z"); put('র', "r"); put('ল', "l"); put('শ', "sh"); + put('ষ', "sh"); put('স', "s"); put('হ', "h"); + put('ৎ', "t"); put('ড়', "r"); put('ঢ়', "r"); put('য়', "y"); + } + }; + + private final static HashMap symbols = new HashMap() { + { + put('ব', "w"); + put('য়', "y"); + } + }; + + private final static HashMap joins = new HashMap() { + { + put('৳', "$"); + } + }; + + private static boolean hasJoinedInString(String string) { + return string.contains(string); + } + + public static String transliterate(String txt) { + if (txt.isEmpty()) { + return txt; + } + + char[] charArray = txt.toCharArray(); + + StringBuilder romanizedBuilder = new StringBuilder(); + char last = '\0'; + + for(int i = 0; i < txt.length(); i++) { + char currentChar = charArray[i]; + + if (symbols.containsKey(currentChar)) { + romanizedBuilder.append(symbols.get(currentChar)); + } + else if (numbers.containsKey(currentChar)) { + romanizedBuilder.append(numbers.get(currentChar)); + } + else if (vowels.containsKey(currentChar)) { + romanizedBuilder.append(vowels.get(currentChar)); + } + else if (consonants.containsKey(currentChar)) { + if (last != '\0' && consonants.containsKey(last)) { + romanizedBuilder.append('o'); + } + romanizedBuilder.append(consonants.get(currentChar)); + } else if (currentChar == BENGALI_JOIN_CHAR) { + if (i + 1 < txt.length() && joins.containsKey(charArray[i + 1])) { + romanizedBuilder.append(joins.get(charArray[i + 1])); + i++; + continue; + } + } else { + romanizedBuilder.append(currentChar); + } + + last = currentChar; + } + + String romanized = romanizedBuilder.toString(); + + if (vowels.containsKey(charArray[charArray.length - 1]) + && hasJoinedInString(txt) + && romanized.toCharArray()[romanized.length() - 1] == 'y') { + romanizedBuilder.append('o'); + } + + return romanizedBuilder.toString(); + } + +} \ No newline at end of file diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java index 21246c436..1f321932c 100644 --- a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java @@ -111,7 +111,9 @@ public class LanguageUtils { message.append(transliterate(c)); } - return flattenToAscii(message.toString()); + String messageString = BengaliLanguageUtils.transliterate(message.toString()); + + return flattenToAscii(messageString); } /** diff --git a/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java index 5b09c026a..483f30406 100644 --- a/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java +++ b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java @@ -57,6 +57,15 @@ public class LanguageUtilsTest extends TestBase { assertEquals("Farsi transiteration failed", farsiExpected, farsiActual); } + public void testStringTransliterateBengali() throws Exception { + //input with cyrillic and diacritic letters + String input = "অনিরুদ্ধ"; + String output = LanguageUtils.transliterate(input); + String result = "oniruddho"; + + assertEquals("Transliteration failed", result, output); + } + @Test public void testTransliterateOption() throws Exception { setDefaultTransliteration();