diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java index 943777435..8c91eb7b9 100644 --- a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/BengaliLanguageUtils.java @@ -17,59 +17,153 @@ package nodomain.freeyourgadget.gadgetbridge.util; import java.util.HashMap; +import java.util.regex.*; +// What's the reason to extending LanguageUtils? +// Just doing it because already done in the previous code. public class BengaliLanguageUtils extends LanguageUtils { - - private final static char BENGALI_JOIN_CHAR = '্'; - - private final static HashMap numbers = new HashMap() { + // Composite Letters. + private final static HashMap composites = new HashMap() { { - put('০',"0"); put('১',"1"); put('২',"2"); put('৩',"3"); put('৪',"4"); - put('৫',"5"); put('৬',"6"); put('৭',"7"); put('৮',"8"); - put('৯',"9"); + put("ক্ষ", "kkh"); + put("ঞ্চ", "NC"); + put("ঞ্ছ", "NCh"); + put("ঞ্জ", "Ng"); + put("জ্ঞ", "gg"); + put("ঞ্ঝ", "Ngh"); + put("্র", "r"); + put("্ল", "l"); + put("ষ্ম", "SSh"); + put("র্", "r"); + put("্য", "y"); + put("্ব", "w"); + } + }; + // Vowels Only + private final static HashMap vowelsAndHasants = new HashMap() { + { + put("আ", "aa"); + put("অ", "a"); + put("ই", "i"); + put("ঈ", "ii"); + put("উ", "u"); + put("ঊ", "uu"); + put("ঋ", "ri"); + put("এ", "e"); + put("ঐ", "oi"); + put("ও", "o"); + put("ঔ", "ou"); + put("া", "aa"); + put("ি", "i"); + put("ী", "ii"); + put("ু", "u"); + put("ূ", "uu"); + put("ৃ", "r"); + put("ে", "e"); + put("ো", "o"); + put("ৈ", "oi"); + put("ৗ", "ou"); + put("ৌ", "ou"); + put("ং", "ng"); + put("ঃ", "h"); + put("।", "."); } }; - private final static HashMap vowels = new HashMap() { + // Single Character Letters. + private final static HashMap letters = new HashMap() { { - put('অ', "o"); put('আ', "a"); put('ই', "i"); put('ঈ', "ee"); - put('উ', "u"); put('ঊ', "oo"); put('ঋ', "ri"); put('এ', "e"); - put('ঐ', "oi"); put('ও', "o"); put('ঔ', "ou"); put('া', "a"); - put('ি', "i"); put('ী', "ee"); put('ু', "u"); put('ূ', "oo"); - put('ৃ', "ri"); put('ে', "e"); put('ৈ', "oi"); put('ো', "o"); - put('ৌ', "ou"); + put("আ", "aa"); + put("অ", "a"); + put("ই", "i"); + put("ঈ", "ii"); + put("উ", "u"); + put("ঊ", "uu"); + put("ঋ", "ri"); + put("এ", "e"); + put("ঐ", "oi"); + put("ও", "o"); + put("ঔ", "ou"); + put("ক", "k"); + put("খ", "kh"); + put("গ", "g"); + put("ঘ", "gh"); + put("ঙ", "ng"); + put("চ", "ch"); + put("ছ", "chh"); + put("জ", "j"); + put("ঝ", "jh"); + put("ঞ", "Ng"); + put("ট", "T"); + put("ঠ", "Th"); + put("ড", "D"); + put("ঢ", "Dh"); + put("ণ", "N"); + put("ত", "t"); + put("থ", "th"); + put("দ", "d"); + put("ধ", "dh"); + put("ন", "n"); + put("প", "p"); + put("ফ", "ph"); + put("ব", "b"); + put("ভ", "bh"); + put("ম", "m"); + put("য", "J"); + put("র", "r"); + put("ল", "l"); + put("শ", "sh"); + put("ষ", "Sh"); + put("স", "s"); + put("হ", "h"); + put("ড়", "rh"); + put("ঢ়", "rH"); + put("য়", "y"); + put("ৎ", "t"); + put("০", "0"); + put("১", "1"); + put("২", "2"); + put("৩", "3"); + put("৪", "4"); + put("৫", "5"); + put("৬", "6"); + put("৭", "7"); + put("৮", "8"); + put("৯", "9"); + put("া", "aa"); + put("ি", "i"); + put("ী", "ii"); + put("ু", "u"); + put("ূ", "uu"); + put("ৃ", "r"); + put("ে", "e"); + put("ো", "o"); + put("ৈ", "oi"); + put("ৗ", "ou"); + put("ৌ", "ou"); + put("ং", "ng"); + put("ঃ", "h"); + put("ঁ", "nN"); + put("।", "."); } }; - private final static HashMap consonants = new HashMap() { - { - put('ঁ', ""); put('ং', "ng"); put('ঃ', ""); - put('ক', "k"); put('খ', "kh"); put('গ', "g"); put('ঘ', "gh"); put('ঙ', "ng"); - put('চ', "ch"); put('ছ', "ch"); put('জ', "j"); put('ঝ', "jh"); put('ঞ', "ng"); - put('ট', "t"); put('ঠ', "th"); put('ড', "d"); put('ঢ', "dh"); put('ণ', "n"); - put('ত', "t"); put('থ', "th"); put('দ', "d"); put('ধ', "dh"); put('ন', "n"); - put('প', "p"); put('ফ', "f"); put('ব', "b"); put('ভ', "v"); put('ম', "m"); - put('য', "z"); put('র', "r"); put('ল', "l"); put('শ', "sh"); - put('ষ', "sh"); put('স', "s"); put('হ', "h"); - put('ৎ', "t"); put('ড়', "r"); put('ঢ়', "r"); put('য়', "y"); - } - }; + // The regex to extract Bengali characters in nested groups. + private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((‍){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])| "; + private final static Pattern bengaliRegex = Pattern.compile(pattern); - private final static HashMap symbols = new HashMap() { - { - put('ব', "w"); - put('য়', "y"); + private static String getVal(String key) { + if (key != null) { + String comp = composites.get(key); + if (comp != null) { + return comp; + } + String sl = letters.get(key); + if (sl != null) { + return letters.get(key); + } } - }; - - private final static HashMap joins = new HashMap() { - { - put('৳', "$"); - } - }; - - private static boolean hasJoinedInString(String string) { - return string.contains(string); + return null; } public static String transliterate(String txt) { @@ -77,50 +171,72 @@ public class BengaliLanguageUtils extends LanguageUtils { return txt; } - char[] charArray = txt.toCharArray(); - - StringBuilder romanizedBuilder = new StringBuilder(); - char last = '\0'; - - for(int i = 0; i < txt.length(); i++) { - char currentChar = charArray[i]; - - if (symbols.containsKey(currentChar)) { - romanizedBuilder.append(symbols.get(currentChar)); + Matcher m = bengaliRegex.matcher(txt); + StringBuffer sb = new StringBuffer(); + while (m.find()) { + String appendableString = ""; + String reff = m.group(1); + if (reff != null) { + appendableString = appendableString + "rr"; } - else if (numbers.containsKey(currentChar)) { - romanizedBuilder.append(numbers.get(currentChar)); - } - else if (vowels.containsKey(currentChar)) { - romanizedBuilder.append(vowels.get(currentChar)); - } - else if (consonants.containsKey(currentChar)) { - if (last != '\0' && consonants.containsKey(last)) { - romanizedBuilder.append('o'); - } - romanizedBuilder.append(consonants.get(currentChar)); - } else if (currentChar == BENGALI_JOIN_CHAR) { - if (i + 1 < txt.length() && joins.containsKey(charArray[i + 1])) { - romanizedBuilder.append(joins.get(charArray[i + 1])); - i++; - continue; - } + // This is a filter-down approach. First considering larger groups, + // If found any match breaks their. Else go to the next step. + // Helpful to solve some corner-cases. + String mainPart = getVal(m.group(2)); + if (mainPart != null) { + appendableString = appendableString + mainPart; } else { - romanizedBuilder.append(currentChar); + String firstPart = getVal(m.group(3)); + if (firstPart != null) { + appendableString = appendableString + firstPart; + } + int g = 4; + while (g < 6) { + String part = getVal(m.group(g)); + if (part != null) { + appendableString = appendableString + part; + break; + } + g = g + 1; + } } + int g = 6; + while (g < 10) { + String key = getVal(m.group(g)); + if (key != null) { + appendableString = appendableString + key; + break; + } + g = g + 1; + } + String kaar = m.group(10); + if (kaar != null) { + String kaarStr = letters.get(kaar); + if (kaarStr != null) { + appendableString = appendableString + kaarStr; + } + } else if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0))) { + // Adding 'a' like ITRANS if no vowel is present. + // TODO: Have to add it dynamically using Bengali grammer rules. + appendableString = appendableString + "a"; + } + String singleton = m.group(11); + if (singleton != null) { + String singleStr = letters.get(singleton); + if (singleStr != null) { + appendableString = appendableString + singleStr; + } + } + String others = m.group(0); + if (others != null) { - last = currentChar; + if (appendableString.length() <= 0) { + appendableString = appendableString + others; + } + } + m.appendReplacement(sb, appendableString); } - - String romanized = romanizedBuilder.toString(); - - if (vowels.containsKey(charArray[charArray.length - 1]) - && hasJoinedInString(txt) - && romanized.toCharArray()[romanized.length() - 1] == 'y') { - romanizedBuilder.append('o'); - } - - return romanizedBuilder.toString(); + m.appendTail(sb); + return sb.toString(); } - -} \ No newline at end of file +} diff --git a/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java index eb5aa1753..50c56526c 100644 --- a/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java +++ b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/LanguageUtilsTest.java @@ -17,14 +17,14 @@ import static org.junit.Assert.assertTrue; public class LanguageUtilsTest extends TestBase { @Test public void testStringTransliterateCyrillic() throws Exception { - //input with cyrillic and diacritic letters + // input with cyrillic and diacritic letters String input = "Прõсто текčт"; String output = LanguageUtils.transliterate(input); String result = "Prosto tekct"; assertEquals("Transliteration failed", result, output); } - + @Test public void testStringTransliterateHebrew() throws Exception { String input = "בדיקה עברית"; @@ -58,12 +58,17 @@ public class LanguageUtilsTest extends TestBase { } public void testStringTransliterateBengali() throws Exception { - //input with cyrillic and diacritic letters - String input = "অনিরুদ্ধ"; - String output = LanguageUtils.transliterate(input); - String result = "oniruddho"; + // input with cyrillic and diacritic letters + String[] inputs = { "অনিরুদ্ধ", "বিজ্ঞানযাত্রা চলছে চলবে।", "আমি সব দেখেশুনে ক্ষেপে গিয়ে করি বাঙলায় চিৎকার!" }; + String[] outputs = { "aniruddha", "biggaanaJaatraa chalachhe chalabe.", + "aami saba dekheshune kkhepe giye kari baangalaaya chitkaara!" }; - assertEquals("Transliteration failed", result, output); + String result; + + for (int i = 0; i < inputs.length; i++) { + result = LanguageUtils.transliterate(inputs[i]); + assertEquals("Transliteration failed", outputs[i], result); + } } @Test @@ -82,7 +87,8 @@ public class LanguageUtilsTest extends TestBase { @Test public void testTransliterateOption() throws Exception { setDefaultTransliteration(); - assertFalse("Transliteration option fail! Expected 'Off' by default, but result is 'On'", LanguageUtils.transliterate()); + assertFalse("Transliteration option fail! Expected 'Off' by default, but result is 'On'", + LanguageUtils.transliterate()); enableTransliteration(true); assertTrue("Transliteration option fail! Expected 'On', but result is 'Off'", LanguageUtils.transliterate());