1
0
mirror of https://codeberg.org/Freeyourgadget/Gadgetbridge synced 2024-07-06 13:41:35 +02:00

Various Fixes

1. Updated test.
2. Fixed a composite letter issue.
3. Moved some redundant codes to a function.
This commit is contained in:
Utsob Roy 2018-08-24 22:33:25 +06:00
parent 8f12f10bfe
commit 98e747c5f5
2 changed files with 75 additions and 37 deletions

View File

@ -23,6 +23,37 @@ public class BengaliLanguageUtils extends LanguageUtils {
put("্ব", "w"); put("্ব", "w");
} }
}; };
// Vowels Only
private final static Hashtable<String, String> vowelsAndHasants = new Hashtable<String, String>() {
{
put("", "aa");
put("", "a");
put("", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "ri");
put("", "e");
put("", "oi");
put("", "o");
put("", "ou");
put("", "aa");
put("ি", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "r");
put("", "e");
put("", "o");
put("", "oi");
put("", "ou");
put("", "ou");
put("", "ng");
put("", "h");
put("", ".");
}
};
// Single Character Letters. // Single Character Letters.
private final static Hashtable<String, String> letters = new Hashtable<String, String>() { private final static Hashtable<String, String> letters = new Hashtable<String, String>() {
{ {
@ -102,7 +133,21 @@ public class BengaliLanguageUtils extends LanguageUtils {
}; };
// The regex to extract Bengali characters in nested groups. // The regex to extract Bengali characters in nested groups.
private final static String pattern = "(র্){0,1}([অ-হড়-য়](্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|[্ঁঃংৎ০-৯।]| "; private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|[্ঁঃংৎ০-৯।]| ";
private static String getVal(String key){
if (key != null) {
boolean hasKey = composites.containsKey(key);
if (hasKey) {
return composites.get(key);
}
hasKey = letters.containsKey(key);
if (hasKey) {
return letters.get(key);
}
}
return null;
}
public static String transliterate(String txt) { public static String transliterate(String txt) {
if (txt.isEmpty()) { if (txt.isEmpty()) {
@ -118,50 +163,43 @@ public class BengaliLanguageUtils extends LanguageUtils {
if (reff != null) { if (reff != null) {
appendableString = appendableString + "rr"; appendableString = appendableString + "rr";
} }
int g = 0;
// This is a filter-down approach. First considering larger groups, // This is a filter-down approach. First considering larger groups,
// If found any match breaks their. Else go to the next step. // If found any match breaks their. Else go to the next step.
// Helpful to solve some corner-cases. // Helpful to solve some corner-cases.
while (g < 5) { String mainPart = getVal(m.group(2));
String key = m.group(g); if (mainPart != null) {
if (key != null) { appendableString = appendableString + mainPart;
boolean hasKey = composites.containsKey(key); } else {
if (hasKey) { String firstPart = getVal(m.group(3));
appendableString = appendableString + composites.get(key); if (firstPart != null){
break; appendableString = appendableString + firstPart;
} }
hasKey = letters.containsKey(key); int g = 4;
if (hasKey) { while (g < 6){
appendableString = appendableString + letters.get(key); String part = getVal(m.group(g));
if (part != null){
appendableString = appendableString + part;
break; break;
} }
}
g = g + 1; g = g + 1;
} }
g = 5; }
while (g < 9) { int g = 6;
String key = m.group(g); while (g < 10) {
String key = getVal(m.group(g));
if (key != null){ if (key != null){
boolean hasKey = composites.containsKey(key); appendableString = appendableString + key;
if (hasKey) {
appendableString = appendableString + composites.get(key);
break; break;
} }
hasKey = letters.containsKey(key);
if (hasKey) {
appendableString = appendableString + letters.get(key);
break;
}
}
g = g + 1; g = g + 1;
} }
String kaar = m.group(9); String kaar = m.group(10);
if (kaar != null) { if (kaar != null) {
boolean hasKey = letters.containsKey(kaar); boolean hasKey = letters.containsKey(kaar);
if (hasKey) { if (hasKey) {
appendableString = appendableString + letters.get(kaar); appendableString = appendableString + letters.get(kaar);
} }
} else if (appendableString.length() > 0 && !appendableString.equals(".")) { } else if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0))) {
// Adding 'a' like ITRANS if no vowel is present. // Adding 'a' like ITRANS if no vowel is present.
// TODO: Have to add it dynamically using Bengali grammer rules. // TODO: Have to add it dynamically using Bengali grammer rules.
appendableString = appendableString + "a"; appendableString = appendableString + "a";

View File

@ -61,7 +61,7 @@ public class LanguageUtilsTest extends TestBase {
//input with cyrillic and diacritic letters //input with cyrillic and diacritic letters
String input = "অনিরুদ্ধ"; String input = "অনিরুদ্ধ";
String output = LanguageUtils.transliterate(input); String output = LanguageUtils.transliterate(input);
String result = "oniruddho"; String result = "aniruddha";
assertEquals("Transliteration failed", result, output); assertEquals("Transliteration failed", result, output);
} }