1
0
mirror of https://codeberg.org/Freeyourgadget/Gadgetbridge synced 2024-12-28 03:25:49 +01:00

better bengali transliteration. [master]

This commit is contained in:
Utsob Roy 2018-08-24 14:42:27 +06:00
parent 52b6b8db73
commit 8ca1b6e864

View File

@ -1,110 +1,171 @@
package nodomain.freeyourgadget.gadgetbridge.util;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.regex.*;
public class BengaliLanguageUtils extends LanguageUtils {
private final static char BENGALI_JOIN_CHAR = '্';
private final static HashMap<Character, String> numbers = new HashMap<Character, String>() {
private final static Hashtable<String, String> composites = new Hashtable<String, String>() {
{
put('',"0"); put('১',"1"); put('২',"2"); put('৩',"3"); put('',"4");
put('৫',"5"); put('৬',"6"); put('',"7"); put('৮',"8");
put('৯',"9");
put("ক্ষ", "kkh");
put("ঞ্চ", "NC");
put("ঞ্ছ", "NCh");
put("ঞ্জ", "Ng");
put("জ্ঞ", "gg");
put("ঞ্ঝ", "Ngh");
put("্র", "r");
put("্ল", "l");
put("ষ্ম", "SSh");
put("র্", "r");
put("্য", "y");
put("্ব", "w");
}
};
private final static Hashtable<String, String> letters = new Hashtable<String, String>() {
{
put("", "aa");
put("", "a");
put("", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "ri");
put("", "e");
put("", "oi");
put("", "o");
put("", "ou");
put("", "k");
put("", "kh");
put("", "g");
put("", "gh");
put("", "ng");
put("", "ch");
put("", "chh");
put("", "j");
put("", "jh");
put("", "Ng");
put("", "T");
put("", "Th");
put("", "D");
put("", "Dh");
put("", "N");
put("", "t");
put("", "th");
put("", "d");
put("", "dh");
put("", "n");
put("", "p");
put("", "ph");
put("", "b");
put("", "bh");
put("", "m");
put("", "J");
put("", "r");
put("", "l");
put("", "sh");
put("", "Sh");
put("", "s");
put("", "h");
put("", "rh");
put("", "rH");
put("", "y");
put("", "t");
put("", "0");
put("", "1");
put("", "2");
put("", "3");
put("", "4");
put("", "5");
put("", "6");
put("", "7");
put("", "8");
put("", "9");
put("", "aa");
put("ি", "i");
put("", "ii");
put("", "u");
put("", "uu");
put("", "r");
put("", "e");
put("", "o");
put("", "oi");
put("", "ou");
put("", "ou");
put("", "ng");
put("", "h");
put("", "nN");
put("", ".");
}
};
private final static HashMap<Character, String> vowels = new HashMap<Character, String>() {
{
put('অ', "o"); put('আ', "a"); put('ই', "i"); put('ঈ', "ee");
put('উ', "u"); put('ঊ', "oo"); put('ঋ', "ri"); put('এ', "e");
put('ঐ', "oi"); put('ও', "o"); put('ঔ', "ou"); put('া', "a");
put('ি', "i"); put('ী', "ee"); put('ু', "u"); put('ূ', "oo");
put('ৃ', "ri"); put('ে', "e"); put('ৈ', "oi"); put('ো', "o");
put('ৌ', "ou");
}
};
private final static HashMap<Character, String> consonants = new HashMap<Character, String>() {
{
put('ঁ', ""); put('ং', "ng"); put('ঃ', "");
put('ক', "k"); put('খ', "kh"); put('গ', "g"); put('ঘ', "gh"); put('ঙ', "ng");
put('চ', "ch"); put('ছ', "ch"); put('জ', "j"); put('ঝ', "jh"); put('ঞ', "ng");
put('ট', "t"); put('ঠ', "th"); put('ড', "d"); put('ঢ', "dh"); put('ণ', "n");
put('ত', "t"); put('থ', "th"); put('দ', "d"); put('ধ', "dh"); put('ন', "n");
put('প', "p"); put('ফ', "f"); put('ব', "b"); put('ভ', "v"); put('ম', "m");
put('য', "z"); put('র', "r"); put('ল', "l"); put('শ', "sh");
put('ষ', "sh"); put('স', "s"); put('হ', "h");
put('ৎ', "t"); put('ড়', "r"); put('ঢ়', "r"); put('য়', "y");
}
};
private final static HashMap<Character, String> symbols = new HashMap<Character, String>() {
{
put('ব', "w");
put('য়', "y");
}
};
private final static HashMap<Character, String> joins = new HashMap<Character, String>() {
{
put('৳', "$");
}
};
private static boolean hasJoinedInString(String string) {
return string.contains(string);
}
private final static String pattern = "(র্){0,1}([অ-হড়-য়](্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|[্ঁঃংৎ০-৯।]| ";
public static String transliterate(String txt) {
if (txt.isEmpty()) {
return txt;
}
char[] charArray = txt.toCharArray();
StringBuilder romanizedBuilder = new StringBuilder();
char last = '\0';
for(int i = 0; i < txt.length(); i++) {
char currentChar = charArray[i];
if (symbols.containsKey(currentChar)) {
romanizedBuilder.append(symbols.get(currentChar));
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(txt);
StringBuffer sb = new StringBuffer();
while (m.find()) {
String appendableString = "";
String reff = m.group(1);
if (reff != null) {
appendableString = appendableString + "rr";
}
else if (numbers.containsKey(currentChar)) {
romanizedBuilder.append(numbers.get(currentChar));
}
else if (vowels.containsKey(currentChar)) {
romanizedBuilder.append(vowels.get(currentChar));
}
else if (consonants.containsKey(currentChar)) {
if (last != '\0' && consonants.containsKey(last)) {
romanizedBuilder.append('o');
int g = 0;
while (g < 5) {
String key = m.group(g);
if (key != null) {
boolean hasKey = composites.containsKey(key);
if (hasKey) {
appendableString = appendableString + composites.get(key);
break;
}
hasKey = letters.containsKey(key);
if (hasKey) {
appendableString = appendableString + letters.get(key);
break;
}
}
romanizedBuilder.append(consonants.get(currentChar));
} else if (currentChar == BENGALI_JOIN_CHAR) {
if (i + 1 < txt.length() && joins.containsKey(charArray[i + 1])) {
romanizedBuilder.append(joins.get(charArray[i + 1]));
i++;
continue;
}
} else {
romanizedBuilder.append(currentChar);
g = g + 1;
}
g = 5;
while (g < 9) {
String key = m.group(g);
if (key != null) {
boolean hasKey = composites.containsKey(key);
if (hasKey) {
appendableString = appendableString + composites.get(key);
break;
}
hasKey = letters.containsKey(key);
if (hasKey) {
appendableString = appendableString + letters.get(key);
break;
}
}
g = g + 1;
}
String kaar = m.group(9);
if (kaar != null) {
boolean hasKey = letters.containsKey(kaar);
if (hasKey) {
appendableString = appendableString + letters.get(kaar);
}
} else if (appendableString.length() > 0 && !appendableString.equals(".")) {
appendableString = appendableString + "a";
}
String others = m.group(0);
if (others != null) {
last = currentChar;
if (appendableString.length() <= 0) {
appendableString = appendableString + others;
}
}
m.appendReplacement(sb, appendableString);
}
String romanized = romanizedBuilder.toString();
if (vowels.containsKey(charArray[charArray.length - 1])
&& hasJoinedInString(txt)
&& romanized.toCharArray()[romanized.length() - 1] == 'y') {
romanizedBuilder.append('o');
}
return romanizedBuilder.toString();
m.appendTail(sb);
return sb.toString();
}
}
}