mirror of
https://codeberg.org/Freeyourgadget/Gadgetbridge
synced 2024-11-27 04:16:49 +01:00
Enhancement and Fixes for Bengali Transliteration. (#1263)
* Added various fixes and enhancment for Bengali transliteration. * various fixes and enhancment for Bengali transliteration * fixed a coding typo [master] * Boolean lowercase and added .project in .gitignore * Boolean lowercase and added .project in .gitignore * typo fix [master] * fixed negative index error [master] * fixed negative index error [master] * unprinted character fix [master] * enhanced transliteration [master] * lowercased boolean and replaced Integer with int [master] * removed .setting, .classpath and .project and added them to .gitignore too. * bug fix and multilingual testcase [master]
This commit is contained in:
parent
56477efcdf
commit
e66d0a2d10
3
.gitignore
vendored
3
.gitignore
vendored
@ -31,3 +31,6 @@ proguard/
|
||||
MPChartLib
|
||||
|
||||
fw.dirs
|
||||
**/.project
|
||||
**/.settings
|
||||
**/.classpath
|
||||
|
2
.settings/org.eclipse.buildship.core.prefs
Normal file
2
.settings/org.eclipse.buildship.core.prefs
Normal file
@ -0,0 +1,2 @@
|
||||
connection.project.dir=
|
||||
eclipse.preferences.version=1
|
@ -1,11 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="src" path="gen"/>
|
||||
<classpathentry kind="src" path="java"/>
|
||||
<classpathentry kind="src" path="aidl"/>
|
||||
<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
|
||||
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/>
|
||||
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.DEPENDENCIES"/>
|
||||
<classpathentry kind="output" path="bin/classes"/>
|
||||
</classpath>
|
@ -1,33 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>Gadgetbridge</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.ApkBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
@ -1,12 +0,0 @@
|
||||
eclipse.preferences.version=1
|
||||
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
|
||||
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
|
||||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
|
||||
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
|
||||
org.eclipse.jdt.core.compiler.compliance=1.7
|
||||
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
|
||||
org.eclipse.jdt.core.compiler.debug.localVariable=generate
|
||||
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
|
||||
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
|
||||
org.eclipse.jdt.core.compiler.source=1.7
|
@ -22,7 +22,7 @@ import java.util.regex.*;
|
||||
// What's the reason to extending LanguageUtils?
|
||||
// Just doing it because already done in the previous code.
|
||||
public class BengaliLanguageUtils extends LanguageUtils {
|
||||
// Composite Letters.
|
||||
// Composite Letters.
|
||||
private final static HashMap<String, String> composites = new HashMap<String, String>() {
|
||||
{
|
||||
put("ক্ষ", "kkh");
|
||||
@ -39,7 +39,25 @@ public class BengaliLanguageUtils extends LanguageUtils {
|
||||
put("্ব", "w");
|
||||
}
|
||||
};
|
||||
|
||||
// Vowels Only
|
||||
private final static HashMap<String, String> vowels = new HashMap<String, String>() {
|
||||
{
|
||||
put("আ", "aa");
|
||||
put("অ", "a");
|
||||
put("ই", "i");
|
||||
put("ঈ", "ii");
|
||||
put("উ", "u");
|
||||
put("ঊ", "uu");
|
||||
put("ঋ", "ri");
|
||||
put("এ", "e");
|
||||
put("ঐ", "oi");
|
||||
put("ও", "o");
|
||||
put("ঔ", "ou");
|
||||
}
|
||||
};
|
||||
|
||||
// Vowels and Hasants
|
||||
private final static HashMap<String, String> vowelsAndHasants = new HashMap<String, String>() {
|
||||
{
|
||||
put("আ", "aa");
|
||||
@ -149,7 +167,8 @@ public class BengaliLanguageUtils extends LanguageUtils {
|
||||
};
|
||||
|
||||
// The regex to extract Bengali characters in nested groups.
|
||||
private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])| ";
|
||||
private final static String pattern = "(র্){0,1}(([অ-হড়-য়])(্([অ-মশ-হড়-য়]))*)((){0,1}(্([য-ল]))){0,1}([া-ৌ]){0,1}|([্ঁঃংৎ০-৯।])|(\\s)";
|
||||
|
||||
private final static Pattern bengaliRegex = Pattern.compile(pattern);
|
||||
|
||||
private static String getVal(String key) {
|
||||
@ -173,7 +192,15 @@ public class BengaliLanguageUtils extends LanguageUtils {
|
||||
|
||||
Matcher m = bengaliRegex.matcher(txt);
|
||||
StringBuffer sb = new StringBuffer();
|
||||
String lastChar = "";
|
||||
boolean lastHadComposition = false;
|
||||
boolean lastHadKaar = false;
|
||||
boolean nextNeedsO = false;
|
||||
int lastHadO = 0;
|
||||
while (m.find()) {
|
||||
boolean thisNeedsO = false;
|
||||
boolean changePronounciation = false;
|
||||
boolean thisHadKaar = false;
|
||||
String appendableString = "";
|
||||
String reff = m.group(1);
|
||||
if (reff != null) {
|
||||
@ -200,6 +227,10 @@ public class BengaliLanguageUtils extends LanguageUtils {
|
||||
g = g + 1;
|
||||
}
|
||||
}
|
||||
if (m.group(2) != null && m.group(2).equals("ক্ষ")) {
|
||||
changePronounciation = true;
|
||||
thisNeedsO = true;
|
||||
}
|
||||
int g = 6;
|
||||
while (g < 10) {
|
||||
String key = getVal(m.group(g));
|
||||
@ -209,16 +240,24 @@ public class BengaliLanguageUtils extends LanguageUtils {
|
||||
}
|
||||
g = g + 1;
|
||||
}
|
||||
String phala = m.group(8);
|
||||
if (phala != null && phala.equals("্য")) {
|
||||
changePronounciation = true;
|
||||
thisNeedsO = true;
|
||||
}
|
||||
String jukto = m.group(4);
|
||||
if (jukto != null) {
|
||||
thisNeedsO = true;
|
||||
}
|
||||
String kaar = m.group(10);
|
||||
if (kaar != null) {
|
||||
String kaarStr = letters.get(kaar);
|
||||
if (kaarStr != null) {
|
||||
appendableString = appendableString + kaarStr;
|
||||
}
|
||||
} else if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0))) {
|
||||
// Adding 'a' like ITRANS if no vowel is present.
|
||||
// TODO: Have to add it dynamically using Bengali grammer rules.
|
||||
appendableString = appendableString + "a";
|
||||
if (kaarStr.equals("i") || kaarStr.equals("ii") || kaarStr.equals("u") || kaarStr.equals("uu")) {
|
||||
changePronounciation = true;
|
||||
}
|
||||
}
|
||||
String singleton = m.group(11);
|
||||
if (singleton != null) {
|
||||
@ -227,6 +266,9 @@ public class BengaliLanguageUtils extends LanguageUtils {
|
||||
appendableString = appendableString + singleStr;
|
||||
}
|
||||
}
|
||||
if (changePronounciation && lastChar.equals("a")) {
|
||||
sb.setCharAt(sb.length() - 1, 'o');
|
||||
}
|
||||
String others = m.group(0);
|
||||
if (others != null) {
|
||||
|
||||
@ -234,7 +276,41 @@ public class BengaliLanguageUtils extends LanguageUtils {
|
||||
appendableString = appendableString + others;
|
||||
}
|
||||
}
|
||||
String whitespace = m.group(12);
|
||||
if (nextNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) {
|
||||
appendableString = appendableString + "o";
|
||||
lastHadO++;
|
||||
thisNeedsO = false;
|
||||
}
|
||||
|
||||
if (((kaar != null && lastHadO > 1) || whitespace != null) && !lastHadKaar && sb.length() > 0
|
||||
&& sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) {
|
||||
sb.deleteCharAt(sb.length() - 1);
|
||||
lastHadO = 0;
|
||||
}
|
||||
nextNeedsO = false;
|
||||
if (thisNeedsO && kaar == null && whitespace == null && !vowels.containsKey(m.group(0))) {
|
||||
appendableString = appendableString + "o";
|
||||
lastHadO++;
|
||||
}
|
||||
if (appendableString.length() > 0 && !vowelsAndHasants.containsKey(m.group(0)) && kaar == null) {
|
||||
nextNeedsO = true;
|
||||
}
|
||||
if (reff != null || m.group(4) != null || m.group(6) != null) {
|
||||
lastHadComposition = true;
|
||||
} else {
|
||||
lastHadComposition = false;
|
||||
}
|
||||
if (kaar != null) {
|
||||
lastHadKaar = true;
|
||||
} else {
|
||||
lastHadKaar = false;
|
||||
}
|
||||
m.appendReplacement(sb, appendableString);
|
||||
lastChar = appendableString;
|
||||
}
|
||||
if (!lastHadKaar && sb.length() > 0 && sb.charAt(sb.length() - 1) == 'o' && !lastHadComposition) {
|
||||
sb.deleteCharAt(sb.length() - 1);
|
||||
}
|
||||
m.appendTail(sb);
|
||||
return sb.toString();
|
||||
|
@ -57,11 +57,14 @@ public class LanguageUtilsTest extends TestBase {
|
||||
assertEquals("Farsi transiteration failed", farsiExpected, farsiActual);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStringTransliterateBengali() throws Exception {
|
||||
// input with cyrillic and diacritic letters
|
||||
String[] inputs = { "অনিরুদ্ধ", "বিজ্ঞানযাত্রা চলছে চলবে।", "আমি সব দেখেশুনে ক্ষেপে গিয়ে করি বাঙলায় চিৎকার!" };
|
||||
String[] outputs = { "aniruddha", "biggaanaJaatraa chalachhe chalabe.",
|
||||
"aami saba dekheshune kkhepe giye kari baangalaaya chitkaara!" };
|
||||
String[] inputs = { "অনিরুদ্ধ", "বিজ্ঞানযাত্রা চলছে চলবে।", "আমি সব দেখেশুনে ক্ষেপে গিয়ে করি বাঙলায় চিৎকার!",
|
||||
"আমার জাভা কোড is so bad! কী আর বলবো!" };
|
||||
String[] outputs = { "oniruddho", "biggaanJaatraa cholchhe cholbe.",
|
||||
"aami sob dekheshune kkhepe giye kori baanglaay chitkaar!",
|
||||
"aamaar jaabhaa koD is so bad! kii aar bolbo!"};
|
||||
|
||||
String result;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user