From deee908fb0531149c5622a7aeaf41eaedcba3d3f Mon Sep 17 00:00:00 2001 From: Roi Greenberg Date: Sat, 25 Aug 2018 22:29:46 +0300 Subject: [PATCH] Convert Arabic to contextual form --- .../gadgetbridge/util/LanguageUtils.java | 143 +----- .../gadgetbridge/util/RtlUtils.java | 425 ++++++++++++++++++ app/src/main/res/values/strings.xml | 3 + app/src/main/res/xml/preferences.xml | 39 +- 4 files changed, 470 insertions(+), 140 deletions(-) create mode 100644 app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/RtlUtils.java diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java index 444a33ef9..80f0d3b61 100644 --- a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/LanguageUtils.java @@ -18,7 +18,6 @@ package nodomain.freeyourgadget.gadgetbridge.util; import android.text.TextUtils; -import android.util.Pair; import org.apache.commons.lang3.text.WordUtils; @@ -31,6 +30,15 @@ import java.util.Map; import nodomain.freeyourgadget.gadgetbridge.GBApplication; public class LanguageUtils { + /** + * Checks the status of right-to-left option + * @return true if right-to-left option is On, and false, if Off or not exist + */ + public static boolean rtlSupport() + { + return GBApplication.getPrefs().getBoolean("rtl", false); + } + //transliteration map with english equivalent for unsupported chars private static Map transliterateMap = new HashMap(){ { @@ -156,124 +164,7 @@ public class LanguageUtils { return string.replaceAll("\\p{M}", ""); } - /** - * Checks the status of right-to-left option - * @return true if right-to-left option is On, and false, if Off or not exist - */ - public static boolean rtlSupport() - { - return GBApplication.getPrefs().getBoolean("rtl", false); - } - //map with brackets chars to change there direction - private static Map directionSignsMap = new HashMap(){ - { - put('(', ')'); put(')', '('); put('[', ']'); put(']', '['); put('{','}'); put('}','{'); - - - } - }; - - //list of unicode ranges of rtl chars - private static ArrayList > rtlRange = new ArrayList>() { - { - add(new Pair('\u0590', '\u05F4')); - add(new Pair('\uFB1D', '\uFB4F')); - add(new Pair('\u0600', '\u06FF')); - add(new Pair('\u0750', '\u077F')); - add(new Pair('\u08A0', '\u08FF')); - add(new Pair('\uFB50', '\uFDFF')); - add(new Pair('\uFE70', '\uFEFF')); - } - }; - - /** - * @return true if the char is in the rtl range, otherwise false - */ - private static Boolean isRtl(char c){ - for (Pair rang: rtlRange) { - if (rang.first <= c && c <= rang.second) { - return true; - } - } - return false; - } - - //list of unicode ranges of punctuations chars - private static ArrayList > punctuationsRange = new ArrayList>() { - { - add(new Pair('\u0021', '\u002F')); - add(new Pair('\u003A', '\u0040')); - add(new Pair('\u005B', '\u0060')); - add(new Pair('\u007B', '\u007E')); - } - }; - - /** - * @return true if the char is in the punctuations range, otherwise false - */ - private static Boolean isPunctuations(char c){ - for (Pair rang: punctuationsRange) { - if (rang.first <= c && c <= rang.second) { - return true; - } - } - return false; - } - - //list of sign that ends a word - private static ArrayList wordEndSigns = new ArrayList() { - { - add('\0'); - add('\n'); - add(' '); - } - }; - - /** - * @return true if the char is in the end of word list, otherwise false - */ - private static Boolean isWordEndSign(char c){ - for (char sign: wordEndSigns){ - if (c == sign){ - return true; - } - } - - return false; - } - - /** - * The function get a string and reverse it. - * in case of end-of-word sign, it will leave it at the end. - * in case of sign with direction like brackets, it will change the direction. - * @param s - the string to reverse - * @return reversed string - */ - private static String reverse(String s) { - int j = s.length(); - int startWithSpace = 0; - char[] newWord = new char[j]; - - if (j == 0) { - return s; - } - - // remain end-of-word sign at the end - if (isWordEndSign(s.charAt(s.length() - 1))){ - startWithSpace = 1; - newWord[--j] = s.charAt(s.length() - 1); - } - - for (int i = 0; i < s.length() - startWithSpace; i++) { - if (LanguageUtils.directionSignsMap.containsKey(s.charAt(i))) { - newWord[--j] = LanguageUtils.directionSignsMap.get(s.charAt(i)); - } else { - newWord[--j] = s.charAt(i); - } - } - return new String(newWord); - } /** * The function get a string and fix the rtl words. @@ -295,27 +186,29 @@ public class LanguageUtils { int startPos = 0; int endPos = 0; - Boolean isRtlState = LanguageUtils.isRtl(oldString.charAt(0)); + Boolean isRtlState = RtlUtils.isRtl(oldString.charAt(0)); char c; String line = ""; for (int i = 0; i < length; i++) { c = oldString.charAt(i); - if ((LanguageUtils.isRtl(c) == isRtlState || LanguageUtils.isPunctuations(c)) && i < length - 1) { + if ((RtlUtils.isRtl(c) == isRtlState || RtlUtils.isPunctuations(c)) && i < length - 1) { endPos++; } else { String word; - if (isWordEndSign(c)){ + if (RtlUtils.isWordEndSign(c)){ endPos++; } if (i == length - 1){ endPos = length; } + word = oldString.substring(startPos, endPos); if (isRtlState) { - word = reverse(oldString.substring(startPos, endPos)); - } else { - word = (oldString.substring(startPos, endPos)); + if(RtlUtils.contextualSupport()){ + word = RtlUtils.converToContextual(word); + } + word = RtlUtils.reverse(word); } if (line.length() + word.length() > line_max_size) { lines.add(line + "\n"); @@ -327,7 +220,7 @@ public class LanguageUtils { line = ""; } startPos = endPos; - if (!isWordEndSign(c)){ + if (!RtlUtils.isWordEndSign(c)){ endPos++; isRtlState = !isRtlState; } diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/RtlUtils.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/RtlUtils.java new file mode 100644 index 000000000..5d5f0d446 --- /dev/null +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/RtlUtils.java @@ -0,0 +1,425 @@ +package nodomain.freeyourgadget.gadgetbridge.util; + +import android.util.Log; +import android.util.Pair; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import nodomain.freeyourgadget.gadgetbridge.GBApplication; + +class RtlUtils { + /** + * Checks the status of right-to-left option + * @return true if right-to-left option is On, and false, if Off or not exist + */ + public static boolean contextualSupport() + { + return GBApplication.getPrefs().getBoolean("contextualArabic", false); + } + + //map with brackets chars to change there direction + private static Map directionSignsMap = new HashMap(){ + { + put('(', ')'); put(')', '('); put('[', ']'); put(']', '['); put('{','}'); put('}','{'); + + + } + }; + + //list of unicode ranges of rtl chars + private static ArrayList> hebrewRange = new ArrayList>() { + { + add(new Pair('\u0590', '\u05F4')); + add(new Pair('\uFB1D', '\uFB4F')); + } + }; + + //list of unicode ranges of rtl chars + private static ArrayList> arabicRange = new ArrayList>() { + { + add(new Pair('\u0600', '\u06FF')); + add(new Pair('\u0750', '\u077F')); + add(new Pair('\u08A0', '\u08FF')); + add(new Pair('\uFB50', '\uFDFF')); + add(new Pair('\uFE70', '\uFEFF')); + } + }; + + //list of unicode ranges of rtl chars + private static ArrayList> rtlRange = new ArrayList>() { + { + addAll(hebrewRange); + addAll(arabicRange); + } + }; + + /** + * @return true if the char is in the rtl range, otherwise false + */ + static Boolean isHebrew(char c){ + for (Pair rang: hebrewRange) { + if (rang.first <= c && c <= rang.second) { + return true; + } + } + return false; + } + + /** + * @return true if the char is in the rtl range, otherwise false + */ + static Boolean isArabic(char c){ + for (Pair rang: arabicRange) { + if (rang.first <= c && c <= rang.second) { + return true; + } + } + return false; + } + + /** + * @return true if the char is in the rtl range, otherwise false + */ + static Boolean isRtl(char c){ + for (Pair rang: rtlRange) { + if (rang.first <= c && c <= rang.second) { + return true; + } + } + return false; + } + + //list of unicode ranges of punctuations chars + private static ArrayList > punctuationsRange = new ArrayList>() { + { + add(new Pair('\u0021', '\u002F')); + add(new Pair('\u003A', '\u0040')); + add(new Pair('\u005B', '\u0060')); + add(new Pair('\u007B', '\u007E')); + } + }; + + /** + * @return true if the char is in the punctuations range, otherwise false + */ + static Boolean isPunctuations(char c){ + for (Pair rang: punctuationsRange) { + if (rang.first <= c && c <= rang.second) { + return true; + } + } + return false; + } + + //list of sign that ends a word + private static ArrayList wordEndSigns = new ArrayList() { + { + add('\0'); + add('\n'); + add(' '); + } + }; + + /** + * @return true if the char is in the end of word list, otherwise false + */ + static Boolean isWordEndSign(char c){ + for (char sign: wordEndSigns){ + if (c == sign){ + return true; + } + } + + return false; + } + + //map from Arabian characters to their contextual form in the beginning of the word + private static Map contextualArabicIsolated = new HashMap(){ + { + put('ا', '\uFE8D'); + put('ب', '\uFE8F'); + put('ت', '\uFE95'); + put('ث', '\uFE99'); + put('ج', '\uFE9D'); + put('ح', '\uFEA1'); + put('خ', '\uFEA5'); + put('د', '\uFEA9'); + put('ذ', '\uFEAB'); + put('ر', '\uFEAD'); + put('ز', '\uFEAF'); + put('س', '\uFEB1'); + put('ش', '\uFEB5'); + put('ص', '\uFEB9'); + put('ض', '\uFEBD'); + put('ط', '\uFEC1'); + put('ظ', '\uFEC5'); + put('ع', '\uFEC9'); + put('غ', '\uFECD'); + put('ف', '\uFED1'); + put('ق', '\uFED5'); + put('ك', '\uFED9'); + put('ل', '\uFEDD'); + put('م', '\uFEE1'); + put('ن', '\uFEE5'); + put('ه', '\uFEE9'); + put('و', '\uFEED'); + put('ي', '\uFEF1'); + put('آ', '\uFE81'); + put('ة', '\uFE93'); + put('ى', '\uFEEF'); + put('ئ', '\uFE89'); + put('إ', '\uFE87'); + put('أ', '\uFE83'); + put('ء', '\uFE80'); + put('ؤ', '\uFE85'); + put((char)('ل' + 'آ'), '\uFEF5'); + put((char)('ل' + 'أ'), '\uFEF7'); + put((char)('ل' + 'إ'), '\uFEF9'); + put((char)('ل' + 'ا'), '\uFEFB'); + + } + }; + + //map from Arabian characters to their contextual form in the beginning of the word + private static Map contextualArabicBeginning = new HashMap(){ + { + put('ب', '\uFE91'); + put('ت', '\uFE97'); + put('ث', '\uFE9B'); + put('ج', '\uFE9F'); + put('ح', '\uFEA3'); + put('خ', '\uFEA7'); + put('س', '\uFEB3'); + put('ش', '\uFEB7'); + put('ص', '\uFEBB'); + put('ض', '\uFEBF'); + put('ط', '\uFEC3'); + put('ظ', '\uFEC7'); + put('ع', '\uFECB'); + put('غ', '\uFECF'); + put('ف', '\uFED3'); + put('ق', '\uFED7'); + put('ك', '\uFEDB'); + put('ل', '\uFEDF'); + put('م', '\uFEE3'); + put('ن', '\uFEE7'); + put('ه', '\uFEEB'); + put('ي', '\uFEF3'); + put('ئ', '\uFE8B'); + } + }; + + //map from Arabian characters to their contextual form in the middle of the word + private static Map contextualArabicMiddle = new HashMap(){ + { + put('ب', '\uFE92'); + put('ت', '\uFE98'); + put('ث', '\uFE9C'); + put('ج', '\uFEA0'); + put('ح', '\uFEA4'); + put('خ', '\uFEA8'); + put('س', '\uFEB4'); + put('ش', '\uFEB8'); + put('ص', '\uFEBC'); + put('ض', '\uFEC0'); + put('ط', '\uFEC4'); + put('ظ', '\uFEC8'); + put('ع', '\uFECC'); + put('غ', '\uFED0'); + put('ف', '\uFED4'); + put('ق', '\uFED8'); + put('ك', '\uFEDC'); + put('ل', '\uFEE0'); + put('م', '\uFEE4'); + put('ن', '\uFEE8'); + put('ه', '\uFEEC'); + put('ي', '\uFEF4'); + put('ئ', '\uFE8C'); + } + }; + + //map from Arabian characters to their contextual form in the end of the word + private static Map contextualArabicEnd = new HashMap(){ + { + put('ا', '\uFE8E'); + put('ب', '\uFE90'); + put('ت', '\uFE96'); + put('ث', '\uFE9A'); + put('ج', '\uFE9E'); + put('ح', '\uFEA2'); + put('خ', '\uFEA6'); + put('د', '\uFEAA'); + put('ذ', '\uFEAC'); + put('ر', '\uFEAE'); + put('ز', '\uFEB0'); + put('س', '\uFEB2'); + put('ش', '\uFEB6'); + put('ص', '\uFEBA'); + put('ض', '\uFEBE'); + put('ط', '\uFEC2'); + put('ظ', '\uFEC6'); + put('ع', '\uFECA'); + put('غ', '\uFECE'); + put('ف', '\uFED2'); + put('ق', '\uFED6'); + put('ك', '\uFEDA'); + put('ل', '\uFEDE'); + put('م', '\uFEE2'); + put('ن', '\uFEE6'); + put('ه', '\uFEEA'); + put('و', '\uFEEE'); + put('ي', '\uFEF2'); + put('آ', '\uFE82'); + put('ة', '\uFE94'); + put('ى', '\uFEF0'); + put('ئ', '\uFE8A'); + put('إ', '\uFE88'); + put('أ', '\uFE84'); + put('ؤ', '\uFE86'); + put((char)('ل' + 'آ'), '\uFEF6'); + put((char)('ل' + 'أ'), '\uFEF8'); + put((char)('ل' + 'إ'), '\uFEFA'); + put((char)('ل' + 'ا'), '\uFEFC'); + } + }; + enum contextualState{ + isolate, + begin, + middle, + end + } + + private static boolean exceptionAfterLam(char c){ + switch (c){ + case '\u0622': + case '\u0623': + case '\u0625': + case '\u0627': + return true; + default: + return false; + + } + } + + private static char getContextualSymbol(Character c, contextualState state) { + Character newChar; + switch (state){ + case begin: + newChar = contextualArabicBeginning.get(c); + break; + case middle: + newChar = contextualArabicMiddle.get(c); + break; + case end: + newChar = contextualArabicEnd.get(c); + break; + case isolate: + default: + newChar = contextualArabicIsolated.get(c);; + } + if (newChar != null){ + return newChar; + } else{ + return c; + } + } + + static String converToContextual(String s){ + if (s == null || s.isEmpty() || s.length() == 1){ + return s; + } + + int length = s.length(); + StringBuilder newWord = new StringBuilder(length); + + Character curChar, nextChar = s.charAt(0); + contextualState prevState = contextualState.isolate; + contextualState curState = contextualState.isolate; + + for (int i = 0; i < length - 1; i++){ + curChar = nextChar; + nextChar = s.charAt(i + 1); + + if (curChar == 'ل' && exceptionAfterLam(nextChar)){ + i++; + curChar = (char)(nextChar + curChar); + if (i < length - 1) { + nextChar = s.charAt(i + 1); + }else{ + nextChar = curChar; + prevState = curState; + break; + } + + } + + curState = getCharContextualState(prevState, curChar, nextChar); + newWord.append(getContextualSymbol(curChar, curState)); + prevState = curState; + + + } + curState = getCharContextualState(prevState, nextChar, null); + newWord.append(getContextualSymbol(nextChar, curState)); + + return newWord.toString(); + } + + private static contextualState getCharContextualState(contextualState prevState, Character curChar, Character nextChar) { + contextualState curState; + if ((prevState == contextualState.isolate || prevState == contextualState.end) && + contextualArabicBeginning.containsKey(curChar) && + contextualArabicEnd.containsKey(nextChar)){ + + curState = contextualState.begin; + + } else if ((prevState == contextualState.begin || prevState == contextualState.middle) && + contextualArabicEnd.containsKey(curChar)){ + + if (contextualArabicMiddle.containsKey(curChar) && contextualArabicEnd.containsKey(nextChar)){ + curState = contextualState.middle; + }else{ + curState = contextualState.end; + } + }else{ + curState = contextualState.isolate; + } + return curState; + } + + + /** + * The function get a string and reverse it. + * in case of end-of-word sign, it will leave it at the end. + * in case of sign with direction like brackets, it will change the direction. + * @param s - the string to reverse + * @return reversed string + */ + static String reverse(String s) { + int j = s.length(); + int startWithSpace = 0; + char[] newWord = new char[j]; + + if (j == 0) { + return s; + } + + // remain end-of-word sign at the end + if (isWordEndSign(s.charAt(s.length() - 1))){ + startWithSpace = 1; + newWord[--j] = s.charAt(s.length() - 1); + } + + for (int i = 0; i < s.length() - startWithSpace; i++) { + if (directionSignsMap.containsKey(s.charAt(i))) { + newWord[--j] = directionSignsMap.get(s.charAt(i)); + } else { + newWord[--j] = s.charAt(i); + } + } + return new String(newWord); + } +} diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index a2dcb14dd..51a2a6984 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -631,4 +631,7 @@ Calibrate Watch 9 pairing Watch 9 calibration + Contextual Arabic + Enable this to support contextual Arabic + Right To Left Support diff --git a/app/src/main/res/xml/preferences.xml b/app/src/main/res/xml/preferences.xml index bf351b0c3..99b30390b 100644 --- a/app/src/main/res/xml/preferences.xml +++ b/app/src/main/res/xml/preferences.xml @@ -170,23 +170,32 @@ android:summary="@string/pref_summary_transliteration" android:title="@string/pref_title_transliteration" /> + + - - - + + +