diff --git a/app/build.gradle b/app/build.gradle index 96f898654..753e891da 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -276,6 +276,9 @@ dependencies { // Fix Duplicate class build error implementation(platform("org.jetbrains.kotlin:kotlin-bom:1.8.0")) + + // Needed for Armenian transliteration + implementation group: 'org.ahocorasick', name: 'ahocorasick', version: '0.6.3' } preBuild.dependsOn(":GBDaoGenerator:genSources") diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtils.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtils.java index 1283ee63a..f3d66b511 100644 --- a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtils.java +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/LanguageUtils.java @@ -35,6 +35,7 @@ import nodomain.freeyourgadget.gadgetbridge.devices.DeviceCoordinator; import nodomain.freeyourgadget.gadgetbridge.impl.GBDevice; import nodomain.freeyourgadget.gadgetbridge.util.Prefs; import nodomain.freeyourgadget.gadgetbridge.util.language.impl.ArabicTransliterator; +import nodomain.freeyourgadget.gadgetbridge.util.language.impl.ArmenianTransliterator; import nodomain.freeyourgadget.gadgetbridge.util.language.impl.BengaliTransliterator; import nodomain.freeyourgadget.gadgetbridge.util.language.impl.CommonSymbolsTransliterator; import nodomain.freeyourgadget.gadgetbridge.util.language.impl.CroatianTransliterator; @@ -86,6 +87,7 @@ public class LanguageUtils { put("scandinavian", new ScandinavianTransliterator()); put("turkish", new TurkishTransliterator()); put("ukranian", new UkranianTransliterator()); + put("armenian", new ArmenianTransliterator()); }}; /** diff --git a/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/impl/ArmenianTransliterator.java b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/impl/ArmenianTransliterator.java new file mode 100644 index 000000000..550b61bd4 --- /dev/null +++ b/app/src/main/java/nodomain/freeyourgadget/gadgetbridge/util/language/impl/ArmenianTransliterator.java @@ -0,0 +1,245 @@ +/* Copyright (C) 2021-2024 Alik Aslanyan + + This file is part of Gadgetbridge. + + Gadgetbridge is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Gadgetbridge is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . */ +package nodomain.freeyourgadget.gadgetbridge.util.language.impl; +import nodomain.freeyourgadget.gadgetbridge.util.language.Transliterator; + +import org.ahocorasick.trie.Emit; +import org.ahocorasick.trie.Trie; +import org.apache.commons.lang3.text.WordUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class ArmenianTransliterator implements Transliterator { + // Transliteration map ordered by priority + // Armenian has some rules regarding reading of 'ո' in the middle of the word it reads as english O + // But if word starts with it's read as sound of 'vo' + // Or if it has 'ւ' symbol after it, then we should read it as 'u' (as double o in booze) + private static final Map transliterateMap = new LinkedHashMap() { + { + // Letter + 'ու' + put("աու","au"); + put("բու","bu"); + put("գու","gu"); + put("դու","du"); + put("եու","eu"); + put("զու","zu"); + put("էու","eu"); + put("ըու","yu"); + put("թու","tu"); + put("ժու","ju"); + put("իու","iu"); + put("լու","lu"); + put("խու","xu"); + put("ծու","cu"); + put("կու","ku"); + put("հու","hu"); + put("ձու","dzu"); + put("ղու","xu"); + put("ճու","cu"); + put("մու","mu"); + put("յու","yu"); + put("նու","nu"); + put("շու","shu"); + put("չու","chu"); + put("պու","pu"); + put("ջու","ju"); + put("ռու","ru"); + put("սու","su"); + put("վու","vu"); + put("տու","tu"); + put("րու","ru"); + put("ցու","cu"); + put("փու","pu"); + put("քու","qu"); + put("օու","ou"); + put("ևու","eu"); + put("ֆու","fu"); + put("ոու","vou"); + + put("ու","u"); + + // Letter + 'ո' + put("բո","bo"); + put("գո","go"); + put("դո","do"); + put("զո","zo"); + put("թո","to"); + put("ժո","jo"); + put("լո","lo"); + put("խո","xo"); + put("ծո","co"); + put("կո","ko"); + put("հո","ho"); + put("ձո","dzo"); + put("ղո","xo"); + put("ճո","co"); + put("մո","mo"); + put("յո","yo"); + put("նո","no"); + put("շո","so"); + put("չո","co"); + put("պո","po"); + put("ջո","jo"); + put("ռո","ro"); + put("սո","so"); + put("վո","vo"); + put("տո","to"); + put("րո","ro"); + put("ցո","co"); + put("փո","po"); + put("քո","qo"); + put("ևո","eo"); + put("ֆո","fo"); + put("ո","vo"); + + // Two different ways to write, we support all. + put("եւ","ev"); + put("եվ","ev"); + + // Simple substitutions + put("ա","a"); + put("բ","b"); + put("գ","g"); + put("դ","d"); + put("ե","e"); + put("զ","z"); + put("է","e"); + put("ը","y"); + put("թ","t"); + put("ժ","j"); + put("ի","i"); + put("լ","l"); + put("խ","x"); + put("ծ","c"); + put("կ","k"); + put("հ","h"); + put("ձ","dz"); + put("ղ","x"); + put("ճ","c"); + put("մ","m"); + put("յ","y"); + put("ն","n"); + put("շ","sh"); + put("չ","ch"); + put("պ","p"); + put("ջ","j"); + put("ռ","r"); + put("ս","s"); + put("վ","v"); + put("տ","t"); + put("ր","r"); + put("ց","c"); + put("փ","p"); + put("ք","q"); + put("օ","o"); + put("և","ev"); + put("ֆ","f"); + + // If this symbol wasn't used in the combination with others, then it's meaningless + put("ւ",""); + + // Add support for capitilazed words + for (final Map.Entry entry : ((Map)this.clone()).entrySet()) { + final String capitalKey = WordUtils.capitalize(entry.getKey()); + if(!capitalKey.equals(entry.getKey())) { + put(capitalKey, WordUtils.capitalize(entry.getValue())); + } + } + + }}; + + private static final Map transliterationPriorityMap = new HashMap() {{ + int priority = 0; + for( final String key : transliterateMap.keySet() ) { + put(key, priority++); + } + }}; + + // Aho-Corasick trie + private static final Trie transliterationTrie; + static { + final Trie.TrieBuilder builder = Trie.builder(); + for( final String key : ArmenianTransliterator.transliterateMap.keySet()) { + builder.addKeyword(key); + } + transliterationTrie = builder.build(); + } + + private static String ahoCorasick(final String text) { + // Create a buffer sufficiently large that re-allocations are minimized. + final StringBuilder sb = new StringBuilder( text.length() * 10 / 12 ); + + // The complexity of the Aho-Corasick algorithm O(N + L + Z) + // Where N is the length of the text, L is the length of keywords and the Z is a number of matches. + // This algorithm allows us to do fast substring search + final List emits = new ArrayList(transliterationTrie.parseText( text )); + + // Sort collection first by starting position, then by priority. + Collections.sort(emits, new Comparator() { + @Override + public int compare(Emit a, Emit b) { + int cmp = Integer.compare(a.getStart(), b.getStart()); + if (cmp != 0) { + return cmp; + } + + int priorityA = transliterationPriorityMap.get(a.getKeyword()); + int priorityB = transliterationPriorityMap.get(b.getKeyword()); + return Integer.compare(priorityA, priorityB); + } + }); + + int prevIndex = 0; + + for( final Emit emit : emits ) { + final int matchIndex = emit.getStart(); + + // Skip if we already substituted this part + if(matchIndex < prevIndex) { + continue; + } + + // Add part which shouldn't be substituted + sb.append(text.substring(prevIndex, matchIndex)); + + // Substitute and append to the builder + sb.append( ArmenianTransliterator.transliterateMap.get( emit.getKeyword() ) ); + + prevIndex = emit.getEnd() + 1; + } + + // Add the remainder of the string (contains no more matches). + sb.append( text.substring( prevIndex ) ); + + return sb.toString(); + } + + @Override + public String transliterate(String txt) { + if (txt == null || txt.isEmpty()) { + return txt; + } + + return ahoCorasick(txt); + } +} \ No newline at end of file diff --git a/app/src/main/res/values/arrays.xml b/app/src/main/res/values/arrays.xml index ea747b502..95ba80293 100644 --- a/app/src/main/res/values/arrays.xml +++ b/app/src/main/res/values/arrays.xml @@ -3495,6 +3495,7 @@ @string/turkish @string/ukranian @string/hungarian + @string/armenian @@ -3521,6 +3522,7 @@ turkish ukranian hungarian + armenian diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index 9f1947626..5ae3a95ad 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -1061,6 +1061,7 @@ Persian Scandinavian Ukranian + Armenian Italian French French (Canada) diff --git a/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/ArmenianTransliteratorTest.java b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/ArmenianTransliteratorTest.java new file mode 100644 index 000000000..ac08324ce --- /dev/null +++ b/app/src/test/java/nodomain/freeyourgadget/gadgetbridge/test/ArmenianTransliteratorTest.java @@ -0,0 +1,150 @@ +package nodomain.freeyourgadget.gadgetbridge.test; + +import junit.framework.TestCase; + +import org.apache.commons.lang3.text.WordUtils; +import org.junit.Test; +import org.junit.Assert; + +import java.util.LinkedHashMap; +import java.util.Map; + +import nodomain.freeyourgadget.gadgetbridge.util.language.impl.ArmenianTransliterator; + +public class ArmenianTransliteratorTest extends TestCase { + @Test + public void testSimpleCases() { + Assert.assertEquals("aybuben", new ArmenianTransliterator().transliterate("այբուբեն")); + Assert.assertEquals("vorotan", new ArmenianTransliterator().transliterate("որոտան")); + Assert.assertEquals("voroshel", new ArmenianTransliterator().transliterate("որոշել")); + Assert.assertEquals("uzox", new ArmenianTransliterator().transliterate("ուզող")); + } + + @Test + public void testMultipleWords() { + Assert.assertEquals("vory karucum en Viqipedia kayqic ogtvoxnery azat xmbagrman dzevachapov", + new ArmenianTransliterator().transliterate("որը կառուցում են Վիքիպեդիա կայքից օգտվողները ազատ խմբագրման ձևաչափով")); + } + + @Test + public void testMixedStrings() { + Assert.assertEquals("vor1voshel 12 uzox", new ArmenianTransliterator().transliterate("որ1ոշել 12 ուզող")); + Assert.assertEquals("vory jet iridescent karucum en sheen Viqipedia kayqic ogtvoxnery and a distinctive azat xmbagrman dzevachapov", + new ArmenianTransliterator().transliterate("որը jet iridescent կառուցում են sheen Վիքիպեդիա կայքից օգտվողները and a distinctive ազատ խմբագրման ձևաչափով")); + } + + + @Test + public void testTop100Words() { + final Map topWords = new LinkedHashMap() {{ + put("ինչպես", "inchpes"); + put("ես", "es"); + put("նրա", "nra"); + put("որ", "vor"); + put("նա", "na"); + put("էր", "er"); + put("համար", "hamar"); + put("ին", "in"); + put("հետ", "het"); + put("նրանք", "nranq"); + put("լինել", "linel"); + put("մեկ", "mek"); + put("ունենալ", "unenal"); + put("այս", "ays"); + put("ից", "ic"); + put("ի", "i"); + put("տաք", "taq"); + put("բառ", "bar"); + put("բայց", "bayc"); + put("ինչ", "inch"); + put("մի", "mi"); + put("քանի", "qani"); + put("է", "e"); + put("այն", "ayn"); + put("դուք", "duq"); + put("կամ", "kam"); + put("եւ", "ev"); + put("մինչեւ", "minchev"); + put("իսկ", "isk"); + put("ա", "a"); + put("մենք", "menq"); + put("կարող", "karox"); + put("այլ", "ayl"); + put("են", "en"); + put("որը", "vory"); + put("անել", "anel"); + put("իրենց", "irenc"); + put("ժամանակ", "jamanak"); + put("եթե", "ete"); + put("կամք", "kamq"); + put("յուրաքանչյուր", "yuraqanchyur"); + put("ասել", "asel"); + put("շարք", "sharq"); + put("երեք", "ereq"); + put("ուզում", "uzum"); + put("օդի", "odi"); + put("լավ", "lav"); + put("նույնպես", "nuynpes"); + put("խաղալ", "xaxal"); + put("փոքր", "poqr"); + put("վերջ", "verj"); + put("կարդալ", "kardal"); + put("ձեռք", "dzerq"); + put("նավահանգիստ", "navahangist"); + put("տառ", "tar"); + put("առ", "ar"); + put("ավելացնել", "avelacnel"); + put("նույնիսկ", "nuynisk"); + put("այստեղ", "aystex"); + put("պետք", "petq"); + put("մեծ", "mec"); + put("բարձր", "bardzr"); + put("այդպիսի", "aydpisi"); + put("հետեւել", "hetevel"); + put("գործ", "gorc"); + put("ինչու", "inchu"); + put("խնդրել", "xndrel"); + put("տղամարդիկ", "txamardik"); + put("փոփոխություն", "popoxutyun"); + put("գնաց", "gnac"); + put("լույս", "luys"); + put("բարի", "bari"); + put("դուրս", "durs"); + put("անհրաժեշտ", "anhrajesht"); + put("տուն", "tun"); + put("նկար", "nkar"); + put("փորձել", "pordzel"); + put("մեզ", "mez"); + put("կրկին", "krkin"); + put("կենդանի", "kendani"); + put("կետ", "ket"); + put("մայր", "mayr"); + put("աշխարհ", "ashxarh"); + put("մոտ", "mot"); + put("կառուցել", "karucel"); + put("ինքնուրույն", "inqnuruyn"); + put("երկիր", "erkir"); + put("հայր", "hayr"); + put("ցանկացած", "cankacac"); + put("նոր", "nor"); + put("աշխատանք", "ashxatanq"); + put("մաս", "mas"); + put("վերցնել", "vercnel"); + put("ստանալ", "stanal"); + put("տեղ", "tex"); + put("ապրել", "aprel"); + put("որտեղ", "vortex"); + put("երբ", "erb"); + put("Վերադառնալ", "Veradarnal"); + put("միայն", "miayn"); + }}; + + for (final Map.Entry entry : topWords.entrySet()) { + Assert.assertEquals(entry.getValue(), new ArmenianTransliterator().transliterate(entry.getKey())); + } + + for (final Map.Entry entry : topWords.entrySet()) { + Assert.assertEquals(WordUtils.capitalize(entry.getValue()), WordUtils.capitalize(new ArmenianTransliterator().transliterate(entry.getKey()))); + } + } +} \ No newline at end of file