From f10060fe8f6566c2bf11b9d4f9244707924a36f4 Mon Sep 17 00:00:00 2001 From: Comnir Date: Fri, 2 Apr 2021 21:29:03 +0300 Subject: [PATCH 1/2] fix: when decoding with UTF-8 fails, create a new buffer for the retry with CESU-8. If the decoding fails and there are UTF-8 decodable bytes before the bytes that couldn't be decoded, then the read index of the original buffer is incremented and those bytes will be missing from the decode result. Now we create a new buffer and the decoding will start at the original start offset. issue #2546 --- .../brut/androlib/res/decoder/StringBlock.java | 5 +++-- .../StringBlockWithSurrogatePairInUtf8Test.java | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java index 9820435b..3d80d341 100644 --- a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java +++ b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java @@ -284,8 +284,8 @@ public class StringBlock { @VisibleForTesting String decodeString(int offset, int length) { - final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length); try { + final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length); return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(wrappedBuffer).toString(); } catch (CharacterCodingException ex) { if (!m_isUTF8) { @@ -295,9 +295,10 @@ public class StringBlock { } try { + final ByteBuffer wrappedBufferRetry = ByteBuffer.wrap(m_strings, offset, length); // in some places, Android uses 3-byte UTF-8 sequences instead of 4-bytes. // If decoding failed, we try to use CESU-8 decoder, which is closer to what Android actually uses. - return CESU8_DECODER.decode(wrappedBuffer).toString(); + return CESU8_DECODER.decode(wrappedBufferRetry).toString(); } catch (CharacterCodingException e) { LOGGER.warning("Failed to decode a string with CESU-8 decoder."); return null; diff --git a/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java b/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java index d141500c..8294928d 100644 --- a/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java +++ b/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java @@ -52,6 +52,20 @@ public class StringBlockWithSurrogatePairInUtf8Test { // See: https://github.com/iBotPeaches/Apktool/issues/2299 final String actual = new StringBlock(new byte[] { (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB4, (byte) 0x86}, true).decodeString(0, 6); assertEquals("Incorrect decoding", "\uD83D\uDD06", actual); + + // See: https://github.com/iBotPeaches/Apktool/issues/2546 + final byte[] bytesWithCharactersBeforeSurrogatePair = {'G', 'o', 'o', 'd', ' ', 'm', 'o', 'r', 'n', 'i', 'n', 'g', '!', ' ', + (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB1, (byte) 0x8B, + ' ', 'S', 'u', 'n', ' ', + (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xBC, (byte) 0x9E + }; + final String actual2 = new StringBlock(bytesWithCharactersBeforeSurrogatePair, true).decodeString(0, 31); + + // D83D -> ED 0xA0 0xBD + // DC4B -> 0xED 0xB1 0x8B + // DF1E -> 0xED 0xBC 0x9E + assertEquals("Incorrect decoding when there are valid characters before the surrogate pair", + "Good morning! \uD83D\uDC4B Sun \uD83C\uDF1E", actual2); } @Test From 491d18058a763a1fed1f6edfd4c27b5cc92f5827 Mon Sep 17 00:00:00 2001 From: Comnir Date: Sat, 3 Apr 2021 13:47:13 +0300 Subject: [PATCH 2/2] test: fix incorrect 3-byte encoding. --- .../res/decoder/StringBlockWithSurrogatePairInUtf8Test.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java b/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java index 8294928d..23e96ca8 100644 --- a/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java +++ b/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java @@ -57,12 +57,13 @@ public class StringBlockWithSurrogatePairInUtf8Test { final byte[] bytesWithCharactersBeforeSurrogatePair = {'G', 'o', 'o', 'd', ' ', 'm', 'o', 'r', 'n', 'i', 'n', 'g', '!', ' ', (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB1, (byte) 0x8B, ' ', 'S', 'u', 'n', ' ', - (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xBC, (byte) 0x9E + (byte) 0xED, (byte) 0xA0, (byte) 0xBC, (byte) 0xED, (byte) 0xBC, (byte) 0x9E }; final String actual2 = new StringBlock(bytesWithCharactersBeforeSurrogatePair, true).decodeString(0, 31); - // D83D -> ED 0xA0 0xBD + // D83D -> 0xED 0xA0 0xBD // DC4B -> 0xED 0xB1 0x8B + // D83C -> 0xED 0xA0 0xBC // DF1E -> 0xED 0xBC 0x9E assertEquals("Incorrect decoding when there are valid characters before the surrogate pair", "Good morning! \uD83D\uDC4B Sun \uD83C\uDF1E", actual2);