diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java index 9820435b..3d80d341 100644 --- a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java +++ b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java @@ -284,8 +284,8 @@ public class StringBlock { @VisibleForTesting String decodeString(int offset, int length) { - final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length); try { + final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length); return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(wrappedBuffer).toString(); } catch (CharacterCodingException ex) { if (!m_isUTF8) { @@ -295,9 +295,10 @@ public class StringBlock { } try { + final ByteBuffer wrappedBufferRetry = ByteBuffer.wrap(m_strings, offset, length); // in some places, Android uses 3-byte UTF-8 sequences instead of 4-bytes. // If decoding failed, we try to use CESU-8 decoder, which is closer to what Android actually uses. - return CESU8_DECODER.decode(wrappedBuffer).toString(); + return CESU8_DECODER.decode(wrappedBufferRetry).toString(); } catch (CharacterCodingException e) { LOGGER.warning("Failed to decode a string with CESU-8 decoder."); return null; diff --git a/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java b/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java index d141500c..8294928d 100644 --- a/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java +++ b/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java @@ -52,6 +52,20 @@ public class StringBlockWithSurrogatePairInUtf8Test { // See: https://github.com/iBotPeaches/Apktool/issues/2299 final String actual = new StringBlock(new byte[] { (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB4, (byte) 0x86}, true).decodeString(0, 6); assertEquals("Incorrect decoding", "\uD83D\uDD06", actual); + + // See: https://github.com/iBotPeaches/Apktool/issues/2546 + final byte[] bytesWithCharactersBeforeSurrogatePair = {'G', 'o', 'o', 'd', ' ', 'm', 'o', 'r', 'n', 'i', 'n', 'g', '!', ' ', + (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB1, (byte) 0x8B, + ' ', 'S', 'u', 'n', ' ', + (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xBC, (byte) 0x9E + }; + final String actual2 = new StringBlock(bytesWithCharactersBeforeSurrogatePair, true).decodeString(0, 31); + + // D83D -> ED 0xA0 0xBD + // DC4B -> 0xED 0xB1 0x8B + // DF1E -> 0xED 0xBC 0x9E + assertEquals("Incorrect decoding when there are valid characters before the surrogate pair", + "Good morning! \uD83D\uDC4B Sun \uD83C\uDF1E", actual2); } @Test