fix: when decoding with UTF-8 fails, create a new buffer for the retry with CESU-8.

If the decoding fails and there are UTF-8 decodable bytes before the bytes that couldn't be decoded, then the read index of the original buffer is incremented and those bytes will be missing from the decode result. Now we create a new buffer and the decoding will start at the original start offset. issue #2546
2025-02-14 12:56:46 +01:00 · 2021-04-02 21:29:03 +03:00 · 2021-04-02 21:29:03 +03:00 · f10060fe8f
commit f10060fe8f
parent 0a7b843786
2 changed files with 17 additions and 2 deletions
--- a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java
+++ b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java
@ -284,8 +284,8 @@ public class StringBlock {

    @VisibleForTesting
    String decodeString(int offset, int length) {
-        final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length);
        try {
+            final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length);
            return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(wrappedBuffer).toString();
        } catch (CharacterCodingException ex) {
            if (!m_isUTF8) {
@ -295,9 +295,10 @@ public class StringBlock {
        }

        try {
+            final ByteBuffer wrappedBufferRetry = ByteBuffer.wrap(m_strings, offset, length);
            // in some places, Android uses 3-byte UTF-8 sequences instead of 4-bytes.
            // If decoding failed, we try to use CESU-8 decoder, which is closer to what Android actually uses.
-            return CESU8_DECODER.decode(wrappedBuffer).toString();
+            return CESU8_DECODER.decode(wrappedBufferRetry).toString();
        } catch (CharacterCodingException e) {
            LOGGER.warning("Failed to decode a string with CESU-8 decoder.");
            return null;
--- a/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java
+++ b/brut.apktool/apktool-lib/src/test/java/brut/androlib/res/decoder/StringBlockWithSurrogatePairInUtf8Test.java
@ -52,6 +52,20 @@ public class StringBlockWithSurrogatePairInUtf8Test {
        // See: https://github.com/iBotPeaches/Apktool/issues/2299
        final String actual = new StringBlock(new byte[] {	(byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB4, (byte) 0x86}, true).decodeString(0, 6);
        assertEquals("Incorrect decoding", "\uD83D\uDD06", actual);
+
+        // See: https://github.com/iBotPeaches/Apktool/issues/2546
+        final byte[] bytesWithCharactersBeforeSurrogatePair = {'G', 'o', 'o', 'd', ' ', 'm', 'o', 'r', 'n', 'i', 'n', 'g', '!', ' ',
+                (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB1, (byte) 0x8B,
+                ' ', 'S', 'u', 'n', ' ',
+                (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xBC, (byte) 0x9E
+        };
+        final String actual2 = new StringBlock(bytesWithCharactersBeforeSurrogatePair, true).decodeString(0, 31);
+
+        // D83D -> ED 0xA0 0xBD
+        // DC4B -> 0xED 0xB1 0x8B
+        // DF1E -> 0xED 0xBC 0x9E
+        assertEquals("Incorrect decoding when there are valid characters before the surrogate pair",
+                "Good morning! \uD83D\uDC4B Sun \uD83C\uDF1E", actual2);
    }

    @Test