mirror of
https://github.com/revanced/Apktool.git
synced 2025-02-14 12:56:46 +01:00
fix: when decoding with UTF-8 fails, create a new buffer for the retry with CESU-8.
If the decoding fails and there are UTF-8 decodable bytes before the bytes that couldn't be decoded, then the read index of the original buffer is incremented and those bytes will be missing from the decode result. Now we create a new buffer and the decoding will start at the original start offset. issue #2546
This commit is contained in:
parent
0a7b843786
commit
f10060fe8f
@ -284,8 +284,8 @@ public class StringBlock {
|
||||
|
||||
@VisibleForTesting
|
||||
String decodeString(int offset, int length) {
|
||||
final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length);
|
||||
try {
|
||||
final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length);
|
||||
return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(wrappedBuffer).toString();
|
||||
} catch (CharacterCodingException ex) {
|
||||
if (!m_isUTF8) {
|
||||
@ -295,9 +295,10 @@ public class StringBlock {
|
||||
}
|
||||
|
||||
try {
|
||||
final ByteBuffer wrappedBufferRetry = ByteBuffer.wrap(m_strings, offset, length);
|
||||
// in some places, Android uses 3-byte UTF-8 sequences instead of 4-bytes.
|
||||
// If decoding failed, we try to use CESU-8 decoder, which is closer to what Android actually uses.
|
||||
return CESU8_DECODER.decode(wrappedBuffer).toString();
|
||||
return CESU8_DECODER.decode(wrappedBufferRetry).toString();
|
||||
} catch (CharacterCodingException e) {
|
||||
LOGGER.warning("Failed to decode a string with CESU-8 decoder.");
|
||||
return null;
|
||||
|
@ -52,6 +52,20 @@ public class StringBlockWithSurrogatePairInUtf8Test {
|
||||
// See: https://github.com/iBotPeaches/Apktool/issues/2299
|
||||
final String actual = new StringBlock(new byte[] { (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB4, (byte) 0x86}, true).decodeString(0, 6);
|
||||
assertEquals("Incorrect decoding", "\uD83D\uDD06", actual);
|
||||
|
||||
// See: https://github.com/iBotPeaches/Apktool/issues/2546
|
||||
final byte[] bytesWithCharactersBeforeSurrogatePair = {'G', 'o', 'o', 'd', ' ', 'm', 'o', 'r', 'n', 'i', 'n', 'g', '!', ' ',
|
||||
(byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB1, (byte) 0x8B,
|
||||
' ', 'S', 'u', 'n', ' ',
|
||||
(byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xBC, (byte) 0x9E
|
||||
};
|
||||
final String actual2 = new StringBlock(bytesWithCharactersBeforeSurrogatePair, true).decodeString(0, 31);
|
||||
|
||||
// D83D -> ED 0xA0 0xBD
|
||||
// DC4B -> 0xED 0xB1 0x8B
|
||||
// DF1E -> 0xED 0xBC 0x9E
|
||||
assertEquals("Incorrect decoding when there are valid characters before the surrogate pair",
|
||||
"Good morning! \uD83D\uDC4B Sun \uD83C\uDF1E", actual2);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
x
Reference in New Issue
Block a user