Merge pull request #2548 from Comnir/2546-create-new-buffer-when-retrying-decoding

Create a new ByteBuffer for decoding when retrying with CESU-8
This commit is contained in:
Connor Tumbleson 2021-04-03 08:44:17 -04:00 committed by GitHub
commit baf8bb592a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 2 deletions

View File

@ -284,8 +284,8 @@ public class StringBlock {
@VisibleForTesting @VisibleForTesting
String decodeString(int offset, int length) { String decodeString(int offset, int length) {
final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length);
try { try {
final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length);
return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(wrappedBuffer).toString(); return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(wrappedBuffer).toString();
} catch (CharacterCodingException ex) { } catch (CharacterCodingException ex) {
if (!m_isUTF8) { if (!m_isUTF8) {
@ -295,9 +295,10 @@ public class StringBlock {
} }
try { try {
final ByteBuffer wrappedBufferRetry = ByteBuffer.wrap(m_strings, offset, length);
// in some places, Android uses 3-byte UTF-8 sequences instead of 4-bytes. // in some places, Android uses 3-byte UTF-8 sequences instead of 4-bytes.
// If decoding failed, we try to use CESU-8 decoder, which is closer to what Android actually uses. // If decoding failed, we try to use CESU-8 decoder, which is closer to what Android actually uses.
return CESU8_DECODER.decode(wrappedBuffer).toString(); return CESU8_DECODER.decode(wrappedBufferRetry).toString();
} catch (CharacterCodingException e) { } catch (CharacterCodingException e) {
LOGGER.warning("Failed to decode a string with CESU-8 decoder."); LOGGER.warning("Failed to decode a string with CESU-8 decoder.");
return null; return null;

View File

@ -52,6 +52,21 @@ public class StringBlockWithSurrogatePairInUtf8Test {
// See: https://github.com/iBotPeaches/Apktool/issues/2299 // See: https://github.com/iBotPeaches/Apktool/issues/2299
final String actual = new StringBlock(new byte[] { (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB4, (byte) 0x86}, true).decodeString(0, 6); final String actual = new StringBlock(new byte[] { (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB4, (byte) 0x86}, true).decodeString(0, 6);
assertEquals("Incorrect decoding", "\uD83D\uDD06", actual); assertEquals("Incorrect decoding", "\uD83D\uDD06", actual);
// See: https://github.com/iBotPeaches/Apktool/issues/2546
final byte[] bytesWithCharactersBeforeSurrogatePair = {'G', 'o', 'o', 'd', ' ', 'm', 'o', 'r', 'n', 'i', 'n', 'g', '!', ' ',
(byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB1, (byte) 0x8B,
' ', 'S', 'u', 'n', ' ',
(byte) 0xED, (byte) 0xA0, (byte) 0xBC, (byte) 0xED, (byte) 0xBC, (byte) 0x9E
};
final String actual2 = new StringBlock(bytesWithCharactersBeforeSurrogatePair, true).decodeString(0, 31);
// D83D -> 0xED 0xA0 0xBD
// DC4B -> 0xED 0xB1 0x8B
// D83C -> 0xED 0xA0 0xBC
// DF1E -> 0xED 0xBC 0x9E
assertEquals("Incorrect decoding when there are valid characters before the surrogate pair",
"Good morning! \uD83D\uDC4B Sun \uD83C\uDF1E", actual2);
} }
@Test @Test