mirror of
https://github.com/revanced/Apktool.git
synced 2024-12-04 18:12:54 +01:00
Merge branch 'Comnir-2299-use-CESU-8-instead-of-UTF-8'
This commit is contained in:
commit
83c754cb3f
@ -37,7 +37,7 @@ public class ResResSpec {
|
|||||||
|
|
||||||
ResResSpec resResSpec = type.getResSpecUnsafe(name);
|
ResResSpec resResSpec = type.getResSpecUnsafe(name);
|
||||||
if (resResSpec != null) {
|
if (resResSpec != null) {
|
||||||
cleanName = name + "_APKTOOL_DUPLICATENAME_" + id.toString();
|
cleanName = String.format("APKTOOL_DUPLICATE_%s_%s", type.toString(), id.toString());
|
||||||
} else {
|
} else {
|
||||||
cleanName = ((name == null || name.isEmpty()) ? ("APKTOOL_DUMMYVAL_" + id.toString()) : name);
|
cleanName = ((name == null || name.isEmpty()) ? ("APKTOOL_DUMMYVAL_" + id.toString()) : name);
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,8 @@ package brut.androlib.res.decoder;
|
|||||||
|
|
||||||
import brut.androlib.res.xml.ResXmlEncoders;
|
import brut.androlib.res.xml.ResXmlEncoders;
|
||||||
import brut.util.ExtDataInput;
|
import brut.util.ExtDataInput;
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.charset.*;
|
import java.nio.charset.*;
|
||||||
@ -254,6 +256,12 @@ public class StringBlock {
|
|||||||
private StringBlock() {
|
private StringBlock() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
StringBlock(byte[] strings, boolean isUTF8) {
|
||||||
|
m_strings = strings;
|
||||||
|
m_isUTF8 = isUTF8;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns style information - array of int triplets, where in each triplet:
|
* Returns style information - array of int triplets, where in each triplet:
|
||||||
* * first int is index of tag name ('b','i', etc.) * second int is tag
|
* * first int is index of tag name ('b','i', etc.) * second int is tag
|
||||||
@ -288,11 +296,24 @@ public class StringBlock {
|
|||||||
return style;
|
return style;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String decodeString(int offset, int length) {
|
@VisibleForTesting
|
||||||
|
String decodeString(int offset, int length) {
|
||||||
|
final ByteBuffer wrappedBuffer = ByteBuffer.wrap(m_strings, offset, length);
|
||||||
try {
|
try {
|
||||||
return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(
|
return (m_isUTF8 ? UTF8_DECODER : UTF16LE_DECODER).decode(wrappedBuffer).toString();
|
||||||
ByteBuffer.wrap(m_strings, offset, length)).toString();
|
|
||||||
} catch (CharacterCodingException ex) {
|
} catch (CharacterCodingException ex) {
|
||||||
|
LOGGER.warning("Failed to decode a string at offset " + offset + " of length " + length);
|
||||||
|
if (!m_isUTF8) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// in some places, Android uses 3-byte UTF-8 sequences instead of 4-bytes.
|
||||||
|
// If decoding failed, we try to use CESU-8 decoder, which is closer to what Android actually uses.
|
||||||
|
return CESU8_DECODER.decode(wrappedBuffer).toString();
|
||||||
|
} catch (CharacterCodingException e) {
|
||||||
|
LOGGER.warning("Failed to decode a string with CESU-8 decoder.");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -353,6 +374,7 @@ public class StringBlock {
|
|||||||
|
|
||||||
private final CharsetDecoder UTF16LE_DECODER = Charset.forName("UTF-16LE").newDecoder();
|
private final CharsetDecoder UTF16LE_DECODER = Charset.forName("UTF-16LE").newDecoder();
|
||||||
private final CharsetDecoder UTF8_DECODER = Charset.forName("UTF-8").newDecoder();
|
private final CharsetDecoder UTF8_DECODER = Charset.forName("UTF-8").newDecoder();
|
||||||
|
private final CharsetDecoder CESU8_DECODER = Charset.forName("CESU8").newDecoder();
|
||||||
private static final Logger LOGGER = Logger.getLogger(StringBlock.class.getName());
|
private static final Logger LOGGER = Logger.getLogger(StringBlock.class.getName());
|
||||||
|
|
||||||
// ResChunk_header = header.type (0x0001) + header.headerSize (0x001C)
|
// ResChunk_header = header.type (0x0001) + header.headerSize (0x001C)
|
||||||
|
@ -0,0 +1,49 @@
|
|||||||
|
package brut.androlib.res.decoder;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
public class StringBlockWithSurrogatePairInUtf8Test {
|
||||||
|
@Test
|
||||||
|
public void decodeSingleOctet() {
|
||||||
|
final String actual = new StringBlock("abcDEF123".getBytes(StandardCharsets.UTF_8), true).decodeString(0, 9);
|
||||||
|
assertEquals("Incorrect decoding", "abcDEF123", actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void decodeTwoOctets() {
|
||||||
|
final String actual0 = new StringBlock(new byte[] { (byte) 0xC2, (byte) 0x80}, true).decodeString(0, 2);
|
||||||
|
assertEquals("Incorrect decoding", "\u0080", actual0);
|
||||||
|
|
||||||
|
final String actual1 = new StringBlock(new byte[] { (byte) 0xDF, (byte) 0xBF}, true).decodeString(0, 2);
|
||||||
|
assertEquals("Incorrect decoding", "\u07FF", actual1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void decodeThreeOctets() {
|
||||||
|
final String actual0 = new StringBlock(new byte[] { (byte) 0xE0, (byte) 0xA0, (byte) 0x80}, true).decodeString(0, 3);
|
||||||
|
assertEquals("Incorrect decoding", "\u0800", actual0);
|
||||||
|
|
||||||
|
final String actual1 = new StringBlock(new byte[] { (byte) 0xEF, (byte) 0xBF, (byte) 0xBF}, true).decodeString(0, 3);
|
||||||
|
assertEquals("Incorrect decoding", "\uFFFF", actual1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void decodeSurrogatePair_when_givesAsThreeOctetsFromInvalidRangeOfUtf8() {
|
||||||
|
// See: https://github.com/iBotPeaches/Apktool/issues/2299
|
||||||
|
final String actual = new StringBlock(new byte[] { (byte) 0xED, (byte) 0xA0, (byte) 0xBD, (byte) 0xED, (byte) 0xB4, (byte) 0x86}, true).decodeString(0, 6);
|
||||||
|
assertEquals("Incorrect decoding", "\uD83D\uDD06", actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void decodeSurrogatePair_when_givesAsThreeOctetsFromTheValidRangeOfUtf8() {
|
||||||
|
// \u10FFFF is encoded in UTF-8 as "0xDBFF 0xDFFF" (4-byte encoding),
|
||||||
|
// but when used in Android resources which are encoded in UTF-8, 3-byte encoding is used,
|
||||||
|
// so each of these is encoded as 3-bytes
|
||||||
|
final String actual = new StringBlock(new byte[] { (byte) 0xED, (byte) 0xAF, (byte) 0xBF, (byte) 0xED, (byte) 0xBF, (byte) 0xBF}, true).decodeString(0, 6);
|
||||||
|
assertEquals("Incorrect decoding", "\uDBFF\uDFFF", actual);
|
||||||
|
}
|
||||||
|
}
|
@ -41,4 +41,5 @@ bar"</string>
|
|||||||
<string name="test_string39"><font size="17">[Ţåþ ţö ţýþé þåššŵöŕð one two three</font>]</string>
|
<string name="test_string39"><font size="17">[Ţåþ ţö ţýþé þåššŵöŕð one two three</font>]</string>
|
||||||
<string name="test_string40">[<font size="17">]Ţåþ ţö ţýþé þåššŵöŕð one two three</font></string>
|
<string name="test_string40">[<font size="17">]Ţåþ ţö ţýþé þåššŵöŕð one two three</font></string>
|
||||||
<string name="test_string41"><font size="17">[Ţåþ ţö ţýþé þåššŵöŕð one two three]</font></string>
|
<string name="test_string41"><font size="17">[Ţåþ ţö ţýþé þåššŵöŕð one two three]</font></string>
|
||||||
|
<string name="test_string42">🔆</string>
|
||||||
</resources>
|
</resources>
|
||||||
|
@ -2,4 +2,5 @@
|
|||||||
<resources>
|
<resources>
|
||||||
<string name="app_name">testapp</string>
|
<string name="app_name">testapp</string>
|
||||||
<string name="long_string_32767" />
|
<string name="long_string_32767" />
|
||||||
|
<string name="surrogate_issue_2299">🔆</string>
|
||||||
</resources>
|
</resources>
|
||||||
|
Loading…
Reference in New Issue
Block a user