From 7c2cb5b30a15f64c4c41f1cd881aa51618e1d673 Mon Sep 17 00:00:00 2001 From: Connor Tumbleson Date: Tue, 1 Aug 2023 06:23:14 -0400 Subject: [PATCH] Hardened String Block Parser (#3239) * fix: add headerSize to stringBlock to detect larger headers * fix: handle app with style offset, but 0 styles * refactor: split counting stream into CountingDataInput * fix: read strings till end of string pool chunk * fix: support out of bound string reading * fix: don't read string/style offset out of bounds * refactor: cleanup comments for string parser * style: comment on 4 byte alignment * fix: only warn if utf16 string --- .../androlib/res/data/arsc/ARSCHeader.java | 12 ++-- .../androlib/res/decoder/ARSCDecoder.java | 42 +++++------ .../res/decoder/AXmlResourceParser.java | 19 ++--- .../androlib/res/decoder/StringBlock.java | 63 ++++++++++------ brut.j.util/build.gradle | 1 + .../java/brut/util/ExtCountingDataInput.java | 72 +++++++++++++++++++ 6 files changed, 146 insertions(+), 63 deletions(-) create mode 100644 brut.j.util/src/main/java/brut/util/ExtCountingDataInput.java diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/data/arsc/ARSCHeader.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/data/arsc/ARSCHeader.java index 5d5f3a87..5fe443dd 100644 --- a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/data/arsc/ARSCHeader.java +++ b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/data/arsc/ARSCHeader.java @@ -16,8 +16,8 @@ */ package brut.androlib.res.data.arsc; +import brut.util.ExtCountingDataInput; import brut.util.ExtDataInput; -import org.apache.commons.io.input.CountingInputStream; import java.io.EOFException; import java.io.IOException; @@ -39,23 +39,23 @@ public class ARSCHeader { this.endPosition = headerStart + chunkSize; } - public static ARSCHeader read(ExtDataInput in, CountingInputStream countIn) throws IOException { + public static ARSCHeader read(ExtCountingDataInput in) throws IOException { short type; - int start = countIn.getCount(); + int start = in.position(); try { type = in.readShort(); } catch (EOFException ex) { - return new ARSCHeader(RES_NONE_TYPE, 0, 0, countIn.getCount()); + return new ARSCHeader(RES_NONE_TYPE, 0, 0, in.position()); } return new ARSCHeader(type, in.readShort(), in.readInt(), start); } - public void checkForUnreadHeader(ExtDataInput in, CountingInputStream countIn) throws IOException { + public void checkForUnreadHeader(ExtCountingDataInput in) throws IOException { // Some applications lie about the reported size of their chunk header. Trusting the chunkSize is misleading // So compare to what we actually read in the header vs reported and skip the rest. // However, this runs after each chunk and not every chunk reading has a specific distinction between the // header and the body. - int actualHeaderSize = countIn.getCount() - this.startPosition; + int actualHeaderSize = in.position() - this.startPosition; int exceedingSize = this.headerSize - actualHeaderSize; if (exceedingSize > 0) { byte[] buf = new byte[exceedingSize]; diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/ARSCDecoder.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/ARSCDecoder.java index 605d82ef..62604d07 100644 --- a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/ARSCDecoder.java +++ b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/ARSCDecoder.java @@ -22,9 +22,8 @@ import brut.androlib.res.data.*; import brut.androlib.res.data.arsc.*; import brut.androlib.res.data.value.*; import brut.util.Duo; -import brut.util.ExtDataInput; +import brut.util.ExtCountingDataInput; import com.google.common.io.LittleEndianDataInputStream; -import org.apache.commons.io.input.CountingInputStream; import java.io.*; import java.math.BigInteger; @@ -52,16 +51,12 @@ public class ARSCDecoder { } private ARSCDecoder(InputStream arscStream, ResTable resTable, boolean storeFlagsOffsets, boolean keepBroken) { - arscStream = mCountIn = new CountingInputStream(arscStream); if (storeFlagsOffsets) { mFlagsOffsets = new ArrayList<>(); } else { mFlagsOffsets = null; } - // We need to explicitly cast to DataInput as otherwise the constructor is ambiguous. - // We choose DataInput instead of InputStream as ExtDataInput wraps an InputStream in - // a DataInputStream which is big-endian and ignores the little-endian behavior. - mIn = new ExtDataInput((DataInput) new LittleEndianDataInputStream(arscStream)); + mIn = new ExtCountingDataInput(new LittleEndianDataInputStream(arscStream)); mResTable = resTable; mKeepBroken = keepBroken; } @@ -126,20 +121,20 @@ public class ARSCDecoder { private void readStringPoolChunk() throws IOException, AndrolibException { checkChunkType(ARSCHeader.RES_STRING_POOL_TYPE); - mTableStrings = StringBlock.readWithoutChunk(mIn, mHeader.chunkSize); + mTableStrings = StringBlock.readWithoutChunk(mIn, mHeader.startPosition, mHeader.headerSize, mHeader.chunkSize); } private void readTableChunk() throws IOException, AndrolibException { checkChunkType(ARSCHeader.RES_TABLE_TYPE); mIn.skipInt(); // packageCount - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); } private void readUnknownChunk() throws IOException, AndrolibException { checkChunkType(ARSCHeader.RES_NULL_TYPE); - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); LOGGER.warning("Skipping unknown chunk data of size " + mHeader.chunkSize); mHeader.skipChunk(mIn); @@ -177,7 +172,7 @@ public class ARSCDecoder { LOGGER.warning("Please report this application to Apktool for a fix: https://github.com/iBotPeaches/Apktool/issues/1728"); } - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); mTypeNames = StringBlock.readWithChunk(mIn); mSpecNames = StringBlock.readWithChunk(mIn); @@ -195,7 +190,7 @@ public class ARSCDecoder { int packageId; String packageName; - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); for (int i = 0; i < libraryCount; i++) { packageId = mIn.readInt(); @@ -207,7 +202,7 @@ public class ARSCDecoder { private void readStagedAliasSpec() throws IOException { int count = mIn.readInt(); - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); for (int i = 0; i < count; i++) { LOGGER.fine(String.format("Skipping staged alias stagedId (%h) finalId: %h", mIn.readInt(), mIn.readInt())); @@ -219,7 +214,7 @@ public class ARSCDecoder { String name = mIn.readNullEndedString(256, true); String actor = mIn.readNullEndedString(256, true); - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); LOGGER.fine(String.format("Overlay name: \"%s\", actor: \"%s\")", name, actor)); } @@ -229,7 +224,7 @@ public class ARSCDecoder { mIn.skipInt(); // policyFlags int count = mIn.readInt(); - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); for (int i = 0; i < count; i++) { LOGGER.fine(String.format("Skipping overlay (%h)", mIn.readInt())); @@ -244,10 +239,10 @@ public class ARSCDecoder { int entryCount = mIn.readInt(); if (mFlagsOffsets != null) { - mFlagsOffsets.add(new FlagsOffset(mCountIn.getCount(), entryCount)); + mFlagsOffsets.add(new FlagsOffset(mIn.position(), entryCount)); } - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); mIn.skipBytes(entryCount * 4); // flags mTypeSpec = new ResTypeSpec(mTypeNames.getString(id - 1), mResTable, mPkg, id, entryCount); @@ -272,7 +267,7 @@ public class ARSCDecoder { mMissingResSpecMap = new LinkedHashMap<>(); ResConfigFlags flags = readConfigFlags(); - mHeader.checkForUnreadHeader(mIn, mCountIn); + mHeader.checkForUnreadHeader(mIn); if ((typeFlags & 0x01) != 0) { LOGGER.fine("Sparse type flags detected: " + mTypeSpec.getName()); @@ -310,7 +305,7 @@ public class ARSCDecoder { mResId = (mResId & 0xffff0000) | i; // As seen in some recent APKs - there are more entries reported than can fit in the chunk. - if (mCountIn.getCount() == mHeader.endPosition) { + if (mIn.position() == mHeader.endPosition) { int remainingEntries = entryCount - i; LOGGER.warning(String.format("End of chunk hit. Skipping remaining entries (%d) in type: %s", remainingEntries, mTypeSpec.getName() @@ -325,8 +320,8 @@ public class ARSCDecoder { } // skip "TYPE 8 chunks" and/or padding data at the end of this chunk - if (mCountIn.getCount() < mHeader.endPosition) { - long bytesSkipped = mCountIn.skip(mHeader.endPosition - mCountIn.getCount()); + if (mIn.position() < mHeader.endPosition) { + long bytesSkipped = mIn.skip(mHeader.endPosition - mIn.position()); LOGGER.warning("Unknown data detected. Skipping: " + bytesSkipped + " byte(s)"); } @@ -630,7 +625,7 @@ public class ARSCDecoder { } private ARSCHeader nextChunk() throws IOException { - return mHeader = ARSCHeader.read(mIn, mCountIn); + return mHeader = ARSCHeader.read(mIn); } private void checkChunkType(int expectedType) throws AndrolibException { @@ -640,9 +635,8 @@ public class ARSCDecoder { } } - private final ExtDataInput mIn; + private final ExtCountingDataInput mIn; private final ResTable mResTable; - private final CountingInputStream mCountIn; private final List mFlagsOffsets; private final boolean mKeepBroken; diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/AXmlResourceParser.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/AXmlResourceParser.java index 4acd3da5..679af932 100644 --- a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/AXmlResourceParser.java +++ b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/AXmlResourceParser.java @@ -24,9 +24,8 @@ import brut.androlib.res.data.ResTable; import brut.androlib.res.data.arsc.ARSCHeader; import brut.androlib.res.data.axml.NamespaceStack; import brut.androlib.res.xml.ResXmlEncoders; -import brut.util.ExtDataInput; +import brut.util.ExtCountingDataInput; import com.google.common.io.LittleEndianDataInputStream; -import org.apache.commons.io.input.CountingInputStream; import org.xmlpull.v1.XmlPullParserException; import java.io.*; @@ -64,11 +63,7 @@ public class AXmlResourceParser implements XmlResourceParser { public void open(InputStream stream) { close(); if (stream != null) { - stream = mCountIn = new CountingInputStream(stream); - // We need to explicitly cast to DataInput as otherwise the constructor is ambiguous. - // We choose DataInput instead of InputStream as ExtDataInput wraps an InputStream in - // a DataInputStream which is big-endian and ignores the little-endian behavior. - mIn = new ExtDataInput((DataInput) new LittleEndianDataInputStream(stream)); + mIn = new ExtCountingDataInput(new LittleEndianDataInputStream(stream)); } } @@ -79,7 +74,6 @@ public class AXmlResourceParser implements XmlResourceParser { } isOperational = false; mIn = null; - mCountIn = null; mStringBlock = null; mResourceIds = null; mNamespaces.reset(); @@ -687,8 +681,8 @@ public class AXmlResourceParser implements XmlResourceParser { } // #2070 - Some applications have 2 start namespaces, but only 1 end namespace. - if (mCountIn.available() == 0) { - LOGGER.warning(String.format("AXML hit unexpected end of file at byte: 0x%X", mCountIn.getCount())); + if (mIn.remaining() == 0) { + LOGGER.warning(String.format("AXML hit unexpected end of file at byte: 0x%X", mIn.position())); mEvent = END_DOCUMENT; break; } @@ -715,7 +709,7 @@ public class AXmlResourceParser implements XmlResourceParser { if (chunkType < ARSCHeader.RES_XML_FIRST_CHUNK_TYPE || chunkType > ARSCHeader.RES_XML_LAST_CHUNK_TYPE) { int chunkSize = mIn.readInt(); mIn.skipBytes(chunkSize - 8); - LOGGER.warning(String.format("Unknown chunk type at: (0x%08x) skipping...", mCountIn.getCount())); + LOGGER.warning(String.format("Unknown chunk type at: (0x%08x) skipping...", mIn.position())); break; } @@ -806,8 +800,7 @@ public class AXmlResourceParser implements XmlResourceParser { } } - private ExtDataInput mIn; - private CountingInputStream mCountIn; + private ExtCountingDataInput mIn; private ResAttrDecoder mAttrDecoder; private AndrolibException mFirstError; diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java index 112e8936..5793df87 100644 --- a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java +++ b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java @@ -18,7 +18,7 @@ package brut.androlib.res.decoder; import brut.androlib.res.data.arsc.ARSCHeader; import brut.androlib.res.xml.ResXmlEncoders; -import brut.util.ExtDataInput; +import brut.util.ExtCountingDataInput; import com.google.common.annotations.VisibleForTesting; import java.io.IOException; @@ -30,15 +30,18 @@ import java.util.List; import java.util.logging.Logger; public class StringBlock { - public static StringBlock readWithChunk(ExtDataInput reader) throws IOException { + public static StringBlock readWithChunk(ExtCountingDataInput reader) throws IOException { + int startPosition = reader.position(); reader.skipCheckShort(ARSCHeader.RES_STRING_POOL_TYPE); - reader.skipShort(); // headerSize + int headerSize = reader.readShort(); int chunkSize = reader.readInt(); - return readWithoutChunk(reader, chunkSize); + return readWithoutChunk(reader, startPosition, headerSize, chunkSize); } - public static StringBlock readWithoutChunk(ExtDataInput reader, int chunkSize) throws IOException { + public static StringBlock readWithoutChunk(ExtCountingDataInput reader, int startPosition, int headerSize, + int chunkSize) throws IOException + { // ResStringPool_header int stringCount = reader.readInt(); int styleCount = reader.readInt(); @@ -46,28 +49,42 @@ public class StringBlock { int stringsOffset = reader.readInt(); int stylesOffset = reader.readInt(); - StringBlock block = new StringBlock(); - block.m_isUTF8 = (flags & UTF8_FLAG) != 0; - block.m_stringOffsets = reader.readIntArray(stringCount); - - if (styleCount != 0) { - block.m_styleOffsets = reader.readIntArray(styleCount); + // For some applications they pack the StringBlock header with more unused data at end. + if (headerSize > STRING_BLOCK_HEADER_SIZE) { + reader.skipBytes(headerSize - STRING_BLOCK_HEADER_SIZE); + } + + StringBlock block = new StringBlock(); + block.m_isUTF8 = (flags & UTF8_FLAG) != 0; + block.m_stringOffsets = reader.readSafeIntArray(stringCount, startPosition + stringsOffset); + + if (styleCount != 0) { + block.m_styleOffsets = reader.readSafeIntArray(styleCount, startPosition + stylesOffset); + } + + // #3236 - Some applications give a style offset, but have 0 styles. Make this check more robust. + boolean hasStyles = stylesOffset != 0 && styleCount != 0; + int size = chunkSize - stringsOffset; + + // If we have both strings and even just a lying style offset - lets calculate the size of the strings without + // accidentally parsing all the styles. + if (styleCount > 0) { + size = stylesOffset - stringsOffset; } - int size = ((stylesOffset == 0) ? chunkSize : stylesOffset) - stringsOffset; block.m_strings = new byte[size]; reader.readFully(block.m_strings); - if (stylesOffset != 0) { - size = (chunkSize - stylesOffset); + if (hasStyles) { + size = chunkSize - stylesOffset; block.m_styles = reader.readIntArray(size / 4); + } - // read remaining bytes - int remaining = size % 4; - if (remaining >= 1) { - while (remaining-- > 0) { - reader.readByte(); - } + // In case we aren't 4 byte aligned we need to skip the remaining bytes. + int remaining = size % 4; + if (remaining >= 1) { + while (remaining-- > 0) { + reader.readByte(); } } @@ -211,6 +228,11 @@ public class StringBlock { LOGGER.warning("Failed to decode a string at offset " + offset + " of length " + length); return null; } + } catch (IndexOutOfBoundsException ex) { + if (!m_isUTF8) { + LOGGER.warning("String extends outside of pool at " + offset + " of length " + length); + return null; + } } try { @@ -275,4 +297,5 @@ public class StringBlock { private static final Logger LOGGER = Logger.getLogger(StringBlock.class.getName()); private static final int UTF8_FLAG = 0x00000100; + private static final int STRING_BLOCK_HEADER_SIZE = 28; } diff --git a/brut.j.util/build.gradle b/brut.j.util/build.gradle index 92d7c923..b63c82b5 100644 --- a/brut.j.util/build.gradle +++ b/brut.j.util/build.gradle @@ -17,4 +17,5 @@ dependencies { implementation project(':brut.j.common') implementation depends.commons_io + implementation depends.guava } diff --git a/brut.j.util/src/main/java/brut/util/ExtCountingDataInput.java b/brut.j.util/src/main/java/brut/util/ExtCountingDataInput.java new file mode 100644 index 00000000..95e13182 --- /dev/null +++ b/brut.j.util/src/main/java/brut/util/ExtCountingDataInput.java @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2010 Ryszard Wiśniewski + * Copyright (C) 2010 Connor Tumbleson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package brut.util; + +import org.apache.commons.io.input.CountingInputStream; +import com.google.common.io.LittleEndianDataInputStream; + +import java.io.DataInput; +import java.io.IOException; +import java.util.logging.Logger; + +public class ExtCountingDataInput extends ExtDataInput { + private final CountingInputStream mCountIn; + + public ExtCountingDataInput(LittleEndianDataInputStream in) { + this(new CountingInputStream(in)); + } + + public ExtCountingDataInput(CountingInputStream countIn) { + // We need to explicitly cast to DataInput as otherwise the constructor is ambiguous. + // We choose DataInput instead of InputStream as ExtDataInput wraps an InputStream in + // a DataInputStream which is big-endian and ignores the little-endian behavior. + super((DataInput) new LittleEndianDataInputStream(countIn)); + mCountIn = countIn; + } + + public int position() { + return mCountIn.getCount(); + } + + public int remaining() throws IOException { + return mCountIn.available(); + } + + public long skip(int bytes) throws IOException { + return mCountIn.skip(bytes); + } + + public int[] readSafeIntArray(int length, long maxPosition) throws IOException { + int[] array = new int[length]; + + for (int i = 0; i < length; i++) { + // #3236 - In some applications we have more strings than fit into the block. This function takes + // an expected max position and if we are past it, we return early during processing. + if (position() >= maxPosition) { + LOGGER.warning(String.format("Bad string block: string entry is at %d, past end at %d", + position(), maxPosition) + ); + return array; + } + + array[i] = readInt(); + } + return array; + } + + private static final Logger LOGGER = Logger.getLogger(ExtCountingDataInput.class.getName()); +}