mirror of
https://github.com/revanced/Apktool.git
synced 2025-01-27 12:17:35 +01:00
Truncate filenames based on their utf-8 length
This commit is contained in:
parent
838b35e477
commit
83e63dab7a
@ -33,7 +33,9 @@ import ds.tree.RadixTreeImpl;
|
|||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.CharBuffer;
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -87,8 +89,9 @@ public class ClassFileNameHandler {
|
|||||||
packageElement += "#";
|
packageElement += "#";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (packageElement.length() > MAX_FILENAME_LENGTH) {
|
int utf8Length = utf8Length(packageElement);
|
||||||
packageElement = shortenPathComponent(packageElement, MAX_FILENAME_LENGTH);
|
if (utf8Length > MAX_FILENAME_LENGTH) {
|
||||||
|
packageElement = shortenPathComponent(packageElement, utf8Length - MAX_FILENAME_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
packageElements[elementIndex++] = packageElement;
|
packageElements[elementIndex++] = packageElement;
|
||||||
@ -109,8 +112,9 @@ public class ClassFileNameHandler {
|
|||||||
packageElement += "#";
|
packageElement += "#";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((packageElement.length() + fileExtension.length()) > MAX_FILENAME_LENGTH) {
|
int utf8Length = utf8Length(packageElement) + utf8Length(fileExtension);
|
||||||
packageElement = shortenPathComponent(packageElement, MAX_FILENAME_LENGTH - fileExtension.length());
|
if (utf8Length > MAX_FILENAME_LENGTH) {
|
||||||
|
packageElement = shortenPathComponent(packageElement, utf8Length - MAX_FILENAME_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
packageElements[elementIndex] = packageElement;
|
packageElements[elementIndex] = packageElement;
|
||||||
@ -118,12 +122,87 @@ public class ClassFileNameHandler {
|
|||||||
return top.addUniqueChild(packageElements, 0);
|
return top.addUniqueChild(packageElements, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
private static int utf8Length(String str) {
|
||||||
static String shortenPathComponent(@Nonnull String pathComponent, int maxLength) {
|
int utf8Length = 0;
|
||||||
int toRemove = pathComponent.length() - maxLength + 1;
|
int i=0;
|
||||||
|
while (i<str.length()) {
|
||||||
|
int c = str.codePointAt(i);
|
||||||
|
utf8Length += utf8Length(c);
|
||||||
|
i += Character.charCount(c);
|
||||||
|
}
|
||||||
|
return utf8Length;
|
||||||
|
}
|
||||||
|
|
||||||
int firstIndex = (pathComponent.length()/2) - (toRemove/2);
|
private static int utf8Length(int codePoint) {
|
||||||
return pathComponent.substring(0, firstIndex) + "#" + pathComponent.substring(firstIndex+toRemove);
|
if (codePoint < 0x80) {
|
||||||
|
return 1;
|
||||||
|
} else if (codePoint < 0x800) {
|
||||||
|
return 2;
|
||||||
|
} else if (codePoint < 0x10000) {
|
||||||
|
return 3;
|
||||||
|
} else {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shortens an individual file/directory name, removing the necessary number of code points
|
||||||
|
* from the middle of the string such that the utf-8 encoding of the string is at least
|
||||||
|
* bytesToRemove bytes shorter than the original.
|
||||||
|
*
|
||||||
|
* The removed codePoints in the middle of the string will be replaced with a # character.
|
||||||
|
*/
|
||||||
|
@Nonnull
|
||||||
|
static String shortenPathComponent(@Nonnull String pathComponent, int bytesToRemove) {
|
||||||
|
// We replace the removed part with a #, so we need to remove 1 extra char
|
||||||
|
bytesToRemove++;
|
||||||
|
|
||||||
|
int[] codePoints;
|
||||||
|
try {
|
||||||
|
IntBuffer intBuffer = ByteBuffer.wrap(pathComponent.getBytes("UTF-32BE")).asIntBuffer();
|
||||||
|
codePoints = new int[intBuffer.limit()];
|
||||||
|
intBuffer.get(codePoints);
|
||||||
|
} catch (UnsupportedEncodingException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
int midPoint = codePoints.length/2;
|
||||||
|
int delta = 0;
|
||||||
|
|
||||||
|
int firstEnd = midPoint; // exclusive
|
||||||
|
int secondStart = midPoint+1; // inclusive
|
||||||
|
int bytesRemoved = utf8Length(codePoints[midPoint]);
|
||||||
|
|
||||||
|
// if we have an even number of codepoints, start by removing both middle characters,
|
||||||
|
// unless just removing the first already removes enough bytes
|
||||||
|
if (((codePoints.length % 2) == 0) && bytesRemoved < bytesToRemove) {
|
||||||
|
bytesRemoved += utf8Length(codePoints[secondStart]);
|
||||||
|
secondStart++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while ((bytesRemoved < bytesToRemove) &&
|
||||||
|
(firstEnd > 0 || secondStart < codePoints.length)) {
|
||||||
|
if (firstEnd > 0) {
|
||||||
|
firstEnd--;
|
||||||
|
bytesRemoved += utf8Length(codePoints[firstEnd]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bytesRemoved < bytesToRemove && secondStart < codePoints.length) {
|
||||||
|
bytesRemoved += utf8Length(codePoints[secondStart]);
|
||||||
|
secondStart++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i=0; i<firstEnd; i++) {
|
||||||
|
sb.appendCodePoint(codePoints[i]);
|
||||||
|
}
|
||||||
|
sb.append('#');
|
||||||
|
for (int i=secondStart; i<codePoints.length; i++) {
|
||||||
|
sb.appendCodePoint(codePoints[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean testForWindowsReservedFileNames(File path) {
|
private static boolean testForWindowsReservedFileNames(File path) {
|
||||||
|
@ -34,16 +34,79 @@ package org.jf.util;
|
|||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
|
||||||
public class ClassFileNameHandlerTest {
|
public class ClassFileNameHandlerTest {
|
||||||
|
private final Charset UTF8 = Charset.forName("UTF-8");
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testShortedPathComponent() {
|
public void test1ByteEncodings() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (int i=0; i<300; i++) {
|
for (int i=0; i<100; i++) {
|
||||||
sb.append((char)i);
|
sb.append((char)i);
|
||||||
}
|
}
|
||||||
|
|
||||||
String result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 255);
|
String result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 5);
|
||||||
|
Assert.assertEquals(95, result.getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(95, result.length());
|
||||||
|
}
|
||||||
|
|
||||||
Assert.assertEquals(255, result.length());
|
@Test
|
||||||
|
public void test2ByteEncodings() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i=0x80; i<0x80+100; i++) {
|
||||||
|
sb.append((char)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove a total of 3 2-byte characters, and then add back in the 1-byte '#'
|
||||||
|
String result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 4);
|
||||||
|
Assert.assertEquals(200, sb.toString().getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(195, result.getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(98, result.length());
|
||||||
|
|
||||||
|
// remove a total of 3 2-byte characters, and then add back in the 1-byte '#'
|
||||||
|
result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 5);
|
||||||
|
Assert.assertEquals(200, sb.toString().getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(195, result.getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(98, result.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test3ByteEncodings() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i=0x800; i<0x800+100; i++) {
|
||||||
|
sb.append((char)i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove a total of 3 3-byte characters, and then add back in the 1-byte '#'
|
||||||
|
String result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 6);
|
||||||
|
Assert.assertEquals(300, sb.toString().getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(292, result.getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(98, result.length());
|
||||||
|
|
||||||
|
// remove a total of 3 3-byte characters, and then add back in the 1-byte '#'
|
||||||
|
result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 7);
|
||||||
|
Assert.assertEquals(300, sb.toString().getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(292, result.getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(98, result.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test4ByteEncodings() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i=0x10000; i<0x10000+100; i++) {
|
||||||
|
sb.appendCodePoint(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// we remove 3 codepoints == 6 characters == 12 bytes, and then add back in the 1-byte '#'
|
||||||
|
String result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 8);
|
||||||
|
Assert.assertEquals(400, sb.toString().getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(389, result.getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(98, result.length());
|
||||||
|
|
||||||
|
// we remove 3 codepoints == 6 characters == 12 bytes, and then add back in the 1-byte '#'
|
||||||
|
result = ClassFileNameHandler.shortenPathComponent(sb.toString(), 7);
|
||||||
|
Assert.assertEquals(400, sb.toString().getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(3892, result.getBytes(UTF8).length);
|
||||||
|
Assert.assertEquals(98, result.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user