decode three byte characters

This commit is contained in:
REAndroid 2023-03-21 07:12:06 -04:00
parent 3ee3e2b92f
commit 8e5afcd4ae
2 changed files with 235 additions and 3 deletions

View File

@ -0,0 +1,222 @@
/*
* Copyright (C) 2022 github.com/REAndroid
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.reandroid.arsc.decoder;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
public class ThreeByteCharsetDecoder extends CharsetDecoder {
public static final ThreeByteCharsetDecoder INSTANCE = new ThreeByteCharsetDecoder();
public ThreeByteCharsetDecoder() {
super(StandardCharsets.UTF_8, 1.0F, 1.0F);
}
@Override
protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
return src.hasArray() && dst.hasArray() ? this.decodeArrayLoop(src, dst) : this.decodeBufferLoop(src, dst);
}
private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
byte[] srcBytes = src.array();
int sourcePosition = src.arrayOffset() + src.position();
int sourceLimit = src.arrayOffset() + src.limit();
char[] dstChars = dst.array();
int dstPosition = dst.arrayOffset() + dst.position();
int dstLimit = dst.arrayOffset() + dst.limit();
int min = sourceLimit - sourcePosition;
int start = min;
min = dstLimit - dstPosition;
if(min < start){
start = min;
}
start = dstPosition + start;
while ( dstPosition < start && srcBytes[sourcePosition] >= 0) {
dstChars[dstPosition++] = (char) srcBytes[sourcePosition++];
}
while (sourcePosition < sourceLimit) {
int b1 = srcBytes[sourcePosition];
if (b1 < 0) {
if (b1 >> 5 == -2 && (b1 & 0x1E) != 0) {
if (sourceLimit - sourcePosition < 2 || dstPosition >= dstLimit) {
return xFlow(src, sourcePosition, sourceLimit, dst, dstPosition, 2);
}
int b2 = srcBytes[sourcePosition + 1];
if (isNotContinuation(b2)) {
return malformedForLength(src, sourcePosition, dst, dstPosition);
}
dstChars[dstPosition++] = (char) (b1 << 6 ^ b2 ^ 0x0F80);
sourcePosition += 2;
} else {
if (b1 >> 4 != -2) {
return malformed(src, sourcePosition, dst, dstPosition, 1);
}
int srcRemaining = sourceLimit - sourcePosition;
if (srcRemaining < 3 || dstPosition >= dstLimit) {
if (srcRemaining > 1 && isMalformed3_2(b1, srcBytes[sourcePosition + 1])) {
return malformedForLength(src, sourcePosition, dst, dstPosition);
}
return xFlow(src, sourcePosition, sourceLimit, dst, dstPosition, 3);
}
int b2 = srcBytes[sourcePosition + 1];
int b3 = srcBytes[sourcePosition + 2];
if (isMalformed3(b1, b2, b3)) {
return malformed(src, sourcePosition, dst, dstPosition, 3);
}
dstChars[dstPosition++] = (char) (b1 << 12 ^ b2 << 6 ^ b3 ^ 0xFFFE1F80);
sourcePosition += 3;
}
} else {
if (dstPosition >= dstLimit) {
return xFlow(src, sourcePosition, sourceLimit, dst, dstPosition, 1);
}
dstChars[dstPosition++] = (char) b1;
++sourcePosition;
}
}
return xFlow(src, sourcePosition, sourceLimit, dst, dstPosition, 0);
}
private CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
int mark = src.position();
int limit = src.limit();
while (mark < limit) {
int b1 = src.get();
if (b1 < 0) {
if (b1 >> 5 == -2 && (b1 & 0x1E) != 0) {
if (limit - mark < 2 || dst.remaining() < 1) {
return xFlow(src, mark, 2);
}
int b2 = src.get();
if (isNotContinuation(b2)) {
return malformedForLength(src, mark);
}
dst.put((char) (b1 << 6 ^ b2 ^ 0x0F80));
mark += 2;
} else {
if (b1 >> 4 != -2) {
return malformed(src, mark, 1);
}
int srcRemaining = limit - mark;
if (srcRemaining < 3 || dst.remaining() < 1) {
if (srcRemaining > 1 && isMalformed3_2(b1, src.get())) {
return malformedForLength(src, mark);
}
return xFlow(src, mark, 3);
}
int b2 = src.get();
int b3 = src.get();
if (isMalformed3(b1, b2, b3)) {
return malformed(src, mark, 3);
}
dst.put((char) (b1 << 12 ^ b2 << 6 ^ b3 ^ 0xFFFE1F80));
mark += 3;
}
} else {
if (dst.remaining() < 1) {
return xFlow(src, mark, 1);
}
dst.put((char) b1);
++mark;
}
}
return xFlow(src, mark, 0);
}
private static void updatePositions(Buffer src, int sourcePosition, Buffer dst, int dstPosition) {
src.position(sourcePosition - src.arrayOffset());
dst.position(dstPosition - dst.arrayOffset());
}
private static boolean isNotContinuation(int b) {
return (b & 0xC0) != 0x80;
}
private static boolean isMalformed3(int b1, int b2, int b3) {
return b1 == -32 && (b2 & 0xE0) == 0x80 || (b2 & 0xC0) != 0x80 || (b3 & 0xC0) != 0x80;
}
private static boolean isMalformed3_2(int b1, int b2) {
return b1 == -32 && (b2 & 0xE0) == 0x80 || (b2 & 0xC0) != 0x80;
}
private static CoderResult malformedN(ByteBuffer src, int nb) {
int b1;
int b2;
switch (nb) {
case 1:
case 2:
return CoderResult.malformedForLength(1);
case 3:
b1 = src.get();
b2 = src.get();
return CoderResult.malformedForLength((b1 != -32 || (b2 & 0xE0) != 0x80) && !isNotContinuation(b2) ? 2 : 1);
case 4:
b1 = src.get() & 0xFF;
b2 = src.get() & 0xFF;
if (b1 <= 244
&& (b1 != 0xF0 || b2 >= 144 && b2 <= 0xBF)
&& (b1 != 244 || (b2 & 0xF0) == 0x80)
&& !isNotContinuation(b2)) {
if (isNotContinuation(src.get())) {
return CoderResult.malformedForLength(2);
}
return CoderResult.malformedForLength(3);
}
return CoderResult.malformedForLength(1);
default:
return null;
}
}
private static CoderResult malformed(ByteBuffer src, int sourcePosition, CharBuffer dst, int dstPosition, int numBytes) {
src.position(sourcePosition - src.arrayOffset());
CoderResult cr = malformedN(src, numBytes);
updatePositions(src, sourcePosition, dst, dstPosition);
return cr;
}
private static CoderResult malformed(ByteBuffer src, int mark, int nb) {
src.position(mark);
CoderResult cr = malformedN(src, nb);
src.position(mark);
return cr;
}
private static CoderResult malformedForLength(ByteBuffer src, int sourcePosition, CharBuffer dst, int dstPosition) {
updatePositions(src, sourcePosition, dst, dstPosition);
return CoderResult.malformedForLength(1);
}
private static CoderResult malformedForLength(ByteBuffer src, int mark) {
src.position(mark);
return CoderResult.malformedForLength(1);
}
private static CoderResult xFlow(Buffer src, int sourcePosition, int sourceLimit, Buffer dst, int dstPosition, int numBytes) {
updatePositions(src, sourcePosition, dst, dstPosition);
return numBytes != 0 && sourceLimit - sourcePosition >= numBytes ? CoderResult.OVERFLOW : CoderResult.UNDERFLOW;
}
private static CoderResult xFlow(Buffer src, int mark, int nb) {
src.position(mark);
return nb != 0 && src.remaining() >= nb ? CoderResult.OVERFLOW : CoderResult.UNDERFLOW;
}
}

View File

@ -15,7 +15,7 @@
*/
package com.reandroid.arsc.item;
import com.reandroid.arsc.base.Block;
import com.reandroid.arsc.decoder.ThreeByteCharsetDecoder;
import com.reandroid.arsc.io.BlockReader;
import com.reandroid.arsc.pool.StringPool;
import com.reandroid.json.JSONConvert;
@ -212,11 +212,20 @@ public class StringItem extends BlockItem implements JSONConvert<JSONObject> {
return charBuffer.toString();
} catch (CharacterCodingException ex) {
if(isUtf8){
return new String(allStringBytes, offLen[0], offLen[1], StandardCharsets.UTF_8);
return tryThreeByteDecoder(allStringBytes, offLen[0], offLen[1]);
}
return new String(allStringBytes, offLen[0], offLen[1], StandardCharsets.UTF_16LE);
}
}
private String tryThreeByteDecoder(byte[] bytes, int offset, int length){
try {
ByteBuffer byteBuffer = ByteBuffer.wrap(bytes, offset, length);
CharBuffer charBuffer = DECODER_3B.decode(byteBuffer);
return charBuffer.toString();
} catch (CharacterCodingException e) {
return new String(bytes, offset, length, StandardCharsets.UTF_8);
}
}
public boolean hasStyle(){
StyleItem styleItem=getStyle();
if(styleItem==null){
@ -314,7 +323,7 @@ public class StringItem extends BlockItem implements JSONConvert<JSONObject> {
private static byte[] encodeUtf8ToBytes(String str){
byte[] bts=new byte[0];
byte[] bts;
byte[] lenBytes=new byte[2];
if(str!=null){
bts=str.getBytes(StandardCharsets.UTF_8);
@ -400,6 +409,7 @@ public class StringItem extends BlockItem implements JSONConvert<JSONObject> {
private static final CharsetDecoder UTF16LE_DECODER = StandardCharsets.UTF_16LE.newDecoder();
private static final CharsetDecoder UTF8_DECODER = StandardCharsets.UTF_8.newDecoder();
private static final CharsetDecoder DECODER_3B = ThreeByteCharsetDecoder.INSTANCE;
public static final String NAME_string="string";
public static final String NAME_style="style";