Move Utf8Utils to util module, and split out StringUtils

This commit is contained in:
Ben Gruver
2012-10-21 21:05:09 -07:00
parent ac7a94a295
commit 9f1d05eb44
15 changed files with 139 additions and 103 deletions

View File

@ -31,6 +31,7 @@ package org.jf.dexlib;
import org.jf.dexlib.Util.*;
import org.jf.util.AlignmentUtils;
import org.jf.util.ExceptionWithContext;
import org.jf.util.Hex;
import java.io.*;
import java.security.DigestException;

View File

@ -33,7 +33,7 @@ import org.jf.dexlib.StringIdItem;
import org.jf.dexlib.Util.AnnotatedOutput;
import org.jf.dexlib.Util.EncodedValueUtils;
import org.jf.dexlib.Util.Input;
import org.jf.dexlib.Util.Utf8Utils;
import org.jf.util.StringUtils;
public class StringEncodedValue extends EncodedValue {
public final StringIdItem value;
@ -65,7 +65,7 @@ public class StringEncodedValue extends EncodedValue {
if (out.annotates()) {
out.annotate(1, "value_type=" + ValueType.VALUE_STRING.name() + ",value_arg=" + (bytes.length - 1));
out.annotate(bytes.length, "value: \"" + Utf8Utils.escapeString(value.getStringValue()) + "\"");
out.annotate(bytes.length, "value: \"" + StringUtils.escapeString(value.getStringValue()) + "\"");
}
out.writeByte(ValueType.VALUE_STRING.value | ((bytes.length - 1) << 5));

View File

@ -31,7 +31,7 @@ package org.jf.dexlib;
import com.google.common.base.Preconditions;
import org.jf.dexlib.Util.AnnotatedOutput;
import org.jf.dexlib.Util.Input;
import org.jf.dexlib.Util.Utf8Utils;
import org.jf.util.StringUtils;
public class HeaderItem extends Item<HeaderItem> {
/**
@ -182,7 +182,7 @@ public class HeaderItem extends Item<HeaderItem> {
magicBuilder.append((char)MAGIC_VALUES[magic_index][i]);
}
out.annotate("magic: " + Utf8Utils.escapeString(magicBuilder.toString()));
out.annotate("magic: " + StringUtils.escapeString(magicBuilder.toString()));
out.write(MAGIC_VALUES[magic_index]);
out.annotate("checksum");

View File

@ -31,7 +31,8 @@ package org.jf.dexlib;
import org.jf.dexlib.Util.AnnotatedOutput;
import org.jf.dexlib.Util.Input;
import org.jf.dexlib.Util.Leb128Utils;
import org.jf.dexlib.Util.Utf8Utils;
import org.jf.util.StringUtils;
import org.jf.util.Utf8Utils;
public class StringDataItem extends Item<StringDataItem> {
private int hashCode = 0;
@ -103,7 +104,7 @@ public class StringDataItem extends Item<StringDataItem> {
")");
out.writeUnsignedLeb128(stringValue.length());
out.annotate(encodedValue.length + 1, "string_data: \"" + Utf8Utils.escapeString(stringValue) + "\"");
out.annotate(encodedValue.length + 1, "string_data: \"" + StringUtils.escapeString(stringValue) + "\"");
} else {
out.writeUnsignedLeb128(stringValue.length());
}
@ -118,7 +119,7 @@ public class StringDataItem extends Item<StringDataItem> {
/** {@inheritDoc} */
public String getConciseIdentity() {
return "string_data_item: \"" + Utf8Utils.escapeString(getStringValue()) + "\"";
return "string_data_item: \"" + StringUtils.escapeString(getStringValue()) + "\"";
}
/** {@inheritDoc} */

View File

@ -30,7 +30,7 @@ package org.jf.dexlib;
import org.jf.dexlib.Util.AnnotatedOutput;
import org.jf.dexlib.Util.Input;
import org.jf.dexlib.Util.Utf8Utils;
import org.jf.util.StringUtils;
import javax.annotation.Nullable;
@ -118,7 +118,7 @@ public class StringIdItem extends Item<StringIdItem> {
/** {@inheritDoc} */
public String getConciseIdentity() {
return "string_id_item: " + Utf8Utils.escapeString(getStringValue());
return "string_id_item: " + StringUtils.escapeString(getStringValue());
}
/** {@inheritDoc} */

View File

@ -25,6 +25,7 @@
package org.jf.dexlib.Util;
import org.jf.util.ExceptionWithContext;
import org.jf.util.Hex;
import java.io.IOException;
import java.io.Writer;

View File

@ -26,6 +26,7 @@ package org.jf.dexlib.Util;
import org.jf.util.AlignmentUtils;
import org.jf.util.ExceptionWithContext;
import org.jf.util.Utf8Utils;
/**
* Implementation of {@link Input} which reads the data from a

View File

@ -1,315 +0,0 @@
/*
* [The "BSD licence"]
* Copyright (c) 2010 Ben Gruver (JesusFreke)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.jf.dexlib.Util;
/**
* Utilities for formatting numbers as hexadecimal.
*/
public final class Hex {
/**
* This class is uninstantiable.
*/
private Hex() {
// This space intentionally left blank.
}
/**
* Formats a <code>long</code> as an 8-byte unsigned hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String u8(long v) {
char[] result = new char[16];
for (int i = 0; i < 16; i++) {
result[15 - i] = Character.forDigit((int) v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as a 4-byte unsigned hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String u4(int v) {
char[] result = new char[8];
for (int i = 0; i < 8; i++) {
result[7 - i] = Character.forDigit(v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as a 3-byte unsigned hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String u3(int v) {
char[] result = new char[6];
for (int i = 0; i < 6; i++) {
result[5 - i] = Character.forDigit(v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as a 2-byte unsigned hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String u2(int v) {
char[] result = new char[4];
for (int i = 0; i < 4; i++) {
result[3 - i] = Character.forDigit(v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as either a 2-byte unsigned hex value
* (if the value is small enough) or a 4-byte unsigned hex value (if
* not).
*
* @param v value to format
* @return non-null; formatted form
*/
public static String u2or4(int v) {
if (v == (char) v) {
return u2(v);
} else {
return u4(v);
}
}
/**
* Formats an <code>int</code> as a 1-byte unsigned hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String u1(int v) {
char[] result = new char[2];
for (int i = 0; i < 2; i++) {
result[1 - i] = Character.forDigit(v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as a 4-bit unsigned hex nibble.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String uNibble(int v) {
char[] result = new char[1];
result[0] = Character.forDigit(v & 0x0f, 16);
return new String(result);
}
/**
* Formats a <code>long</code> as an 8-byte signed hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String s8(long v) {
char[] result = new char[17];
if (v < 0) {
result[0] = '-';
v = -v;
} else {
result[0] = '+';
}
for (int i = 0; i < 16; i++) {
result[16 - i] = Character.forDigit((int) v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as a 4-byte signed hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String s4(int v) {
char[] result = new char[9];
if (v < 0) {
result[0] = '-';
v = -v;
} else {
result[0] = '+';
}
for (int i = 0; i < 8; i++) {
result[8 - i] = Character.forDigit(v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as a 2-byte signed hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String s2(int v) {
char[] result = new char[5];
if (v < 0) {
result[0] = '-';
v = -v;
} else {
result[0] = '+';
}
for (int i = 0; i < 4; i++) {
result[4 - i] = Character.forDigit(v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats an <code>int</code> as a 1-byte signed hex value.
*
* @param v value to format
* @return non-null; formatted form
*/
public static String s1(int v) {
char[] result = new char[3];
if (v < 0) {
result[0] = '-';
v = -v;
} else {
result[0] = '+';
}
for (int i = 0; i < 2; i++) {
result[2 - i] = Character.forDigit(v & 0x0f, 16);
v >>= 4;
}
return new String(result);
}
/**
* Formats a hex dump of a portion of a <code>byte[]</code>. The result
* is always newline-terminated, unless the passed-in length was zero,
* in which case the result is always the empty string (<code>""</code>).
*
* @param arr non-null; array to format
* @param offset &gt;= 0; offset to the part to dump
* @param length &gt;= 0; number of bytes to dump
* @param outOffset &gt;= 0; first output offset to print
* @param bpl &gt;= 0; number of bytes of output per line
* @param addressLength {2,4,6,8}; number of characters for each address
* header
* @return non-null; a string of the dump
*/
public static String dump(byte[] arr, int offset, int length,
int outOffset, int bpl, int addressLength) {
int end = offset + length;
// twos-complement math trick: ((x < 0) || (y < 0)) <=> ((x|y) < 0)
if (((offset | length | end) < 0) || (end > arr.length)) {
throw new IndexOutOfBoundsException("arr.length " +
arr.length + "; " +
offset + "..!" + end);
}
if (outOffset < 0) {
throw new IllegalArgumentException("outOffset < 0");
}
if (length == 0) {
return "";
}
StringBuffer sb = new StringBuffer(length * 4 + 6);
boolean bol = true;
int col = 0;
while (length > 0) {
if (col == 0) {
String astr;
switch (addressLength) {
case 2: astr = Hex.u1(outOffset); break;
case 4: astr = Hex.u2(outOffset); break;
case 6: astr = Hex.u3(outOffset); break;
default: astr = Hex.u4(outOffset); break;
}
sb.append(astr);
sb.append(": ");
} else if ((col & 1) == 0) {
sb.append(' ');
}
sb.append(Hex.u1(arr[offset]));
outOffset++;
offset++;
col++;
if (col == bpl) {
sb.append('\n');
col = 0;
}
length--;
}
if (col != 0) {
sb.append('\n');
}
return sb.toString();
}
}

View File

@ -1,260 +0,0 @@
/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* As per the Apache license requirements, this file has been modified
* from its original state.
*
* Such modifications are Copyright (C) 2010 Ben Gruver, and are released
* under the original license
*/
package org.jf.dexlib.Util;
import java.io.IOException;
import java.io.Writer;
/**
* Constants of type <code>CONSTANT_Utf8_info</code>.
*/
public final class Utf8Utils {
/**
* Converts a string into its Java-style UTF-8 form. Java-style UTF-8
* differs from normal UTF-8 in the handling of character '\0' and
* surrogate pairs.
*
* @param string non-null; the string to convert
* @return non-null; the UTF-8 bytes for it
*/
public static byte[] stringToUtf8Bytes(String string) {
int len = string.length();
byte[] bytes = new byte[len * 3]; // Avoid having to reallocate.
int outAt = 0;
for (int i = 0; i < len; i++) {
char c = string.charAt(i);
if ((c != 0) && (c < 0x80)) {
bytes[outAt] = (byte) c;
outAt++;
} else if (c < 0x800) {
bytes[outAt] = (byte) (((c >> 6) & 0x1f) | 0xc0);
bytes[outAt + 1] = (byte) ((c & 0x3f) | 0x80);
outAt += 2;
} else {
bytes[outAt] = (byte) (((c >> 12) & 0x0f) | 0xe0);
bytes[outAt + 1] = (byte) (((c >> 6) & 0x3f) | 0x80);
bytes[outAt + 2] = (byte) ((c & 0x3f) | 0x80);
outAt += 3;
}
}
byte[] result = new byte[outAt];
System.arraycopy(bytes, 0, result, 0, outAt);
return result;
}
private static char[] tempBuffer = null;
/**
* Converts an array of UTF-8 bytes into a string.
*
* This method uses a global buffer to avoid having to allocate one every time, so it is *not* thread-safe
*
* @param bytes non-null; the bytes to convert
* @param start the start index of the utf8 string to convert
* @param length the length of the utf8 string to convert, not including any null-terminator that might be present
* @return non-null; the converted string
*/
public static String utf8BytesToString(byte[] bytes, int start, int length) {
if (tempBuffer == null || tempBuffer.length < length) {
tempBuffer = new char[length];
}
char[] chars = tempBuffer;
int outAt = 0;
for (int at = start; length > 0; /*at*/) {
int v0 = bytes[at] & 0xFF;
char out;
switch (v0 >> 4) {
case 0x00: case 0x01: case 0x02: case 0x03:
case 0x04: case 0x05: case 0x06: case 0x07: {
// 0XXXXXXX -- single-byte encoding
length--;
if (v0 == 0) {
// A single zero byte is illegal.
return throwBadUtf8(v0, at);
}
out = (char) v0;
at++;
break;
}
case 0x0c: case 0x0d: {
// 110XXXXX -- two-byte encoding
length -= 2;
if (length < 0) {
return throwBadUtf8(v0, at);
}
int v1 = bytes[at + 1] & 0xFF;
if ((v1 & 0xc0) != 0x80) {
return throwBadUtf8(v1, at + 1);
}
int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
if ((value != 0) && (value < 0x80)) {
/*
* This should have been represented with
* one-byte encoding.
*/
return throwBadUtf8(v1, at + 1);
}
out = (char) value;
at += 2;
break;
}
case 0x0e: {
// 1110XXXX -- three-byte encoding
length -= 3;
if (length < 0) {
return throwBadUtf8(v0, at);
}
int v1 = bytes[at + 1] & 0xFF;
if ((v1 & 0xc0) != 0x80) {
return throwBadUtf8(v1, at + 1);
}
int v2 = bytes[at + 2] & 0xFF;
if ((v2 & 0xc0) != 0x80) {
return throwBadUtf8(v2, at + 2);
}
int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6) |
(v2 & 0x3f);
if (value < 0x800) {
/*
* This should have been represented with one- or
* two-byte encoding.
*/
return throwBadUtf8(v2, at + 2);
}
out = (char) value;
at += 3;
break;
}
default: {
// 10XXXXXX, 1111XXXX -- illegal
return throwBadUtf8(v0, at);
}
}
chars[outAt] = out;
outAt++;
}
return new String(chars, 0, outAt);
}
/**
* Helper for {@link #utf8BytesToString}, which throws the right
* exception for a bogus utf-8 byte.
*
* @param value the byte value
* @param offset the file offset
* @return never
* @throws IllegalArgumentException always thrown
*/
private static String throwBadUtf8(int value, int offset) {
throw new IllegalArgumentException("bad utf-8 byte " + Hex.u1(value) +
" at offset " + Hex.u4(offset));
}
public static void writeEscapedChar(Writer writer, char c) throws IOException {
if ((c >= ' ') && (c < 0x7f)) {
if ((c == '\'') || (c == '\"') || (c == '\\')) {
writer.write('\\');
}
writer.write(c);
return;
} else if (c <= 0x7f) {
switch (c) {
case '\n': writer.write("\\n"); return;
case '\r': writer.write("\\r"); return;
case '\t': writer.write("\\t"); return;
}
}
writer.write("\\u");
writer.write(Character.forDigit(c >> 12, 16));
writer.write(Character.forDigit((c >> 8) & 0x0f, 16));
writer.write(Character.forDigit((c >> 4) & 0x0f, 16));
writer.write(Character.forDigit(c & 0x0f, 16));
}
public static void writeEscapedString(Writer writer, String value) throws IOException {
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
if ((c >= ' ') && (c < 0x7f)) {
if ((c == '\'') || (c == '\"') || (c == '\\')) {
writer.write('\\');
}
writer.write(c);
continue;
} else if (c <= 0x7f) {
switch (c) {
case '\n': writer.write("\\n"); continue;
case '\r': writer.write("\\r"); continue;
case '\t': writer.write("\\t"); continue;
}
}
writer.write("\\u");
writer.write(Character.forDigit(c >> 12, 16));
writer.write(Character.forDigit((c >> 8) & 0x0f, 16));
writer.write(Character.forDigit((c >> 4) & 0x0f, 16));
writer.write(Character.forDigit(c & 0x0f, 16));
}
}
public static String escapeString(String value) {
int len = value.length();
StringBuilder sb = new StringBuilder(len * 3 / 2);
for (int i = 0; i < len; i++) {
char c = value.charAt(i);
if ((c >= ' ') && (c < 0x7f)) {
if ((c == '\'') || (c == '\"') || (c == '\\')) {
sb.append('\\');
}
sb.append(c);
continue;
} else if (c <= 0x7f) {
switch (c) {
case '\n': sb.append("\\n"); continue;
case '\r': sb.append("\\r"); continue;
case '\t': sb.append("\\t"); continue;
}
}
sb.append("\\u");
sb.append(Character.forDigit(c >> 12, 16));
sb.append(Character.forDigit((c >> 8) & 0x0f, 16));
sb.append(Character.forDigit((c >> 4) & 0x0f, 16));
sb.append(Character.forDigit(c & 0x0f, 16));
}
return sb.toString();
}
}