From 2fd677db8811d33d3c77cb3e157c9eeb01cf1412 Mon Sep 17 00:00:00 2001 From: Ben Gruver Date: Wed, 26 Mar 2014 20:23:15 -0700 Subject: [PATCH] Tweak how array descriptors are parsed This is needed to allow smalidea to have an outer array type element to represent the type as a whole, and an inner class type element for the actual element type --- smali/src/main/antlr3/smaliParser.g | 25 +++--- smali/src/main/antlr3/smaliTreeWalker.g | 22 ++--- smali/src/main/jflex/smaliLexer.flex | 21 ++++- .../LexerTest/RealSmaliFileTest.tokens | 6 +- .../LexerTest/TypeAndIdentifierTest.smali | 6 +- .../LexerTest/TypeAndIdentifierTest.tokens | 81 +++++++++++++------ smalidea/src/main/antlr3/smalideaParser.g | 2 +- .../java/org/jf/smalidea/SmaliTokens.java | 4 +- .../java/org/jf/smalidea/SmaliLexerTest.java | 3 +- 9 files changed, 110 insertions(+), 60 deletions(-) diff --git a/smali/src/main/antlr3/smaliParser.g b/smali/src/main/antlr3/smaliParser.g index d057b4a4..0e8c2aa4 100644 --- a/smali/src/main/antlr3/smaliParser.g +++ b/smali/src/main/antlr3/smaliParser.g @@ -39,7 +39,7 @@ tokens { ANNOTATION_DIRECTIVE; ANNOTATION_VISIBILITY; ARRAY_DATA_DIRECTIVE; - ARRAY_DESCRIPTOR; + ARRAY_TYPE_PREFIX; ARROW; BOOL_LITERAL; BYTE_LITERAL; @@ -369,16 +369,12 @@ import org.jf.dexlib2.Opcodes; case '[': { int i = typeStartIndex; - while (str.charAt(++i) == '['); + while (str.charAt(++i) == '['); - if (str.charAt(i++) == 'L') { - while (str.charAt(i++) != ';'); - } - - token.setType(ARRAY_DESCRIPTOR); - token.setText(str.substring(typeStartIndex, i)); - token.setStopIndex(baseToken.getStartIndex() + i - 1); - break; + token.setType(ARRAY_TYPE_PREFIX); + token.setText(str.substring(typeStartIndex, i)); + token.setStopIndex(baseToken.getStartIndex() + i - 1); + break; } default: throw new RuntimeException(String.format("Invalid character '\%c' in param list \"\%s\" at position \%d", str.charAt(typeStartIndex), str, typeStartIndex)); @@ -591,20 +587,23 @@ param_list | PARAM_LIST_OR_ID_START PRIMITIVE_TYPE* PARAM_LIST_OR_ID_END -> PRIMITIVE_TYPE* | nonvoid_type_descriptor*; +array_descriptor + : ARRAY_TYPE_PREFIX (PRIMITIVE_TYPE | CLASS_DESCRIPTOR); + type_descriptor : VOID_TYPE | PRIMITIVE_TYPE | CLASS_DESCRIPTOR - | ARRAY_DESCRIPTOR; + | array_descriptor; nonvoid_type_descriptor : PRIMITIVE_TYPE | CLASS_DESCRIPTOR - | ARRAY_DESCRIPTOR; + | array_descriptor; reference_type_descriptor : CLASS_DESCRIPTOR - | ARRAY_DESCRIPTOR; + | array_descriptor; integer_literal : POSITIVE_INTEGER_LITERAL -> INTEGER_LITERAL[$POSITIVE_INTEGER_LITERAL] diff --git a/smali/src/main/antlr3/smaliTreeWalker.g b/smali/src/main/antlr3/smaliTreeWalker.g index f49f10ef..c5b27768 100644 --- a/smali/src/main/antlr3/smaliTreeWalker.g +++ b/smali/src/main/antlr3/smaliTreeWalker.g @@ -1146,20 +1146,20 @@ insn_sparse_switch_directive $method::methodBuilder.addInstruction(new BuilderSparseSwitchPayload($sparse_switch_elements.elements)); }; +array_descriptor returns [String type] + : ARRAY_TYPE_PREFIX ( PRIMITIVE_TYPE { $type = $ARRAY_TYPE_PREFIX.text + $PRIMITIVE_TYPE.text; } + | CLASS_DESCRIPTOR { $type = $ARRAY_TYPE_PREFIX.text + $CLASS_DESCRIPTOR.text; }); + nonvoid_type_descriptor returns [String type] - : (PRIMITIVE_TYPE - | CLASS_DESCRIPTOR - | ARRAY_DESCRIPTOR) - { - $type = $start.getText(); - }; + : (PRIMITIVE_TYPE { $type = $text; } + | CLASS_DESCRIPTOR { $type = $text; } + | array_descriptor { $type = $array_descriptor.type; }) + ; reference_type_descriptor returns [String type] - : (CLASS_DESCRIPTOR - | ARRAY_DESCRIPTOR) - { - $type = $start.getText(); - }; + : (CLASS_DESCRIPTOR { $type = $text; } + | array_descriptor { $type = $array_descriptor.type; }) + ; type_descriptor returns [String type] : VOID_TYPE {$type = "V";} diff --git a/smali/src/main/jflex/smaliLexer.flex b/smali/src/main/jflex/smaliLexer.flex index 8b7c66e3..2849ab89 100644 --- a/smali/src/main/jflex/smaliLexer.flex +++ b/smali/src/main/jflex/smaliLexer.flex @@ -231,13 +231,14 @@ PrimitiveType = [ZBSCIJFD] ClassDescriptor = L ({SimpleName} "/")* {SimpleName} ; -ArrayDescriptor = "[" + ({PrimitiveType} | {ClassDescriptor}) +ArrayPrefix = "["+ -Type = {PrimitiveType} | {ClassDescriptor} | {ArrayDescriptor} +Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | {PrimitiveType}) %state PARAM_LIST_OR_ID %state PARAM_LIST +%state ARRAY_DESCRIPTOR %state STRING %state CHAR @@ -314,7 +315,7 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayDescriptor} { {PrimitiveType} { return newToken(PRIMITIVE_TYPE); } {ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } - {ArrayDescriptor} { return newToken(ARRAY_DESCRIPTOR); } + {ArrayPrefix} { return newToken(ARRAY_TYPE_PREFIX); } [^] { yypushback(1); yybegin(YYINITIAL); return newToken(PARAM_LIST_END); } <> { yybegin(YYINITIAL); return newToken(PARAM_LIST_END); } } @@ -611,12 +612,24 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayDescriptor} } } + { + {PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); } + {ClassDescriptor} { yybegin(YYINITIAL); return newToken(CLASS_DESCRIPTOR); } + [^] { yypushback(1); yybegin(YYINITIAL); } +} + /*Types*/ { {PrimitiveType} { return newToken(PRIMITIVE_TYPE); } V { return newToken(VOID_TYPE); } {ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } - {ArrayDescriptor} { return newToken(ARRAY_DESCRIPTOR); } + + // we have to drop into a separate state so that we don't parse something like + // "[I->" as "[" followed by "I-" as a SIMPLE_NAME + {ArrayPrefix} { + yybegin(ARRAY_DESCRIPTOR); + return newToken(ARRAY_TYPE_PREFIX); + } {PrimitiveType} {PrimitiveType}+ { yypushback(yylength()); diff --git a/smali/src/test/resources/LexerTest/RealSmaliFileTest.tokens b/smali/src/test/resources/LexerTest/RealSmaliFileTest.tokens index ba40c2f2..71f9b169 100644 --- a/smali/src/test/resources/LexerTest/RealSmaliFileTest.tokens +++ b/smali/src/test/resources/LexerTest/RealSmaliFileTest.tokens @@ -184,7 +184,8 @@ REGISTER("v0") COMMA(",") REGISTER("v0") COMMA(",") -ARRAY_DESCRIPTOR("[I") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") INSTRUCTION_FORMAT31t("fill-array-data") REGISTER("v0") COMMA(",") @@ -197,7 +198,8 @@ CLASS_DESCRIPTOR("Lcom/android/internal/os/BatteryStatsImpl;") ARROW("->") SIMPLE_NAME("PROC_WAKELOCKS_FORMAT") COLON(":") -ARRAY_DESCRIPTOR("[I") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") LINE_DIRECTIVE(".line") POSITIVE_INTEGER_LITERAL("3495") INSTRUCTION_FORMAT21c_TYPE("new-instance") diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali index 2120d33f..9becb916 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali @@ -23,6 +23,8 @@ LI/I/I; [D [Ljava/lang/String; [LI/I/I; +[[LI/I/I; +[[I IIIII ZBSCIJFD @@ -49,4 +51,6 @@ L[Ljava/lang/String; -III \ No newline at end of file +III + +[I->clone()Ljava/lang/Object; \ No newline at end of file diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens index d99d2c29..03e79f03 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens @@ -13,16 +13,30 @@ CLASS_DESCRIPTOR("LI;") CLASS_DESCRIPTOR("LV;") CLASS_DESCRIPTOR("LI/I/I;") -ARRAY_DESCRIPTOR("[Z") -ARRAY_DESCRIPTOR("[B") -ARRAY_DESCRIPTOR("[S") -ARRAY_DESCRIPTOR("[C") -ARRAY_DESCRIPTOR("[I") -ARRAY_DESCRIPTOR("[J") -ARRAY_DESCRIPTOR("[F") -ARRAY_DESCRIPTOR("[D") -ARRAY_DESCRIPTOR("[Ljava/lang/String;") -ARRAY_DESCRIPTOR("[LI/I/I;") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("Z") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("B") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("S") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("C") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("J") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("F") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("D") +ARRAY_TYPE_PREFIX("[") +CLASS_DESCRIPTOR("Ljava/lang/String;") +ARRAY_TYPE_PREFIX("[") +CLASS_DESCRIPTOR("LI/I/I;") +ARRAY_TYPE_PREFIX("[[") +CLASS_DESCRIPTOR("LI/I/I;") +ARRAY_TYPE_PREFIX("[[") +PRIMITIVE_TYPE("I") PARAM_LIST_OR_ID_START("") PRIMITIVE_TYPE("I") @@ -46,8 +60,10 @@ PARAM_LIST_OR_ID_END("") PARAM_LIST_START("") PRIMITIVE_TYPE("I") CLASS_DESCRIPTOR("La;") -ARRAY_DESCRIPTOR("[La;") -ARRAY_DESCRIPTOR("[I") +ARRAY_TYPE_PREFIX("[") +CLASS_DESCRIPTOR("La;") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") PARAM_LIST_END("") PARAM_LIST_START("") @@ -56,19 +72,26 @@ CLASS_DESCRIPTOR("Ljava/lang/String;") PARAM_LIST_END("") PARAM_LIST_START("") -ARRAY_DESCRIPTOR("[I") -ARRAY_DESCRIPTOR("[I") -ARRAY_DESCRIPTOR("[I") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") PARAM_LIST_END("") PARAM_LIST_START("") -ARRAY_DESCRIPTOR("[I") -ARRAY_DESCRIPTOR("[Z") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("Z") PARAM_LIST_END("") PARAM_LIST_START("") -ARRAY_DESCRIPTOR("[I") -ARRAY_DESCRIPTOR("[Ljava/lang/String;") +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") +ARRAY_TYPE_PREFIX("[") +CLASS_DESCRIPTOR("Ljava/lang/String;") PARAM_LIST_END("") MEMBER_NAME("") @@ -79,12 +102,12 @@ MEMBER_NAME("") SIMPLE_NAME("Ljava") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String") SIMPLE_NAME("L") INVALID_TOKEN(";") SIMPLE_NAME("LI") -SIMPLE_NAME("L") ARRAY_DESCRIPTOR("[Ljava/lang/String;") +SIMPLE_NAME("L") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;") -INVALID_TOKEN("[") -INVALID_TOKEN("[") VOID_TYPE("V") -INVALID_TOKEN("[") SIMPLE_NAME("java") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String") INVALID_TOKEN(";") -INVALID_TOKEN("[") INVALID_TOKEN(";") +ARRAY_TYPE_PREFIX("[") +ARRAY_TYPE_PREFIX("[") VOID_TYPE("V") +ARRAY_TYPE_PREFIX("[") SIMPLE_NAME("java") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String") INVALID_TOKEN(";") +ARRAY_TYPE_PREFIX("[") INVALID_TOKEN(";") MEMBER_NAME("") @@ -92,4 +115,12 @@ PARAM_LIST_OR_ID_START("") PRIMITIVE_TYPE("I") PRIMITIVE_TYPE("I") PRIMITIVE_TYPE("I") -PARAM_LIST_OR_ID_END("") \ No newline at end of file +PARAM_LIST_OR_ID_END("") + +ARRAY_TYPE_PREFIX("[") +PRIMITIVE_TYPE("I") +ARROW("->") +SIMPLE_NAME("clone") +OPEN_PAREN("(") +CLOSE_PAREN(")") +CLASS_DESCRIPTOR("Ljava/lang/Object;") \ No newline at end of file diff --git a/smalidea/src/main/antlr3/smalideaParser.g b/smalidea/src/main/antlr3/smalideaParser.g index 823046a5..43449326 100644 --- a/smalidea/src/main/antlr3/smalideaParser.g +++ b/smalidea/src/main/antlr3/smalideaParser.g @@ -347,7 +347,7 @@ class_descriptor array_descriptor @init { Marker marker = mark(); } - : ARRAY_DESCRIPTOR; + : ARRAY_TYPE_PREFIX (primitive_type | class_descriptor); finally { marker.done(SmaliElementTypes.ARRAY_TYPE); } void_type diff --git a/smalidea/src/main/java/org/jf/smalidea/SmaliTokens.java b/smalidea/src/main/java/org/jf/smalidea/SmaliTokens.java index c009a884..1e79ad72 100644 --- a/smalidea/src/main/java/org/jf/smalidea/SmaliTokens.java +++ b/smalidea/src/main/java/org/jf/smalidea/SmaliTokens.java @@ -50,7 +50,7 @@ public class SmaliTokens { @SuppressWarnings({"UnusedDeclaration"}) public static IElementType ANNOTATION_DIRECTIVE; @SuppressWarnings({"UnusedDeclaration"}) public static IElementType ANNOTATION_VISIBILITY; @SuppressWarnings({"UnusedDeclaration"}) public static IElementType ARRAY_DATA_DIRECTIVE; - @SuppressWarnings({"UnusedDeclaration"}) public static IElementType ARRAY_DESCRIPTOR; + @SuppressWarnings({"UnusedDeclaration"}) public static IElementType ARRAY_TYPE_PREFIX; @SuppressWarnings({"UnusedDeclaration"}) public static IElementType ARROW; @SuppressWarnings({"UnusedDeclaration"}) public static IElementType BOOL_LITERAL; @SuppressWarnings({"UnusedDeclaration"}) public static IElementType BYTE_LITERAL; @@ -169,7 +169,7 @@ public class SmaliTokens { tokenColors.put("ANNOTATION_DIRECTIVE", SmaliHighlightingColors.DIRECTIVE); tokenColors.put("ANNOTATION_VISIBILITY", SmaliHighlightingColors.ACCESS); tokenColors.put("ARRAY_DATA_DIRECTIVE", SmaliHighlightingColors.DIRECTIVE); - tokenColors.put("ARRAY_DESCRIPTOR", SmaliHighlightingColors.TYPE); + tokenColors.put("ARRAY_TYPE_PREFIX", SmaliHighlightingColors.TYPE); tokenColors.put("ARROW", SmaliHighlightingColors.ARROW); tokenColors.put("BOOL_LITERAL", SmaliHighlightingColors.LITERAL); tokenColors.put("BYTE_LITERAL", SmaliHighlightingColors.NUMBER); diff --git a/smalidea/src/test/java/org/jf/smalidea/SmaliLexerTest.java b/smalidea/src/test/java/org/jf/smalidea/SmaliLexerTest.java index 14e424ef..8383f966 100644 --- a/smalidea/src/test/java/org/jf/smalidea/SmaliLexerTest.java +++ b/smalidea/src/test/java/org/jf/smalidea/SmaliLexerTest.java @@ -72,7 +72,8 @@ public class SmaliLexerTest extends LexerTestCase { "WHITE_SPACE (' ')\n" + "SIMPLE_NAME ('main')\n" + "OPEN_PAREN ('(')\n" + - "ARRAY_DESCRIPTOR ('[Ljava/lang/String;')\n" + + "ARRAY_TYPE_PREFIX ('[')\n" + + "CLASS_DESCRIPTOR ('Ljava/lang/String;')\n" + "CLOSE_PAREN (')')\n" + "VOID_TYPE ('V')\n" + "WHITE_SPACE ('\\n ')\n" +