diff --git a/pom.xml b/pom.xml index b125bd7b..eb469de0 100644 --- a/pom.xml +++ b/pom.xml @@ -23,12 +23,21 @@ 3.1.3-1 - smali + smaliLexer antlr - - org/JesusFreke/smali/smaliTreeWalker.g + + org/JesusFreke/smali/smaliLexer.g + + + + smaliParser + + antlr + + + org/JesusFreke/smali/smaliParser.g @@ -37,7 +46,7 @@ antlr - org/JesusFreke/smali/smali.g + org/JesusFreke/smali/smaliTreeWalker.g diff --git a/src/main/antlr3/org/JesusFreke/smali/smali.g b/src/main/antlr3/org/JesusFreke/smali/smali.g deleted file mode 100644 index 9c7d9802..00000000 --- a/src/main/antlr3/org/JesusFreke/smali/smali.g +++ /dev/null @@ -1,677 +0,0 @@ -/* - * The comment lexical rule, and the number, string and character constant - * lexical rules are derived from rules from the Java 1.6 grammar which can be - * found here: http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g - * - * Specifically, these rules: - * - * COMMENT, LONG_LITERAL, INT_LITERAL, Integer_number, Hex_prefix, Hex_digit, - * Long_suffix, Non_integer_number_SIMPLE_NAME, Non_integer_number, - * Decimal_exponent, Hex_exponent, Float_suffix, Double_suffix, - * FLOAT_LITERAL_SIMPLE_NAME, FLOAT_LITERAL, DOUBLE_LITERAL_SIMPLE_NAME, - * DOUBLE_LITERAL, CHAR_LITERAL, STRING_LITERAL, EscapeSequence - * - * These rules were originally copyrighted by Terence Parr, and are used here in - * accordance with the following license - * - * [The "BSD licence"] - * Copyright (c) 2007-2008 Terence Parr - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form mmaven-2.0.9" -Didea.launcher.port=7538 "-Didea.launcher.bin.path=C:\Program Files\JetBrains\IntelliJ IDEA 8.1\bin" -Dfile.encoding=windows-1252 -classpath "C:\Program Files\Apache Software Foundation\apache-maven-2.0.9\boot\classworlds-1.1.jar;C:\Program Files\JetBrains\IntelliJ IDEA 8.1\lib\idea_rt.jar" com.intellij.rt.execution.application.AppMain org.codehaus.classworlds.Launcher --no-plugin-registry --fail-fast --no-plugin-updates --strict-checksums -f D:\Android\smali\pom.xml compile -+ Enabling strict checksum verification on all artifact downloads. -[INFO] Scanning for projects... -[INFO] ------------------------------------------------------------------------ -[INFO] Building Unnamed - smali:smali:jar:1.0 -[INFO] task-segment: [compile] -[INFO] ------------------------------------------------------------------------ -[INFO] [antlr3:antlr {execution: smali}] -[INFO] ANTLR: Processing source directory D:\Android\smali\src\main\antlr3 -ANTLR Parser Generator Version 3.1.3 Mar 17, 2009 19:23:44 -org\JesusFreke\smali\smali.g -[INFO] [antlr3:antlr {execution: smaliTreeWalker}] -[INFO] ANTLR: Processing source directory D:\Android\smali\src\main\antlr3 -ANTLR Parser Generator Version 3.1.3 Mar 17, 2009 19:23:44 -org\JesusFreke\smali\smaliTreeWalker.g -[INFO] [resources:resources]ust reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * - * The remainder of this grammar is released by me (Ben Gruver) under the - * following license: - * - * [The "BSD licence"] - * Copyright (c) 2009 Ben Gruver - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -grammar smali; - -options { - output=AST; - ASTLabelType=CommonTree; -} - -tokens { - //I_* tokens are imaginary tokens used as parent AST nodes - I_CLASS_DEF; - I_SUPER; - I_ACCESS_LIST; - I_METHODS; - I_FIELDS; - I_FIELD; - I_FIELD_TYPE; - I_FIELD_INITIAL_VALUE; - I_METHOD; - I_METHOD_PROTOTYPE; - I_METHOD_RETURN_TYPE; - I_REGISTERS; - I_STATEMENTS; - I_STATEMENT_FORMAT10x; - I_STATEMENT_FORMAT11x; - I_STATEMENT_FORMAT12x; - I_STATEMENT_FORMAT21c_TYPE; - I_STATEMENT_FORMAT21c_FIELD; - I_STATEMENT_FORMAT22c_FIELD; - I_STATEMENT_FORMAT21c_STRING; - I_STATEMENT_FORMAT35c_METHOD; - I_STATEMENT_FORMAT3rc_METHOD; - I_REGISTER_RANGE; - I_REGISTER_LIST; -} - -@parser::header { -package org.JesusFreke.smali; -} - -@lexer::header { -package org.JesusFreke.smali; - -import java.util.ArrayDeque; -} - -@lexer::init { - state.token = Token.INVALID_TOKEN; -} - -@lexer::members { - protected ArrayDeque tokens = new ArrayDeque(); - - public void reset() { - super.reset(); - state.token = Token.INVALID_TOKEN; - tokens.clear(); - } - - public Token nextToken() { - while (true) { - if (tokens.size() > 0) { - Token token = tokens.poll(); - if (token == Token.SKIP_TOKEN) { - continue; - } - - return token; - } - - state.channel = Token.DEFAULT_CHANNEL; - state.tokenStartCharIndex = input.index(); - state.tokenStartCharPositionInLine = input.getCharPositionInLine(); - state.tokenStartLine = input.getLine(); - state.text = null; - if ( input.LA(1)==CharStream.EOF ) { - return Token.EOF_TOKEN; - } - try { - mTokens(); - - if (tokens.size() == 0) { - emit(); - } - } - catch (NoViableAltException nva) { - reportError(nva); - recover(nva); // throw out current char and try again - } - catch (RecognitionException re) { - reportError(re); - // match() routine has already called recover() - } - } - } - - public void skip() { - tokens.add(Token.SKIP_TOKEN); - } - - public void emit(Token token) { - tokens.add(token); - } -} - - -smali_file: header methods_and_fields -> ^(I_CLASS_DEF header methods_and_fields); - -header : class_spec super_spec; - -class_spec - : '.class' access_list class_name -> class_name access_list; - -super_spec - : first_token='.super' class_name -> ^(I_SUPER[$first_token, "I_SUPER"] class_name); - -access_list - : first_token=ACCESS_SPEC ACCESS_SPEC* -> ^(I_ACCESS_LIST[$first_token,"I_ACCESS_LIST"] ACCESS_SPEC+); - -methods_and_fields - : (method | field)* -> ^(I_METHODS method*) ^(I_FIELDS field*); - -field : first_token='.field' access_list member_name field_type_descriptor ('=' literal)? - -> ^(I_FIELD[$first_token, "I_FIELD"] member_name access_list ^(I_FIELD_TYPE field_type_descriptor) ^(I_FIELD_INITIAL_VALUE literal)?); - -method : first_token='.method' access_list method_name_and_prototype locals_directive statements '.end method' - -> ^(I_METHOD[$first_token, "I_METHOD"] method_name_and_prototype access_list locals_directive statements); - -method_prototype - : first_token='(' field_type_list ')' type_descriptor - -> ^(I_METHOD_PROTOTYPE[$first_token, "I_METHOD_PROTOTYPE"] ^(I_METHOD_RETURN_TYPE type_descriptor) field_type_list?); - -method_name_and_prototype - : member_name method_prototype; - -field_type_list - : field_type_descriptor*; - -locals_directive - : first_token='.registers' INT_LITERAL - -> ^(I_REGISTERS[$first_token, "I_REGISTERS"] INT_LITERAL); - - -full_method_name_and_prototype - : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME method_prototype; - -full_field_name_and_type - : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME field_type_descriptor; - -statements - : statement* -> ^(I_STATEMENTS statement*); - -statement - : instruction; - -instruction - //e.g. return - : INSTRUCTION_NAME_FORMAT10x - -> ^(I_STATEMENT_FORMAT10x[$start, "I_STATEMENT_FORMAT10x"] INSTRUCTION_NAME_FORMAT10x) - | //e.g. move-result-object v1 - INSTRUCTION_NAME_FORMAT11x REGISTER - -> ^(I_STATEMENT_FORMAT11x[$start, "I_STATEMENT_FORMAT11x"] INSTRUCTION_NAME_FORMAT11x REGISTER) - | //e.g. move v1 v2 - INSTRUCTION_NAME_FORMAT12x REGISTER ',' REGISTER - -> ^(I_STATEMENT_FORMAT12x[$start, "I_STATEMENT_FORMAT12x"] INSTRUCTION_NAME_FORMAT12x REGISTER REGISTER) - | //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream; - INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER ',' full_field_name_and_type - -> ^(I_STATEMENT_FORMAT21c_FIELD[$start, "I_STATEMENT_FORMAT21c_FIELD"] INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER full_field_name_and_type) - | //e.g. const-string v1 "Hello World!" - INSTRUCTION_NAME_FORMAT21c_STRING REGISTER ',' STRING_LITERAL - -> ^(I_STATEMENT_FORMAT21c_STRING[$start, "I_STATEMENT_FORMAT21c_STRING"] INSTRUCTION_NAME_FORMAT21c_STRING REGISTER STRING_LITERAL) - | //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2 - INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER ',' class_or_array_type_descriptor - -> ^(I_STATEMENT_FORMAT21c_TYPE[$start, "I_STATEMENT_FORMAT21c"] INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor) - | //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; - INSTRUCTION_NAME_FORMAT22c_FIELD REGISTER ',' REGISTER ',' full_field_name_and_type - -> ^(I_STATEMENT_FORMAT22c_FIELD[$start, "I_INSTANCE_FIELD_STATEMENT"] INSTRUCTION_NAME_FORMAT22c_FIELD REGISTER REGISTER full_field_name_and_type) - | //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V - INSTRUCTION_NAME_FORMAT35c_METHOD '{' register_list '}' ',' full_method_name_and_prototype - -> ^(I_STATEMENT_FORMAT35c_METHOD[$start, "I_STATEMENT_FORMAT35c_METHOD"] INSTRUCTION_NAME_FORMAT35c_METHOD register_list full_method_name_and_prototype) - | //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder; - INSTRUCTION_NAME_FORMAT3rc_METHOD '{' register_range '}' ',' full_method_name_and_prototype - -> ^(I_STATEMENT_FORMAT3rc_METHOD[$start, "I_STATEMENT_FORMAT3rc_METHOD"] INSTRUCTION_NAME_FORMAT3rc_METHOD register_range full_method_name_and_prototype) - ; - - -register_list - : first_token=REGISTER? (',' REGISTER)* -> ^(I_REGISTER_LIST[$first_token, "I_REGISTER_LIST"] REGISTER*); - -register_range - : first_token=REGISTER ('..' REGISTER)? -> ^(I_REGISTER_RANGE[$first_token, "I_REGISTER_RANGE"] REGISTER REGISTER?); - -/*since there are no reserved words in the dex specification, there are a -number of tokens that can be a valid simple_name, in addition to just -SIMPLE_NAME. We need to match any token that could also be considered a valid -SIMPLE_NAME. In the case of floating point literals, some could be considered -a valid SIMPLE_NAME while others couldn't. The lexer will generate a separate -FLOAT_LITERAL_SIMPLE_NAME OR DOUBLE_LITERAL_SIMPLE_NAME token for literals -that can be considered a valid SIMPLE_NAME*/ -simple_name - : SIMPLE_NAME - | ACCESS_SPEC - | instruction_name - | INT_LITERAL - | LONG_LITERAL - | FLOAT_LITERAL_SIMPLE_NAME - | DOUBLE_LITERAL_SIMPLE_NAME - | BOOL_LITERAL - | PRIMITIVE_TYPE - ; - -instruction_name - : INSTRUCTION_NAME_FORMAT10x - | INSTRUCTION_NAME_FORMAT11x - | INSTRUCTION_NAME_FORMAT12x - | INSTRUCTION_NAME_FORMAT21c_FIELD - | INSTRUCTION_NAME_FORMAT21c_STRING - | INSTRUCTION_NAME_FORMAT21c_TYPE - | INSTRUCTION_NAME_FORMAT22c_FIELD - | INSTRUCTION_NAME_FORMAT35c_METHOD - | INSTRUCTION_NAME_FORMAT3rc_METHOD - ; - -member_name - : simple_name - | MEMBER_NAME - ; - -class_name - : SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME; - -field_type_descriptor - : PRIMITIVE_TYPE - | CLASS_DESCRIPTOR - | ARRAY_TYPE - ; - -class_or_array_type_descriptor - : CLASS_DESCRIPTOR - | ARRAY_TYPE; - -type_descriptor - : VOID_TYPE - | field_type_descriptor - ; - -literal : INT_LITERAL - | LONG_LITERAL - | float_literal - | double_literal - | CHAR_LITERAL - | STRING_LITERAL - | BOOL_LITERAL; - -float_literal - : FLOAT_LITERAL -> FLOAT_LITERAL - | FLOAT_LITERAL_SIMPLE_NAME -> FLOAT_LITERAL[$FLOAT_LITERAL_SIMPLE_NAME, $FLOAT_LITERAL_SIMPLE_NAME.text]; - -double_literal - : DOUBLE_LITERAL -> DOUBLE_LITERAL - | DOUBLE_LITERAL_SIMPLE_NAME -> DOUBLE_LITERAL[$DOUBLE_LITERAL_SIMPLE_NAME, $DOUBLE_LITERAL_SIMPLE_NAME.text]; - -ACCESS_SPEC - : 'public' | 'private' | 'static' | 'constructor' | 'final'; - -INSTRUCTION_NAME_FORMAT10x - : 'return-void' - | 'nop'; - -INSTRUCTION_NAME_FORMAT11x - : 'move-result' - | 'move-result-wide' - | 'move-result-object' - | 'move-exception' - | 'return' - | 'return-wide' - | 'return-object' - | 'monitor-enter' - | 'monitor-exit' - | 'throw'; - -INSTRUCTION_NAME_FORMAT12x - : 'move' - | 'move-wide' - | 'move-object' - | 'array-length' - | 'neg-int' - | 'not-int' - | 'neg-long' - | 'not-long' - | 'neg-float' - | 'neg-double' - | 'int-to-long' - | 'int-to-float' - | 'int-to-double' - | 'long-to-int' - | 'long-to-float' - | 'long-to-double' - | 'float-to-int' - | 'float-to-long' - | 'float-to-double' - | 'double-to-int' - | 'double-to-long' - | 'double-to-float' - | 'int-to-byte' - | 'int-to-char' - | 'int-to-short' - | 'add-int/2addr' - | 'sub-int/2addr' - | 'mul-int/2addr' - | 'div-int/2addr' - | 'rem-int/2addr' - | 'and-int/2addr' - | 'or-int/2addr' - | 'xor-int/2addr' - | 'shl-int/2addr' - | 'shr-int/2addr' - | 'ushr-int/2addr' - | 'add-long/2addr' - | 'sub-long/2addr' - | 'mul-long/2addr' - | 'div-long/2addr' - | 'rem-long/2addr' - | 'and-long/2addr' - | 'or-long/2addr' - | 'xor-long/2addr' - | 'shl-long/2addr' - | 'shr-long/2addr' - | 'ushr-long/2addr' - | 'add-float/2addr' - | 'sub-float/2addr' - | 'mul-float/2addr' - | 'div-float/2addr' - | 'rem-float/2addr' - | 'add-double/2addr' - | 'sub-double/2addr' - | 'mul-double/2addr' - | 'div-double/2addr' - | 'rem-double/2addr'; - -INSTRUCTION_NAME_FORMAT21c_FIELD - : 'sget' - | 'sget-wide' - | 'sget-object' - | 'sget-boolean' - | 'sget-byte' - | 'sget-char' - | 'sget-short' - | 'sput' - | 'sput-wide' - | 'sput-object' - | 'sput-boolean' - | 'sput-byte' - | 'sput-char' - | 'sput-short' - ; - -INSTRUCTION_NAME_FORMAT21c_STRING - : 'const-string'; - -INSTRUCTION_NAME_FORMAT21c_TYPE - : 'check-cast' - | 'new-instance' - | 'const-class'; - -INSTRUCTION_NAME_FORMAT22c_FIELD - : 'iget' - | 'iget-wide' - | 'iget-object' - | 'iget-boolean' - | 'iget-byte' - | 'iget-char' - | 'iget-short' - | 'iput' - | 'iput-wide' - | 'iput-object' - | 'iput-boolean' - | 'iput-byte' - | 'iput-char' - | 'iput-short' - ; - -INSTRUCTION_NAME_FORMAT35c_METHOD - : 'invoke-virtual' - | 'invoke-super' - | 'invoke-direct' - | 'invoke-static' - | 'invoke-interface' - ; - -INSTRUCTION_NAME_FORMAT3rc_METHOD - : 'invoke-virtual/range' - | 'invoke-super/range' - | 'invoke-direct/range' - | 'invoke-static/range' - | 'invoke-interface/range' - ; - -/*since SIMPLE_NAME is so all-encompassing, it includes all integer literals -and a subset of the possible floating point literals. For floating point -literals, we need to generate a separate token depending on whether the token -could also be considered a SIMPLE_NAME or not. - -The floating point related tokens with a _SIMPLE_NAME suffix could also be -considered valid SIMPLE_NAME tokens, while the plain version of the token -(without the suffix) could not be considered a valid SIMPLE_NAME token*/ - -LONG_LITERAL - : Integer_number Long_suffix; - -INT_LITERAL - : Integer_number; - -fragment Integer_number - : '-'? '0' - | '-'? ('1'..'9') ('0'..'9')* - | '0' ('0'..'7')+ - | Hex_prefix Hex_digit+ - ; - -fragment Hex_prefix - : '0x'|'0X'; - -fragment Hex_digit - : ('0'..'9'|'a'..'f'|'A'..'F'); - -fragment Long_suffix - : 'l'|'L'; - -fragment Non_integer_number_SIMPLE_NAME - : ('0'..'9')+ Decimal_exponent - | ('0'..'9')+ - | Hex_prefix (Hex_digit)* Hex_exponent - ; - - -fragment Non_integer_number - : ('0'..'9')+ '.' ('0'..'9')* Decimal_exponent? - | '.' ('0'..'9')+ Decimal_exponent? - | Hex_prefix (Hex_digit)* '.' (Hex_digit)* Hex_exponent - ; - -fragment Decimal_exponent - : ('e'|'E') '-'? ('0'..'9')+; - -fragment Hex_exponent - : ('p'|'P') '-'? ('0'..'9')+; - -fragment Float_suffix - : 'f'|'F'; - -fragment Double_suffix - : 'd'|'D'; - -FLOAT_LITERAL_SIMPLE_NAME - : Non_integer_number_SIMPLE_NAME Float_suffix; - -FLOAT_LITERAL - : Non_integer_number Float_suffix; - -DOUBLE_LITERAL_SIMPLE_NAME - : Non_integer_number_SIMPLE_NAME Double_suffix?; - -DOUBLE_LITERAL - : Non_integer_number Double_suffix?; - -CHAR_LITERAL - - : '\'' {StringBuilder sb = new StringBuilder();} - ( Escape_sequence[sb] {setText(sb.toString());} - | ~( '\'' | '\\' | '\r' | '\n' ) - ) - '\'' - ; - -STRING_LITERAL - : '"' {StringBuilder sb = new StringBuilder();} - ( Escape_sequence[sb] - | ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));} - )* - '"' {setText(sb.toString());} - ; - - -Hex_digits - : Hex_digit Hex_digit Hex_digit Hex_digit; - -fragment -Escape_sequence[StringBuilder sb] - : '\\' - ( - 'b' {sb.append("\b");} - | 't' {sb.append("\t");} - | 'n' {sb.append("\n");} - | 'f' {sb.append("\f");} - | 'r' {sb.append("\r");} - | '\"' {sb.append("\"");} - | '\'' {sb.append("'");} - | '\\' {sb.append("\\");} - | 'u' Hex_digits {sb.append((char)Integer.parseInt($Hex_digits.text, 16));} -/* | octdigits=(('0'..'3') ('0'..'7') ('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);} - | octdigits=(('0'..'7') ('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);} - | octdigits=(('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);}*/ - ); - -BOOL_LITERAL - : 'true'|'false'; - - - -WHITESPACE - : (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;}; - -REGISTER: 'v' ('0'..'9')+; - - -/*a token of type QUALIFIED_MEMBER is never generated. This rule emits 2 sub-tokens -that represent the class name and the member name, so that they don't have to be -parsed out later*/ -QUALIFIED_MEMBER - : class_name=QUALIFIED_MEMBER__CLASS_NAME '.' member_name=QUALIFIED_MEMBER__MEMBER_NAME - { - $class_name.setType(QUALIFIED_MEMBER__CLASS_NAME); - $member_name.setType(QUALIFIED_MEMBER__MEMBER_NAME); - emit($class_name); - emit($member_name); - }; - -fragment QUALIFIED_MEMBER__CLASS_NAME - : (SIMPLE_NAME '/')* SIMPLE_NAME; - -fragment QUALIFIED_MEMBER__MEMBER_NAME - : MEMBER_NAME | SIMPLE_NAME; - - -ARRAY_TYPE - : - ARRAY_CHAR_LIST[255] (PRIMITIVE_TYPE | CLASS_DESCRIPTOR); - - -//match from 1 to maxCount '[' characters -fragment -ARRAY_CHAR_LIST[int maxCount] - : {$maxCount > 1}?=> '[' ARRAY_CHAR_LIST[$maxCount - 1] - | '[' - ; - -MEMBER_NAME - : '<' SIMPLE_NAME '>'; - -VOID_TYPE - : 'V'; - -PRIMITIVE_TYPE - : 'Z' - | 'B' - | 'S' - | 'C' - | 'I' - | 'J' - | 'F' - | 'D' - ; - -CLASS_WITH_PACKAGE_NAME - : (SIMPLE_NAME '/')+ SIMPLE_NAME; - -CLASS_DESCRIPTOR - : 'L' (SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME) ';'; - -SIMPLE_NAME: - ( 'A'..'Z' - | 'a'..'z' - | '0'..'9' - | '$' - | '-' - | '_' - | '\u00a1'..'\u1fff' - | '\u2010'..'\u2027' - | '\u2030'..'\ud7ff' - | '\ue000'..'\uffef' - )+; - -COMMENT - : (';' ~('\n'|'\r')* ('\r\n' | '\r' | '\n') - | ';' ~('\n'|'\r')*) - { - $channel = HIDDEN; - } - ; diff --git a/src/main/antlr3/org/JesusFreke/smali/smaliLexer.g b/src/main/antlr3/org/JesusFreke/smali/smaliLexer.g new file mode 100644 index 00000000..6c7f3ae8 --- /dev/null +++ b/src/main/antlr3/org/JesusFreke/smali/smaliLexer.g @@ -0,0 +1,749 @@ +/* + * The number, string and character constant lexical rules are derived from rules + * from the Java 1.6 grammar which can be found here: + * http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g + * + * Specifically, these rules: + * + * STRING_LITERAL, ESCAPE_SEQUENCE, HEX_DIGITS, HEX_DIGIT, INTEGER_LITERAL, + * HEX_PREFIX, LONG_LITERAL, FLOATING_POINT_NUMBER, DECIMAL_EXPONENT, + * HEX_EXPONENT, FLOAT_LITERAL, DOUBLE_LITERAL, CHAR_LITERAL + * + * These rules were originally copyrighted by Terence Parr, and are used here in + * accordance with the following license + * + * [The "BSD licence"] + * Copyright (c) 2007-2008 Terence Parr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * The remainder of this grammar is released by me (Ben Gruver) under the + * following license: + * + * [The "BSD licence"] + * Copyright (c) 2009 Ben Gruver + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + + +/*smali files are particularly hard to tokenize, because of dex's +identifiers, which are much more all-encompassing than languages'. +One reasonable possibility would be to limit the identifiers to what Java +supports. But I want the syntax to expose the full functionality of the dex +format, so that means supporting the wide range of identifiers that it +supports. + +This makes tokenizing a much more context sensitive operation than usual. To +address this, I've added extended the base lexer class to support multiple +token emissions per rule. The top level *_PHRASE lexical rules generally +match a "phrase". Each phrase has a specific format, and a unique starting +sequence - typically a directive or opcode. Each phrase rule doesn't generate +a token that represents itself, like a typical lexical rule, rather, it emits +all of its children tokens. + +For example, a phrase may consist of ".field private helloWorld Ljava/lang/String;". + +The corresponding rule (without the supporting emission code) would look something like + +FIELD_PHRASE : '.field' ACCESS_SPEC+ MEMBER_NAME FIELD_TYPE_DESCRIPTOR + +There would never be a "FIELD_PHRASE" token in the output token stream. Instead, +it would emit a token for each of its children tokens.*/ + + +lexer grammar smaliLexer; + +@lexer::header { +package org.JesusFreke.smali; + +import java.util.ArrayDeque; +} + +@lexer::init { + state.token = Token.INVALID_TOKEN; +} + +@lexer::members { + protected ArrayDeque tokens = new ArrayDeque(); + + public void reset() { + super.reset(); + state.token = Token.INVALID_TOKEN; + tokens.clear(); + } + + public Token nextToken() { + while (true) { + if (tokens.size() > 0) { + Token token = tokens.poll(); + if (token == Token.SKIP_TOKEN) { + continue; + } + + System.out.println(token.toString()); + return token; + } + + state.channel = Token.DEFAULT_CHANNEL; + state.tokenStartCharIndex = input.index(); + state.tokenStartCharPositionInLine = input.getCharPositionInLine(); + state.tokenStartLine = input.getLine(); + state.text = null; + if ( input.LA(1)==CharStream.EOF ) { + return Token.EOF_TOKEN; + } + try { + mTokens(); + + if (tokens.size() == 0) { + emit(); + } + } + catch (NoViableAltException nva) { + reportError(nva); + recover(nva); // throw out current char and try again + } + catch (RecognitionException re) { + reportError(re); + // match() routine has already called recover() + } + } + } + + public void skip() { + tokens.add(Token.SKIP_TOKEN); + } + + public void emit(Token token) { + tokens.add(token); + } + + public void emit(Token token, int type) { + token.setType(type); + tokens.add(token); + } + + public void emit(Token token, int type, int channel) { + token.setType(type); + token.setChannel(channel); + tokens.add(token); + } + +/*protected void mismatch(IntStream input, int ttype, BitSet follow) throws RecognitionException +{ + throw new MismatchedTokenException(ttype, input); +} + +public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow) throws RecognitionException +{ + throw e; +}*/ + +} + +/*@rulecatch { +catch (RecognitionException e) { +throw e; +} +}*/ + + +CLASS_PHRASE + : CLASS_DIRECTIVE_EMIT + WS + (ACCESS_SPEC_EMIT WS)+ + CLASS_DESCRIPTOR_EMIT; + +SUPER_PHRASE + : SUPER_DIRECTIVE_EMIT + WS + CLASS_DESCRIPTOR_EMIT; + +FIELD_PHRASE + : FIELD_DIRECTIVE_EMIT + WS + (ACCESS_SPEC_EMIT WS)+ + MEMBER_NAME_EMIT + WS + FIELD_TYPE_DESCRIPTOR_EMITCHILD + WS? + ('=' WS? LITERAL_EMITCHILD)?; + +METHOD_PHRASE + : METHOD_DIRECTIVE_EMIT + WS + (ACCESS_SPEC_EMIT WS)+ + MEMBER_NAME_EMIT + METHOD_PROTOTYPE_EMITCHILDREN; + +END_METHOD_PHRASE + : END_METHOD_DIRECTIVE_EMIT; + +REGISTERS_PHRASE + : REGISTERS_DIRECTIVE_EMIT + WS + INTEGER_LITERAL_EMIT; + +INSTRUCTION_FORMAT10x_PHRASE + : INSTRUCTION_FORMAT10x_EMIT; + +INSTRUCTION_FORMAT11x_PHRASE + : INSTRUCTION_FORMAT11x_EMIT + WS + REGISTER_EMIT; + +INSTRUCTION_FORMAT12x_PHRASE + : INSTRUCTION_FORMAT12x_EMIT + WS + REGISTER_EMIT + WS? ',' WS? + REGISTER_EMIT; + +INSTRUCTION_FORMAT21c_FIELD_PHRASE + : INSTRUCTION_FORMAT21c_FIELD_EMIT + WS + REGISTER_EMIT + WS? ',' WS? + FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN + WS + FIELD_TYPE_DESCRIPTOR_EMITCHILD; + +INSTRUCTION_FORMAT21c_STRING_PHRASE + : INSTRUCTION_FORMAT21c_STRING_EMIT + WS + REGISTER_EMIT + WS? ',' WS? + STRING_LITERAL_EMIT; + +INSTRUCTION_FORMAT21c_TYPE_PHRASE + : INSTRUCTION_FORMAT21c_TYPE_EMIT + WS + REGISTER_EMIT + WS? ',' WS? + CLASS_OR_ARRAY_TYPE_DESCRIPTOR_EMITCHILD; + +INSTRUCTION_FORMAT22c_FIELD_PHRASE + : INSTRUCTION_FORMAT22c_FIELD_EMIT + WS + REGISTER_EMIT + WS? ',' WS? + REGISTER_EMIT + WS? ',' WS? + FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN + WS + FIELD_TYPE_DESCRIPTOR_EMITCHILD; + +INSTRUCTION_FORMAT35c_METHOD_PHRASE + : INSTRUCTION_FORMAT35c_METHOD_EMIT + WS + REGISTER_LIST_EMITCHILDREN + WS? ',' WS? + FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN + METHOD_PROTOTYPE_EMITCHILDREN; + +INSTRUCTION_FORMAT3rc_METHOD_PHRASE + : INSTRUCTION_FORMAT3rc_METHOD_EMIT + WS + REGISTER_RANGE_EMITCHILDREN + WS? ',' WS? + FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN + METHOD_PROTOTYPE_EMITCHILDREN; + + +fragment CLASS_DIRECTIVE_EMIT + : CLASS_DIRECTIVE {emit($CLASS_DIRECTIVE, CLASS_DIRECTIVE);}; +fragment CLASS_DIRECTIVE + : '.class'; + +fragment SUPER_DIRECTIVE_EMIT + : SUPER_DIRECTIVE {emit($SUPER_DIRECTIVE, SUPER_DIRECTIVE);}; +fragment SUPER_DIRECTIVE + : '.super'; + +fragment FIELD_DIRECTIVE_EMIT + : FIELD_DIRECTIVE {emit($FIELD_DIRECTIVE, FIELD_DIRECTIVE);}; +fragment FIELD_DIRECTIVE + : '.field'; + +fragment METHOD_DIRECTIVE_EMIT + : METHOD_DIRECTIVE {emit($METHOD_DIRECTIVE, METHOD_DIRECTIVE);}; +fragment METHOD_DIRECTIVE + : '.method'; + +fragment END_METHOD_DIRECTIVE_EMIT + : END_METHOD_DIRECTIVE {emit($END_METHOD_DIRECTIVE, END_METHOD_DIRECTIVE);}; +fragment END_METHOD_DIRECTIVE + : '.end method'; + +fragment REGISTERS_DIRECTIVE_EMIT + : REGISTERS_DIRECTIVE {emit($REGISTERS_DIRECTIVE, REGISTERS_DIRECTIVE);}; +fragment REGISTERS_DIRECTIVE + : '.registers'; + +fragment REGISTER_EMIT + : REGISTER {emit($REGISTER, REGISTER);}; +fragment REGISTER + : 'v' ('0'..'9')+; + + +fragment REGISTER_LIST_EMITCHILDREN + : OPEN_BRACKET_EMIT + ( WS? + REGISTER_EMIT (WS? ',' WS? REGISTER_EMIT)* + WS? + | WS?) + CLOSE_BRACKET_EMIT; + + +fragment REGISTER_RANGE_EMITCHILDREN + : OPEN_BRACKET_EMIT + WS? + REGISTER_EMIT + WS? + ('..' WS? + REGISTER_EMIT)? + CLOSE_BRACKET_EMIT; + + +fragment METHOD_PROTOTYPE_EMITCHILDREN + : OPEN_PAREN_EMIT + (FIELD_TYPE_DESCRIPTOR_EMITCHILD+)? + CLOSE_PAREN_EMIT + TYPE_DESCRIPTOR_EMITCHILD; + +fragment FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN +@init {int startPos;} + : {startPos = getCharIndex();} (SIMPLE_NAME '/')* token=SIMPLE_NAME {((CommonToken)$token).setStartIndex(startPos); emit($token, CLASS_NAME);} + '/' + MEMBER_NAME_EMIT; + +fragment TYPE_DESCRIPTOR_EMITCHILD + : PRIMITIVE_TYPE_EMIT + | VOID_TYPE_EMIT + | CLASS_DESCRIPTOR_EMIT + | ARRAY_DESCRIPTOR_EMIT; + + +fragment FIELD_TYPE_DESCRIPTOR_EMITCHILD + : PRIMITIVE_TYPE_EMIT + | CLASS_DESCRIPTOR_EMIT + | ARRAY_DESCRIPTOR_EMIT; + +fragment CLASS_OR_ARRAY_TYPE_DESCRIPTOR_EMITCHILD + : CLASS_DESCRIPTOR_EMIT + | ARRAY_DESCRIPTOR_EMIT; + +fragment PRIMITIVE_TYPE_EMIT + : PRIMITIVE_TYPE {emit($PRIMITIVE_TYPE, PRIMITIVE_TYPE);}; + +fragment PRIMITIVE_TYPE + : 'Z' + | 'B' + | 'S' + | 'C' + | 'I' + | 'J' + | 'F' + | 'D' + ; + + +fragment VOID_TYPE_EMIT + : VOID_TYPE {emit($VOID_TYPE, VOID_TYPE);}; +fragment VOID_TYPE + : 'V'; + + +fragment CLASS_DESCRIPTOR_EMIT + : CLASS_DESCRIPTOR {emit($CLASS_DESCRIPTOR, CLASS_DESCRIPTOR);}; + +fragment CLASS_DESCRIPTOR + : 'L' CLASS_NAME ';'; + +fragment CLASS_NAME + : (SIMPLE_NAME '/')* SIMPLE_NAME; + + +fragment ARRAY_DESCRIPTOR_EMIT + : ARRAY_DESCRIPTOR {emit($ARRAY_DESCRIPTOR, ARRAY_DESCRIPTOR);}; + +fragment ARRAY_DESCRIPTOR + : ARRAY_TYPE_PREFIX (PRIMITIVE_TYPE | CLASS_DESCRIPTOR); + +fragment ARRAY_TYPE_PREFIX + : ARRAY_CHAR_LIST[255]; + +fragment ARRAY_CHAR_LIST[int maxCount] + : {$maxCount > 1}?=> '[' ARRAY_CHAR_LIST[$maxCount - 1] + | '[' + ; + + +fragment ACCESS_SPEC_EMIT + : ACCESS_SPEC {emit($ACCESS_SPEC, ACCESS_SPEC);}; + +fragment ACCESS_SPEC + : 'public' + | 'private' + | 'static' + | 'constructor' + | 'final'; + + + +fragment MEMBER_NAME_EMIT + : MEMBER_NAME {emit($MEMBER_NAME, MEMBER_NAME);}; + +fragment MEMBER_NAME + : '<'? SIMPLE_NAME '>'?; + + +fragment SIMPLE_NAME: + ( 'A'..'Z' + | 'a'..'z' + | '0'..'9' + | '$' + | '-' + | '_' + | '\u00a1'..'\u1fff' + | '\u2010'..'\u2027' + | '\u2030'..'\ud7ff' + | '\ue000'..'\uffef' + )+; + + +fragment LITERAL_EMITCHILD + : STRING_LITERAL_EMIT + | INTEGER_LITERAL_EMIT + | LONG_LITERAL_EMIT + | FLOAT_LITERAL_EMIT + | DOUBLE_LITERAL_EMIT + | CHAR_LITERAL_EMIT + | BOOL_LITERAL_EMIT; + + +fragment STRING_LITERAL_EMIT + @init {StringBuilder sb = new StringBuilder();} + : STRING_LITERAL[sb] + { + $STRING_LITERAL.setText(sb.toString()); + emit($STRING_LITERAL, STRING_LITERAL); + }; + +fragment STRING_LITERAL [StringBuilder sb] + : '"' {sb.append('"');} + ( ESCAPE_SEQUENCE[sb] + | ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));} + )* + '"' {sb.append('"');} + ; + +fragment +ESCAPE_SEQUENCE[StringBuilder sb] + : '\\' + ( + 'b' {sb.append("\b");} + | 't' {sb.append("\t");} + | 'n' {sb.append("\n");} + | 'f' {sb.append("\f");} + | 'r' {sb.append("\r");} + | '\"' {sb.append("\"");} + | '\'' {sb.append("'");} + | '\\' {sb.append("\\");} + | 'u' HEX_DIGITS {sb.append((char)Integer.parseInt($HEX_DIGITS.text, 16));} + ); + +fragment HEX_DIGITS + : HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT; + +fragment HEX_DIGIT + : ('0'..'9'|'a'..'f'|'A'..'F'); + + +fragment INTEGER_LITERAL_EMIT + : INTEGER_LITERAL {emit($INTEGER_LITERAL, INTEGER_LITERAL);}; + +fragment INTEGER_LITERAL + : '-'? '0' + | '-'? ('1'..'9') ('0'..'9')* + | '0' ('0'..'7')+ + | HEX_PREFIX HEX_DIGIT+ + ; + +fragment HEX_PREFIX + : '0x'|'0X'; + + +fragment LONG_LITERAL_EMIT + : LONG_LITERAL {emit($LONG_LITERAL, LONG_LITERAL);}; +fragment LONG_LITERAL + : INTEGER_LITERAL ('l' | 'L'); + + +fragment FLOATING_POINT_NUMBER + : ('0' .. '9')+ '.' ('0' .. '9')* DECIMAL_EXPONENT? + | '.' ( '0' .. '9' )+ DECIMAL_EXPONENT? + | ('0' .. '9')+ DECIMAL_EXPONENT + | HEX_PREFIX + ( HEX_DIGIT+ ('.' HEX_DIGIT*)? + | '.' HEX_DIGIT+ + ) + BINARY_EXPONENT + ; + +fragment DECIMAL_EXPONENT + : ('e'|'E') '-'? ('0'..'9')+; + +fragment BINARY_EXPONENT + : ('p'|'P') '-'? ('0'..'9')+; + + +fragment FLOAT_LITERAL_EMIT + : FLOAT_LITERAL {emit($FLOAT_LITERAL, FLOAT_LITERAL);}; +fragment FLOAT_LITERAL + : (FLOATING_POINT_NUMBER | ('0' .. '9')+) ('f' | 'F'); + +fragment DOUBLE_LITERAL_EMIT + : DOUBLE_LITERAL {emit($DOUBLE_LITERAL, DOUBLE_LITERAL);}; +fragment DOUBLE_LITERAL + : FLOATING_POINT_NUMBER ('d' | 'D')? + | ('0' .. '9')+ ('d' | 'D'); + + +fragment CHAR_LITERAL_EMIT + : CHAR_LITERAL {emit($CHAR_LITERAL, CHAR_LITERAL);}; +fragment CHAR_LITERAL + : '\'' {StringBuilder sb = new StringBuilder("'");} + ( ESCAPE_SEQUENCE[sb] {sb.append("'"); setText(sb.toString());} + | ~( '\'' | '\\' | '\r' | '\n' ) + ) + '\'' + ; + +fragment BOOL_LITERAL_EMIT + : BOOL_LITERAL {emit($BOOL_LITERAL, BOOL_LITERAL);}; +fragment BOOL_LITERAL + : 'true'|'false'; + +fragment INSTRUCTION_FORMAT10x_EMIT + : INSTRUCTION_FORMAT10x {emit($INSTRUCTION_FORMAT10x, INSTRUCTION_FORMAT10x);}; +fragment INSTRUCTION_FORMAT10x + : 'return-void' + | 'nop'; + +fragment INSTRUCTION_FORMAT11x_EMIT + : INSTRUCTION_FORMAT11x {emit($INSTRUCTION_FORMAT11x, INSTRUCTION_FORMAT11x);}; +fragment INSTRUCTION_FORMAT11x + : 'move-result' + | 'move-result-wide' + | 'move-result-object' + | 'move-exception' + | 'return' + | 'return-wide' + | 'return-object' + | 'monitor-enter' + | 'monitor-exit' + | 'throw'; + +fragment INSTRUCTION_FORMAT12x_EMIT + : INSTRUCTION_FORMAT12x {emit($INSTRUCTION_FORMAT12x, INSTRUCTION_FORMAT12x);}; +fragment INSTRUCTION_FORMAT12x + : 'move' + | 'move-wide' + | 'move-object' + | 'array-length' + | 'neg-int' + | 'not-int' + | 'neg-long' + | 'not-long' + | 'neg-float' + | 'neg-double' + | 'int-to-long' + | 'int-to-float' + | 'int-to-double' + | 'long-to-int' + | 'long-to-float' + | 'long-to-double' + | 'float-to-int' + | 'float-to-long' + | 'float-to-double' + | 'double-to-int' + | 'double-to-long' + | 'double-to-float' + | 'int-to-byte' + | 'int-to-char' + | 'int-to-short' + | 'add-int/2addr' + | 'sub-int/2addr' + | 'mul-int/2addr' + | 'div-int/2addr' + | 'rem-int/2addr' + | 'and-int/2addr' + | 'or-int/2addr' + | 'xor-int/2addr' + | 'shl-int/2addr' + | 'shr-int/2addr' + | 'ushr-int/2addr' + | 'add-long/2addr' + | 'sub-long/2addr' + | 'mul-long/2addr' + | 'div-long/2addr' + | 'rem-long/2addr' + | 'and-long/2addr' + | 'or-long/2addr' + | 'xor-long/2addr' + | 'shl-long/2addr' + | 'shr-long/2addr' + | 'ushr-long/2addr' + | 'add-float/2addr' + | 'sub-float/2addr' + | 'mul-float/2addr' + | 'div-float/2addr' + | 'rem-float/2addr' + | 'add-double/2addr' + | 'sub-double/2addr' + | 'mul-double/2addr' + | 'div-double/2addr' + | 'rem-double/2addr'; + +fragment INSTRUCTION_FORMAT21c_FIELD_EMIT + : INSTRUCTION_FORMAT21c_FIELD {emit($INSTRUCTION_FORMAT21c_FIELD, INSTRUCTION_FORMAT21c_FIELD);}; +fragment INSTRUCTION_FORMAT21c_FIELD + : 'sget' + | 'sget-wide' + | 'sget-object' + | 'sget-boolean' + | 'sget-byte' + | 'sget-char' + | 'sget-short' + | 'sput' + | 'sput-wide' + | 'sput-object' + | 'sput-boolean' + | 'sput-byte' + | 'sput-char' + | 'sput-short' + ; + +fragment INSTRUCTION_FORMAT21c_STRING_EMIT + : INSTRUCTION_FORMAT21c_STRING {emit($INSTRUCTION_FORMAT21c_STRING, INSTRUCTION_FORMAT21c_STRING);}; +fragment INSTRUCTION_FORMAT21c_STRING + : 'const-string'; + +fragment INSTRUCTION_FORMAT21c_TYPE_EMIT + : INSTRUCTION_FORMAT21c_TYPE {emit($INSTRUCTION_FORMAT21c_TYPE, INSTRUCTION_FORMAT21c_TYPE);}; +fragment INSTRUCTION_FORMAT21c_TYPE + : 'check-cast' + | 'new-instance' + | 'const-class'; + +fragment INSTRUCTION_FORMAT22c_FIELD_EMIT + : INSTRUCTION_FORMAT22c_FIELD {emit($INSTRUCTION_FORMAT22c_FIELD, INSTRUCTION_FORMAT22c_FIELD);}; +fragment INSTRUCTION_FORMAT22c_FIELD + : 'iget' + | 'iget-wide' + | 'iget-object' + | 'iget-boolean' + | 'iget-byte' + | 'iget-char' + | 'iget-short' + | 'iput' + | 'iput-wide' + | 'iput-object' + | 'iput-boolean' + | 'iput-byte' + | 'iput-char' + | 'iput-short' + ; + +fragment INSTRUCTION_FORMAT35c_METHOD_EMIT + : INSTRUCTION_FORMAT35c_METHOD {emit($INSTRUCTION_FORMAT35c_METHOD, INSTRUCTION_FORMAT35c_METHOD);}; +fragment INSTRUCTION_FORMAT35c_METHOD + : 'invoke-virtual' + | 'invoke-super' + | 'invoke-direct' + | 'invoke-static' + | 'invoke-interface' + ; + +fragment INSTRUCTION_FORMAT3rc_METHOD_EMIT + : INSTRUCTION_FORMAT3rc_METHOD {emit($INSTRUCTION_FORMAT3rc_METHOD, INSTRUCTION_FORMAT3rc_METHOD);}; +fragment INSTRUCTION_FORMAT3rc_METHOD + : 'invoke-virtual/range' + | 'invoke-super/range' + | 'invoke-direct/range' + | 'invoke-static/range' + | 'invoke-interface/range' + ; + + +fragment OPEN_PAREN_EMIT + : OPEN_PAREN {emit($OPEN_PAREN, OPEN_PAREN);}; +fragment OPEN_PAREN + : '('; + +fragment CLOSE_PAREN_EMIT + : CLOSE_PAREN {emit($CLOSE_PAREN, CLOSE_PAREN);}; +fragment CLOSE_PAREN + : ')'; + +fragment OPEN_BRACKET_EMIT + : OPEN_BRACKET {emit($OPEN_BRACKET, OPEN_BRACKET);}; +fragment OPEN_BRACKET + : '{'; + +fragment CLOSE_BRACKET_EMIT + : CLOSE_BRACKET {emit($CLOSE_BRACKET, CLOSE_BRACKET);}; +fragment CLOSE_BRACKET + : '}'; + +fragment WS + : WHITE_SPACE {emit($WHITE_SPACE, WHITE_SPACE, Token.HIDDEN_CHANNEL);}; + +WHITE_SPACE + : (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;}; diff --git a/src/main/antlr3/org/JesusFreke/smali/smaliParser.g b/src/main/antlr3/org/JesusFreke/smali/smaliParser.g new file mode 100644 index 00000000..eb57a184 --- /dev/null +++ b/src/main/antlr3/org/JesusFreke/smali/smaliParser.g @@ -0,0 +1,182 @@ +/* + * [The "BSD licence"] + * Copyright (c) 2009 Ben Gruver + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +parser grammar smaliParser; + +options { + tokenVocab=smaliLexer; + output=AST; + ASTLabelType=CommonTree; +} + +tokens { + //I_* tokens are imaginary tokens used as parent AST nodes + I_CLASS_DEF; + I_SUPER; + I_ACCESS_LIST; + I_METHODS; + I_FIELDS; + I_FIELD; + I_FIELD_TYPE; + I_FIELD_INITIAL_VALUE; + I_METHOD; + I_METHOD_PROTOTYPE; + I_METHOD_RETURN_TYPE; + I_REGISTERS; + I_STATEMENTS; + I_STATEMENT_FORMAT10x; + I_STATEMENT_FORMAT11x; + I_STATEMENT_FORMAT12x; + I_STATEMENT_FORMAT21c_TYPE; + I_STATEMENT_FORMAT21c_FIELD; + I_STATEMENT_FORMAT22c_FIELD; + I_STATEMENT_FORMAT21c_STRING; + I_STATEMENT_FORMAT35c_METHOD; + I_STATEMENT_FORMAT3rc_METHOD; + I_REGISTER_RANGE; + I_REGISTER_LIST; + + CLASS_NAME; + MEMBER_NAME; +} + +@header { +package org.JesusFreke.smali; +} + + +smali_file: header methods_and_fields -> ^(I_CLASS_DEF header methods_and_fields); + +header : class_spec super_spec; + +class_spec + : CLASS_DIRECTIVE access_list CLASS_DESCRIPTOR -> CLASS_DESCRIPTOR access_list; + +super_spec + : SUPER_DIRECTIVE CLASS_DESCRIPTOR -> ^(I_SUPER[$start, "I_SUPER"] CLASS_DESCRIPTOR); + +access_list + : ACCESS_SPEC+ -> ^(I_ACCESS_LIST[$start,"I_ACCESS_LIST"] ACCESS_SPEC+); + +methods_and_fields + : (method | field)* -> ^(I_METHODS method*) ^(I_FIELDS field*); + +field : FIELD_DIRECTIVE access_list MEMBER_NAME field_type_descriptor literal? + -> ^(I_FIELD[$start, "I_FIELD"] MEMBER_NAME access_list ^(I_FIELD_TYPE field_type_descriptor) ^(I_FIELD_INITIAL_VALUE literal)?); + +method : METHOD_DIRECTIVE access_list MEMBER_NAME method_prototype + registers_directive + statements + END_METHOD_DIRECTIVE + -> ^(I_METHOD[$start, "I_METHOD"] MEMBER_NAME method_prototype access_list registers_directive statements); + +method_prototype + : OPEN_PAREN field_type_descriptor* CLOSE_PAREN type_descriptor + -> ^(I_METHOD_PROTOTYPE[$start, "I_METHOD_PROTOTYPE"] ^(I_METHOD_RETURN_TYPE type_descriptor) field_type_descriptor*); + + + +registers_directive + : REGISTERS_DIRECTIVE INTEGER_LITERAL + -> ^(I_REGISTERS[$start, "I_REGISTERS"] INTEGER_LITERAL); + + +fully_qualified_method + : CLASS_NAME MEMBER_NAME method_prototype; + +fully_qualified_field + : CLASS_NAME MEMBER_NAME field_type_descriptor; + +statements + : statement* -> ^(I_STATEMENTS statement*); + +statement + : instruction; + +instruction + //e.g. return + : INSTRUCTION_FORMAT10x + -> ^(I_STATEMENT_FORMAT10x[$start, "I_STATEMENT_FORMAT10x"] INSTRUCTION_FORMAT10x) + | //e.g. move-result-object v1 + INSTRUCTION_FORMAT11x REGISTER + -> ^(I_STATEMENT_FORMAT11x[$start, "I_STATEMENT_FORMAT11x"] INSTRUCTION_FORMAT11x REGISTER) + | //e.g. move v1 v2 + INSTRUCTION_FORMAT12x REGISTER REGISTER + -> ^(I_STATEMENT_FORMAT12x[$start, "I_STATEMENT_FORMAT12x"] INSTRUCTION_FORMAT12x REGISTER REGISTER) + | //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream; + INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field + -> ^(I_STATEMENT_FORMAT21c_FIELD[$start, "I_STATEMENT_FORMAT21c_FIELD"] INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field) + | //e.g. const-string v1 "Hello World!" + INSTRUCTION_FORMAT21c_STRING REGISTER STRING_LITERAL + -> ^(I_STATEMENT_FORMAT21c_STRING[$start, "I_STATEMENT_FORMAT21c_STRING"] INSTRUCTION_FORMAT21c_STRING REGISTER STRING_LITERAL) + | //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2 + INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor + -> ^(I_STATEMENT_FORMAT21c_TYPE[$start, "I_STATEMENT_FORMAT21c"] INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor) + | //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; + INSTRUCTION_FORMAT22c_FIELD REGISTER REGISTER fully_qualified_field + -> ^(I_STATEMENT_FORMAT22c_FIELD[$start, "I_INSTANCE_FIELD_STATEMENT"] INSTRUCTION_FORMAT22c_FIELD REGISTER REGISTER fully_qualified_field) + | //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V + INSTRUCTION_FORMAT35c_METHOD OPEN_BRACKET register_list CLOSE_BRACKET fully_qualified_method + -> ^(I_STATEMENT_FORMAT35c_METHOD[$start, "I_STATEMENT_FORMAT35c_METHOD"] INSTRUCTION_FORMAT35c_METHOD register_list fully_qualified_method) + | //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder; + INSTRUCTION_FORMAT3rc_METHOD OPEN_BRACKET register_range CLOSE_BRACKET fully_qualified_method + -> ^(I_STATEMENT_FORMAT3rc_METHOD[$start, "I_STATEMENT_FORMAT3rc_METHOD"] INSTRUCTION_FORMAT3rc_METHOD register_range fully_qualified_method) + ; + + +register_list + : REGISTER* -> ^(I_REGISTER_LIST[$start, "I_REGISTER_LIST"] REGISTER*); + +register_range + : REGISTER REGISTER? -> ^(I_REGISTER_RANGE[$start, "I_REGISTER_RANGE"] REGISTER REGISTER?); + + +field_type_descriptor + : PRIMITIVE_TYPE + | CLASS_DESCRIPTOR + | ARRAY_DESCRIPTOR + ; + +class_or_array_type_descriptor + : CLASS_DESCRIPTOR + | ARRAY_DESCRIPTOR; + +type_descriptor + : VOID_TYPE + | PRIMITIVE_TYPE + | CLASS_DESCRIPTOR + | ARRAY_DESCRIPTOR + ; + +literal : INTEGER_LITERAL + | LONG_LITERAL + | FLOAT_LITERAL + | DOUBLE_LITERAL + | CHAR_LITERAL + | STRING_LITERAL + | BOOL_LITERAL; diff --git a/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g b/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g index 76ab1f7e..53a2c585 100644 --- a/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g +++ b/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g @@ -29,7 +29,7 @@ tree grammar smaliTreeWalker; options { - tokenVocab=smali; + tokenVocab=smaliParser; ASTLabelType=CommonTree; } @@ -96,16 +96,16 @@ header : class_spec super_spec }; class_spec returns[TypeIdItem type, int accessFlags] - : class_name access_list + : class_type_descriptor access_list { - $type = $class_name.type; + $type = $class_type_descriptor.type; $accessFlags = $access_list.value; }; super_spec returns[TypeIdItem type] - : ^(I_SUPER class_name) + : ^(I_SUPER class_type_descriptor) { - $type = $class_name.type; + $type = $class_type_descriptor.type; }; access_list returns [int value] @@ -134,10 +134,10 @@ methods : ^(I_METHODS })*); field returns[ClassDataItem.EncodedField encodedField, EncodedValue encodedValue] - :^(I_FIELD member_name access_list ^(I_FIELD_TYPE field_type_descriptor) field_initial_value) + :^(I_FIELD MEMBER_NAME access_list ^(I_FIELD_TYPE field_type_descriptor) field_initial_value) { TypeIdItem classType = classDefItem.getClassType(); - StringIdItem memberName = new StringIdItem(dexFile, $member_name.memberName); + StringIdItem memberName = new StringIdItem(dexFile, $MEMBER_NAME.text); TypeIdItem fieldType = $field_type_descriptor.type; FieldIdItem fieldIdItem = new FieldIdItem(dexFile, classType, memberName, fieldType); @@ -157,7 +157,7 @@ field returns[ClassDataItem.EncodedField encodedField, EncodedValue encodedValue field_initial_value returns[EncodedValue encodedValue] : ^(I_FIELD_INITIAL_VALUE - ( int_literal { $encodedValue = new EncodedValue(dexFile, new IntEncodedValueSubField($int_literal.value)); } + ( integer_literal { $encodedValue = new EncodedValue(dexFile, new IntEncodedValueSubField($integer_literal.value)); } | long_literal { $encodedValue = new EncodedValue(dexFile, new LongEncodedValueSubField($long_literal.value)); } | float_literal { $encodedValue = new EncodedValue(dexFile, new FloatEncodedValueSubField($float_literal.value)); } | double_literal { $encodedValue = new EncodedValue(dexFile, new DoubleEncodedValueSubField($double_literal.value)); } @@ -169,10 +169,10 @@ field_initial_value returns[EncodedValue encodedValue] method returns[ClassDataItem.EncodedMethod encodedMethod] - : ^(I_METHOD method_name_and_prototype access_list locals_directive statements) + : ^(I_METHOD method_name_and_prototype access_list registers_directive statements) { MethodIdItem methodIdItem = $method_name_and_prototype.methodIdItem; - int registers = $locals_directive.registers; + int registers = $registers_directive.registers; int access = $access_list.value; boolean isStatic = (access & AccessFlags.STATIC) != 0; ArrayList instructions = $statements.instructions; @@ -192,10 +192,10 @@ method_prototype returns[ProtoIdItem protoIdItem] }; method_name_and_prototype returns[MethodIdItem methodIdItem] - : member_name method_prototype + : MEMBER_NAME method_prototype { TypeIdItem classType = classDefItem.getClassType(); - String methodNameString = $member_name.memberName; + String methodNameString = $MEMBER_NAME.text; StringIdItem methodName = new StringIdItem(dexFile, methodNameString); ProtoIdItem protoIdItem = $method_prototype.protoIdItem; @@ -214,23 +214,25 @@ field_type_list returns[ArrayList types] } )*; -locals_directive returns[int registers] - : ^(I_REGISTERS INT_LITERAL) {$registers = Integer.parseInt($INT_LITERAL.text);}; +registers_directive returns[int registers] + : ^(I_REGISTERS INTEGER_LITERAL) {$registers = Integer.parseInt($INTEGER_LITERAL.text);}; -full_method_name_and_prototype returns[MethodIdItem methodIdItem] - : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME method_prototype + + +fully_qualified_method returns[MethodIdItem methodIdItem] + : CLASS_NAME MEMBER_NAME method_prototype { - TypeIdItem classType = new TypeIdItem(dexFile, "L" + $QUALIFIED_MEMBER__CLASS_NAME.text + ";"); - StringIdItem methodName = new StringIdItem(dexFile, $QUALIFIED_MEMBER__MEMBER_NAME.text); + TypeIdItem classType = new TypeIdItem(dexFile, "L" + $CLASS_NAME.text + ";"); + StringIdItem methodName = new StringIdItem(dexFile, $MEMBER_NAME.text); ProtoIdItem prototype = $method_prototype.protoIdItem; $methodIdItem = new MethodIdItem(dexFile, classType, methodName, prototype); }; -full_field_name_and_type returns[FieldIdItem fieldIdItem] - : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME field_type_descriptor +fully_qualified_field returns[FieldIdItem fieldIdItem] + : CLASS_NAME MEMBER_NAME field_type_descriptor { - TypeIdItem classType = new TypeIdItem(dexFile, "L" + $QUALIFIED_MEMBER__CLASS_NAME.text + ";"); - StringIdItem fieldName = new StringIdItem(dexFile, $QUALIFIED_MEMBER__MEMBER_NAME.text); + TypeIdItem classType = new TypeIdItem(dexFile, "L" + $CLASS_NAME.text + ";"); + StringIdItem fieldName = new StringIdItem(dexFile, $MEMBER_NAME.text); TypeIdItem fieldType = $field_type_descriptor.type; $fieldIdItem = new FieldIdItem(dexFile, classType, fieldName, fieldType); }; @@ -249,42 +251,42 @@ statements returns[ArrayList instructions] instruction returns[Instruction instruction] //e.g. return - : ^(I_STATEMENT_FORMAT10x INSTRUCTION_NAME_FORMAT10x) + : ^(I_STATEMENT_FORMAT10x INSTRUCTION_FORMAT10x) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT10x.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT10x.text); $instruction = Format10x.Format.make(dexFile, opcode.value); } | //e.g. move-result-object v1 - ^(I_STATEMENT_FORMAT11x INSTRUCTION_NAME_FORMAT11x REGISTER) + ^(I_STATEMENT_FORMAT11x INSTRUCTION_FORMAT11x REGISTER) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT11x.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT11x.text); short regA = parseRegister_byte($REGISTER.text); $instruction = Format11x.Format.make(dexFile, opcode.value, regA); } | //e.g. move v1 v2 - ^(I_STATEMENT_FORMAT12x INSTRUCTION_NAME_FORMAT12x registerA=REGISTER registerB=REGISTER) + ^(I_STATEMENT_FORMAT12x INSTRUCTION_FORMAT12x registerA=REGISTER registerB=REGISTER) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT12x.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT12x.text); byte regA = parseRegister_nibble($registerA.text); byte regB = parseRegister_nibble($registerB.text); $instruction = Format12x.Format.make(dexFile, opcode.value, regA, regB); } | //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream; - ^(I_STATEMENT_FORMAT21c_FIELD INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER full_field_name_and_type) + ^(I_STATEMENT_FORMAT21c_FIELD INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_FIELD.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_FIELD.text); short regA = parseRegister_byte($REGISTER.text); - FieldIdItem fieldIdItem = $full_field_name_and_type.fieldIdItem; + FieldIdItem fieldIdItem = $fully_qualified_field.fieldIdItem; $instruction = Format21c.Format.make(dexFile, opcode.value, regA, fieldIdItem); } | //e.g. const-string v1 "Hello World!" - ^(I_STATEMENT_FORMAT21c_STRING INSTRUCTION_NAME_FORMAT21c_STRING REGISTER string_literal) + ^(I_STATEMENT_FORMAT21c_STRING INSTRUCTION_FORMAT21c_STRING REGISTER string_literal) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_STRING.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_STRING.text); short regA = parseRegister_byte($REGISTER.text); StringIdItem stringIdItem = new StringIdItem(dexFile, $string_literal.value); @@ -292,9 +294,9 @@ instruction returns[Instruction instruction] $instruction = Format21c.Format.make(dexFile, opcode.value, regA, stringIdItem); } | //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2 - ^(I_STATEMENT_FORMAT21c_TYPE INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor) + ^(I_STATEMENT_FORMAT21c_TYPE INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_TYPE.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_TYPE.text); short regA = parseRegister_byte($REGISTER.text); TypeIdItem typeIdItem = $class_or_array_type_descriptor.type; @@ -302,22 +304,22 @@ instruction returns[Instruction instruction] $instruction = Format21c.Format.make(dexFile, opcode.value, regA, typeIdItem); } | //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V - ^(I_STATEMENT_FORMAT35c_METHOD INSTRUCTION_NAME_FORMAT35c_METHOD register_list full_method_name_and_prototype) + ^(I_STATEMENT_FORMAT35c_METHOD INSTRUCTION_FORMAT35c_METHOD register_list fully_qualified_method) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT35c_METHOD.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT35c_METHOD.text); //this depends on the fact that register_list returns a byte[5] byte[] registers = $register_list.registers; byte registerCount = $register_list.registerCount; - MethodIdItem methodIdItem = $full_method_name_and_prototype.methodIdItem; + MethodIdItem methodIdItem = $fully_qualified_method.methodIdItem; $instruction = Format35c.Format.make(dexFile, opcode.value, registerCount, registers[0], registers[1], registers[2], registers[3], registers[4], methodIdItem); } | //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder; - ^(I_STATEMENT_FORMAT3rc_METHOD INSTRUCTION_NAME_FORMAT3rc_METHOD register_range full_method_name_and_prototype) + ^(I_STATEMENT_FORMAT3rc_METHOD INSTRUCTION_FORMAT3rc_METHOD register_range fully_qualified_method) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT3rc_METHOD.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT3rc_METHOD.text); int startRegister = $register_range.startRegister; int endRegister = $register_range.endRegister; @@ -331,19 +333,19 @@ instruction returns[Instruction instruction] throw new RuntimeException("A register range must have the lower register listed first"); } - MethodIdItem methodIdItem = $full_method_name_and_prototype.methodIdItem; + MethodIdItem methodIdItem = $fully_qualified_method.methodIdItem; //not supported yet $instruction = Format3rc.Format.make(dexFile, opcode.value, (short)registerCount, startRegister, methodIdItem); } | //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; - ^(I_STATEMENT_FORMAT22c_FIELD INSTRUCTION_NAME_FORMAT22c_FIELD registerA=REGISTER registerB=REGISTER full_field_name_and_type) + ^(I_STATEMENT_FORMAT22c_FIELD INSTRUCTION_FORMAT22c_FIELD registerA=REGISTER registerB=REGISTER fully_qualified_field) { - Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT22c_FIELD.text); + Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT22c_FIELD.text); byte regA = parseRegister_nibble($registerA.text); byte regB = parseRegister_nibble($registerB.text); - FieldIdItem fieldIdItem = $full_field_name_and_type.fieldIdItem; + FieldIdItem fieldIdItem = $fully_qualified_field.fieldIdItem; $instruction = Format22c.Format.make(dexFile, opcode.value, regA, regB, fieldIdItem); } @@ -378,54 +380,19 @@ register_range returns[int startRegister, int endRegister] } ; -simple_name - : SIMPLE_NAME - | ACCESS_SPEC - | INT_LITERAL - | LONG_LITERAL - | FLOAT_LITERAL_SIMPLE_NAME - | DOUBLE_LITERAL_SIMPLE_NAME - | BOOL_LITERAL - | PRIMITIVE_TYPE - | instruction_name - ; - -instruction_name returns[String value] - : INSTRUCTION_NAME_FORMAT10x - | INSTRUCTION_NAME_FORMAT11x - | INSTRUCTION_NAME_FORMAT12x - | INSTRUCTION_NAME_FORMAT21c_FIELD - | INSTRUCTION_NAME_FORMAT21c_STRING - | INSTRUCTION_NAME_FORMAT21c_TYPE - | INSTRUCTION_NAME_FORMAT22c_FIELD - | INSTRUCTION_NAME_FORMAT35c_METHOD - | INSTRUCTION_NAME_FORMAT3rc_METHOD - ; - -member_name returns[String memberName] - : (simple_name - | MEMBER_NAME) {$memberName = $start.getText();} - ; - -class_name returns [TypeIdItem type] - : token=(SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME) - { - $type = new TypeIdItem(dexFile, 'L'+$token.text+';'); - }; - field_type_descriptor returns [TypeIdItem type] - : token=(PRIMITIVE_TYPE + : (PRIMITIVE_TYPE | CLASS_DESCRIPTOR - | ARRAY_TYPE) + | ARRAY_DESCRIPTOR) { - $type = new TypeIdItem(dexFile, $token.text); + $type = new TypeIdItem(dexFile, $start.getText()); }; class_or_array_type_descriptor returns [TypeIdItem type] - : token=(CLASS_DESCRIPTOR - | ARRAY_TYPE) + : (CLASS_DESCRIPTOR + | ARRAY_DESCRIPTOR) { - $type = new TypeIdItem(dexFile, $token.text); + $type = new TypeIdItem(dexFile, $start.getText()); }; class_type_descriptor returns [TypeIdItem type] @@ -439,8 +406,8 @@ type_descriptor returns [TypeIdItem type] | field_type_descriptor {$type = $field_type_descriptor.type;} ; -int_literal returns[int value] - : INT_LITERAL { $value = Integer.parseInt($INT_LITERAL.text); }; +integer_literal returns[int value] + : INTEGER_LITERAL { $value = Integer.parseInt($INTEGER_LITERAL.text); }; long_literal returns[long value] : LONG_LITERAL { $value = Long.parseLong($LONG_LITERAL.text); }; @@ -455,7 +422,11 @@ char_literal returns[char value] : CHAR_LITERAL { $value = $CHAR_LITERAL.text.charAt(0); }; string_literal returns[String value] - : STRING_LITERAL { $value = $STRING_LITERAL.text; }; + : STRING_LITERAL + { + $value = $STRING_LITERAL.text; + $value = $value.substring(1,$value.length()-1); + }; bool_literal returns[boolean value] : BOOL_LITERAL { $value = Boolean.parseBoolean($BOOL_LITERAL.text); }; diff --git a/src/main/java/org/JesusFreke/smali/smali.java b/src/main/java/org/JesusFreke/smali/smali.java index ddc57cc4..215564a0 100644 --- a/src/main/java/org/JesusFreke/smali/smali.java +++ b/src/main/java/org/JesusFreke/smali/smali.java @@ -32,6 +32,7 @@ import org.JesusFreke.dexlib.DexFile; import org.JesusFreke.dexlib.util.ByteArrayOutput; import org.antlr.runtime.ANTLRInputStream; import org.antlr.runtime.CommonTokenStream; +import org.antlr.runtime.Token; import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.CommonTreeNodeStream; @@ -48,11 +49,12 @@ public class smali List l = tokens.getTokens();*/ - ANTLRInputStream input = new ANTLRInputStream(new FileInputStream(args[0])); smaliLexer lexer = new smaliLexer(input); + CommonTokenStream tokens = new CommonTokenStream(lexer); smaliParser parser = new smaliParser(tokens); + smaliParser.smali_file_return result = parser.smali_file(); CommonTree t = (CommonTree) result.getTree(); diff --git a/src/test/resources/examples/HelloWorld.smali b/src/test/resources/examples/HelloWorld.smali index 4bdac992..8f653a35 100644 --- a/src/test/resources/examples/HelloWorld.smali +++ b/src/test/resources/examples/HelloWorld.smali @@ -1,10 +1,10 @@ -.class public HelloWorld -.super java/lang/Object +.class Lpublic HelloWorld; +.super Ljava/lang/Object; .method public ()V .registers 1 - invoke-direct {v0} java/lang/Object.()V + invoke-direct {v0}, java/lang/Object.()V return-void .end method @@ -12,10 +12,10 @@ .method public static main([Ljava/lang/String;)V .registers 4 - sget-object v0 java/lang/System.out Ljava/io/PrintStream; - const-string v1 "Hello World!" + sget-object v0, java/lang/System.out Ljava/io/PrintStream; + const-string v1, "Hello World!" - invoke-virtual {v0, v1} java/io/PrintStream.print(Ljava/Lang/Stream;)V + invoke-virtual {v0, v1}, java/io/PrintStream.print(Ljava/Lang/Stream;)V return-void .end method diff --git a/src/test/resources/examples/HelloWorld2.smali b/src/test/resources/examples/HelloWorld2.smali index c9e7a107..948ae8d0 100644 --- a/src/test/resources/examples/HelloWorld2.smali +++ b/src/test/resources/examples/HelloWorld2.smali @@ -1,102 +1,102 @@ -.class public org/JesusFreke/HelloWorld2/HelloWorld2 -.super android/app/Activity - -.field private helloWorld Ljava/lang/String; -.field private static helloWorldStatic Ljava/lang/String; - -.field private static helloWorldStatic2 Ljava/lang/String; = "Static Initializer Hello World!" - -.method static constructor ()V - .registers 1 - - const-string v0, "Static Hello World!" - sput-object v0, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic Ljava/lang/String; - - return-void -.end method - -.method public constructor ()V - .registers 2 - invoke-direct {v1}, android/app/Activity.()V - - const-string v0, "Hello World!" - iput-object v0, v1, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; - - return-void -.end method - -.method public onCreate(Landroid/os/Bundle;)V - .registers 6 - - invoke-super {v4,v5}, android/app/Activity.onCreate(Landroid/os/Bundle;)V - - const-string v3, "\n" - - new-instance v0, Landroid/widget/TextView; - invoke-direct {v0,v4}, android/widget/TextView.(Landroid/content/Context;)V - - iget-object v1, v4, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; - - invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic Ljava/lang/String; - invoke-virtual {v1, v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic2 Ljava/lang/String; - invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - - invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - const-class v2, Lorg/JesusFreke/HelloWorld2/HelloWorld2; - invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String; - move-result-object v2 - - invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - - - invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - const-class v2, [Lorg/JesusFreke/HelloWorld2/HelloWorld2; - invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String; - move-result-object v2 - - invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - - - invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - const-class v2, [I - invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String; - move-result-object v2 - - invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; - move-result-object v1 - - move-object v2, v1 - - - - - check-cast v4, Landroid/app/Activity; - - invoke-virtual {v0,v2}, android/widget/TextView.setText(Ljava/lang/CharSequence;)V - invoke-virtual {v4,v0}, org/JesusFreke/HelloWorld2/HelloWorld2.setContentView(Landroid/view/View;)V - - return-void -.end method - - +.class public Lorg/JesusFreke/HelloWorld2/HelloWorld2; +.super Landroid/app/Activity; + +.field private helloWorld Ljava/lang/String; +.field private static helloWorldStatic Ljava/lang/String; + +.field private static helloWorldStatic2 Ljava/lang/String; = "Static Initializer Hello World!" + +.method static constructor ()V + .registers 1 + + const-string v0, "Static Hello World!" + sput-object v0, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic Ljava/lang/String; + + return-void +.end method + +.method public constructor ()V + .registers 2 + invoke-direct {v1}, android/app/Activity/()V + + const-string v0, "Hello World!" + iput-object v0, v1, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorld Ljava/lang/String; + + return-void +.end method + +.method public onCreate(Landroid/os/Bundle;)V + .registers 6 + + invoke-super {v4,v5}, android/app/Activity/onCreate(Landroid/os/Bundle;)V + + const-string v3, "\n" + + new-instance v0, Landroid/widget/TextView; + invoke-direct {v0,v4}, android/widget/TextView/(Landroid/content/Context;)V + + iget-object v1, v4, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorld Ljava/lang/String; + + invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic Ljava/lang/String; + invoke-virtual {v1, v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic2 Ljava/lang/String; + invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + + invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + const-class v2, Lorg/JesusFreke/HelloWorld2/HelloWorld2; + invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String; + move-result-object v2 + + invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + + + invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + const-class v2, [Lorg/JesusFreke/HelloWorld2/HelloWorld2; + invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String; + move-result-object v2 + + invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + + + invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + const-class v2, [I + invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String; + move-result-object v2 + + invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String; + move-result-object v1 + + move-object v2, v1 + + + + + check-cast v4, Landroid/app/Activity; + + invoke-virtual {v0,v2}, android/widget/TextView/setText(Ljava/lang/CharSequence;)V + invoke-virtual {v4,v0}, org/JesusFreke/HelloWorld2/HelloWorld2/setContentView(Landroid/view/View;)V + + return-void +.end method + +