diff --git a/pom.xml b/pom.xml
index b125bd7b..eb469de0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,12 +23,21 @@
3.1.3-1
- smali
+ smaliLexer
antlr
-
- org/JesusFreke/smali/smaliTreeWalker.g
+
+ org/JesusFreke/smali/smaliLexer.g
+
+
+
+ smaliParser
+
+ antlr
+
+
+ org/JesusFreke/smali/smaliParser.g
@@ -37,7 +46,7 @@
antlr
- org/JesusFreke/smali/smali.g
+ org/JesusFreke/smali/smaliTreeWalker.g
diff --git a/src/main/antlr3/org/JesusFreke/smali/smali.g b/src/main/antlr3/org/JesusFreke/smali/smali.g
deleted file mode 100644
index 9c7d9802..00000000
--- a/src/main/antlr3/org/JesusFreke/smali/smali.g
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
- * The comment lexical rule, and the number, string and character constant
- * lexical rules are derived from rules from the Java 1.6 grammar which can be
- * found here: http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g
- *
- * Specifically, these rules:
- *
- * COMMENT, LONG_LITERAL, INT_LITERAL, Integer_number, Hex_prefix, Hex_digit,
- * Long_suffix, Non_integer_number_SIMPLE_NAME, Non_integer_number,
- * Decimal_exponent, Hex_exponent, Float_suffix, Double_suffix,
- * FLOAT_LITERAL_SIMPLE_NAME, FLOAT_LITERAL, DOUBLE_LITERAL_SIMPLE_NAME,
- * DOUBLE_LITERAL, CHAR_LITERAL, STRING_LITERAL, EscapeSequence
- *
- * These rules were originally copyrighted by Terence Parr, and are used here in
- * accordance with the following license
- *
- * [The "BSD licence"]
- * Copyright (c) 2007-2008 Terence Parr
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form mmaven-2.0.9" -Didea.launcher.port=7538 "-Didea.launcher.bin.path=C:\Program Files\JetBrains\IntelliJ IDEA 8.1\bin" -Dfile.encoding=windows-1252 -classpath "C:\Program Files\Apache Software Foundation\apache-maven-2.0.9\boot\classworlds-1.1.jar;C:\Program Files\JetBrains\IntelliJ IDEA 8.1\lib\idea_rt.jar" com.intellij.rt.execution.application.AppMain org.codehaus.classworlds.Launcher --no-plugin-registry --fail-fast --no-plugin-updates --strict-checksums -f D:\Android\smali\pom.xml compile
-+ Enabling strict checksum verification on all artifact downloads.
-[INFO] Scanning for projects...
-[INFO] ------------------------------------------------------------------------
-[INFO] Building Unnamed - smali:smali:jar:1.0
-[INFO] task-segment: [compile]
-[INFO] ------------------------------------------------------------------------
-[INFO] [antlr3:antlr {execution: smali}]
-[INFO] ANTLR: Processing source directory D:\Android\smali\src\main\antlr3
-ANTLR Parser Generator Version 3.1.3 Mar 17, 2009 19:23:44
-org\JesusFreke\smali\smali.g
-[INFO] [antlr3:antlr {execution: smaliTreeWalker}]
-[INFO] ANTLR: Processing source directory D:\Android\smali\src\main\antlr3
-ANTLR Parser Generator Version 3.1.3 Mar 17, 2009 19:23:44
-org\JesusFreke\smali\smaliTreeWalker.g
-[INFO] [resources:resources]ust reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * The remainder of this grammar is released by me (Ben Gruver) under the
- * following license:
- *
- * [The "BSD licence"]
- * Copyright (c) 2009 Ben Gruver
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-grammar smali;
-
-options {
- output=AST;
- ASTLabelType=CommonTree;
-}
-
-tokens {
- //I_* tokens are imaginary tokens used as parent AST nodes
- I_CLASS_DEF;
- I_SUPER;
- I_ACCESS_LIST;
- I_METHODS;
- I_FIELDS;
- I_FIELD;
- I_FIELD_TYPE;
- I_FIELD_INITIAL_VALUE;
- I_METHOD;
- I_METHOD_PROTOTYPE;
- I_METHOD_RETURN_TYPE;
- I_REGISTERS;
- I_STATEMENTS;
- I_STATEMENT_FORMAT10x;
- I_STATEMENT_FORMAT11x;
- I_STATEMENT_FORMAT12x;
- I_STATEMENT_FORMAT21c_TYPE;
- I_STATEMENT_FORMAT21c_FIELD;
- I_STATEMENT_FORMAT22c_FIELD;
- I_STATEMENT_FORMAT21c_STRING;
- I_STATEMENT_FORMAT35c_METHOD;
- I_STATEMENT_FORMAT3rc_METHOD;
- I_REGISTER_RANGE;
- I_REGISTER_LIST;
-}
-
-@parser::header {
-package org.JesusFreke.smali;
-}
-
-@lexer::header {
-package org.JesusFreke.smali;
-
-import java.util.ArrayDeque;
-}
-
-@lexer::init {
- state.token = Token.INVALID_TOKEN;
-}
-
-@lexer::members {
- protected ArrayDeque tokens = new ArrayDeque();
-
- public void reset() {
- super.reset();
- state.token = Token.INVALID_TOKEN;
- tokens.clear();
- }
-
- public Token nextToken() {
- while (true) {
- if (tokens.size() > 0) {
- Token token = tokens.poll();
- if (token == Token.SKIP_TOKEN) {
- continue;
- }
-
- return token;
- }
-
- state.channel = Token.DEFAULT_CHANNEL;
- state.tokenStartCharIndex = input.index();
- state.tokenStartCharPositionInLine = input.getCharPositionInLine();
- state.tokenStartLine = input.getLine();
- state.text = null;
- if ( input.LA(1)==CharStream.EOF ) {
- return Token.EOF_TOKEN;
- }
- try {
- mTokens();
-
- if (tokens.size() == 0) {
- emit();
- }
- }
- catch (NoViableAltException nva) {
- reportError(nva);
- recover(nva); // throw out current char and try again
- }
- catch (RecognitionException re) {
- reportError(re);
- // match() routine has already called recover()
- }
- }
- }
-
- public void skip() {
- tokens.add(Token.SKIP_TOKEN);
- }
-
- public void emit(Token token) {
- tokens.add(token);
- }
-}
-
-
-smali_file: header methods_and_fields -> ^(I_CLASS_DEF header methods_and_fields);
-
-header : class_spec super_spec;
-
-class_spec
- : '.class' access_list class_name -> class_name access_list;
-
-super_spec
- : first_token='.super' class_name -> ^(I_SUPER[$first_token, "I_SUPER"] class_name);
-
-access_list
- : first_token=ACCESS_SPEC ACCESS_SPEC* -> ^(I_ACCESS_LIST[$first_token,"I_ACCESS_LIST"] ACCESS_SPEC+);
-
-methods_and_fields
- : (method | field)* -> ^(I_METHODS method*) ^(I_FIELDS field*);
-
-field : first_token='.field' access_list member_name field_type_descriptor ('=' literal)?
- -> ^(I_FIELD[$first_token, "I_FIELD"] member_name access_list ^(I_FIELD_TYPE field_type_descriptor) ^(I_FIELD_INITIAL_VALUE literal)?);
-
-method : first_token='.method' access_list method_name_and_prototype locals_directive statements '.end method'
- -> ^(I_METHOD[$first_token, "I_METHOD"] method_name_and_prototype access_list locals_directive statements);
-
-method_prototype
- : first_token='(' field_type_list ')' type_descriptor
- -> ^(I_METHOD_PROTOTYPE[$first_token, "I_METHOD_PROTOTYPE"] ^(I_METHOD_RETURN_TYPE type_descriptor) field_type_list?);
-
-method_name_and_prototype
- : member_name method_prototype;
-
-field_type_list
- : field_type_descriptor*;
-
-locals_directive
- : first_token='.registers' INT_LITERAL
- -> ^(I_REGISTERS[$first_token, "I_REGISTERS"] INT_LITERAL);
-
-
-full_method_name_and_prototype
- : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME method_prototype;
-
-full_field_name_and_type
- : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME field_type_descriptor;
-
-statements
- : statement* -> ^(I_STATEMENTS statement*);
-
-statement
- : instruction;
-
-instruction
- //e.g. return
- : INSTRUCTION_NAME_FORMAT10x
- -> ^(I_STATEMENT_FORMAT10x[$start, "I_STATEMENT_FORMAT10x"] INSTRUCTION_NAME_FORMAT10x)
- | //e.g. move-result-object v1
- INSTRUCTION_NAME_FORMAT11x REGISTER
- -> ^(I_STATEMENT_FORMAT11x[$start, "I_STATEMENT_FORMAT11x"] INSTRUCTION_NAME_FORMAT11x REGISTER)
- | //e.g. move v1 v2
- INSTRUCTION_NAME_FORMAT12x REGISTER ',' REGISTER
- -> ^(I_STATEMENT_FORMAT12x[$start, "I_STATEMENT_FORMAT12x"] INSTRUCTION_NAME_FORMAT12x REGISTER REGISTER)
- | //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream;
- INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER ',' full_field_name_and_type
- -> ^(I_STATEMENT_FORMAT21c_FIELD[$start, "I_STATEMENT_FORMAT21c_FIELD"] INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER full_field_name_and_type)
- | //e.g. const-string v1 "Hello World!"
- INSTRUCTION_NAME_FORMAT21c_STRING REGISTER ',' STRING_LITERAL
- -> ^(I_STATEMENT_FORMAT21c_STRING[$start, "I_STATEMENT_FORMAT21c_STRING"] INSTRUCTION_NAME_FORMAT21c_STRING REGISTER STRING_LITERAL)
- | //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2
- INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER ',' class_or_array_type_descriptor
- -> ^(I_STATEMENT_FORMAT21c_TYPE[$start, "I_STATEMENT_FORMAT21c"] INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor)
- | //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
- INSTRUCTION_NAME_FORMAT22c_FIELD REGISTER ',' REGISTER ',' full_field_name_and_type
- -> ^(I_STATEMENT_FORMAT22c_FIELD[$start, "I_INSTANCE_FIELD_STATEMENT"] INSTRUCTION_NAME_FORMAT22c_FIELD REGISTER REGISTER full_field_name_and_type)
- | //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V
- INSTRUCTION_NAME_FORMAT35c_METHOD '{' register_list '}' ',' full_method_name_and_prototype
- -> ^(I_STATEMENT_FORMAT35c_METHOD[$start, "I_STATEMENT_FORMAT35c_METHOD"] INSTRUCTION_NAME_FORMAT35c_METHOD register_list full_method_name_and_prototype)
- | //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder;
- INSTRUCTION_NAME_FORMAT3rc_METHOD '{' register_range '}' ',' full_method_name_and_prototype
- -> ^(I_STATEMENT_FORMAT3rc_METHOD[$start, "I_STATEMENT_FORMAT3rc_METHOD"] INSTRUCTION_NAME_FORMAT3rc_METHOD register_range full_method_name_and_prototype)
- ;
-
-
-register_list
- : first_token=REGISTER? (',' REGISTER)* -> ^(I_REGISTER_LIST[$first_token, "I_REGISTER_LIST"] REGISTER*);
-
-register_range
- : first_token=REGISTER ('..' REGISTER)? -> ^(I_REGISTER_RANGE[$first_token, "I_REGISTER_RANGE"] REGISTER REGISTER?);
-
-/*since there are no reserved words in the dex specification, there are a
-number of tokens that can be a valid simple_name, in addition to just
-SIMPLE_NAME. We need to match any token that could also be considered a valid
-SIMPLE_NAME. In the case of floating point literals, some could be considered
-a valid SIMPLE_NAME while others couldn't. The lexer will generate a separate
-FLOAT_LITERAL_SIMPLE_NAME OR DOUBLE_LITERAL_SIMPLE_NAME token for literals
-that can be considered a valid SIMPLE_NAME*/
-simple_name
- : SIMPLE_NAME
- | ACCESS_SPEC
- | instruction_name
- | INT_LITERAL
- | LONG_LITERAL
- | FLOAT_LITERAL_SIMPLE_NAME
- | DOUBLE_LITERAL_SIMPLE_NAME
- | BOOL_LITERAL
- | PRIMITIVE_TYPE
- ;
-
-instruction_name
- : INSTRUCTION_NAME_FORMAT10x
- | INSTRUCTION_NAME_FORMAT11x
- | INSTRUCTION_NAME_FORMAT12x
- | INSTRUCTION_NAME_FORMAT21c_FIELD
- | INSTRUCTION_NAME_FORMAT21c_STRING
- | INSTRUCTION_NAME_FORMAT21c_TYPE
- | INSTRUCTION_NAME_FORMAT22c_FIELD
- | INSTRUCTION_NAME_FORMAT35c_METHOD
- | INSTRUCTION_NAME_FORMAT3rc_METHOD
- ;
-
-member_name
- : simple_name
- | MEMBER_NAME
- ;
-
-class_name
- : SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME;
-
-field_type_descriptor
- : PRIMITIVE_TYPE
- | CLASS_DESCRIPTOR
- | ARRAY_TYPE
- ;
-
-class_or_array_type_descriptor
- : CLASS_DESCRIPTOR
- | ARRAY_TYPE;
-
-type_descriptor
- : VOID_TYPE
- | field_type_descriptor
- ;
-
-literal : INT_LITERAL
- | LONG_LITERAL
- | float_literal
- | double_literal
- | CHAR_LITERAL
- | STRING_LITERAL
- | BOOL_LITERAL;
-
-float_literal
- : FLOAT_LITERAL -> FLOAT_LITERAL
- | FLOAT_LITERAL_SIMPLE_NAME -> FLOAT_LITERAL[$FLOAT_LITERAL_SIMPLE_NAME, $FLOAT_LITERAL_SIMPLE_NAME.text];
-
-double_literal
- : DOUBLE_LITERAL -> DOUBLE_LITERAL
- | DOUBLE_LITERAL_SIMPLE_NAME -> DOUBLE_LITERAL[$DOUBLE_LITERAL_SIMPLE_NAME, $DOUBLE_LITERAL_SIMPLE_NAME.text];
-
-ACCESS_SPEC
- : 'public' | 'private' | 'static' | 'constructor' | 'final';
-
-INSTRUCTION_NAME_FORMAT10x
- : 'return-void'
- | 'nop';
-
-INSTRUCTION_NAME_FORMAT11x
- : 'move-result'
- | 'move-result-wide'
- | 'move-result-object'
- | 'move-exception'
- | 'return'
- | 'return-wide'
- | 'return-object'
- | 'monitor-enter'
- | 'monitor-exit'
- | 'throw';
-
-INSTRUCTION_NAME_FORMAT12x
- : 'move'
- | 'move-wide'
- | 'move-object'
- | 'array-length'
- | 'neg-int'
- | 'not-int'
- | 'neg-long'
- | 'not-long'
- | 'neg-float'
- | 'neg-double'
- | 'int-to-long'
- | 'int-to-float'
- | 'int-to-double'
- | 'long-to-int'
- | 'long-to-float'
- | 'long-to-double'
- | 'float-to-int'
- | 'float-to-long'
- | 'float-to-double'
- | 'double-to-int'
- | 'double-to-long'
- | 'double-to-float'
- | 'int-to-byte'
- | 'int-to-char'
- | 'int-to-short'
- | 'add-int/2addr'
- | 'sub-int/2addr'
- | 'mul-int/2addr'
- | 'div-int/2addr'
- | 'rem-int/2addr'
- | 'and-int/2addr'
- | 'or-int/2addr'
- | 'xor-int/2addr'
- | 'shl-int/2addr'
- | 'shr-int/2addr'
- | 'ushr-int/2addr'
- | 'add-long/2addr'
- | 'sub-long/2addr'
- | 'mul-long/2addr'
- | 'div-long/2addr'
- | 'rem-long/2addr'
- | 'and-long/2addr'
- | 'or-long/2addr'
- | 'xor-long/2addr'
- | 'shl-long/2addr'
- | 'shr-long/2addr'
- | 'ushr-long/2addr'
- | 'add-float/2addr'
- | 'sub-float/2addr'
- | 'mul-float/2addr'
- | 'div-float/2addr'
- | 'rem-float/2addr'
- | 'add-double/2addr'
- | 'sub-double/2addr'
- | 'mul-double/2addr'
- | 'div-double/2addr'
- | 'rem-double/2addr';
-
-INSTRUCTION_NAME_FORMAT21c_FIELD
- : 'sget'
- | 'sget-wide'
- | 'sget-object'
- | 'sget-boolean'
- | 'sget-byte'
- | 'sget-char'
- | 'sget-short'
- | 'sput'
- | 'sput-wide'
- | 'sput-object'
- | 'sput-boolean'
- | 'sput-byte'
- | 'sput-char'
- | 'sput-short'
- ;
-
-INSTRUCTION_NAME_FORMAT21c_STRING
- : 'const-string';
-
-INSTRUCTION_NAME_FORMAT21c_TYPE
- : 'check-cast'
- | 'new-instance'
- | 'const-class';
-
-INSTRUCTION_NAME_FORMAT22c_FIELD
- : 'iget'
- | 'iget-wide'
- | 'iget-object'
- | 'iget-boolean'
- | 'iget-byte'
- | 'iget-char'
- | 'iget-short'
- | 'iput'
- | 'iput-wide'
- | 'iput-object'
- | 'iput-boolean'
- | 'iput-byte'
- | 'iput-char'
- | 'iput-short'
- ;
-
-INSTRUCTION_NAME_FORMAT35c_METHOD
- : 'invoke-virtual'
- | 'invoke-super'
- | 'invoke-direct'
- | 'invoke-static'
- | 'invoke-interface'
- ;
-
-INSTRUCTION_NAME_FORMAT3rc_METHOD
- : 'invoke-virtual/range'
- | 'invoke-super/range'
- | 'invoke-direct/range'
- | 'invoke-static/range'
- | 'invoke-interface/range'
- ;
-
-/*since SIMPLE_NAME is so all-encompassing, it includes all integer literals
-and a subset of the possible floating point literals. For floating point
-literals, we need to generate a separate token depending on whether the token
-could also be considered a SIMPLE_NAME or not.
-
-The floating point related tokens with a _SIMPLE_NAME suffix could also be
-considered valid SIMPLE_NAME tokens, while the plain version of the token
-(without the suffix) could not be considered a valid SIMPLE_NAME token*/
-
-LONG_LITERAL
- : Integer_number Long_suffix;
-
-INT_LITERAL
- : Integer_number;
-
-fragment Integer_number
- : '-'? '0'
- | '-'? ('1'..'9') ('0'..'9')*
- | '0' ('0'..'7')+
- | Hex_prefix Hex_digit+
- ;
-
-fragment Hex_prefix
- : '0x'|'0X';
-
-fragment Hex_digit
- : ('0'..'9'|'a'..'f'|'A'..'F');
-
-fragment Long_suffix
- : 'l'|'L';
-
-fragment Non_integer_number_SIMPLE_NAME
- : ('0'..'9')+ Decimal_exponent
- | ('0'..'9')+
- | Hex_prefix (Hex_digit)* Hex_exponent
- ;
-
-
-fragment Non_integer_number
- : ('0'..'9')+ '.' ('0'..'9')* Decimal_exponent?
- | '.' ('0'..'9')+ Decimal_exponent?
- | Hex_prefix (Hex_digit)* '.' (Hex_digit)* Hex_exponent
- ;
-
-fragment Decimal_exponent
- : ('e'|'E') '-'? ('0'..'9')+;
-
-fragment Hex_exponent
- : ('p'|'P') '-'? ('0'..'9')+;
-
-fragment Float_suffix
- : 'f'|'F';
-
-fragment Double_suffix
- : 'd'|'D';
-
-FLOAT_LITERAL_SIMPLE_NAME
- : Non_integer_number_SIMPLE_NAME Float_suffix;
-
-FLOAT_LITERAL
- : Non_integer_number Float_suffix;
-
-DOUBLE_LITERAL_SIMPLE_NAME
- : Non_integer_number_SIMPLE_NAME Double_suffix?;
-
-DOUBLE_LITERAL
- : Non_integer_number Double_suffix?;
-
-CHAR_LITERAL
-
- : '\'' {StringBuilder sb = new StringBuilder();}
- ( Escape_sequence[sb] {setText(sb.toString());}
- | ~( '\'' | '\\' | '\r' | '\n' )
- )
- '\''
- ;
-
-STRING_LITERAL
- : '"' {StringBuilder sb = new StringBuilder();}
- ( Escape_sequence[sb]
- | ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
- )*
- '"' {setText(sb.toString());}
- ;
-
-
-Hex_digits
- : Hex_digit Hex_digit Hex_digit Hex_digit;
-
-fragment
-Escape_sequence[StringBuilder sb]
- : '\\'
- (
- 'b' {sb.append("\b");}
- | 't' {sb.append("\t");}
- | 'n' {sb.append("\n");}
- | 'f' {sb.append("\f");}
- | 'r' {sb.append("\r");}
- | '\"' {sb.append("\"");}
- | '\'' {sb.append("'");}
- | '\\' {sb.append("\\");}
- | 'u' Hex_digits {sb.append((char)Integer.parseInt($Hex_digits.text, 16));}
-/* | octdigits=(('0'..'3') ('0'..'7') ('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);}
- | octdigits=(('0'..'7') ('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);}
- | octdigits=(('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);}*/
- );
-
-BOOL_LITERAL
- : 'true'|'false';
-
-
-
-WHITESPACE
- : (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;};
-
-REGISTER: 'v' ('0'..'9')+;
-
-
-/*a token of type QUALIFIED_MEMBER is never generated. This rule emits 2 sub-tokens
-that represent the class name and the member name, so that they don't have to be
-parsed out later*/
-QUALIFIED_MEMBER
- : class_name=QUALIFIED_MEMBER__CLASS_NAME '.' member_name=QUALIFIED_MEMBER__MEMBER_NAME
- {
- $class_name.setType(QUALIFIED_MEMBER__CLASS_NAME);
- $member_name.setType(QUALIFIED_MEMBER__MEMBER_NAME);
- emit($class_name);
- emit($member_name);
- };
-
-fragment QUALIFIED_MEMBER__CLASS_NAME
- : (SIMPLE_NAME '/')* SIMPLE_NAME;
-
-fragment QUALIFIED_MEMBER__MEMBER_NAME
- : MEMBER_NAME | SIMPLE_NAME;
-
-
-ARRAY_TYPE
- :
- ARRAY_CHAR_LIST[255] (PRIMITIVE_TYPE | CLASS_DESCRIPTOR);
-
-
-//match from 1 to maxCount '[' characters
-fragment
-ARRAY_CHAR_LIST[int maxCount]
- : {$maxCount > 1}?=> '[' ARRAY_CHAR_LIST[$maxCount - 1]
- | '['
- ;
-
-MEMBER_NAME
- : '<' SIMPLE_NAME '>';
-
-VOID_TYPE
- : 'V';
-
-PRIMITIVE_TYPE
- : 'Z'
- | 'B'
- | 'S'
- | 'C'
- | 'I'
- | 'J'
- | 'F'
- | 'D'
- ;
-
-CLASS_WITH_PACKAGE_NAME
- : (SIMPLE_NAME '/')+ SIMPLE_NAME;
-
-CLASS_DESCRIPTOR
- : 'L' (SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME) ';';
-
-SIMPLE_NAME:
- ( 'A'..'Z'
- | 'a'..'z'
- | '0'..'9'
- | '$'
- | '-'
- | '_'
- | '\u00a1'..'\u1fff'
- | '\u2010'..'\u2027'
- | '\u2030'..'\ud7ff'
- | '\ue000'..'\uffef'
- )+;
-
-COMMENT
- : (';' ~('\n'|'\r')* ('\r\n' | '\r' | '\n')
- | ';' ~('\n'|'\r')*)
- {
- $channel = HIDDEN;
- }
- ;
diff --git a/src/main/antlr3/org/JesusFreke/smali/smaliLexer.g b/src/main/antlr3/org/JesusFreke/smali/smaliLexer.g
new file mode 100644
index 00000000..6c7f3ae8
--- /dev/null
+++ b/src/main/antlr3/org/JesusFreke/smali/smaliLexer.g
@@ -0,0 +1,749 @@
+/*
+ * The number, string and character constant lexical rules are derived from rules
+ * from the Java 1.6 grammar which can be found here:
+ * http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g
+ *
+ * Specifically, these rules:
+ *
+ * STRING_LITERAL, ESCAPE_SEQUENCE, HEX_DIGITS, HEX_DIGIT, INTEGER_LITERAL,
+ * HEX_PREFIX, LONG_LITERAL, FLOATING_POINT_NUMBER, DECIMAL_EXPONENT,
+ * HEX_EXPONENT, FLOAT_LITERAL, DOUBLE_LITERAL, CHAR_LITERAL
+ *
+ * These rules were originally copyrighted by Terence Parr, and are used here in
+ * accordance with the following license
+ *
+ * [The "BSD licence"]
+ * Copyright (c) 2007-2008 Terence Parr
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * The remainder of this grammar is released by me (Ben Gruver) under the
+ * following license:
+ *
+ * [The "BSD licence"]
+ * Copyright (c) 2009 Ben Gruver
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+
+/*smali files are particularly hard to tokenize, because of dex's
+identifiers, which are much more all-encompassing than languages'.
+One reasonable possibility would be to limit the identifiers to what Java
+supports. But I want the syntax to expose the full functionality of the dex
+format, so that means supporting the wide range of identifiers that it
+supports.
+
+This makes tokenizing a much more context sensitive operation than usual. To
+address this, I've added extended the base lexer class to support multiple
+token emissions per rule. The top level *_PHRASE lexical rules generally
+match a "phrase". Each phrase has a specific format, and a unique starting
+sequence - typically a directive or opcode. Each phrase rule doesn't generate
+a token that represents itself, like a typical lexical rule, rather, it emits
+all of its children tokens.
+
+For example, a phrase may consist of ".field private helloWorld Ljava/lang/String;".
+
+The corresponding rule (without the supporting emission code) would look something like
+
+FIELD_PHRASE : '.field' ACCESS_SPEC+ MEMBER_NAME FIELD_TYPE_DESCRIPTOR
+
+There would never be a "FIELD_PHRASE" token in the output token stream. Instead,
+it would emit a token for each of its children tokens.*/
+
+
+lexer grammar smaliLexer;
+
+@lexer::header {
+package org.JesusFreke.smali;
+
+import java.util.ArrayDeque;
+}
+
+@lexer::init {
+ state.token = Token.INVALID_TOKEN;
+}
+
+@lexer::members {
+ protected ArrayDeque tokens = new ArrayDeque();
+
+ public void reset() {
+ super.reset();
+ state.token = Token.INVALID_TOKEN;
+ tokens.clear();
+ }
+
+ public Token nextToken() {
+ while (true) {
+ if (tokens.size() > 0) {
+ Token token = tokens.poll();
+ if (token == Token.SKIP_TOKEN) {
+ continue;
+ }
+
+ System.out.println(token.toString());
+ return token;
+ }
+
+ state.channel = Token.DEFAULT_CHANNEL;
+ state.tokenStartCharIndex = input.index();
+ state.tokenStartCharPositionInLine = input.getCharPositionInLine();
+ state.tokenStartLine = input.getLine();
+ state.text = null;
+ if ( input.LA(1)==CharStream.EOF ) {
+ return Token.EOF_TOKEN;
+ }
+ try {
+ mTokens();
+
+ if (tokens.size() == 0) {
+ emit();
+ }
+ }
+ catch (NoViableAltException nva) {
+ reportError(nva);
+ recover(nva); // throw out current char and try again
+ }
+ catch (RecognitionException re) {
+ reportError(re);
+ // match() routine has already called recover()
+ }
+ }
+ }
+
+ public void skip() {
+ tokens.add(Token.SKIP_TOKEN);
+ }
+
+ public void emit(Token token) {
+ tokens.add(token);
+ }
+
+ public void emit(Token token, int type) {
+ token.setType(type);
+ tokens.add(token);
+ }
+
+ public void emit(Token token, int type, int channel) {
+ token.setType(type);
+ token.setChannel(channel);
+ tokens.add(token);
+ }
+
+/*protected void mismatch(IntStream input, int ttype, BitSet follow) throws RecognitionException
+{
+ throw new MismatchedTokenException(ttype, input);
+}
+
+public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow) throws RecognitionException
+{
+ throw e;
+}*/
+
+}
+
+/*@rulecatch {
+catch (RecognitionException e) {
+throw e;
+}
+}*/
+
+
+CLASS_PHRASE
+ : CLASS_DIRECTIVE_EMIT
+ WS
+ (ACCESS_SPEC_EMIT WS)+
+ CLASS_DESCRIPTOR_EMIT;
+
+SUPER_PHRASE
+ : SUPER_DIRECTIVE_EMIT
+ WS
+ CLASS_DESCRIPTOR_EMIT;
+
+FIELD_PHRASE
+ : FIELD_DIRECTIVE_EMIT
+ WS
+ (ACCESS_SPEC_EMIT WS)+
+ MEMBER_NAME_EMIT
+ WS
+ FIELD_TYPE_DESCRIPTOR_EMITCHILD
+ WS?
+ ('=' WS? LITERAL_EMITCHILD)?;
+
+METHOD_PHRASE
+ : METHOD_DIRECTIVE_EMIT
+ WS
+ (ACCESS_SPEC_EMIT WS)+
+ MEMBER_NAME_EMIT
+ METHOD_PROTOTYPE_EMITCHILDREN;
+
+END_METHOD_PHRASE
+ : END_METHOD_DIRECTIVE_EMIT;
+
+REGISTERS_PHRASE
+ : REGISTERS_DIRECTIVE_EMIT
+ WS
+ INTEGER_LITERAL_EMIT;
+
+INSTRUCTION_FORMAT10x_PHRASE
+ : INSTRUCTION_FORMAT10x_EMIT;
+
+INSTRUCTION_FORMAT11x_PHRASE
+ : INSTRUCTION_FORMAT11x_EMIT
+ WS
+ REGISTER_EMIT;
+
+INSTRUCTION_FORMAT12x_PHRASE
+ : INSTRUCTION_FORMAT12x_EMIT
+ WS
+ REGISTER_EMIT
+ WS? ',' WS?
+ REGISTER_EMIT;
+
+INSTRUCTION_FORMAT21c_FIELD_PHRASE
+ : INSTRUCTION_FORMAT21c_FIELD_EMIT
+ WS
+ REGISTER_EMIT
+ WS? ',' WS?
+ FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
+ WS
+ FIELD_TYPE_DESCRIPTOR_EMITCHILD;
+
+INSTRUCTION_FORMAT21c_STRING_PHRASE
+ : INSTRUCTION_FORMAT21c_STRING_EMIT
+ WS
+ REGISTER_EMIT
+ WS? ',' WS?
+ STRING_LITERAL_EMIT;
+
+INSTRUCTION_FORMAT21c_TYPE_PHRASE
+ : INSTRUCTION_FORMAT21c_TYPE_EMIT
+ WS
+ REGISTER_EMIT
+ WS? ',' WS?
+ CLASS_OR_ARRAY_TYPE_DESCRIPTOR_EMITCHILD;
+
+INSTRUCTION_FORMAT22c_FIELD_PHRASE
+ : INSTRUCTION_FORMAT22c_FIELD_EMIT
+ WS
+ REGISTER_EMIT
+ WS? ',' WS?
+ REGISTER_EMIT
+ WS? ',' WS?
+ FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
+ WS
+ FIELD_TYPE_DESCRIPTOR_EMITCHILD;
+
+INSTRUCTION_FORMAT35c_METHOD_PHRASE
+ : INSTRUCTION_FORMAT35c_METHOD_EMIT
+ WS
+ REGISTER_LIST_EMITCHILDREN
+ WS? ',' WS?
+ FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
+ METHOD_PROTOTYPE_EMITCHILDREN;
+
+INSTRUCTION_FORMAT3rc_METHOD_PHRASE
+ : INSTRUCTION_FORMAT3rc_METHOD_EMIT
+ WS
+ REGISTER_RANGE_EMITCHILDREN
+ WS? ',' WS?
+ FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
+ METHOD_PROTOTYPE_EMITCHILDREN;
+
+
+fragment CLASS_DIRECTIVE_EMIT
+ : CLASS_DIRECTIVE {emit($CLASS_DIRECTIVE, CLASS_DIRECTIVE);};
+fragment CLASS_DIRECTIVE
+ : '.class';
+
+fragment SUPER_DIRECTIVE_EMIT
+ : SUPER_DIRECTIVE {emit($SUPER_DIRECTIVE, SUPER_DIRECTIVE);};
+fragment SUPER_DIRECTIVE
+ : '.super';
+
+fragment FIELD_DIRECTIVE_EMIT
+ : FIELD_DIRECTIVE {emit($FIELD_DIRECTIVE, FIELD_DIRECTIVE);};
+fragment FIELD_DIRECTIVE
+ : '.field';
+
+fragment METHOD_DIRECTIVE_EMIT
+ : METHOD_DIRECTIVE {emit($METHOD_DIRECTIVE, METHOD_DIRECTIVE);};
+fragment METHOD_DIRECTIVE
+ : '.method';
+
+fragment END_METHOD_DIRECTIVE_EMIT
+ : END_METHOD_DIRECTIVE {emit($END_METHOD_DIRECTIVE, END_METHOD_DIRECTIVE);};
+fragment END_METHOD_DIRECTIVE
+ : '.end method';
+
+fragment REGISTERS_DIRECTIVE_EMIT
+ : REGISTERS_DIRECTIVE {emit($REGISTERS_DIRECTIVE, REGISTERS_DIRECTIVE);};
+fragment REGISTERS_DIRECTIVE
+ : '.registers';
+
+fragment REGISTER_EMIT
+ : REGISTER {emit($REGISTER, REGISTER);};
+fragment REGISTER
+ : 'v' ('0'..'9')+;
+
+
+fragment REGISTER_LIST_EMITCHILDREN
+ : OPEN_BRACKET_EMIT
+ ( WS?
+ REGISTER_EMIT (WS? ',' WS? REGISTER_EMIT)*
+ WS?
+ | WS?)
+ CLOSE_BRACKET_EMIT;
+
+
+fragment REGISTER_RANGE_EMITCHILDREN
+ : OPEN_BRACKET_EMIT
+ WS?
+ REGISTER_EMIT
+ WS?
+ ('..' WS?
+ REGISTER_EMIT)?
+ CLOSE_BRACKET_EMIT;
+
+
+fragment METHOD_PROTOTYPE_EMITCHILDREN
+ : OPEN_PAREN_EMIT
+ (FIELD_TYPE_DESCRIPTOR_EMITCHILD+)?
+ CLOSE_PAREN_EMIT
+ TYPE_DESCRIPTOR_EMITCHILD;
+
+fragment FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
+@init {int startPos;}
+ : {startPos = getCharIndex();} (SIMPLE_NAME '/')* token=SIMPLE_NAME {((CommonToken)$token).setStartIndex(startPos); emit($token, CLASS_NAME);}
+ '/'
+ MEMBER_NAME_EMIT;
+
+fragment TYPE_DESCRIPTOR_EMITCHILD
+ : PRIMITIVE_TYPE_EMIT
+ | VOID_TYPE_EMIT
+ | CLASS_DESCRIPTOR_EMIT
+ | ARRAY_DESCRIPTOR_EMIT;
+
+
+fragment FIELD_TYPE_DESCRIPTOR_EMITCHILD
+ : PRIMITIVE_TYPE_EMIT
+ | CLASS_DESCRIPTOR_EMIT
+ | ARRAY_DESCRIPTOR_EMIT;
+
+fragment CLASS_OR_ARRAY_TYPE_DESCRIPTOR_EMITCHILD
+ : CLASS_DESCRIPTOR_EMIT
+ | ARRAY_DESCRIPTOR_EMIT;
+
+fragment PRIMITIVE_TYPE_EMIT
+ : PRIMITIVE_TYPE {emit($PRIMITIVE_TYPE, PRIMITIVE_TYPE);};
+
+fragment PRIMITIVE_TYPE
+ : 'Z'
+ | 'B'
+ | 'S'
+ | 'C'
+ | 'I'
+ | 'J'
+ | 'F'
+ | 'D'
+ ;
+
+
+fragment VOID_TYPE_EMIT
+ : VOID_TYPE {emit($VOID_TYPE, VOID_TYPE);};
+fragment VOID_TYPE
+ : 'V';
+
+
+fragment CLASS_DESCRIPTOR_EMIT
+ : CLASS_DESCRIPTOR {emit($CLASS_DESCRIPTOR, CLASS_DESCRIPTOR);};
+
+fragment CLASS_DESCRIPTOR
+ : 'L' CLASS_NAME ';';
+
+fragment CLASS_NAME
+ : (SIMPLE_NAME '/')* SIMPLE_NAME;
+
+
+fragment ARRAY_DESCRIPTOR_EMIT
+ : ARRAY_DESCRIPTOR {emit($ARRAY_DESCRIPTOR, ARRAY_DESCRIPTOR);};
+
+fragment ARRAY_DESCRIPTOR
+ : ARRAY_TYPE_PREFIX (PRIMITIVE_TYPE | CLASS_DESCRIPTOR);
+
+fragment ARRAY_TYPE_PREFIX
+ : ARRAY_CHAR_LIST[255];
+
+fragment ARRAY_CHAR_LIST[int maxCount]
+ : {$maxCount > 1}?=> '[' ARRAY_CHAR_LIST[$maxCount - 1]
+ | '['
+ ;
+
+
+fragment ACCESS_SPEC_EMIT
+ : ACCESS_SPEC {emit($ACCESS_SPEC, ACCESS_SPEC);};
+
+fragment ACCESS_SPEC
+ : 'public'
+ | 'private'
+ | 'static'
+ | 'constructor'
+ | 'final';
+
+
+
+fragment MEMBER_NAME_EMIT
+ : MEMBER_NAME {emit($MEMBER_NAME, MEMBER_NAME);};
+
+fragment MEMBER_NAME
+ : '<'? SIMPLE_NAME '>'?;
+
+
+fragment SIMPLE_NAME:
+ ( 'A'..'Z'
+ | 'a'..'z'
+ | '0'..'9'
+ | '$'
+ | '-'
+ | '_'
+ | '\u00a1'..'\u1fff'
+ | '\u2010'..'\u2027'
+ | '\u2030'..'\ud7ff'
+ | '\ue000'..'\uffef'
+ )+;
+
+
+fragment LITERAL_EMITCHILD
+ : STRING_LITERAL_EMIT
+ | INTEGER_LITERAL_EMIT
+ | LONG_LITERAL_EMIT
+ | FLOAT_LITERAL_EMIT
+ | DOUBLE_LITERAL_EMIT
+ | CHAR_LITERAL_EMIT
+ | BOOL_LITERAL_EMIT;
+
+
+fragment STRING_LITERAL_EMIT
+ @init {StringBuilder sb = new StringBuilder();}
+ : STRING_LITERAL[sb]
+ {
+ $STRING_LITERAL.setText(sb.toString());
+ emit($STRING_LITERAL, STRING_LITERAL);
+ };
+
+fragment STRING_LITERAL [StringBuilder sb]
+ : '"' {sb.append('"');}
+ ( ESCAPE_SEQUENCE[sb]
+ | ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
+ )*
+ '"' {sb.append('"');}
+ ;
+
+fragment
+ESCAPE_SEQUENCE[StringBuilder sb]
+ : '\\'
+ (
+ 'b' {sb.append("\b");}
+ | 't' {sb.append("\t");}
+ | 'n' {sb.append("\n");}
+ | 'f' {sb.append("\f");}
+ | 'r' {sb.append("\r");}
+ | '\"' {sb.append("\"");}
+ | '\'' {sb.append("'");}
+ | '\\' {sb.append("\\");}
+ | 'u' HEX_DIGITS {sb.append((char)Integer.parseInt($HEX_DIGITS.text, 16));}
+ );
+
+fragment HEX_DIGITS
+ : HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
+
+fragment HEX_DIGIT
+ : ('0'..'9'|'a'..'f'|'A'..'F');
+
+
+fragment INTEGER_LITERAL_EMIT
+ : INTEGER_LITERAL {emit($INTEGER_LITERAL, INTEGER_LITERAL);};
+
+fragment INTEGER_LITERAL
+ : '-'? '0'
+ | '-'? ('1'..'9') ('0'..'9')*
+ | '0' ('0'..'7')+
+ | HEX_PREFIX HEX_DIGIT+
+ ;
+
+fragment HEX_PREFIX
+ : '0x'|'0X';
+
+
+fragment LONG_LITERAL_EMIT
+ : LONG_LITERAL {emit($LONG_LITERAL, LONG_LITERAL);};
+fragment LONG_LITERAL
+ : INTEGER_LITERAL ('l' | 'L');
+
+
+fragment FLOATING_POINT_NUMBER
+ : ('0' .. '9')+ '.' ('0' .. '9')* DECIMAL_EXPONENT?
+ | '.' ( '0' .. '9' )+ DECIMAL_EXPONENT?
+ | ('0' .. '9')+ DECIMAL_EXPONENT
+ | HEX_PREFIX
+ ( HEX_DIGIT+ ('.' HEX_DIGIT*)?
+ | '.' HEX_DIGIT+
+ )
+ BINARY_EXPONENT
+ ;
+
+fragment DECIMAL_EXPONENT
+ : ('e'|'E') '-'? ('0'..'9')+;
+
+fragment BINARY_EXPONENT
+ : ('p'|'P') '-'? ('0'..'9')+;
+
+
+fragment FLOAT_LITERAL_EMIT
+ : FLOAT_LITERAL {emit($FLOAT_LITERAL, FLOAT_LITERAL);};
+fragment FLOAT_LITERAL
+ : (FLOATING_POINT_NUMBER | ('0' .. '9')+) ('f' | 'F');
+
+fragment DOUBLE_LITERAL_EMIT
+ : DOUBLE_LITERAL {emit($DOUBLE_LITERAL, DOUBLE_LITERAL);};
+fragment DOUBLE_LITERAL
+ : FLOATING_POINT_NUMBER ('d' | 'D')?
+ | ('0' .. '9')+ ('d' | 'D');
+
+
+fragment CHAR_LITERAL_EMIT
+ : CHAR_LITERAL {emit($CHAR_LITERAL, CHAR_LITERAL);};
+fragment CHAR_LITERAL
+ : '\'' {StringBuilder sb = new StringBuilder("'");}
+ ( ESCAPE_SEQUENCE[sb] {sb.append("'"); setText(sb.toString());}
+ | ~( '\'' | '\\' | '\r' | '\n' )
+ )
+ '\''
+ ;
+
+fragment BOOL_LITERAL_EMIT
+ : BOOL_LITERAL {emit($BOOL_LITERAL, BOOL_LITERAL);};
+fragment BOOL_LITERAL
+ : 'true'|'false';
+
+fragment INSTRUCTION_FORMAT10x_EMIT
+ : INSTRUCTION_FORMAT10x {emit($INSTRUCTION_FORMAT10x, INSTRUCTION_FORMAT10x);};
+fragment INSTRUCTION_FORMAT10x
+ : 'return-void'
+ | 'nop';
+
+fragment INSTRUCTION_FORMAT11x_EMIT
+ : INSTRUCTION_FORMAT11x {emit($INSTRUCTION_FORMAT11x, INSTRUCTION_FORMAT11x);};
+fragment INSTRUCTION_FORMAT11x
+ : 'move-result'
+ | 'move-result-wide'
+ | 'move-result-object'
+ | 'move-exception'
+ | 'return'
+ | 'return-wide'
+ | 'return-object'
+ | 'monitor-enter'
+ | 'monitor-exit'
+ | 'throw';
+
+fragment INSTRUCTION_FORMAT12x_EMIT
+ : INSTRUCTION_FORMAT12x {emit($INSTRUCTION_FORMAT12x, INSTRUCTION_FORMAT12x);};
+fragment INSTRUCTION_FORMAT12x
+ : 'move'
+ | 'move-wide'
+ | 'move-object'
+ | 'array-length'
+ | 'neg-int'
+ | 'not-int'
+ | 'neg-long'
+ | 'not-long'
+ | 'neg-float'
+ | 'neg-double'
+ | 'int-to-long'
+ | 'int-to-float'
+ | 'int-to-double'
+ | 'long-to-int'
+ | 'long-to-float'
+ | 'long-to-double'
+ | 'float-to-int'
+ | 'float-to-long'
+ | 'float-to-double'
+ | 'double-to-int'
+ | 'double-to-long'
+ | 'double-to-float'
+ | 'int-to-byte'
+ | 'int-to-char'
+ | 'int-to-short'
+ | 'add-int/2addr'
+ | 'sub-int/2addr'
+ | 'mul-int/2addr'
+ | 'div-int/2addr'
+ | 'rem-int/2addr'
+ | 'and-int/2addr'
+ | 'or-int/2addr'
+ | 'xor-int/2addr'
+ | 'shl-int/2addr'
+ | 'shr-int/2addr'
+ | 'ushr-int/2addr'
+ | 'add-long/2addr'
+ | 'sub-long/2addr'
+ | 'mul-long/2addr'
+ | 'div-long/2addr'
+ | 'rem-long/2addr'
+ | 'and-long/2addr'
+ | 'or-long/2addr'
+ | 'xor-long/2addr'
+ | 'shl-long/2addr'
+ | 'shr-long/2addr'
+ | 'ushr-long/2addr'
+ | 'add-float/2addr'
+ | 'sub-float/2addr'
+ | 'mul-float/2addr'
+ | 'div-float/2addr'
+ | 'rem-float/2addr'
+ | 'add-double/2addr'
+ | 'sub-double/2addr'
+ | 'mul-double/2addr'
+ | 'div-double/2addr'
+ | 'rem-double/2addr';
+
+fragment INSTRUCTION_FORMAT21c_FIELD_EMIT
+ : INSTRUCTION_FORMAT21c_FIELD {emit($INSTRUCTION_FORMAT21c_FIELD, INSTRUCTION_FORMAT21c_FIELD);};
+fragment INSTRUCTION_FORMAT21c_FIELD
+ : 'sget'
+ | 'sget-wide'
+ | 'sget-object'
+ | 'sget-boolean'
+ | 'sget-byte'
+ | 'sget-char'
+ | 'sget-short'
+ | 'sput'
+ | 'sput-wide'
+ | 'sput-object'
+ | 'sput-boolean'
+ | 'sput-byte'
+ | 'sput-char'
+ | 'sput-short'
+ ;
+
+fragment INSTRUCTION_FORMAT21c_STRING_EMIT
+ : INSTRUCTION_FORMAT21c_STRING {emit($INSTRUCTION_FORMAT21c_STRING, INSTRUCTION_FORMAT21c_STRING);};
+fragment INSTRUCTION_FORMAT21c_STRING
+ : 'const-string';
+
+fragment INSTRUCTION_FORMAT21c_TYPE_EMIT
+ : INSTRUCTION_FORMAT21c_TYPE {emit($INSTRUCTION_FORMAT21c_TYPE, INSTRUCTION_FORMAT21c_TYPE);};
+fragment INSTRUCTION_FORMAT21c_TYPE
+ : 'check-cast'
+ | 'new-instance'
+ | 'const-class';
+
+fragment INSTRUCTION_FORMAT22c_FIELD_EMIT
+ : INSTRUCTION_FORMAT22c_FIELD {emit($INSTRUCTION_FORMAT22c_FIELD, INSTRUCTION_FORMAT22c_FIELD);};
+fragment INSTRUCTION_FORMAT22c_FIELD
+ : 'iget'
+ | 'iget-wide'
+ | 'iget-object'
+ | 'iget-boolean'
+ | 'iget-byte'
+ | 'iget-char'
+ | 'iget-short'
+ | 'iput'
+ | 'iput-wide'
+ | 'iput-object'
+ | 'iput-boolean'
+ | 'iput-byte'
+ | 'iput-char'
+ | 'iput-short'
+ ;
+
+fragment INSTRUCTION_FORMAT35c_METHOD_EMIT
+ : INSTRUCTION_FORMAT35c_METHOD {emit($INSTRUCTION_FORMAT35c_METHOD, INSTRUCTION_FORMAT35c_METHOD);};
+fragment INSTRUCTION_FORMAT35c_METHOD
+ : 'invoke-virtual'
+ | 'invoke-super'
+ | 'invoke-direct'
+ | 'invoke-static'
+ | 'invoke-interface'
+ ;
+
+fragment INSTRUCTION_FORMAT3rc_METHOD_EMIT
+ : INSTRUCTION_FORMAT3rc_METHOD {emit($INSTRUCTION_FORMAT3rc_METHOD, INSTRUCTION_FORMAT3rc_METHOD);};
+fragment INSTRUCTION_FORMAT3rc_METHOD
+ : 'invoke-virtual/range'
+ | 'invoke-super/range'
+ | 'invoke-direct/range'
+ | 'invoke-static/range'
+ | 'invoke-interface/range'
+ ;
+
+
+fragment OPEN_PAREN_EMIT
+ : OPEN_PAREN {emit($OPEN_PAREN, OPEN_PAREN);};
+fragment OPEN_PAREN
+ : '(';
+
+fragment CLOSE_PAREN_EMIT
+ : CLOSE_PAREN {emit($CLOSE_PAREN, CLOSE_PAREN);};
+fragment CLOSE_PAREN
+ : ')';
+
+fragment OPEN_BRACKET_EMIT
+ : OPEN_BRACKET {emit($OPEN_BRACKET, OPEN_BRACKET);};
+fragment OPEN_BRACKET
+ : '{';
+
+fragment CLOSE_BRACKET_EMIT
+ : CLOSE_BRACKET {emit($CLOSE_BRACKET, CLOSE_BRACKET);};
+fragment CLOSE_BRACKET
+ : '}';
+
+fragment WS
+ : WHITE_SPACE {emit($WHITE_SPACE, WHITE_SPACE, Token.HIDDEN_CHANNEL);};
+
+WHITE_SPACE
+ : (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;};
diff --git a/src/main/antlr3/org/JesusFreke/smali/smaliParser.g b/src/main/antlr3/org/JesusFreke/smali/smaliParser.g
new file mode 100644
index 00000000..eb57a184
--- /dev/null
+++ b/src/main/antlr3/org/JesusFreke/smali/smaliParser.g
@@ -0,0 +1,182 @@
+/*
+ * [The "BSD licence"]
+ * Copyright (c) 2009 Ben Gruver
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+parser grammar smaliParser;
+
+options {
+ tokenVocab=smaliLexer;
+ output=AST;
+ ASTLabelType=CommonTree;
+}
+
+tokens {
+ //I_* tokens are imaginary tokens used as parent AST nodes
+ I_CLASS_DEF;
+ I_SUPER;
+ I_ACCESS_LIST;
+ I_METHODS;
+ I_FIELDS;
+ I_FIELD;
+ I_FIELD_TYPE;
+ I_FIELD_INITIAL_VALUE;
+ I_METHOD;
+ I_METHOD_PROTOTYPE;
+ I_METHOD_RETURN_TYPE;
+ I_REGISTERS;
+ I_STATEMENTS;
+ I_STATEMENT_FORMAT10x;
+ I_STATEMENT_FORMAT11x;
+ I_STATEMENT_FORMAT12x;
+ I_STATEMENT_FORMAT21c_TYPE;
+ I_STATEMENT_FORMAT21c_FIELD;
+ I_STATEMENT_FORMAT22c_FIELD;
+ I_STATEMENT_FORMAT21c_STRING;
+ I_STATEMENT_FORMAT35c_METHOD;
+ I_STATEMENT_FORMAT3rc_METHOD;
+ I_REGISTER_RANGE;
+ I_REGISTER_LIST;
+
+ CLASS_NAME;
+ MEMBER_NAME;
+}
+
+@header {
+package org.JesusFreke.smali;
+}
+
+
+smali_file: header methods_and_fields -> ^(I_CLASS_DEF header methods_and_fields);
+
+header : class_spec super_spec;
+
+class_spec
+ : CLASS_DIRECTIVE access_list CLASS_DESCRIPTOR -> CLASS_DESCRIPTOR access_list;
+
+super_spec
+ : SUPER_DIRECTIVE CLASS_DESCRIPTOR -> ^(I_SUPER[$start, "I_SUPER"] CLASS_DESCRIPTOR);
+
+access_list
+ : ACCESS_SPEC+ -> ^(I_ACCESS_LIST[$start,"I_ACCESS_LIST"] ACCESS_SPEC+);
+
+methods_and_fields
+ : (method | field)* -> ^(I_METHODS method*) ^(I_FIELDS field*);
+
+field : FIELD_DIRECTIVE access_list MEMBER_NAME field_type_descriptor literal?
+ -> ^(I_FIELD[$start, "I_FIELD"] MEMBER_NAME access_list ^(I_FIELD_TYPE field_type_descriptor) ^(I_FIELD_INITIAL_VALUE literal)?);
+
+method : METHOD_DIRECTIVE access_list MEMBER_NAME method_prototype
+ registers_directive
+ statements
+ END_METHOD_DIRECTIVE
+ -> ^(I_METHOD[$start, "I_METHOD"] MEMBER_NAME method_prototype access_list registers_directive statements);
+
+method_prototype
+ : OPEN_PAREN field_type_descriptor* CLOSE_PAREN type_descriptor
+ -> ^(I_METHOD_PROTOTYPE[$start, "I_METHOD_PROTOTYPE"] ^(I_METHOD_RETURN_TYPE type_descriptor) field_type_descriptor*);
+
+
+
+registers_directive
+ : REGISTERS_DIRECTIVE INTEGER_LITERAL
+ -> ^(I_REGISTERS[$start, "I_REGISTERS"] INTEGER_LITERAL);
+
+
+fully_qualified_method
+ : CLASS_NAME MEMBER_NAME method_prototype;
+
+fully_qualified_field
+ : CLASS_NAME MEMBER_NAME field_type_descriptor;
+
+statements
+ : statement* -> ^(I_STATEMENTS statement*);
+
+statement
+ : instruction;
+
+instruction
+ //e.g. return
+ : INSTRUCTION_FORMAT10x
+ -> ^(I_STATEMENT_FORMAT10x[$start, "I_STATEMENT_FORMAT10x"] INSTRUCTION_FORMAT10x)
+ | //e.g. move-result-object v1
+ INSTRUCTION_FORMAT11x REGISTER
+ -> ^(I_STATEMENT_FORMAT11x[$start, "I_STATEMENT_FORMAT11x"] INSTRUCTION_FORMAT11x REGISTER)
+ | //e.g. move v1 v2
+ INSTRUCTION_FORMAT12x REGISTER REGISTER
+ -> ^(I_STATEMENT_FORMAT12x[$start, "I_STATEMENT_FORMAT12x"] INSTRUCTION_FORMAT12x REGISTER REGISTER)
+ | //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream;
+ INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field
+ -> ^(I_STATEMENT_FORMAT21c_FIELD[$start, "I_STATEMENT_FORMAT21c_FIELD"] INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field)
+ | //e.g. const-string v1 "Hello World!"
+ INSTRUCTION_FORMAT21c_STRING REGISTER STRING_LITERAL
+ -> ^(I_STATEMENT_FORMAT21c_STRING[$start, "I_STATEMENT_FORMAT21c_STRING"] INSTRUCTION_FORMAT21c_STRING REGISTER STRING_LITERAL)
+ | //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2
+ INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor
+ -> ^(I_STATEMENT_FORMAT21c_TYPE[$start, "I_STATEMENT_FORMAT21c"] INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor)
+ | //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
+ INSTRUCTION_FORMAT22c_FIELD REGISTER REGISTER fully_qualified_field
+ -> ^(I_STATEMENT_FORMAT22c_FIELD[$start, "I_INSTANCE_FIELD_STATEMENT"] INSTRUCTION_FORMAT22c_FIELD REGISTER REGISTER fully_qualified_field)
+ | //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V
+ INSTRUCTION_FORMAT35c_METHOD OPEN_BRACKET register_list CLOSE_BRACKET fully_qualified_method
+ -> ^(I_STATEMENT_FORMAT35c_METHOD[$start, "I_STATEMENT_FORMAT35c_METHOD"] INSTRUCTION_FORMAT35c_METHOD register_list fully_qualified_method)
+ | //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+ INSTRUCTION_FORMAT3rc_METHOD OPEN_BRACKET register_range CLOSE_BRACKET fully_qualified_method
+ -> ^(I_STATEMENT_FORMAT3rc_METHOD[$start, "I_STATEMENT_FORMAT3rc_METHOD"] INSTRUCTION_FORMAT3rc_METHOD register_range fully_qualified_method)
+ ;
+
+
+register_list
+ : REGISTER* -> ^(I_REGISTER_LIST[$start, "I_REGISTER_LIST"] REGISTER*);
+
+register_range
+ : REGISTER REGISTER? -> ^(I_REGISTER_RANGE[$start, "I_REGISTER_RANGE"] REGISTER REGISTER?);
+
+
+field_type_descriptor
+ : PRIMITIVE_TYPE
+ | CLASS_DESCRIPTOR
+ | ARRAY_DESCRIPTOR
+ ;
+
+class_or_array_type_descriptor
+ : CLASS_DESCRIPTOR
+ | ARRAY_DESCRIPTOR;
+
+type_descriptor
+ : VOID_TYPE
+ | PRIMITIVE_TYPE
+ | CLASS_DESCRIPTOR
+ | ARRAY_DESCRIPTOR
+ ;
+
+literal : INTEGER_LITERAL
+ | LONG_LITERAL
+ | FLOAT_LITERAL
+ | DOUBLE_LITERAL
+ | CHAR_LITERAL
+ | STRING_LITERAL
+ | BOOL_LITERAL;
diff --git a/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g b/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g
index 76ab1f7e..53a2c585 100644
--- a/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g
+++ b/src/main/antlr3/org/JesusFreke/smali/smaliTreeWalker.g
@@ -29,7 +29,7 @@
tree grammar smaliTreeWalker;
options {
- tokenVocab=smali;
+ tokenVocab=smaliParser;
ASTLabelType=CommonTree;
}
@@ -96,16 +96,16 @@ header : class_spec super_spec
};
class_spec returns[TypeIdItem type, int accessFlags]
- : class_name access_list
+ : class_type_descriptor access_list
{
- $type = $class_name.type;
+ $type = $class_type_descriptor.type;
$accessFlags = $access_list.value;
};
super_spec returns[TypeIdItem type]
- : ^(I_SUPER class_name)
+ : ^(I_SUPER class_type_descriptor)
{
- $type = $class_name.type;
+ $type = $class_type_descriptor.type;
};
access_list returns [int value]
@@ -134,10 +134,10 @@ methods : ^(I_METHODS
})*);
field returns[ClassDataItem.EncodedField encodedField, EncodedValue encodedValue]
- :^(I_FIELD member_name access_list ^(I_FIELD_TYPE field_type_descriptor) field_initial_value)
+ :^(I_FIELD MEMBER_NAME access_list ^(I_FIELD_TYPE field_type_descriptor) field_initial_value)
{
TypeIdItem classType = classDefItem.getClassType();
- StringIdItem memberName = new StringIdItem(dexFile, $member_name.memberName);
+ StringIdItem memberName = new StringIdItem(dexFile, $MEMBER_NAME.text);
TypeIdItem fieldType = $field_type_descriptor.type;
FieldIdItem fieldIdItem = new FieldIdItem(dexFile, classType, memberName, fieldType);
@@ -157,7 +157,7 @@ field returns[ClassDataItem.EncodedField encodedField, EncodedValue encodedValue
field_initial_value returns[EncodedValue encodedValue]
: ^(I_FIELD_INITIAL_VALUE
- ( int_literal { $encodedValue = new EncodedValue(dexFile, new IntEncodedValueSubField($int_literal.value)); }
+ ( integer_literal { $encodedValue = new EncodedValue(dexFile, new IntEncodedValueSubField($integer_literal.value)); }
| long_literal { $encodedValue = new EncodedValue(dexFile, new LongEncodedValueSubField($long_literal.value)); }
| float_literal { $encodedValue = new EncodedValue(dexFile, new FloatEncodedValueSubField($float_literal.value)); }
| double_literal { $encodedValue = new EncodedValue(dexFile, new DoubleEncodedValueSubField($double_literal.value)); }
@@ -169,10 +169,10 @@ field_initial_value returns[EncodedValue encodedValue]
method returns[ClassDataItem.EncodedMethod encodedMethod]
- : ^(I_METHOD method_name_and_prototype access_list locals_directive statements)
+ : ^(I_METHOD method_name_and_prototype access_list registers_directive statements)
{
MethodIdItem methodIdItem = $method_name_and_prototype.methodIdItem;
- int registers = $locals_directive.registers;
+ int registers = $registers_directive.registers;
int access = $access_list.value;
boolean isStatic = (access & AccessFlags.STATIC) != 0;
ArrayList instructions = $statements.instructions;
@@ -192,10 +192,10 @@ method_prototype returns[ProtoIdItem protoIdItem]
};
method_name_and_prototype returns[MethodIdItem methodIdItem]
- : member_name method_prototype
+ : MEMBER_NAME method_prototype
{
TypeIdItem classType = classDefItem.getClassType();
- String methodNameString = $member_name.memberName;
+ String methodNameString = $MEMBER_NAME.text;
StringIdItem methodName = new StringIdItem(dexFile, methodNameString);
ProtoIdItem protoIdItem = $method_prototype.protoIdItem;
@@ -214,23 +214,25 @@ field_type_list returns[ArrayList types]
}
)*;
-locals_directive returns[int registers]
- : ^(I_REGISTERS INT_LITERAL) {$registers = Integer.parseInt($INT_LITERAL.text);};
+registers_directive returns[int registers]
+ : ^(I_REGISTERS INTEGER_LITERAL) {$registers = Integer.parseInt($INTEGER_LITERAL.text);};
-full_method_name_and_prototype returns[MethodIdItem methodIdItem]
- : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME method_prototype
+
+
+fully_qualified_method returns[MethodIdItem methodIdItem]
+ : CLASS_NAME MEMBER_NAME method_prototype
{
- TypeIdItem classType = new TypeIdItem(dexFile, "L" + $QUALIFIED_MEMBER__CLASS_NAME.text + ";");
- StringIdItem methodName = new StringIdItem(dexFile, $QUALIFIED_MEMBER__MEMBER_NAME.text);
+ TypeIdItem classType = new TypeIdItem(dexFile, "L" + $CLASS_NAME.text + ";");
+ StringIdItem methodName = new StringIdItem(dexFile, $MEMBER_NAME.text);
ProtoIdItem prototype = $method_prototype.protoIdItem;
$methodIdItem = new MethodIdItem(dexFile, classType, methodName, prototype);
};
-full_field_name_and_type returns[FieldIdItem fieldIdItem]
- : QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME field_type_descriptor
+fully_qualified_field returns[FieldIdItem fieldIdItem]
+ : CLASS_NAME MEMBER_NAME field_type_descriptor
{
- TypeIdItem classType = new TypeIdItem(dexFile, "L" + $QUALIFIED_MEMBER__CLASS_NAME.text + ";");
- StringIdItem fieldName = new StringIdItem(dexFile, $QUALIFIED_MEMBER__MEMBER_NAME.text);
+ TypeIdItem classType = new TypeIdItem(dexFile, "L" + $CLASS_NAME.text + ";");
+ StringIdItem fieldName = new StringIdItem(dexFile, $MEMBER_NAME.text);
TypeIdItem fieldType = $field_type_descriptor.type;
$fieldIdItem = new FieldIdItem(dexFile, classType, fieldName, fieldType);
};
@@ -249,42 +251,42 @@ statements returns[ArrayList instructions]
instruction returns[Instruction instruction]
//e.g. return
- : ^(I_STATEMENT_FORMAT10x INSTRUCTION_NAME_FORMAT10x)
+ : ^(I_STATEMENT_FORMAT10x INSTRUCTION_FORMAT10x)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT10x.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT10x.text);
$instruction = Format10x.Format.make(dexFile, opcode.value);
}
| //e.g. move-result-object v1
- ^(I_STATEMENT_FORMAT11x INSTRUCTION_NAME_FORMAT11x REGISTER)
+ ^(I_STATEMENT_FORMAT11x INSTRUCTION_FORMAT11x REGISTER)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT11x.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT11x.text);
short regA = parseRegister_byte($REGISTER.text);
$instruction = Format11x.Format.make(dexFile, opcode.value, regA);
}
| //e.g. move v1 v2
- ^(I_STATEMENT_FORMAT12x INSTRUCTION_NAME_FORMAT12x registerA=REGISTER registerB=REGISTER)
+ ^(I_STATEMENT_FORMAT12x INSTRUCTION_FORMAT12x registerA=REGISTER registerB=REGISTER)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT12x.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT12x.text);
byte regA = parseRegister_nibble($registerA.text);
byte regB = parseRegister_nibble($registerB.text);
$instruction = Format12x.Format.make(dexFile, opcode.value, regA, regB);
}
| //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream;
- ^(I_STATEMENT_FORMAT21c_FIELD INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER full_field_name_and_type)
+ ^(I_STATEMENT_FORMAT21c_FIELD INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_FIELD.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_FIELD.text);
short regA = parseRegister_byte($REGISTER.text);
- FieldIdItem fieldIdItem = $full_field_name_and_type.fieldIdItem;
+ FieldIdItem fieldIdItem = $fully_qualified_field.fieldIdItem;
$instruction = Format21c.Format.make(dexFile, opcode.value, regA, fieldIdItem);
}
| //e.g. const-string v1 "Hello World!"
- ^(I_STATEMENT_FORMAT21c_STRING INSTRUCTION_NAME_FORMAT21c_STRING REGISTER string_literal)
+ ^(I_STATEMENT_FORMAT21c_STRING INSTRUCTION_FORMAT21c_STRING REGISTER string_literal)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_STRING.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_STRING.text);
short regA = parseRegister_byte($REGISTER.text);
StringIdItem stringIdItem = new StringIdItem(dexFile, $string_literal.value);
@@ -292,9 +294,9 @@ instruction returns[Instruction instruction]
$instruction = Format21c.Format.make(dexFile, opcode.value, regA, stringIdItem);
}
| //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2
- ^(I_STATEMENT_FORMAT21c_TYPE INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor)
+ ^(I_STATEMENT_FORMAT21c_TYPE INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_TYPE.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_TYPE.text);
short regA = parseRegister_byte($REGISTER.text);
TypeIdItem typeIdItem = $class_or_array_type_descriptor.type;
@@ -302,22 +304,22 @@ instruction returns[Instruction instruction]
$instruction = Format21c.Format.make(dexFile, opcode.value, regA, typeIdItem);
}
| //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V
- ^(I_STATEMENT_FORMAT35c_METHOD INSTRUCTION_NAME_FORMAT35c_METHOD register_list full_method_name_and_prototype)
+ ^(I_STATEMENT_FORMAT35c_METHOD INSTRUCTION_FORMAT35c_METHOD register_list fully_qualified_method)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT35c_METHOD.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT35c_METHOD.text);
//this depends on the fact that register_list returns a byte[5]
byte[] registers = $register_list.registers;
byte registerCount = $register_list.registerCount;
- MethodIdItem methodIdItem = $full_method_name_and_prototype.methodIdItem;
+ MethodIdItem methodIdItem = $fully_qualified_method.methodIdItem;
$instruction = Format35c.Format.make(dexFile, opcode.value, registerCount, registers[0], registers[1], registers[2], registers[3], registers[4], methodIdItem);
}
| //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder;
- ^(I_STATEMENT_FORMAT3rc_METHOD INSTRUCTION_NAME_FORMAT3rc_METHOD register_range full_method_name_and_prototype)
+ ^(I_STATEMENT_FORMAT3rc_METHOD INSTRUCTION_FORMAT3rc_METHOD register_range fully_qualified_method)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT3rc_METHOD.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT3rc_METHOD.text);
int startRegister = $register_range.startRegister;
int endRegister = $register_range.endRegister;
@@ -331,19 +333,19 @@ instruction returns[Instruction instruction]
throw new RuntimeException("A register range must have the lower register listed first");
}
- MethodIdItem methodIdItem = $full_method_name_and_prototype.methodIdItem;
+ MethodIdItem methodIdItem = $fully_qualified_method.methodIdItem;
//not supported yet
$instruction = Format3rc.Format.make(dexFile, opcode.value, (short)registerCount, startRegister, methodIdItem);
}
| //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
- ^(I_STATEMENT_FORMAT22c_FIELD INSTRUCTION_NAME_FORMAT22c_FIELD registerA=REGISTER registerB=REGISTER full_field_name_and_type)
+ ^(I_STATEMENT_FORMAT22c_FIELD INSTRUCTION_FORMAT22c_FIELD registerA=REGISTER registerB=REGISTER fully_qualified_field)
{
- Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT22c_FIELD.text);
+ Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT22c_FIELD.text);
byte regA = parseRegister_nibble($registerA.text);
byte regB = parseRegister_nibble($registerB.text);
- FieldIdItem fieldIdItem = $full_field_name_and_type.fieldIdItem;
+ FieldIdItem fieldIdItem = $fully_qualified_field.fieldIdItem;
$instruction = Format22c.Format.make(dexFile, opcode.value, regA, regB, fieldIdItem);
}
@@ -378,54 +380,19 @@ register_range returns[int startRegister, int endRegister]
}
;
-simple_name
- : SIMPLE_NAME
- | ACCESS_SPEC
- | INT_LITERAL
- | LONG_LITERAL
- | FLOAT_LITERAL_SIMPLE_NAME
- | DOUBLE_LITERAL_SIMPLE_NAME
- | BOOL_LITERAL
- | PRIMITIVE_TYPE
- | instruction_name
- ;
-
-instruction_name returns[String value]
- : INSTRUCTION_NAME_FORMAT10x
- | INSTRUCTION_NAME_FORMAT11x
- | INSTRUCTION_NAME_FORMAT12x
- | INSTRUCTION_NAME_FORMAT21c_FIELD
- | INSTRUCTION_NAME_FORMAT21c_STRING
- | INSTRUCTION_NAME_FORMAT21c_TYPE
- | INSTRUCTION_NAME_FORMAT22c_FIELD
- | INSTRUCTION_NAME_FORMAT35c_METHOD
- | INSTRUCTION_NAME_FORMAT3rc_METHOD
- ;
-
-member_name returns[String memberName]
- : (simple_name
- | MEMBER_NAME) {$memberName = $start.getText();}
- ;
-
-class_name returns [TypeIdItem type]
- : token=(SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME)
- {
- $type = new TypeIdItem(dexFile, 'L'+$token.text+';');
- };
-
field_type_descriptor returns [TypeIdItem type]
- : token=(PRIMITIVE_TYPE
+ : (PRIMITIVE_TYPE
| CLASS_DESCRIPTOR
- | ARRAY_TYPE)
+ | ARRAY_DESCRIPTOR)
{
- $type = new TypeIdItem(dexFile, $token.text);
+ $type = new TypeIdItem(dexFile, $start.getText());
};
class_or_array_type_descriptor returns [TypeIdItem type]
- : token=(CLASS_DESCRIPTOR
- | ARRAY_TYPE)
+ : (CLASS_DESCRIPTOR
+ | ARRAY_DESCRIPTOR)
{
- $type = new TypeIdItem(dexFile, $token.text);
+ $type = new TypeIdItem(dexFile, $start.getText());
};
class_type_descriptor returns [TypeIdItem type]
@@ -439,8 +406,8 @@ type_descriptor returns [TypeIdItem type]
| field_type_descriptor {$type = $field_type_descriptor.type;}
;
-int_literal returns[int value]
- : INT_LITERAL { $value = Integer.parseInt($INT_LITERAL.text); };
+integer_literal returns[int value]
+ : INTEGER_LITERAL { $value = Integer.parseInt($INTEGER_LITERAL.text); };
long_literal returns[long value]
: LONG_LITERAL { $value = Long.parseLong($LONG_LITERAL.text); };
@@ -455,7 +422,11 @@ char_literal returns[char value]
: CHAR_LITERAL { $value = $CHAR_LITERAL.text.charAt(0); };
string_literal returns[String value]
- : STRING_LITERAL { $value = $STRING_LITERAL.text; };
+ : STRING_LITERAL
+ {
+ $value = $STRING_LITERAL.text;
+ $value = $value.substring(1,$value.length()-1);
+ };
bool_literal returns[boolean value]
: BOOL_LITERAL { $value = Boolean.parseBoolean($BOOL_LITERAL.text); };
diff --git a/src/main/java/org/JesusFreke/smali/smali.java b/src/main/java/org/JesusFreke/smali/smali.java
index ddc57cc4..215564a0 100644
--- a/src/main/java/org/JesusFreke/smali/smali.java
+++ b/src/main/java/org/JesusFreke/smali/smali.java
@@ -32,6 +32,7 @@ import org.JesusFreke.dexlib.DexFile;
import org.JesusFreke.dexlib.util.ByteArrayOutput;
import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.CommonTokenStream;
+import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
@@ -48,11 +49,12 @@ public class smali
List l = tokens.getTokens();*/
-
ANTLRInputStream input = new ANTLRInputStream(new FileInputStream(args[0]));
smaliLexer lexer = new smaliLexer(input);
+
CommonTokenStream tokens = new CommonTokenStream(lexer);
smaliParser parser = new smaliParser(tokens);
+
smaliParser.smali_file_return result = parser.smali_file();
CommonTree t = (CommonTree) result.getTree();
diff --git a/src/test/resources/examples/HelloWorld.smali b/src/test/resources/examples/HelloWorld.smali
index 4bdac992..8f653a35 100644
--- a/src/test/resources/examples/HelloWorld.smali
+++ b/src/test/resources/examples/HelloWorld.smali
@@ -1,10 +1,10 @@
-.class public HelloWorld
-.super java/lang/Object
+.class Lpublic HelloWorld;
+.super Ljava/lang/Object;
.method public ()V
.registers 1
- invoke-direct {v0} java/lang/Object.()V
+ invoke-direct {v0}, java/lang/Object.()V
return-void
.end method
@@ -12,10 +12,10 @@
.method public static main([Ljava/lang/String;)V
.registers 4
- sget-object v0 java/lang/System.out Ljava/io/PrintStream;
- const-string v1 "Hello World!"
+ sget-object v0, java/lang/System.out Ljava/io/PrintStream;
+ const-string v1, "Hello World!"
- invoke-virtual {v0, v1} java/io/PrintStream.print(Ljava/Lang/Stream;)V
+ invoke-virtual {v0, v1}, java/io/PrintStream.print(Ljava/Lang/Stream;)V
return-void
.end method
diff --git a/src/test/resources/examples/HelloWorld2.smali b/src/test/resources/examples/HelloWorld2.smali
index c9e7a107..948ae8d0 100644
--- a/src/test/resources/examples/HelloWorld2.smali
+++ b/src/test/resources/examples/HelloWorld2.smali
@@ -1,102 +1,102 @@
-.class public org/JesusFreke/HelloWorld2/HelloWorld2
-.super android/app/Activity
-
-.field private helloWorld Ljava/lang/String;
-.field private static helloWorldStatic Ljava/lang/String;
-
-.field private static helloWorldStatic2 Ljava/lang/String; = "Static Initializer Hello World!"
-
-.method static constructor ()V
- .registers 1
-
- const-string v0, "Static Hello World!"
- sput-object v0, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic Ljava/lang/String;
-
- return-void
-.end method
-
-.method public constructor ()V
- .registers 2
- invoke-direct {v1}, android/app/Activity.()V
-
- const-string v0, "Hello World!"
- iput-object v0, v1, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
-
- return-void
-.end method
-
-.method public onCreate(Landroid/os/Bundle;)V
- .registers 6
-
- invoke-super {v4,v5}, android/app/Activity.onCreate(Landroid/os/Bundle;)V
-
- const-string v3, "\n"
-
- new-instance v0, Landroid/widget/TextView;
- invoke-direct {v0,v4}, android/widget/TextView.(Landroid/content/Context;)V
-
- iget-object v1, v4, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
-
- invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
- sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic Ljava/lang/String;
- invoke-virtual {v1, v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
- invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
- sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic2 Ljava/lang/String;
- invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
-
- invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
- const-class v2, Lorg/JesusFreke/HelloWorld2/HelloWorld2;
- invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String;
- move-result-object v2
-
- invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
-
-
- invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
- const-class v2, [Lorg/JesusFreke/HelloWorld2/HelloWorld2;
- invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String;
- move-result-object v2
-
- invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
-
-
- invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
- const-class v2, [I
- invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String;
- move-result-object v2
-
- invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String;
- move-result-object v1
-
- move-object v2, v1
-
-
-
-
- check-cast v4, Landroid/app/Activity;
-
- invoke-virtual {v0,v2}, android/widget/TextView.setText(Ljava/lang/CharSequence;)V
- invoke-virtual {v4,v0}, org/JesusFreke/HelloWorld2/HelloWorld2.setContentView(Landroid/view/View;)V
-
- return-void
-.end method
-
-
+.class public Lorg/JesusFreke/HelloWorld2/HelloWorld2;
+.super Landroid/app/Activity;
+
+.field private helloWorld Ljava/lang/String;
+.field private static helloWorldStatic Ljava/lang/String;
+
+.field private static helloWorldStatic2 Ljava/lang/String; = "Static Initializer Hello World!"
+
+.method static constructor ()V
+ .registers 1
+
+ const-string v0, "Static Hello World!"
+ sput-object v0, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic Ljava/lang/String;
+
+ return-void
+.end method
+
+.method public constructor ()V
+ .registers 2
+ invoke-direct {v1}, android/app/Activity/()V
+
+ const-string v0, "Hello World!"
+ iput-object v0, v1, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorld Ljava/lang/String;
+
+ return-void
+.end method
+
+.method public onCreate(Landroid/os/Bundle;)V
+ .registers 6
+
+ invoke-super {v4,v5}, android/app/Activity/onCreate(Landroid/os/Bundle;)V
+
+ const-string v3, "\n"
+
+ new-instance v0, Landroid/widget/TextView;
+ invoke-direct {v0,v4}, android/widget/TextView/(Landroid/content/Context;)V
+
+ iget-object v1, v4, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorld Ljava/lang/String;
+
+ invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+ sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic Ljava/lang/String;
+ invoke-virtual {v1, v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+ invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+ sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic2 Ljava/lang/String;
+ invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+
+ invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+ const-class v2, Lorg/JesusFreke/HelloWorld2/HelloWorld2;
+ invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String;
+ move-result-object v2
+
+ invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+
+
+ invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+ const-class v2, [Lorg/JesusFreke/HelloWorld2/HelloWorld2;
+ invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String;
+ move-result-object v2
+
+ invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+
+
+ invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+ const-class v2, [I
+ invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String;
+ move-result-object v2
+
+ invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
+ move-result-object v1
+
+ move-object v2, v1
+
+
+
+
+ check-cast v4, Landroid/app/Activity;
+
+ invoke-virtual {v0,v2}, android/widget/TextView/setText(Ljava/lang/CharSequence;)V
+ invoke-virtual {v4,v0}, org/JesusFreke/HelloWorld2/HelloWorld2/setContentView(Landroid/view/View;)V
+
+ return-void
+.end method
+
+