Significant refactoring of the lexer code, so that the lexing is done on a phrase-by-phrase basis, with supporting changes in the parser and tree walker, and a few other misc changes

git-svn-id: https://smali.googlecode.com/svn/trunk@9 55b6fa8a-2a1e-11de-a435-ffa8d773f76a
This commit is contained in:
JesusFreke@JesusFreke.com 2009-04-21 07:51:11 +00:00
parent 8e019f3ae0
commit becf535a32
8 changed files with 1114 additions and 878 deletions

15
pom.xml
View File

@ -23,12 +23,21 @@
<version>3.1.3-1</version> <version>3.1.3-1</version>
<executions> <executions>
<execution> <execution>
<id>smali</id> <id>smaliLexer</id>
<goals> <goals>
<goal>antlr</goal> <goal>antlr</goal>
</goals> </goals>
<configuration> <configuration>
<excludes><exclude>org/JesusFreke/smali/smaliTreeWalker.g</exclude></excludes> <includes><include>org/JesusFreke/smali/smaliLexer.g</include></includes>
</configuration>
</execution>
<execution>
<id>smaliParser</id>
<goals>
<goal>antlr</goal>
</goals>
<configuration>
<includes><include>org/JesusFreke/smali/smaliParser.g</include></includes>
</configuration> </configuration>
</execution> </execution>
<execution> <execution>
@ -37,7 +46,7 @@
<goal>antlr</goal> <goal>antlr</goal>
</goals> </goals>
<configuration> <configuration>
<excludes><exclude>org/JesusFreke/smali/smali.g</exclude></excludes> <includes><include>org/JesusFreke/smali/smaliTreeWalker.g</include></includes>
</configuration> </configuration>
</execution> </execution>
</executions> </executions>

View File

@ -1,677 +0,0 @@
/*
* The comment lexical rule, and the number, string and character constant
* lexical rules are derived from rules from the Java 1.6 grammar which can be
* found here: http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g
*
* Specifically, these rules:
*
* COMMENT, LONG_LITERAL, INT_LITERAL, Integer_number, Hex_prefix, Hex_digit,
* Long_suffix, Non_integer_number_SIMPLE_NAME, Non_integer_number,
* Decimal_exponent, Hex_exponent, Float_suffix, Double_suffix,
* FLOAT_LITERAL_SIMPLE_NAME, FLOAT_LITERAL, DOUBLE_LITERAL_SIMPLE_NAME,
* DOUBLE_LITERAL, CHAR_LITERAL, STRING_LITERAL, EscapeSequence
*
* These rules were originally copyrighted by Terence Parr, and are used here in
* accordance with the following license
*
* [The "BSD licence"]
* Copyright (c) 2007-2008 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form mmaven-2.0.9" -Didea.launcher.port=7538 "-Didea.launcher.bin.path=C:\Program Files\JetBrains\IntelliJ IDEA 8.1\bin" -Dfile.encoding=windows-1252 -classpath "C:\Program Files\Apache Software Foundation\apache-maven-2.0.9\boot\classworlds-1.1.jar;C:\Program Files\JetBrains\IntelliJ IDEA 8.1\lib\idea_rt.jar" com.intellij.rt.execution.application.AppMain org.codehaus.classworlds.Launcher --no-plugin-registry --fail-fast --no-plugin-updates --strict-checksums -f D:\Android\smali\pom.xml compile
+ Enabling strict checksum verification on all artifact downloads.
[INFO] Scanning for projects...
[INFO] ------------------------------------------------------------------------
[INFO] Building Unnamed - smali:smali:jar:1.0
[INFO] task-segment: [compile]
[INFO] ------------------------------------------------------------------------
[INFO] [antlr3:antlr {execution: smali}]
[INFO] ANTLR: Processing source directory D:\Android\smali\src\main\antlr3
ANTLR Parser Generator Version 3.1.3 Mar 17, 2009 19:23:44
org\JesusFreke\smali\smali.g
[INFO] [antlr3:antlr {execution: smaliTreeWalker}]
[INFO] ANTLR: Processing source directory D:\Android\smali\src\main\antlr3
ANTLR Parser Generator Version 3.1.3 Mar 17, 2009 19:23:44
org\JesusFreke\smali\smaliTreeWalker.g
[INFO] [resources:resources]ust reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* The remainder of this grammar is released by me (Ben Gruver) under the
* following license:
*
* [The "BSD licence"]
* Copyright (c) 2009 Ben Gruver
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
grammar smali;
options {
output=AST;
ASTLabelType=CommonTree;
}
tokens {
//I_* tokens are imaginary tokens used as parent AST nodes
I_CLASS_DEF;
I_SUPER;
I_ACCESS_LIST;
I_METHODS;
I_FIELDS;
I_FIELD;
I_FIELD_TYPE;
I_FIELD_INITIAL_VALUE;
I_METHOD;
I_METHOD_PROTOTYPE;
I_METHOD_RETURN_TYPE;
I_REGISTERS;
I_STATEMENTS;
I_STATEMENT_FORMAT10x;
I_STATEMENT_FORMAT11x;
I_STATEMENT_FORMAT12x;
I_STATEMENT_FORMAT21c_TYPE;
I_STATEMENT_FORMAT21c_FIELD;
I_STATEMENT_FORMAT22c_FIELD;
I_STATEMENT_FORMAT21c_STRING;
I_STATEMENT_FORMAT35c_METHOD;
I_STATEMENT_FORMAT3rc_METHOD;
I_REGISTER_RANGE;
I_REGISTER_LIST;
}
@parser::header {
package org.JesusFreke.smali;
}
@lexer::header {
package org.JesusFreke.smali;
import java.util.ArrayDeque;
}
@lexer::init {
state.token = Token.INVALID_TOKEN;
}
@lexer::members {
protected ArrayDeque<Token> tokens = new ArrayDeque<Token>();
public void reset() {
super.reset();
state.token = Token.INVALID_TOKEN;
tokens.clear();
}
public Token nextToken() {
while (true) {
if (tokens.size() > 0) {
Token token = tokens.poll();
if (token == Token.SKIP_TOKEN) {
continue;
}
return token;
}
state.channel = Token.DEFAULT_CHANNEL;
state.tokenStartCharIndex = input.index();
state.tokenStartCharPositionInLine = input.getCharPositionInLine();
state.tokenStartLine = input.getLine();
state.text = null;
if ( input.LA(1)==CharStream.EOF ) {
return Token.EOF_TOKEN;
}
try {
mTokens();
if (tokens.size() == 0) {
emit();
}
}
catch (NoViableAltException nva) {
reportError(nva);
recover(nva); // throw out current char and try again
}
catch (RecognitionException re) {
reportError(re);
// match() routine has already called recover()
}
}
}
public void skip() {
tokens.add(Token.SKIP_TOKEN);
}
public void emit(Token token) {
tokens.add(token);
}
}
smali_file: header methods_and_fields -> ^(I_CLASS_DEF header methods_and_fields);
header : class_spec super_spec;
class_spec
: '.class' access_list class_name -> class_name access_list;
super_spec
: first_token='.super' class_name -> ^(I_SUPER[$first_token, "I_SUPER"] class_name);
access_list
: first_token=ACCESS_SPEC ACCESS_SPEC* -> ^(I_ACCESS_LIST[$first_token,"I_ACCESS_LIST"] ACCESS_SPEC+);
methods_and_fields
: (method | field)* -> ^(I_METHODS method*) ^(I_FIELDS field*);
field : first_token='.field' access_list member_name field_type_descriptor ('=' literal)?
-> ^(I_FIELD[$first_token, "I_FIELD"] member_name access_list ^(I_FIELD_TYPE field_type_descriptor) ^(I_FIELD_INITIAL_VALUE literal)?);
method : first_token='.method' access_list method_name_and_prototype locals_directive statements '.end method'
-> ^(I_METHOD[$first_token, "I_METHOD"] method_name_and_prototype access_list locals_directive statements);
method_prototype
: first_token='(' field_type_list ')' type_descriptor
-> ^(I_METHOD_PROTOTYPE[$first_token, "I_METHOD_PROTOTYPE"] ^(I_METHOD_RETURN_TYPE type_descriptor) field_type_list?);
method_name_and_prototype
: member_name method_prototype;
field_type_list
: field_type_descriptor*;
locals_directive
: first_token='.registers' INT_LITERAL
-> ^(I_REGISTERS[$first_token, "I_REGISTERS"] INT_LITERAL);
full_method_name_and_prototype
: QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME method_prototype;
full_field_name_and_type
: QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME field_type_descriptor;
statements
: statement* -> ^(I_STATEMENTS statement*);
statement
: instruction;
instruction
//e.g. return
: INSTRUCTION_NAME_FORMAT10x
-> ^(I_STATEMENT_FORMAT10x[$start, "I_STATEMENT_FORMAT10x"] INSTRUCTION_NAME_FORMAT10x)
| //e.g. move-result-object v1
INSTRUCTION_NAME_FORMAT11x REGISTER
-> ^(I_STATEMENT_FORMAT11x[$start, "I_STATEMENT_FORMAT11x"] INSTRUCTION_NAME_FORMAT11x REGISTER)
| //e.g. move v1 v2
INSTRUCTION_NAME_FORMAT12x REGISTER ',' REGISTER
-> ^(I_STATEMENT_FORMAT12x[$start, "I_STATEMENT_FORMAT12x"] INSTRUCTION_NAME_FORMAT12x REGISTER REGISTER)
| //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream;
INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER ',' full_field_name_and_type
-> ^(I_STATEMENT_FORMAT21c_FIELD[$start, "I_STATEMENT_FORMAT21c_FIELD"] INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER full_field_name_and_type)
| //e.g. const-string v1 "Hello World!"
INSTRUCTION_NAME_FORMAT21c_STRING REGISTER ',' STRING_LITERAL
-> ^(I_STATEMENT_FORMAT21c_STRING[$start, "I_STATEMENT_FORMAT21c_STRING"] INSTRUCTION_NAME_FORMAT21c_STRING REGISTER STRING_LITERAL)
| //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2
INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER ',' class_or_array_type_descriptor
-> ^(I_STATEMENT_FORMAT21c_TYPE[$start, "I_STATEMENT_FORMAT21c"] INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor)
| //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
INSTRUCTION_NAME_FORMAT22c_FIELD REGISTER ',' REGISTER ',' full_field_name_and_type
-> ^(I_STATEMENT_FORMAT22c_FIELD[$start, "I_INSTANCE_FIELD_STATEMENT"] INSTRUCTION_NAME_FORMAT22c_FIELD REGISTER REGISTER full_field_name_and_type)
| //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V
INSTRUCTION_NAME_FORMAT35c_METHOD '{' register_list '}' ',' full_method_name_and_prototype
-> ^(I_STATEMENT_FORMAT35c_METHOD[$start, "I_STATEMENT_FORMAT35c_METHOD"] INSTRUCTION_NAME_FORMAT35c_METHOD register_list full_method_name_and_prototype)
| //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder;
INSTRUCTION_NAME_FORMAT3rc_METHOD '{' register_range '}' ',' full_method_name_and_prototype
-> ^(I_STATEMENT_FORMAT3rc_METHOD[$start, "I_STATEMENT_FORMAT3rc_METHOD"] INSTRUCTION_NAME_FORMAT3rc_METHOD register_range full_method_name_and_prototype)
;
register_list
: first_token=REGISTER? (',' REGISTER)* -> ^(I_REGISTER_LIST[$first_token, "I_REGISTER_LIST"] REGISTER*);
register_range
: first_token=REGISTER ('..' REGISTER)? -> ^(I_REGISTER_RANGE[$first_token, "I_REGISTER_RANGE"] REGISTER REGISTER?);
/*since there are no reserved words in the dex specification, there are a
number of tokens that can be a valid simple_name, in addition to just
SIMPLE_NAME. We need to match any token that could also be considered a valid
SIMPLE_NAME. In the case of floating point literals, some could be considered
a valid SIMPLE_NAME while others couldn't. The lexer will generate a separate
FLOAT_LITERAL_SIMPLE_NAME OR DOUBLE_LITERAL_SIMPLE_NAME token for literals
that can be considered a valid SIMPLE_NAME*/
simple_name
: SIMPLE_NAME
| ACCESS_SPEC
| instruction_name
| INT_LITERAL
| LONG_LITERAL
| FLOAT_LITERAL_SIMPLE_NAME
| DOUBLE_LITERAL_SIMPLE_NAME
| BOOL_LITERAL
| PRIMITIVE_TYPE
;
instruction_name
: INSTRUCTION_NAME_FORMAT10x
| INSTRUCTION_NAME_FORMAT11x
| INSTRUCTION_NAME_FORMAT12x
| INSTRUCTION_NAME_FORMAT21c_FIELD
| INSTRUCTION_NAME_FORMAT21c_STRING
| INSTRUCTION_NAME_FORMAT21c_TYPE
| INSTRUCTION_NAME_FORMAT22c_FIELD
| INSTRUCTION_NAME_FORMAT35c_METHOD
| INSTRUCTION_NAME_FORMAT3rc_METHOD
;
member_name
: simple_name
| MEMBER_NAME
;
class_name
: SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME;
field_type_descriptor
: PRIMITIVE_TYPE
| CLASS_DESCRIPTOR
| ARRAY_TYPE
;
class_or_array_type_descriptor
: CLASS_DESCRIPTOR
| ARRAY_TYPE;
type_descriptor
: VOID_TYPE
| field_type_descriptor
;
literal : INT_LITERAL
| LONG_LITERAL
| float_literal
| double_literal
| CHAR_LITERAL
| STRING_LITERAL
| BOOL_LITERAL;
float_literal
: FLOAT_LITERAL -> FLOAT_LITERAL
| FLOAT_LITERAL_SIMPLE_NAME -> FLOAT_LITERAL[$FLOAT_LITERAL_SIMPLE_NAME, $FLOAT_LITERAL_SIMPLE_NAME.text];
double_literal
: DOUBLE_LITERAL -> DOUBLE_LITERAL
| DOUBLE_LITERAL_SIMPLE_NAME -> DOUBLE_LITERAL[$DOUBLE_LITERAL_SIMPLE_NAME, $DOUBLE_LITERAL_SIMPLE_NAME.text];
ACCESS_SPEC
: 'public' | 'private' | 'static' | 'constructor' | 'final';
INSTRUCTION_NAME_FORMAT10x
: 'return-void'
| 'nop';
INSTRUCTION_NAME_FORMAT11x
: 'move-result'
| 'move-result-wide'
| 'move-result-object'
| 'move-exception'
| 'return'
| 'return-wide'
| 'return-object'
| 'monitor-enter'
| 'monitor-exit'
| 'throw';
INSTRUCTION_NAME_FORMAT12x
: 'move'
| 'move-wide'
| 'move-object'
| 'array-length'
| 'neg-int'
| 'not-int'
| 'neg-long'
| 'not-long'
| 'neg-float'
| 'neg-double'
| 'int-to-long'
| 'int-to-float'
| 'int-to-double'
| 'long-to-int'
| 'long-to-float'
| 'long-to-double'
| 'float-to-int'
| 'float-to-long'
| 'float-to-double'
| 'double-to-int'
| 'double-to-long'
| 'double-to-float'
| 'int-to-byte'
| 'int-to-char'
| 'int-to-short'
| 'add-int/2addr'
| 'sub-int/2addr'
| 'mul-int/2addr'
| 'div-int/2addr'
| 'rem-int/2addr'
| 'and-int/2addr'
| 'or-int/2addr'
| 'xor-int/2addr'
| 'shl-int/2addr'
| 'shr-int/2addr'
| 'ushr-int/2addr'
| 'add-long/2addr'
| 'sub-long/2addr'
| 'mul-long/2addr'
| 'div-long/2addr'
| 'rem-long/2addr'
| 'and-long/2addr'
| 'or-long/2addr'
| 'xor-long/2addr'
| 'shl-long/2addr'
| 'shr-long/2addr'
| 'ushr-long/2addr'
| 'add-float/2addr'
| 'sub-float/2addr'
| 'mul-float/2addr'
| 'div-float/2addr'
| 'rem-float/2addr'
| 'add-double/2addr'
| 'sub-double/2addr'
| 'mul-double/2addr'
| 'div-double/2addr'
| 'rem-double/2addr';
INSTRUCTION_NAME_FORMAT21c_FIELD
: 'sget'
| 'sget-wide'
| 'sget-object'
| 'sget-boolean'
| 'sget-byte'
| 'sget-char'
| 'sget-short'
| 'sput'
| 'sput-wide'
| 'sput-object'
| 'sput-boolean'
| 'sput-byte'
| 'sput-char'
| 'sput-short'
;
INSTRUCTION_NAME_FORMAT21c_STRING
: 'const-string';
INSTRUCTION_NAME_FORMAT21c_TYPE
: 'check-cast'
| 'new-instance'
| 'const-class';
INSTRUCTION_NAME_FORMAT22c_FIELD
: 'iget'
| 'iget-wide'
| 'iget-object'
| 'iget-boolean'
| 'iget-byte'
| 'iget-char'
| 'iget-short'
| 'iput'
| 'iput-wide'
| 'iput-object'
| 'iput-boolean'
| 'iput-byte'
| 'iput-char'
| 'iput-short'
;
INSTRUCTION_NAME_FORMAT35c_METHOD
: 'invoke-virtual'
| 'invoke-super'
| 'invoke-direct'
| 'invoke-static'
| 'invoke-interface'
;
INSTRUCTION_NAME_FORMAT3rc_METHOD
: 'invoke-virtual/range'
| 'invoke-super/range'
| 'invoke-direct/range'
| 'invoke-static/range'
| 'invoke-interface/range'
;
/*since SIMPLE_NAME is so all-encompassing, it includes all integer literals
and a subset of the possible floating point literals. For floating point
literals, we need to generate a separate token depending on whether the token
could also be considered a SIMPLE_NAME or not.
The floating point related tokens with a _SIMPLE_NAME suffix could also be
considered valid SIMPLE_NAME tokens, while the plain version of the token
(without the suffix) could not be considered a valid SIMPLE_NAME token*/
LONG_LITERAL
: Integer_number Long_suffix;
INT_LITERAL
: Integer_number;
fragment Integer_number
: '-'? '0'
| '-'? ('1'..'9') ('0'..'9')*
| '0' ('0'..'7')+
| Hex_prefix Hex_digit+
;
fragment Hex_prefix
: '0x'|'0X';
fragment Hex_digit
: ('0'..'9'|'a'..'f'|'A'..'F');
fragment Long_suffix
: 'l'|'L';
fragment Non_integer_number_SIMPLE_NAME
: ('0'..'9')+ Decimal_exponent
| ('0'..'9')+
| Hex_prefix (Hex_digit)* Hex_exponent
;
fragment Non_integer_number
: ('0'..'9')+ '.' ('0'..'9')* Decimal_exponent?
| '.' ('0'..'9')+ Decimal_exponent?
| Hex_prefix (Hex_digit)* '.' (Hex_digit)* Hex_exponent
;
fragment Decimal_exponent
: ('e'|'E') '-'? ('0'..'9')+;
fragment Hex_exponent
: ('p'|'P') '-'? ('0'..'9')+;
fragment Float_suffix
: 'f'|'F';
fragment Double_suffix
: 'd'|'D';
FLOAT_LITERAL_SIMPLE_NAME
: Non_integer_number_SIMPLE_NAME Float_suffix;
FLOAT_LITERAL
: Non_integer_number Float_suffix;
DOUBLE_LITERAL_SIMPLE_NAME
: Non_integer_number_SIMPLE_NAME Double_suffix?;
DOUBLE_LITERAL
: Non_integer_number Double_suffix?;
CHAR_LITERAL
: '\'' {StringBuilder sb = new StringBuilder();}
( Escape_sequence[sb] {setText(sb.toString());}
| ~( '\'' | '\\' | '\r' | '\n' )
)
'\''
;
STRING_LITERAL
: '"' {StringBuilder sb = new StringBuilder();}
( Escape_sequence[sb]
| ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
)*
'"' {setText(sb.toString());}
;
Hex_digits
: Hex_digit Hex_digit Hex_digit Hex_digit;
fragment
Escape_sequence[StringBuilder sb]
: '\\'
(
'b' {sb.append("\b");}
| 't' {sb.append("\t");}
| 'n' {sb.append("\n");}
| 'f' {sb.append("\f");}
| 'r' {sb.append("\r");}
| '\"' {sb.append("\"");}
| '\'' {sb.append("'");}
| '\\' {sb.append("\\");}
| 'u' Hex_digits {sb.append((char)Integer.parseInt($Hex_digits.text, 16));}
/* | octdigits=(('0'..'3') ('0'..'7') ('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);}
| octdigits=(('0'..'7') ('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);}
| octdigits=(('0'..'7')) {$value = (char)Integer.parseInt("0" + $octdigits.text);}*/
);
BOOL_LITERAL
: 'true'|'false';
WHITESPACE
: (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;};
REGISTER: 'v' ('0'..'9')+;
/*a token of type QUALIFIED_MEMBER is never generated. This rule emits 2 sub-tokens
that represent the class name and the member name, so that they don't have to be
parsed out later*/
QUALIFIED_MEMBER
: class_name=QUALIFIED_MEMBER__CLASS_NAME '.' member_name=QUALIFIED_MEMBER__MEMBER_NAME
{
$class_name.setType(QUALIFIED_MEMBER__CLASS_NAME);
$member_name.setType(QUALIFIED_MEMBER__MEMBER_NAME);
emit($class_name);
emit($member_name);
};
fragment QUALIFIED_MEMBER__CLASS_NAME
: (SIMPLE_NAME '/')* SIMPLE_NAME;
fragment QUALIFIED_MEMBER__MEMBER_NAME
: MEMBER_NAME | SIMPLE_NAME;
ARRAY_TYPE
:
ARRAY_CHAR_LIST[255] (PRIMITIVE_TYPE | CLASS_DESCRIPTOR);
//match from 1 to maxCount '[' characters
fragment
ARRAY_CHAR_LIST[int maxCount]
: {$maxCount > 1}?=> '[' ARRAY_CHAR_LIST[$maxCount - 1]
| '['
;
MEMBER_NAME
: '<' SIMPLE_NAME '>';
VOID_TYPE
: 'V';
PRIMITIVE_TYPE
: 'Z'
| 'B'
| 'S'
| 'C'
| 'I'
| 'J'
| 'F'
| 'D'
;
CLASS_WITH_PACKAGE_NAME
: (SIMPLE_NAME '/')+ SIMPLE_NAME;
CLASS_DESCRIPTOR
: 'L' (SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME) ';';
SIMPLE_NAME:
( 'A'..'Z'
| 'a'..'z'
| '0'..'9'
| '$'
| '-'
| '_'
| '\u00a1'..'\u1fff'
| '\u2010'..'\u2027'
| '\u2030'..'\ud7ff'
| '\ue000'..'\uffef'
)+;
COMMENT
: (';' ~('\n'|'\r')* ('\r\n' | '\r' | '\n')
| ';' ~('\n'|'\r')*)
{
$channel = HIDDEN;
}
;

View File

@ -0,0 +1,749 @@
/*
* The number, string and character constant lexical rules are derived from rules
* from the Java 1.6 grammar which can be found here:
* http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g
*
* Specifically, these rules:
*
* STRING_LITERAL, ESCAPE_SEQUENCE, HEX_DIGITS, HEX_DIGIT, INTEGER_LITERAL,
* HEX_PREFIX, LONG_LITERAL, FLOATING_POINT_NUMBER, DECIMAL_EXPONENT,
* HEX_EXPONENT, FLOAT_LITERAL, DOUBLE_LITERAL, CHAR_LITERAL
*
* These rules were originally copyrighted by Terence Parr, and are used here in
* accordance with the following license
*
* [The "BSD licence"]
* Copyright (c) 2007-2008 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* The remainder of this grammar is released by me (Ben Gruver) under the
* following license:
*
* [The "BSD licence"]
* Copyright (c) 2009 Ben Gruver
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*smali files are particularly hard to tokenize, because of dex's
identifiers, which are much more all-encompassing than languages'.
One reasonable possibility would be to limit the identifiers to what Java
supports. But I want the syntax to expose the full functionality of the dex
format, so that means supporting the wide range of identifiers that it
supports.
This makes tokenizing a much more context sensitive operation than usual. To
address this, I've added extended the base lexer class to support multiple
token emissions per rule. The top level *_PHRASE lexical rules generally
match a "phrase". Each phrase has a specific format, and a unique starting
sequence - typically a directive or opcode. Each phrase rule doesn't generate
a token that represents itself, like a typical lexical rule, rather, it emits
all of its children tokens.
For example, a phrase may consist of ".field private helloWorld Ljava/lang/String;".
The corresponding rule (without the supporting emission code) would look something like
FIELD_PHRASE : '.field' ACCESS_SPEC+ MEMBER_NAME FIELD_TYPE_DESCRIPTOR
There would never be a "FIELD_PHRASE" token in the output token stream. Instead,
it would emit a token for each of its children tokens.*/
lexer grammar smaliLexer;
@lexer::header {
package org.JesusFreke.smali;
import java.util.ArrayDeque;
}
@lexer::init {
state.token = Token.INVALID_TOKEN;
}
@lexer::members {
protected ArrayDeque<Token> tokens = new ArrayDeque<Token>();
public void reset() {
super.reset();
state.token = Token.INVALID_TOKEN;
tokens.clear();
}
public Token nextToken() {
while (true) {
if (tokens.size() > 0) {
Token token = tokens.poll();
if (token == Token.SKIP_TOKEN) {
continue;
}
System.out.println(token.toString());
return token;
}
state.channel = Token.DEFAULT_CHANNEL;
state.tokenStartCharIndex = input.index();
state.tokenStartCharPositionInLine = input.getCharPositionInLine();
state.tokenStartLine = input.getLine();
state.text = null;
if ( input.LA(1)==CharStream.EOF ) {
return Token.EOF_TOKEN;
}
try {
mTokens();
if (tokens.size() == 0) {
emit();
}
}
catch (NoViableAltException nva) {
reportError(nva);
recover(nva); // throw out current char and try again
}
catch (RecognitionException re) {
reportError(re);
// match() routine has already called recover()
}
}
}
public void skip() {
tokens.add(Token.SKIP_TOKEN);
}
public void emit(Token token) {
tokens.add(token);
}
public void emit(Token token, int type) {
token.setType(type);
tokens.add(token);
}
public void emit(Token token, int type, int channel) {
token.setType(type);
token.setChannel(channel);
tokens.add(token);
}
/*protected void mismatch(IntStream input, int ttype, BitSet follow) throws RecognitionException
{
throw new MismatchedTokenException(ttype, input);
}
public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow) throws RecognitionException
{
throw e;
}*/
}
/*@rulecatch {
catch (RecognitionException e) {
throw e;
}
}*/
CLASS_PHRASE
: CLASS_DIRECTIVE_EMIT
WS
(ACCESS_SPEC_EMIT WS)+
CLASS_DESCRIPTOR_EMIT;
SUPER_PHRASE
: SUPER_DIRECTIVE_EMIT
WS
CLASS_DESCRIPTOR_EMIT;
FIELD_PHRASE
: FIELD_DIRECTIVE_EMIT
WS
(ACCESS_SPEC_EMIT WS)+
MEMBER_NAME_EMIT
WS
FIELD_TYPE_DESCRIPTOR_EMITCHILD
WS?
('=' WS? LITERAL_EMITCHILD)?;
METHOD_PHRASE
: METHOD_DIRECTIVE_EMIT
WS
(ACCESS_SPEC_EMIT WS)+
MEMBER_NAME_EMIT
METHOD_PROTOTYPE_EMITCHILDREN;
END_METHOD_PHRASE
: END_METHOD_DIRECTIVE_EMIT;
REGISTERS_PHRASE
: REGISTERS_DIRECTIVE_EMIT
WS
INTEGER_LITERAL_EMIT;
INSTRUCTION_FORMAT10x_PHRASE
: INSTRUCTION_FORMAT10x_EMIT;
INSTRUCTION_FORMAT11x_PHRASE
: INSTRUCTION_FORMAT11x_EMIT
WS
REGISTER_EMIT;
INSTRUCTION_FORMAT12x_PHRASE
: INSTRUCTION_FORMAT12x_EMIT
WS
REGISTER_EMIT
WS? ',' WS?
REGISTER_EMIT;
INSTRUCTION_FORMAT21c_FIELD_PHRASE
: INSTRUCTION_FORMAT21c_FIELD_EMIT
WS
REGISTER_EMIT
WS? ',' WS?
FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
WS
FIELD_TYPE_DESCRIPTOR_EMITCHILD;
INSTRUCTION_FORMAT21c_STRING_PHRASE
: INSTRUCTION_FORMAT21c_STRING_EMIT
WS
REGISTER_EMIT
WS? ',' WS?
STRING_LITERAL_EMIT;
INSTRUCTION_FORMAT21c_TYPE_PHRASE
: INSTRUCTION_FORMAT21c_TYPE_EMIT
WS
REGISTER_EMIT
WS? ',' WS?
CLASS_OR_ARRAY_TYPE_DESCRIPTOR_EMITCHILD;
INSTRUCTION_FORMAT22c_FIELD_PHRASE
: INSTRUCTION_FORMAT22c_FIELD_EMIT
WS
REGISTER_EMIT
WS? ',' WS?
REGISTER_EMIT
WS? ',' WS?
FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
WS
FIELD_TYPE_DESCRIPTOR_EMITCHILD;
INSTRUCTION_FORMAT35c_METHOD_PHRASE
: INSTRUCTION_FORMAT35c_METHOD_EMIT
WS
REGISTER_LIST_EMITCHILDREN
WS? ',' WS?
FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
METHOD_PROTOTYPE_EMITCHILDREN;
INSTRUCTION_FORMAT3rc_METHOD_PHRASE
: INSTRUCTION_FORMAT3rc_METHOD_EMIT
WS
REGISTER_RANGE_EMITCHILDREN
WS? ',' WS?
FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
METHOD_PROTOTYPE_EMITCHILDREN;
fragment CLASS_DIRECTIVE_EMIT
: CLASS_DIRECTIVE {emit($CLASS_DIRECTIVE, CLASS_DIRECTIVE);};
fragment CLASS_DIRECTIVE
: '.class';
fragment SUPER_DIRECTIVE_EMIT
: SUPER_DIRECTIVE {emit($SUPER_DIRECTIVE, SUPER_DIRECTIVE);};
fragment SUPER_DIRECTIVE
: '.super';
fragment FIELD_DIRECTIVE_EMIT
: FIELD_DIRECTIVE {emit($FIELD_DIRECTIVE, FIELD_DIRECTIVE);};
fragment FIELD_DIRECTIVE
: '.field';
fragment METHOD_DIRECTIVE_EMIT
: METHOD_DIRECTIVE {emit($METHOD_DIRECTIVE, METHOD_DIRECTIVE);};
fragment METHOD_DIRECTIVE
: '.method';
fragment END_METHOD_DIRECTIVE_EMIT
: END_METHOD_DIRECTIVE {emit($END_METHOD_DIRECTIVE, END_METHOD_DIRECTIVE);};
fragment END_METHOD_DIRECTIVE
: '.end method';
fragment REGISTERS_DIRECTIVE_EMIT
: REGISTERS_DIRECTIVE {emit($REGISTERS_DIRECTIVE, REGISTERS_DIRECTIVE);};
fragment REGISTERS_DIRECTIVE
: '.registers';
fragment REGISTER_EMIT
: REGISTER {emit($REGISTER, REGISTER);};
fragment REGISTER
: 'v' ('0'..'9')+;
fragment REGISTER_LIST_EMITCHILDREN
: OPEN_BRACKET_EMIT
( WS?
REGISTER_EMIT (WS? ',' WS? REGISTER_EMIT)*
WS?
| WS?)
CLOSE_BRACKET_EMIT;
fragment REGISTER_RANGE_EMITCHILDREN
: OPEN_BRACKET_EMIT
WS?
REGISTER_EMIT
WS?
('..' WS?
REGISTER_EMIT)?
CLOSE_BRACKET_EMIT;
fragment METHOD_PROTOTYPE_EMITCHILDREN
: OPEN_PAREN_EMIT
(FIELD_TYPE_DESCRIPTOR_EMITCHILD+)?
CLOSE_PAREN_EMIT
TYPE_DESCRIPTOR_EMITCHILD;
fragment FULLY_QUALIFIED_MEMBER_NAME_EMITCHILDREN
@init {int startPos;}
: {startPos = getCharIndex();} (SIMPLE_NAME '/')* token=SIMPLE_NAME {((CommonToken)$token).setStartIndex(startPos); emit($token, CLASS_NAME);}
'/'
MEMBER_NAME_EMIT;
fragment TYPE_DESCRIPTOR_EMITCHILD
: PRIMITIVE_TYPE_EMIT
| VOID_TYPE_EMIT
| CLASS_DESCRIPTOR_EMIT
| ARRAY_DESCRIPTOR_EMIT;
fragment FIELD_TYPE_DESCRIPTOR_EMITCHILD
: PRIMITIVE_TYPE_EMIT
| CLASS_DESCRIPTOR_EMIT
| ARRAY_DESCRIPTOR_EMIT;
fragment CLASS_OR_ARRAY_TYPE_DESCRIPTOR_EMITCHILD
: CLASS_DESCRIPTOR_EMIT
| ARRAY_DESCRIPTOR_EMIT;
fragment PRIMITIVE_TYPE_EMIT
: PRIMITIVE_TYPE {emit($PRIMITIVE_TYPE, PRIMITIVE_TYPE);};
fragment PRIMITIVE_TYPE
: 'Z'
| 'B'
| 'S'
| 'C'
| 'I'
| 'J'
| 'F'
| 'D'
;
fragment VOID_TYPE_EMIT
: VOID_TYPE {emit($VOID_TYPE, VOID_TYPE);};
fragment VOID_TYPE
: 'V';
fragment CLASS_DESCRIPTOR_EMIT
: CLASS_DESCRIPTOR {emit($CLASS_DESCRIPTOR, CLASS_DESCRIPTOR);};
fragment CLASS_DESCRIPTOR
: 'L' CLASS_NAME ';';
fragment CLASS_NAME
: (SIMPLE_NAME '/')* SIMPLE_NAME;
fragment ARRAY_DESCRIPTOR_EMIT
: ARRAY_DESCRIPTOR {emit($ARRAY_DESCRIPTOR, ARRAY_DESCRIPTOR);};
fragment ARRAY_DESCRIPTOR
: ARRAY_TYPE_PREFIX (PRIMITIVE_TYPE | CLASS_DESCRIPTOR);
fragment ARRAY_TYPE_PREFIX
: ARRAY_CHAR_LIST[255];
fragment ARRAY_CHAR_LIST[int maxCount]
: {$maxCount > 1}?=> '[' ARRAY_CHAR_LIST[$maxCount - 1]
| '['
;
fragment ACCESS_SPEC_EMIT
: ACCESS_SPEC {emit($ACCESS_SPEC, ACCESS_SPEC);};
fragment ACCESS_SPEC
: 'public'
| 'private'
| 'static'
| 'constructor'
| 'final';
fragment MEMBER_NAME_EMIT
: MEMBER_NAME {emit($MEMBER_NAME, MEMBER_NAME);};
fragment MEMBER_NAME
: '<'? SIMPLE_NAME '>'?;
fragment SIMPLE_NAME:
( 'A'..'Z'
| 'a'..'z'
| '0'..'9'
| '$'
| '-'
| '_'
| '\u00a1'..'\u1fff'
| '\u2010'..'\u2027'
| '\u2030'..'\ud7ff'
| '\ue000'..'\uffef'
)+;
fragment LITERAL_EMITCHILD
: STRING_LITERAL_EMIT
| INTEGER_LITERAL_EMIT
| LONG_LITERAL_EMIT
| FLOAT_LITERAL_EMIT
| DOUBLE_LITERAL_EMIT
| CHAR_LITERAL_EMIT
| BOOL_LITERAL_EMIT;
fragment STRING_LITERAL_EMIT
@init {StringBuilder sb = new StringBuilder();}
: STRING_LITERAL[sb]
{
$STRING_LITERAL.setText(sb.toString());
emit($STRING_LITERAL, STRING_LITERAL);
};
fragment STRING_LITERAL [StringBuilder sb]
: '"' {sb.append('"');}
( ESCAPE_SEQUENCE[sb]
| ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
)*
'"' {sb.append('"');}
;
fragment
ESCAPE_SEQUENCE[StringBuilder sb]
: '\\'
(
'b' {sb.append("\b");}
| 't' {sb.append("\t");}
| 'n' {sb.append("\n");}
| 'f' {sb.append("\f");}
| 'r' {sb.append("\r");}
| '\"' {sb.append("\"");}
| '\'' {sb.append("'");}
| '\\' {sb.append("\\");}
| 'u' HEX_DIGITS {sb.append((char)Integer.parseInt($HEX_DIGITS.text, 16));}
);
fragment HEX_DIGITS
: HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
fragment HEX_DIGIT
: ('0'..'9'|'a'..'f'|'A'..'F');
fragment INTEGER_LITERAL_EMIT
: INTEGER_LITERAL {emit($INTEGER_LITERAL, INTEGER_LITERAL);};
fragment INTEGER_LITERAL
: '-'? '0'
| '-'? ('1'..'9') ('0'..'9')*
| '0' ('0'..'7')+
| HEX_PREFIX HEX_DIGIT+
;
fragment HEX_PREFIX
: '0x'|'0X';
fragment LONG_LITERAL_EMIT
: LONG_LITERAL {emit($LONG_LITERAL, LONG_LITERAL);};
fragment LONG_LITERAL
: INTEGER_LITERAL ('l' | 'L');
fragment FLOATING_POINT_NUMBER
: ('0' .. '9')+ '.' ('0' .. '9')* DECIMAL_EXPONENT?
| '.' ( '0' .. '9' )+ DECIMAL_EXPONENT?
| ('0' .. '9')+ DECIMAL_EXPONENT
| HEX_PREFIX
( HEX_DIGIT+ ('.' HEX_DIGIT*)?
| '.' HEX_DIGIT+
)
BINARY_EXPONENT
;
fragment DECIMAL_EXPONENT
: ('e'|'E') '-'? ('0'..'9')+;
fragment BINARY_EXPONENT
: ('p'|'P') '-'? ('0'..'9')+;
fragment FLOAT_LITERAL_EMIT
: FLOAT_LITERAL {emit($FLOAT_LITERAL, FLOAT_LITERAL);};
fragment FLOAT_LITERAL
: (FLOATING_POINT_NUMBER | ('0' .. '9')+) ('f' | 'F');
fragment DOUBLE_LITERAL_EMIT
: DOUBLE_LITERAL {emit($DOUBLE_LITERAL, DOUBLE_LITERAL);};
fragment DOUBLE_LITERAL
: FLOATING_POINT_NUMBER ('d' | 'D')?
| ('0' .. '9')+ ('d' | 'D');
fragment CHAR_LITERAL_EMIT
: CHAR_LITERAL {emit($CHAR_LITERAL, CHAR_LITERAL);};
fragment CHAR_LITERAL
: '\'' {StringBuilder sb = new StringBuilder("'");}
( ESCAPE_SEQUENCE[sb] {sb.append("'"); setText(sb.toString());}
| ~( '\'' | '\\' | '\r' | '\n' )
)
'\''
;
fragment BOOL_LITERAL_EMIT
: BOOL_LITERAL {emit($BOOL_LITERAL, BOOL_LITERAL);};
fragment BOOL_LITERAL
: 'true'|'false';
fragment INSTRUCTION_FORMAT10x_EMIT
: INSTRUCTION_FORMAT10x {emit($INSTRUCTION_FORMAT10x, INSTRUCTION_FORMAT10x);};
fragment INSTRUCTION_FORMAT10x
: 'return-void'
| 'nop';
fragment INSTRUCTION_FORMAT11x_EMIT
: INSTRUCTION_FORMAT11x {emit($INSTRUCTION_FORMAT11x, INSTRUCTION_FORMAT11x);};
fragment INSTRUCTION_FORMAT11x
: 'move-result'
| 'move-result-wide'
| 'move-result-object'
| 'move-exception'
| 'return'
| 'return-wide'
| 'return-object'
| 'monitor-enter'
| 'monitor-exit'
| 'throw';
fragment INSTRUCTION_FORMAT12x_EMIT
: INSTRUCTION_FORMAT12x {emit($INSTRUCTION_FORMAT12x, INSTRUCTION_FORMAT12x);};
fragment INSTRUCTION_FORMAT12x
: 'move'
| 'move-wide'
| 'move-object'
| 'array-length'
| 'neg-int'
| 'not-int'
| 'neg-long'
| 'not-long'
| 'neg-float'
| 'neg-double'
| 'int-to-long'
| 'int-to-float'
| 'int-to-double'
| 'long-to-int'
| 'long-to-float'
| 'long-to-double'
| 'float-to-int'
| 'float-to-long'
| 'float-to-double'
| 'double-to-int'
| 'double-to-long'
| 'double-to-float'
| 'int-to-byte'
| 'int-to-char'
| 'int-to-short'
| 'add-int/2addr'
| 'sub-int/2addr'
| 'mul-int/2addr'
| 'div-int/2addr'
| 'rem-int/2addr'
| 'and-int/2addr'
| 'or-int/2addr'
| 'xor-int/2addr'
| 'shl-int/2addr'
| 'shr-int/2addr'
| 'ushr-int/2addr'
| 'add-long/2addr'
| 'sub-long/2addr'
| 'mul-long/2addr'
| 'div-long/2addr'
| 'rem-long/2addr'
| 'and-long/2addr'
| 'or-long/2addr'
| 'xor-long/2addr'
| 'shl-long/2addr'
| 'shr-long/2addr'
| 'ushr-long/2addr'
| 'add-float/2addr'
| 'sub-float/2addr'
| 'mul-float/2addr'
| 'div-float/2addr'
| 'rem-float/2addr'
| 'add-double/2addr'
| 'sub-double/2addr'
| 'mul-double/2addr'
| 'div-double/2addr'
| 'rem-double/2addr';
fragment INSTRUCTION_FORMAT21c_FIELD_EMIT
: INSTRUCTION_FORMAT21c_FIELD {emit($INSTRUCTION_FORMAT21c_FIELD, INSTRUCTION_FORMAT21c_FIELD);};
fragment INSTRUCTION_FORMAT21c_FIELD
: 'sget'
| 'sget-wide'
| 'sget-object'
| 'sget-boolean'
| 'sget-byte'
| 'sget-char'
| 'sget-short'
| 'sput'
| 'sput-wide'
| 'sput-object'
| 'sput-boolean'
| 'sput-byte'
| 'sput-char'
| 'sput-short'
;
fragment INSTRUCTION_FORMAT21c_STRING_EMIT
: INSTRUCTION_FORMAT21c_STRING {emit($INSTRUCTION_FORMAT21c_STRING, INSTRUCTION_FORMAT21c_STRING);};
fragment INSTRUCTION_FORMAT21c_STRING
: 'const-string';
fragment INSTRUCTION_FORMAT21c_TYPE_EMIT
: INSTRUCTION_FORMAT21c_TYPE {emit($INSTRUCTION_FORMAT21c_TYPE, INSTRUCTION_FORMAT21c_TYPE);};
fragment INSTRUCTION_FORMAT21c_TYPE
: 'check-cast'
| 'new-instance'
| 'const-class';
fragment INSTRUCTION_FORMAT22c_FIELD_EMIT
: INSTRUCTION_FORMAT22c_FIELD {emit($INSTRUCTION_FORMAT22c_FIELD, INSTRUCTION_FORMAT22c_FIELD);};
fragment INSTRUCTION_FORMAT22c_FIELD
: 'iget'
| 'iget-wide'
| 'iget-object'
| 'iget-boolean'
| 'iget-byte'
| 'iget-char'
| 'iget-short'
| 'iput'
| 'iput-wide'
| 'iput-object'
| 'iput-boolean'
| 'iput-byte'
| 'iput-char'
| 'iput-short'
;
fragment INSTRUCTION_FORMAT35c_METHOD_EMIT
: INSTRUCTION_FORMAT35c_METHOD {emit($INSTRUCTION_FORMAT35c_METHOD, INSTRUCTION_FORMAT35c_METHOD);};
fragment INSTRUCTION_FORMAT35c_METHOD
: 'invoke-virtual'
| 'invoke-super'
| 'invoke-direct'
| 'invoke-static'
| 'invoke-interface'
;
fragment INSTRUCTION_FORMAT3rc_METHOD_EMIT
: INSTRUCTION_FORMAT3rc_METHOD {emit($INSTRUCTION_FORMAT3rc_METHOD, INSTRUCTION_FORMAT3rc_METHOD);};
fragment INSTRUCTION_FORMAT3rc_METHOD
: 'invoke-virtual/range'
| 'invoke-super/range'
| 'invoke-direct/range'
| 'invoke-static/range'
| 'invoke-interface/range'
;
fragment OPEN_PAREN_EMIT
: OPEN_PAREN {emit($OPEN_PAREN, OPEN_PAREN);};
fragment OPEN_PAREN
: '(';
fragment CLOSE_PAREN_EMIT
: CLOSE_PAREN {emit($CLOSE_PAREN, CLOSE_PAREN);};
fragment CLOSE_PAREN
: ')';
fragment OPEN_BRACKET_EMIT
: OPEN_BRACKET {emit($OPEN_BRACKET, OPEN_BRACKET);};
fragment OPEN_BRACKET
: '{';
fragment CLOSE_BRACKET_EMIT
: CLOSE_BRACKET {emit($CLOSE_BRACKET, CLOSE_BRACKET);};
fragment CLOSE_BRACKET
: '}';
fragment WS
: WHITE_SPACE {emit($WHITE_SPACE, WHITE_SPACE, Token.HIDDEN_CHANNEL);};
WHITE_SPACE
: (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;};

View File

@ -0,0 +1,182 @@
/*
* [The "BSD licence"]
* Copyright (c) 2009 Ben Gruver
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
parser grammar smaliParser;
options {
tokenVocab=smaliLexer;
output=AST;
ASTLabelType=CommonTree;
}
tokens {
//I_* tokens are imaginary tokens used as parent AST nodes
I_CLASS_DEF;
I_SUPER;
I_ACCESS_LIST;
I_METHODS;
I_FIELDS;
I_FIELD;
I_FIELD_TYPE;
I_FIELD_INITIAL_VALUE;
I_METHOD;
I_METHOD_PROTOTYPE;
I_METHOD_RETURN_TYPE;
I_REGISTERS;
I_STATEMENTS;
I_STATEMENT_FORMAT10x;
I_STATEMENT_FORMAT11x;
I_STATEMENT_FORMAT12x;
I_STATEMENT_FORMAT21c_TYPE;
I_STATEMENT_FORMAT21c_FIELD;
I_STATEMENT_FORMAT22c_FIELD;
I_STATEMENT_FORMAT21c_STRING;
I_STATEMENT_FORMAT35c_METHOD;
I_STATEMENT_FORMAT3rc_METHOD;
I_REGISTER_RANGE;
I_REGISTER_LIST;
CLASS_NAME;
MEMBER_NAME;
}
@header {
package org.JesusFreke.smali;
}
smali_file: header methods_and_fields -> ^(I_CLASS_DEF header methods_and_fields);
header : class_spec super_spec;
class_spec
: CLASS_DIRECTIVE access_list CLASS_DESCRIPTOR -> CLASS_DESCRIPTOR access_list;
super_spec
: SUPER_DIRECTIVE CLASS_DESCRIPTOR -> ^(I_SUPER[$start, "I_SUPER"] CLASS_DESCRIPTOR);
access_list
: ACCESS_SPEC+ -> ^(I_ACCESS_LIST[$start,"I_ACCESS_LIST"] ACCESS_SPEC+);
methods_and_fields
: (method | field)* -> ^(I_METHODS method*) ^(I_FIELDS field*);
field : FIELD_DIRECTIVE access_list MEMBER_NAME field_type_descriptor literal?
-> ^(I_FIELD[$start, "I_FIELD"] MEMBER_NAME access_list ^(I_FIELD_TYPE field_type_descriptor) ^(I_FIELD_INITIAL_VALUE literal)?);
method : METHOD_DIRECTIVE access_list MEMBER_NAME method_prototype
registers_directive
statements
END_METHOD_DIRECTIVE
-> ^(I_METHOD[$start, "I_METHOD"] MEMBER_NAME method_prototype access_list registers_directive statements);
method_prototype
: OPEN_PAREN field_type_descriptor* CLOSE_PAREN type_descriptor
-> ^(I_METHOD_PROTOTYPE[$start, "I_METHOD_PROTOTYPE"] ^(I_METHOD_RETURN_TYPE type_descriptor) field_type_descriptor*);
registers_directive
: REGISTERS_DIRECTIVE INTEGER_LITERAL
-> ^(I_REGISTERS[$start, "I_REGISTERS"] INTEGER_LITERAL);
fully_qualified_method
: CLASS_NAME MEMBER_NAME method_prototype;
fully_qualified_field
: CLASS_NAME MEMBER_NAME field_type_descriptor;
statements
: statement* -> ^(I_STATEMENTS statement*);
statement
: instruction;
instruction
//e.g. return
: INSTRUCTION_FORMAT10x
-> ^(I_STATEMENT_FORMAT10x[$start, "I_STATEMENT_FORMAT10x"] INSTRUCTION_FORMAT10x)
| //e.g. move-result-object v1
INSTRUCTION_FORMAT11x REGISTER
-> ^(I_STATEMENT_FORMAT11x[$start, "I_STATEMENT_FORMAT11x"] INSTRUCTION_FORMAT11x REGISTER)
| //e.g. move v1 v2
INSTRUCTION_FORMAT12x REGISTER REGISTER
-> ^(I_STATEMENT_FORMAT12x[$start, "I_STATEMENT_FORMAT12x"] INSTRUCTION_FORMAT12x REGISTER REGISTER)
| //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream;
INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field
-> ^(I_STATEMENT_FORMAT21c_FIELD[$start, "I_STATEMENT_FORMAT21c_FIELD"] INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field)
| //e.g. const-string v1 "Hello World!"
INSTRUCTION_FORMAT21c_STRING REGISTER STRING_LITERAL
-> ^(I_STATEMENT_FORMAT21c_STRING[$start, "I_STATEMENT_FORMAT21c_STRING"] INSTRUCTION_FORMAT21c_STRING REGISTER STRING_LITERAL)
| //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2
INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor
-> ^(I_STATEMENT_FORMAT21c_TYPE[$start, "I_STATEMENT_FORMAT21c"] INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor)
| //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
INSTRUCTION_FORMAT22c_FIELD REGISTER REGISTER fully_qualified_field
-> ^(I_STATEMENT_FORMAT22c_FIELD[$start, "I_INSTANCE_FIELD_STATEMENT"] INSTRUCTION_FORMAT22c_FIELD REGISTER REGISTER fully_qualified_field)
| //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V
INSTRUCTION_FORMAT35c_METHOD OPEN_BRACKET register_list CLOSE_BRACKET fully_qualified_method
-> ^(I_STATEMENT_FORMAT35c_METHOD[$start, "I_STATEMENT_FORMAT35c_METHOD"] INSTRUCTION_FORMAT35c_METHOD register_list fully_qualified_method)
| //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder;
INSTRUCTION_FORMAT3rc_METHOD OPEN_BRACKET register_range CLOSE_BRACKET fully_qualified_method
-> ^(I_STATEMENT_FORMAT3rc_METHOD[$start, "I_STATEMENT_FORMAT3rc_METHOD"] INSTRUCTION_FORMAT3rc_METHOD register_range fully_qualified_method)
;
register_list
: REGISTER* -> ^(I_REGISTER_LIST[$start, "I_REGISTER_LIST"] REGISTER*);
register_range
: REGISTER REGISTER? -> ^(I_REGISTER_RANGE[$start, "I_REGISTER_RANGE"] REGISTER REGISTER?);
field_type_descriptor
: PRIMITIVE_TYPE
| CLASS_DESCRIPTOR
| ARRAY_DESCRIPTOR
;
class_or_array_type_descriptor
: CLASS_DESCRIPTOR
| ARRAY_DESCRIPTOR;
type_descriptor
: VOID_TYPE
| PRIMITIVE_TYPE
| CLASS_DESCRIPTOR
| ARRAY_DESCRIPTOR
;
literal : INTEGER_LITERAL
| LONG_LITERAL
| FLOAT_LITERAL
| DOUBLE_LITERAL
| CHAR_LITERAL
| STRING_LITERAL
| BOOL_LITERAL;

View File

@ -29,7 +29,7 @@
tree grammar smaliTreeWalker; tree grammar smaliTreeWalker;
options { options {
tokenVocab=smali; tokenVocab=smaliParser;
ASTLabelType=CommonTree; ASTLabelType=CommonTree;
} }
@ -96,16 +96,16 @@ header : class_spec super_spec
}; };
class_spec returns[TypeIdItem type, int accessFlags] class_spec returns[TypeIdItem type, int accessFlags]
: class_name access_list : class_type_descriptor access_list
{ {
$type = $class_name.type; $type = $class_type_descriptor.type;
$accessFlags = $access_list.value; $accessFlags = $access_list.value;
}; };
super_spec returns[TypeIdItem type] super_spec returns[TypeIdItem type]
: ^(I_SUPER class_name) : ^(I_SUPER class_type_descriptor)
{ {
$type = $class_name.type; $type = $class_type_descriptor.type;
}; };
access_list returns [int value] access_list returns [int value]
@ -134,10 +134,10 @@ methods : ^(I_METHODS
})*); })*);
field returns[ClassDataItem.EncodedField encodedField, EncodedValue encodedValue] field returns[ClassDataItem.EncodedField encodedField, EncodedValue encodedValue]
:^(I_FIELD member_name access_list ^(I_FIELD_TYPE field_type_descriptor) field_initial_value) :^(I_FIELD MEMBER_NAME access_list ^(I_FIELD_TYPE field_type_descriptor) field_initial_value)
{ {
TypeIdItem classType = classDefItem.getClassType(); TypeIdItem classType = classDefItem.getClassType();
StringIdItem memberName = new StringIdItem(dexFile, $member_name.memberName); StringIdItem memberName = new StringIdItem(dexFile, $MEMBER_NAME.text);
TypeIdItem fieldType = $field_type_descriptor.type; TypeIdItem fieldType = $field_type_descriptor.type;
FieldIdItem fieldIdItem = new FieldIdItem(dexFile, classType, memberName, fieldType); FieldIdItem fieldIdItem = new FieldIdItem(dexFile, classType, memberName, fieldType);
@ -157,7 +157,7 @@ field returns[ClassDataItem.EncodedField encodedField, EncodedValue encodedValue
field_initial_value returns[EncodedValue encodedValue] field_initial_value returns[EncodedValue encodedValue]
: ^(I_FIELD_INITIAL_VALUE : ^(I_FIELD_INITIAL_VALUE
( int_literal { $encodedValue = new EncodedValue(dexFile, new IntEncodedValueSubField($int_literal.value)); } ( integer_literal { $encodedValue = new EncodedValue(dexFile, new IntEncodedValueSubField($integer_literal.value)); }
| long_literal { $encodedValue = new EncodedValue(dexFile, new LongEncodedValueSubField($long_literal.value)); } | long_literal { $encodedValue = new EncodedValue(dexFile, new LongEncodedValueSubField($long_literal.value)); }
| float_literal { $encodedValue = new EncodedValue(dexFile, new FloatEncodedValueSubField($float_literal.value)); } | float_literal { $encodedValue = new EncodedValue(dexFile, new FloatEncodedValueSubField($float_literal.value)); }
| double_literal { $encodedValue = new EncodedValue(dexFile, new DoubleEncodedValueSubField($double_literal.value)); } | double_literal { $encodedValue = new EncodedValue(dexFile, new DoubleEncodedValueSubField($double_literal.value)); }
@ -169,10 +169,10 @@ field_initial_value returns[EncodedValue encodedValue]
method returns[ClassDataItem.EncodedMethod encodedMethod] method returns[ClassDataItem.EncodedMethod encodedMethod]
: ^(I_METHOD method_name_and_prototype access_list locals_directive statements) : ^(I_METHOD method_name_and_prototype access_list registers_directive statements)
{ {
MethodIdItem methodIdItem = $method_name_and_prototype.methodIdItem; MethodIdItem methodIdItem = $method_name_and_prototype.methodIdItem;
int registers = $locals_directive.registers; int registers = $registers_directive.registers;
int access = $access_list.value; int access = $access_list.value;
boolean isStatic = (access & AccessFlags.STATIC) != 0; boolean isStatic = (access & AccessFlags.STATIC) != 0;
ArrayList<Instruction> instructions = $statements.instructions; ArrayList<Instruction> instructions = $statements.instructions;
@ -192,10 +192,10 @@ method_prototype returns[ProtoIdItem protoIdItem]
}; };
method_name_and_prototype returns[MethodIdItem methodIdItem] method_name_and_prototype returns[MethodIdItem methodIdItem]
: member_name method_prototype : MEMBER_NAME method_prototype
{ {
TypeIdItem classType = classDefItem.getClassType(); TypeIdItem classType = classDefItem.getClassType();
String methodNameString = $member_name.memberName; String methodNameString = $MEMBER_NAME.text;
StringIdItem methodName = new StringIdItem(dexFile, methodNameString); StringIdItem methodName = new StringIdItem(dexFile, methodNameString);
ProtoIdItem protoIdItem = $method_prototype.protoIdItem; ProtoIdItem protoIdItem = $method_prototype.protoIdItem;
@ -214,23 +214,25 @@ field_type_list returns[ArrayList<TypeIdItem> types]
} }
)*; )*;
locals_directive returns[int registers] registers_directive returns[int registers]
: ^(I_REGISTERS INT_LITERAL) {$registers = Integer.parseInt($INT_LITERAL.text);}; : ^(I_REGISTERS INTEGER_LITERAL) {$registers = Integer.parseInt($INTEGER_LITERAL.text);};
full_method_name_and_prototype returns[MethodIdItem methodIdItem]
: QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME method_prototype
fully_qualified_method returns[MethodIdItem methodIdItem]
: CLASS_NAME MEMBER_NAME method_prototype
{ {
TypeIdItem classType = new TypeIdItem(dexFile, "L" + $QUALIFIED_MEMBER__CLASS_NAME.text + ";"); TypeIdItem classType = new TypeIdItem(dexFile, "L" + $CLASS_NAME.text + ";");
StringIdItem methodName = new StringIdItem(dexFile, $QUALIFIED_MEMBER__MEMBER_NAME.text); StringIdItem methodName = new StringIdItem(dexFile, $MEMBER_NAME.text);
ProtoIdItem prototype = $method_prototype.protoIdItem; ProtoIdItem prototype = $method_prototype.protoIdItem;
$methodIdItem = new MethodIdItem(dexFile, classType, methodName, prototype); $methodIdItem = new MethodIdItem(dexFile, classType, methodName, prototype);
}; };
full_field_name_and_type returns[FieldIdItem fieldIdItem] fully_qualified_field returns[FieldIdItem fieldIdItem]
: QUALIFIED_MEMBER__CLASS_NAME QUALIFIED_MEMBER__MEMBER_NAME field_type_descriptor : CLASS_NAME MEMBER_NAME field_type_descriptor
{ {
TypeIdItem classType = new TypeIdItem(dexFile, "L" + $QUALIFIED_MEMBER__CLASS_NAME.text + ";"); TypeIdItem classType = new TypeIdItem(dexFile, "L" + $CLASS_NAME.text + ";");
StringIdItem fieldName = new StringIdItem(dexFile, $QUALIFIED_MEMBER__MEMBER_NAME.text); StringIdItem fieldName = new StringIdItem(dexFile, $MEMBER_NAME.text);
TypeIdItem fieldType = $field_type_descriptor.type; TypeIdItem fieldType = $field_type_descriptor.type;
$fieldIdItem = new FieldIdItem(dexFile, classType, fieldName, fieldType); $fieldIdItem = new FieldIdItem(dexFile, classType, fieldName, fieldType);
}; };
@ -249,42 +251,42 @@ statements returns[ArrayList<Instruction> instructions]
instruction returns[Instruction instruction] instruction returns[Instruction instruction]
//e.g. return //e.g. return
: ^(I_STATEMENT_FORMAT10x INSTRUCTION_NAME_FORMAT10x) : ^(I_STATEMENT_FORMAT10x INSTRUCTION_FORMAT10x)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT10x.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT10x.text);
$instruction = Format10x.Format.make(dexFile, opcode.value); $instruction = Format10x.Format.make(dexFile, opcode.value);
} }
| //e.g. move-result-object v1 | //e.g. move-result-object v1
^(I_STATEMENT_FORMAT11x INSTRUCTION_NAME_FORMAT11x REGISTER) ^(I_STATEMENT_FORMAT11x INSTRUCTION_FORMAT11x REGISTER)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT11x.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT11x.text);
short regA = parseRegister_byte($REGISTER.text); short regA = parseRegister_byte($REGISTER.text);
$instruction = Format11x.Format.make(dexFile, opcode.value, regA); $instruction = Format11x.Format.make(dexFile, opcode.value, regA);
} }
| //e.g. move v1 v2 | //e.g. move v1 v2
^(I_STATEMENT_FORMAT12x INSTRUCTION_NAME_FORMAT12x registerA=REGISTER registerB=REGISTER) ^(I_STATEMENT_FORMAT12x INSTRUCTION_FORMAT12x registerA=REGISTER registerB=REGISTER)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT12x.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT12x.text);
byte regA = parseRegister_nibble($registerA.text); byte regA = parseRegister_nibble($registerA.text);
byte regB = parseRegister_nibble($registerB.text); byte regB = parseRegister_nibble($registerB.text);
$instruction = Format12x.Format.make(dexFile, opcode.value, regA, regB); $instruction = Format12x.Format.make(dexFile, opcode.value, regA, regB);
} }
| //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream; | //e.g. sget_object v0 java/lang/System/out LJava/io/PrintStream;
^(I_STATEMENT_FORMAT21c_FIELD INSTRUCTION_NAME_FORMAT21c_FIELD REGISTER full_field_name_and_type) ^(I_STATEMENT_FORMAT21c_FIELD INSTRUCTION_FORMAT21c_FIELD REGISTER fully_qualified_field)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_FIELD.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_FIELD.text);
short regA = parseRegister_byte($REGISTER.text); short regA = parseRegister_byte($REGISTER.text);
FieldIdItem fieldIdItem = $full_field_name_and_type.fieldIdItem; FieldIdItem fieldIdItem = $fully_qualified_field.fieldIdItem;
$instruction = Format21c.Format.make(dexFile, opcode.value, regA, fieldIdItem); $instruction = Format21c.Format.make(dexFile, opcode.value, regA, fieldIdItem);
} }
| //e.g. const-string v1 "Hello World!" | //e.g. const-string v1 "Hello World!"
^(I_STATEMENT_FORMAT21c_STRING INSTRUCTION_NAME_FORMAT21c_STRING REGISTER string_literal) ^(I_STATEMENT_FORMAT21c_STRING INSTRUCTION_FORMAT21c_STRING REGISTER string_literal)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_STRING.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_STRING.text);
short regA = parseRegister_byte($REGISTER.text); short regA = parseRegister_byte($REGISTER.text);
StringIdItem stringIdItem = new StringIdItem(dexFile, $string_literal.value); StringIdItem stringIdItem = new StringIdItem(dexFile, $string_literal.value);
@ -292,9 +294,9 @@ instruction returns[Instruction instruction]
$instruction = Format21c.Format.make(dexFile, opcode.value, regA, stringIdItem); $instruction = Format21c.Format.make(dexFile, opcode.value, regA, stringIdItem);
} }
| //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2 | //e.g. const-class v2 org/JesusFreke/HelloWorld2/HelloWorld2
^(I_STATEMENT_FORMAT21c_TYPE INSTRUCTION_NAME_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor) ^(I_STATEMENT_FORMAT21c_TYPE INSTRUCTION_FORMAT21c_TYPE REGISTER class_or_array_type_descriptor)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT21c_TYPE.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT21c_TYPE.text);
short regA = parseRegister_byte($REGISTER.text); short regA = parseRegister_byte($REGISTER.text);
TypeIdItem typeIdItem = $class_or_array_type_descriptor.type; TypeIdItem typeIdItem = $class_or_array_type_descriptor.type;
@ -302,22 +304,22 @@ instruction returns[Instruction instruction]
$instruction = Format21c.Format.make(dexFile, opcode.value, regA, typeIdItem); $instruction = Format21c.Format.make(dexFile, opcode.value, regA, typeIdItem);
} }
| //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V | //e.g. invoke-virtual {v0,v1} java/io/PrintStream/print(Ljava/lang/Stream;)V
^(I_STATEMENT_FORMAT35c_METHOD INSTRUCTION_NAME_FORMAT35c_METHOD register_list full_method_name_and_prototype) ^(I_STATEMENT_FORMAT35c_METHOD INSTRUCTION_FORMAT35c_METHOD register_list fully_qualified_method)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT35c_METHOD.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT35c_METHOD.text);
//this depends on the fact that register_list returns a byte[5] //this depends on the fact that register_list returns a byte[5]
byte[] registers = $register_list.registers; byte[] registers = $register_list.registers;
byte registerCount = $register_list.registerCount; byte registerCount = $register_list.registerCount;
MethodIdItem methodIdItem = $full_method_name_and_prototype.methodIdItem; MethodIdItem methodIdItem = $fully_qualified_method.methodIdItem;
$instruction = Format35c.Format.make(dexFile, opcode.value, registerCount, registers[0], registers[1], registers[2], registers[3], registers[4], methodIdItem); $instruction = Format35c.Format.make(dexFile, opcode.value, registerCount, registers[0], registers[1], registers[2], registers[3], registers[4], methodIdItem);
} }
| //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder; | //e.g. invoke-virtual/range {v25..v26} java/lang/StringBuilder/append(Ljava/lang/String;)Ljava/lang/StringBuilder;
^(I_STATEMENT_FORMAT3rc_METHOD INSTRUCTION_NAME_FORMAT3rc_METHOD register_range full_method_name_and_prototype) ^(I_STATEMENT_FORMAT3rc_METHOD INSTRUCTION_FORMAT3rc_METHOD register_range fully_qualified_method)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT3rc_METHOD.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT3rc_METHOD.text);
int startRegister = $register_range.startRegister; int startRegister = $register_range.startRegister;
int endRegister = $register_range.endRegister; int endRegister = $register_range.endRegister;
@ -331,19 +333,19 @@ instruction returns[Instruction instruction]
throw new RuntimeException("A register range must have the lower register listed first"); throw new RuntimeException("A register range must have the lower register listed first");
} }
MethodIdItem methodIdItem = $full_method_name_and_prototype.methodIdItem; MethodIdItem methodIdItem = $fully_qualified_method.methodIdItem;
//not supported yet //not supported yet
$instruction = Format3rc.Format.make(dexFile, opcode.value, (short)registerCount, startRegister, methodIdItem); $instruction = Format3rc.Format.make(dexFile, opcode.value, (short)registerCount, startRegister, methodIdItem);
} }
| //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; | //e.g. iput-object v1 v0 org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String;
^(I_STATEMENT_FORMAT22c_FIELD INSTRUCTION_NAME_FORMAT22c_FIELD registerA=REGISTER registerB=REGISTER full_field_name_and_type) ^(I_STATEMENT_FORMAT22c_FIELD INSTRUCTION_FORMAT22c_FIELD registerA=REGISTER registerB=REGISTER fully_qualified_field)
{ {
Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_NAME_FORMAT22c_FIELD.text); Opcode opcode = Opcode.getOpcodeByName($INSTRUCTION_FORMAT22c_FIELD.text);
byte regA = parseRegister_nibble($registerA.text); byte regA = parseRegister_nibble($registerA.text);
byte regB = parseRegister_nibble($registerB.text); byte regB = parseRegister_nibble($registerB.text);
FieldIdItem fieldIdItem = $full_field_name_and_type.fieldIdItem; FieldIdItem fieldIdItem = $fully_qualified_field.fieldIdItem;
$instruction = Format22c.Format.make(dexFile, opcode.value, regA, regB, fieldIdItem); $instruction = Format22c.Format.make(dexFile, opcode.value, regA, regB, fieldIdItem);
} }
@ -378,54 +380,19 @@ register_range returns[int startRegister, int endRegister]
} }
; ;
simple_name
: SIMPLE_NAME
| ACCESS_SPEC
| INT_LITERAL
| LONG_LITERAL
| FLOAT_LITERAL_SIMPLE_NAME
| DOUBLE_LITERAL_SIMPLE_NAME
| BOOL_LITERAL
| PRIMITIVE_TYPE
| instruction_name
;
instruction_name returns[String value]
: INSTRUCTION_NAME_FORMAT10x
| INSTRUCTION_NAME_FORMAT11x
| INSTRUCTION_NAME_FORMAT12x
| INSTRUCTION_NAME_FORMAT21c_FIELD
| INSTRUCTION_NAME_FORMAT21c_STRING
| INSTRUCTION_NAME_FORMAT21c_TYPE
| INSTRUCTION_NAME_FORMAT22c_FIELD
| INSTRUCTION_NAME_FORMAT35c_METHOD
| INSTRUCTION_NAME_FORMAT3rc_METHOD
;
member_name returns[String memberName]
: (simple_name
| MEMBER_NAME) {$memberName = $start.getText();}
;
class_name returns [TypeIdItem type]
: token=(SIMPLE_NAME | CLASS_WITH_PACKAGE_NAME)
{
$type = new TypeIdItem(dexFile, 'L'+$token.text+';');
};
field_type_descriptor returns [TypeIdItem type] field_type_descriptor returns [TypeIdItem type]
: token=(PRIMITIVE_TYPE : (PRIMITIVE_TYPE
| CLASS_DESCRIPTOR | CLASS_DESCRIPTOR
| ARRAY_TYPE) | ARRAY_DESCRIPTOR)
{ {
$type = new TypeIdItem(dexFile, $token.text); $type = new TypeIdItem(dexFile, $start.getText());
}; };
class_or_array_type_descriptor returns [TypeIdItem type] class_or_array_type_descriptor returns [TypeIdItem type]
: token=(CLASS_DESCRIPTOR : (CLASS_DESCRIPTOR
| ARRAY_TYPE) | ARRAY_DESCRIPTOR)
{ {
$type = new TypeIdItem(dexFile, $token.text); $type = new TypeIdItem(dexFile, $start.getText());
}; };
class_type_descriptor returns [TypeIdItem type] class_type_descriptor returns [TypeIdItem type]
@ -439,8 +406,8 @@ type_descriptor returns [TypeIdItem type]
| field_type_descriptor {$type = $field_type_descriptor.type;} | field_type_descriptor {$type = $field_type_descriptor.type;}
; ;
int_literal returns[int value] integer_literal returns[int value]
: INT_LITERAL { $value = Integer.parseInt($INT_LITERAL.text); }; : INTEGER_LITERAL { $value = Integer.parseInt($INTEGER_LITERAL.text); };
long_literal returns[long value] long_literal returns[long value]
: LONG_LITERAL { $value = Long.parseLong($LONG_LITERAL.text); }; : LONG_LITERAL { $value = Long.parseLong($LONG_LITERAL.text); };
@ -455,7 +422,11 @@ char_literal returns[char value]
: CHAR_LITERAL { $value = $CHAR_LITERAL.text.charAt(0); }; : CHAR_LITERAL { $value = $CHAR_LITERAL.text.charAt(0); };
string_literal returns[String value] string_literal returns[String value]
: STRING_LITERAL { $value = $STRING_LITERAL.text; }; : STRING_LITERAL
{
$value = $STRING_LITERAL.text;
$value = $value.substring(1,$value.length()-1);
};
bool_literal returns[boolean value] bool_literal returns[boolean value]
: BOOL_LITERAL { $value = Boolean.parseBoolean($BOOL_LITERAL.text); }; : BOOL_LITERAL { $value = Boolean.parseBoolean($BOOL_LITERAL.text); };

View File

@ -32,6 +32,7 @@ import org.JesusFreke.dexlib.DexFile;
import org.JesusFreke.dexlib.util.ByteArrayOutput; import org.JesusFreke.dexlib.util.ByteArrayOutput;
import org.antlr.runtime.ANTLRInputStream; import org.antlr.runtime.ANTLRInputStream;
import org.antlr.runtime.CommonTokenStream; import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.runtime.tree.CommonTreeNodeStream;
@ -48,11 +49,12 @@ public class smali
List l = tokens.getTokens();*/ List l = tokens.getTokens();*/
ANTLRInputStream input = new ANTLRInputStream(new FileInputStream(args[0])); ANTLRInputStream input = new ANTLRInputStream(new FileInputStream(args[0]));
smaliLexer lexer = new smaliLexer(input); smaliLexer lexer = new smaliLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer); CommonTokenStream tokens = new CommonTokenStream(lexer);
smaliParser parser = new smaliParser(tokens); smaliParser parser = new smaliParser(tokens);
smaliParser.smali_file_return result = parser.smali_file(); smaliParser.smali_file_return result = parser.smali_file();
CommonTree t = (CommonTree) result.getTree(); CommonTree t = (CommonTree) result.getTree();

View File

@ -1,10 +1,10 @@
.class public HelloWorld .class Lpublic HelloWorld;
.super java/lang/Object .super Ljava/lang/Object;
.method public <init>()V .method public <init>()V
.registers 1 .registers 1
invoke-direct {v0} java/lang/Object.<init>()V invoke-direct {v0}, java/lang/Object.<init>()V
return-void return-void
.end method .end method
@ -12,10 +12,10 @@
.method public static main([Ljava/lang/String;)V .method public static main([Ljava/lang/String;)V
.registers 4 .registers 4
sget-object v0 java/lang/System.out Ljava/io/PrintStream; sget-object v0, java/lang/System.out Ljava/io/PrintStream;
const-string v1 "Hello World!" const-string v1, "Hello World!"
invoke-virtual {v0, v1} java/io/PrintStream.print(Ljava/Lang/Stream;)V invoke-virtual {v0, v1}, java/io/PrintStream.print(Ljava/Lang/Stream;)V
return-void return-void
.end method .end method

View File

@ -1,5 +1,5 @@
.class public org/JesusFreke/HelloWorld2/HelloWorld2 .class public Lorg/JesusFreke/HelloWorld2/HelloWorld2;
.super android/app/Activity .super Landroid/app/Activity;
.field private helloWorld Ljava/lang/String; .field private helloWorld Ljava/lang/String;
.field private static helloWorldStatic Ljava/lang/String; .field private static helloWorldStatic Ljava/lang/String;
@ -10,17 +10,17 @@
.registers 1 .registers 1
const-string v0, "Static Hello World!" const-string v0, "Static Hello World!"
sput-object v0, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic Ljava/lang/String; sput-object v0, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic Ljava/lang/String;
return-void return-void
.end method .end method
.method public constructor <init>()V .method public constructor <init>()V
.registers 2 .registers 2
invoke-direct {v1}, android/app/Activity.<init>()V invoke-direct {v1}, android/app/Activity/<init>()V
const-string v0, "Hello World!" const-string v0, "Hello World!"
iput-object v0, v1, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; iput-object v0, v1, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorld Ljava/lang/String;
return-void return-void
.end method .end method
@ -28,62 +28,62 @@
.method public onCreate(Landroid/os/Bundle;)V .method public onCreate(Landroid/os/Bundle;)V
.registers 6 .registers 6
invoke-super {v4,v5}, android/app/Activity.onCreate(Landroid/os/Bundle;)V invoke-super {v4,v5}, android/app/Activity/onCreate(Landroid/os/Bundle;)V
const-string v3, "\n" const-string v3, "\n"
new-instance v0, Landroid/widget/TextView; new-instance v0, Landroid/widget/TextView;
invoke-direct {v0,v4}, android/widget/TextView.<init>(Landroid/content/Context;)V invoke-direct {v0,v4}, android/widget/TextView/<init>(Landroid/content/Context;)V
iget-object v1, v4, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorld Ljava/lang/String; iget-object v1, v4, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorld Ljava/lang/String;
invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic Ljava/lang/String; sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic Ljava/lang/String;
invoke-virtual {v1, v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual {v1, v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2.helloWorldStatic2 Ljava/lang/String; sget-object v2, org/JesusFreke/HelloWorld2/HelloWorld2/helloWorldStatic2 Ljava/lang/String;
invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
const-class v2, Lorg/JesusFreke/HelloWorld2/HelloWorld2; const-class v2, Lorg/JesusFreke/HelloWorld2/HelloWorld2;
invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String; invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String;
move-result-object v2 move-result-object v2
invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
const-class v2, [Lorg/JesusFreke/HelloWorld2/HelloWorld2; const-class v2, [Lorg/JesusFreke/HelloWorld2/HelloWorld2;
invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String; invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String;
move-result-object v2 move-result-object v2
invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
invoke-virtual {v1, v3}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual {v1, v3}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
const-class v2, [I const-class v2, [I
invoke-virtual {v2}, java/lang/Class.getName()Ljava/lang/String; invoke-virtual {v2}, java/lang/Class/getName()Ljava/lang/String;
move-result-object v2 move-result-object v2
invoke-virtual/range {v1 .. v2}, java/lang/String.concat(Ljava/lang/String;)Ljava/lang/String; invoke-virtual/range {v1 .. v2}, java/lang/String/concat(Ljava/lang/String;)Ljava/lang/String;
move-result-object v1 move-result-object v1
move-object v2, v1 move-object v2, v1
@ -93,8 +93,8 @@
check-cast v4, Landroid/app/Activity; check-cast v4, Landroid/app/Activity;
invoke-virtual {v0,v2}, android/widget/TextView.setText(Ljava/lang/CharSequence;)V invoke-virtual {v0,v2}, android/widget/TextView/setText(Ljava/lang/CharSequence;)V
invoke-virtual {v4,v0}, org/JesusFreke/HelloWorld2/HelloWorld2.setContentView(Landroid/view/View;)V invoke-virtual {v4,v0}, org/JesusFreke/HelloWorld2/HelloWorld2/setContentView(Landroid/view/View;)V
return-void return-void
.end method .end method