Remove ANTLR lexer

This commit is contained in:
Ben Gruver 2012-09-23 17:52:14 -07:00
parent b88e9c3752
commit 48d5b73027
5 changed files with 151 additions and 826 deletions

View File

@ -1,791 +0,0 @@
/*
* The comment, number, string and character constant lexical rules are
* derived from rules from the Java 1.6 grammar which can be found here:
* http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g
*
* Specifically, these rules:
*
* BASE_INTEGER, DECIMAL_EXPONENT, BINARY_EXPONENT, HEX_PREFIX, HEX_DIGIT,
* BASE_FLOAT_OR_ID, BASE_FLOAT, ESCAPE_SEQUENCE, POSITIVE_INTEGER_LITERAL,
* NEGATIVE_INTEGER_LITERAL, LONG_LITERAL, SHORT_LITERAL, BYTE_LITERAL,
* FLOAT_LITERAL_OR_ID, DOUBLE_LITERAL_OR_ID, FLOAT_LITERAL, DOUBLE_LITERAL,
* BOOL_LITERAL, STRING_LITERAL, BASE_STRING_LITERAL, CHAR_LITERAL,
* BASE_CHAR_LITERAL
*
* These rules were originally copyrighted by Terence Parr, and are used here in
* accordance with the following license
*
* [The "BSD licence"]
* Copyright (c) 2007-2008 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* The remainder of this grammar is released by me (Ben Gruver) under the
* following license:
*
* [The "BSD licence"]
* Copyright (c) 2010 Ben Gruver
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
lexer grammar smaliLexer;
options {
superClass=ANTLRLexerWithErrorInterface;
}
@lexer::header {
package org.jf.smali;
import static org.jf.smali.LexerErrorInterface.ANTLRLexerWithErrorInterface;
}
@lexer::members {
public static final int ERROR_CHANNEL = 100;
public String getErrorHeader(RecognitionException e) {
return getSourceName()+"["+ e.line+","+e.charPositionInLine+"]";
}
}
/**********************************************************
* DIRECTIVES
**********************************************************/
CLASS_DIRECTIVE
: '.class';
SUPER_DIRECTIVE
: '.super';
IMPLEMENTS_DIRECTIVE
: '.implements';
SOURCE_DIRECTIVE
: '.source';
FIELD_DIRECTIVE
: '.field';
END_FIELD_DIRECTIVE
: '.end field';
SUBANNOTATION_DIRECTIVE
: '.subannotation';
END_SUBANNOTATION_DIRECTIVE
: '.end subannotation';
ANNOTATION_DIRECTIVE
: '.annotation';
END_ANNOTATION_DIRECTIVE
: '.end annotation';
ENUM_DIRECTIVE
: '.enum';
METHOD_DIRECTIVE
: '.method';
END_METHOD_DIRECTIVE
: '.end method';
REGISTERS_DIRECTIVE
: '.registers';
LOCALS_DIRECTIVE
: '.locals';
ARRAY_DATA_DIRECTIVE
: '.array-data';
END_ARRAY_DATA_DIRECTIVE
: '.end array-data';
PACKED_SWITCH_DIRECTIVE
: '.packed-switch';
END_PACKED_SWITCH_DIRECTIVE
: '.end packed-switch';
SPARSE_SWITCH_DIRECTIVE
: '.sparse-switch';
END_SPARSE_SWITCH_DIRECTIVE
: '.end sparse-switch';
CATCH_DIRECTIVE
: '.catch';
CATCHALL_DIRECTIVE
: '.catchall';
LINE_DIRECTIVE
: '.line';
PARAMETER_DIRECTIVE
: '.parameter';
END_PARAMETER_DIRECTIVE
: '.end parameter';
LOCAL_DIRECTIVE
: '.local';
END_LOCAL_DIRECTIVE
: '.end local';
RESTART_LOCAL_DIRECTIVE
: '.restart local';
PROLOGUE_DIRECTIVE
: '.prologue';
EPILOGUE_DIRECTIVE
: '.epilogue';
/**********************************************************
* LITERALS
**********************************************************/
fragment BASE_INTEGER
: '0'
| ('1'..'9') ('0'..'9')*
| '0' ('0'..'7')+
| HEX_PREFIX HEX_DIGIT+;
fragment DECIMAL_EXPONENT
: ('e'|'E') '-'? ('0'..'9')+;
fragment BINARY_EXPONENT
: ('p'|'P') '-'? ('0'..'9')+;
fragment HEX_PREFIX
: '0x'|'0X';
fragment HEX_DIGIT
: ('0'..'9')|('A'..'F')|('a'..'f');
fragment HEX_DIGITS
: HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
/*This can either be floating point numbers, or identifier*/
fragment BASE_FLOAT_OR_ID
: '-'? ('0'..'9')+ DECIMAL_EXPONENT
| HEX_PREFIX HEX_DIGIT+ BINARY_EXPONENT
| '-'? ('i' | 'I') ('n' | 'N') ('f' | 'F') ('i' | 'I') ('n' | 'N') ('i' | 'I') ('t' | 'T') ('y' | 'Y')
| ('n' | 'N') ('a' | 'A') ('n' | 'N');
/*These can't be identifiers, due to the decimal point*/
fragment BASE_FLOAT
: '-'? ('0'..'9')+ '.' ('0'..'9')* DECIMAL_EXPONENT?
| '-'? '.' ('0'..'9')+ DECIMAL_EXPONENT?
| '-'? HEX_PREFIX HEX_DIGIT+ '.' HEX_DIGIT* BINARY_EXPONENT
| '-'? HEX_PREFIX '.' HEX_DIGIT+ BINARY_EXPONENT;
fragment ESCAPE_SEQUENCE[StringBuilder sb]
: '\\'
(
'b' {sb.append("\b");}
| 't' {sb.append("\t");}
| 'n' {sb.append("\n");}
| 'f' {sb.append("\f");}
| 'r' {sb.append("\r");}
| '\"' {sb.append("\"");}
| '\'' {sb.append("'");}
| '\\' {sb.append("\\");}
| 'u' HEX_DIGITS {sb.append((char)Integer.parseInt($HEX_DIGITS.text, 16));}
);
POSITIVE_INTEGER_LITERAL
: BASE_INTEGER;
NEGATIVE_INTEGER_LITERAL
: '-' BASE_INTEGER;
LONG_LITERAL
: '-'? BASE_INTEGER ('l'|'L');
SHORT_LITERAL
: '-'? BASE_INTEGER ('s'|'S');
BYTE_LITERAL
: '-'? BASE_INTEGER ('t'|'T');
FLOAT_LITERAL_OR_ID
: BASE_FLOAT_OR_ID ('f'|'F')
| '-'? ('0'..'9')+ ('f'|'F');
DOUBLE_LITERAL_OR_ID
: BASE_FLOAT_OR_ID ('d'|'D')?
| '-'? ('0'..'9')+ ('d'|'D');
FLOAT_LITERAL
: BASE_FLOAT ('f'|'F');
DOUBLE_LITERAL
: BASE_FLOAT ('d'|'D')?;
BOOL_LITERAL
: 'true'
| 'false';
NULL_LITERAL
: 'null';
STRING_LITERAL
@init {StringBuilder sb = new StringBuilder();}
: BASE_STRING_LITERAL[sb] {setText(sb.toString());};
fragment BASE_STRING_LITERAL[StringBuilder sb]
: '"' {sb.append('"');}
( ESCAPE_SEQUENCE[sb]
| ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
)*
'"' {sb.append('"');};
CHAR_LITERAL
@init {StringBuilder sb = new StringBuilder();}
: BASE_CHAR_LITERAL[sb] {setText(sb.toString());};
fragment BASE_CHAR_LITERAL[StringBuilder sb]
: '\'' {sb.append('\'');}
( ESCAPE_SEQUENCE[sb]
| ~( '\\' | '\'' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
)
'\'' { sb.append('\''); };
/**********************************************************
* MISC
**********************************************************/
REGISTER
: ('v'|'p') ('0'..'9')+;
ANNOTATION_VISIBILITY
: 'build'
| 'runtime'
| 'system';
ACCESS_SPEC
: 'public'
| 'private'
| 'protected'
| 'static'
| 'final'
| 'synchronized'
| 'bridge'
| 'varargs'
| 'native'
| 'abstract'
| 'strictfp'
| 'synthetic'
| 'constructor'
| 'declared-synchronized'
| 'interface'
| 'enum'
| 'annotation'
| 'volatile'
| 'transient';
VERIFICATION_ERROR_TYPE
: 'no-error'
| 'generic-error'
| 'no-such-class'
| 'no-such-field'
| 'no-such-method'
| 'illegal-class-access'
| 'illegal-field-access'
| 'illegal-method-access'
| 'class-change-error'
| 'instantiation-error';
INLINE_INDEX
: 'inline@0x' HEX_DIGIT+;
VTABLE_INDEX
: 'vtable@0x' HEX_DIGIT+;
FIELD_OFFSET
: 'field@0x' HEX_DIGIT+;
OFFSET
: '+' BASE_INTEGER;
LINE_COMMENT
: '#'
(
~('\n'|'\r')* ('\r\n' | '\r' | '\n')
| ~('\n'|'\r')*
)
{$channel = HIDDEN;};
/**********************************************************
* Instructions
**********************************************************/
INSTRUCTION_FORMAT10t
: 'goto';
INSTRUCTION_FORMAT10x
: 'return-void'
| 'nop';
INSTRUCTION_FORMAT10x_ODEX
: 'return-void-barrier';
INSTRUCTION_FORMAT11n
: 'const/4';
INSTRUCTION_FORMAT11x
: 'move-result'
| 'move-result-wide'
| 'move-result-object'
| 'move-exception'
| 'return'
| 'return-wide'
| 'return-object'
| 'monitor-enter'
| 'monitor-exit'
| 'throw';
INSTRUCTION_FORMAT12x_OR_ID
: 'move'
| 'move-wide'
| 'move-object'
| 'array-length'
| 'neg-int'
| 'not-int'
| 'neg-long'
| 'not-long'
| 'neg-float'
| 'neg-double'
| 'int-to-long'
| 'int-to-float'
| 'int-to-double'
| 'long-to-int'
| 'long-to-float'
| 'long-to-double'
| 'float-to-int'
| 'float-to-long'
| 'float-to-double'
| 'double-to-int'
| 'double-to-long'
| 'double-to-float'
| 'int-to-byte'
| 'int-to-char'
| 'int-to-short';
INSTRUCTION_FORMAT12x
: 'add-int/2addr'
| 'sub-int/2addr'
| 'mul-int/2addr'
| 'div-int/2addr'
| 'rem-int/2addr'
| 'and-int/2addr'
| 'or-int/2addr'
| 'xor-int/2addr'
| 'shl-int/2addr'
| 'shr-int/2addr'
| 'ushr-int/2addr'
| 'add-long/2addr'
| 'sub-long/2addr'
| 'mul-long/2addr'
| 'div-long/2addr'
| 'rem-long/2addr'
| 'and-long/2addr'
| 'or-long/2addr'
| 'xor-long/2addr'
| 'shl-long/2addr'
| 'shr-long/2addr'
| 'ushr-long/2addr'
| 'add-float/2addr'
| 'sub-float/2addr'
| 'mul-float/2addr'
| 'div-float/2addr'
| 'rem-float/2addr'
| 'add-double/2addr'
| 'sub-double/2addr'
| 'mul-double/2addr'
| 'div-double/2addr'
| 'rem-double/2addr';
INSTRUCTION_FORMAT20bc
: 'throw-verification-error';
INSTRUCTION_FORMAT20t
: 'goto/16';
INSTRUCTION_FORMAT21c_FIELD
: 'sget'
| 'sget-wide'
| 'sget-object'
| 'sget-boolean'
| 'sget-byte'
| 'sget-char'
| 'sget-short'
| 'sput'
| 'sput-wide'
| 'sput-object'
| 'sput-boolean'
| 'sput-byte'
| 'sput-char'
| 'sput-short';
INSTRUCTION_FORMAT21c_FIELD_ODEX
: 'sget-volatile'
| 'sget-wide-volatile'
| 'sget-object-volatile'
| 'sput-volatile'
| 'sput-wide-volatile'
| 'sput-object-volatile';
INSTRUCTION_FORMAT21c_STRING
: 'const-string';
INSTRUCTION_FORMAT21c_TYPE
: 'check-cast'
| 'new-instance'
| 'const-class';
INSTRUCTION_FORMAT21h
: 'const/high16'
| 'const-wide/high16';
INSTRUCTION_FORMAT21s
: 'const/16'
| 'const-wide/16';
INSTRUCTION_FORMAT21t
: 'if-eqz'
| 'if-nez'
| 'if-ltz'
| 'if-gez'
| 'if-gtz'
| 'if-lez';
INSTRUCTION_FORMAT22b
: 'add-int/lit8'
| 'rsub-int/lit8'
| 'mul-int/lit8'
| 'div-int/lit8'
| 'rem-int/lit8'
| 'and-int/lit8'
| 'or-int/lit8'
| 'xor-int/lit8'
| 'shl-int/lit8'
| 'shr-int/lit8'
| 'ushr-int/lit8';
INSTRUCTION_FORMAT22c_FIELD
: 'iget'
| 'iget-wide'
| 'iget-object'
| 'iget-boolean'
| 'iget-byte'
| 'iget-char'
| 'iget-short'
| 'iput'
| 'iput-wide'
| 'iput-object'
| 'iput-boolean'
| 'iput-byte'
| 'iput-char'
| 'iput-short';
INSTRUCTION_FORMAT22c_FIELD_ODEX
: 'iget-volatile'
| 'iget-wide-volatile'
| 'iget-object-volatile'
| 'iput-volatile'
| 'iput-wide-volatile'
| 'iput-object-volatile';
INSTRUCTION_FORMAT22c_TYPE
: 'instance-of'
| 'new-array';
INSTRUCTION_FORMAT22cs_FIELD
: 'iget-quick'
| 'iget-wide-quick'
| 'iget-object-quick'
| 'iput-quick'
| 'iput-wide-quick'
| 'iput-object-quick';
INSTRUCTION_FORMAT22s_OR_ID
: 'rsub-int';
INSTRUCTION_FORMAT22s
: 'add-int/lit16'
| 'mul-int/lit16'
| 'div-int/lit16'
| 'rem-int/lit16'
| 'and-int/lit16'
| 'or-int/lit16'
| 'xor-int/lit16';
INSTRUCTION_FORMAT22t
: 'if-eq'
| 'if-ne'
| 'if-lt'
| 'if-ge'
| 'if-gt'
| 'if-le';
INSTRUCTION_FORMAT22x
: 'move/from16'
| 'move-wide/from16'
| 'move-object/from16';
INSTRUCTION_FORMAT23x
: 'cmpl-float'
| 'cmpg-float'
| 'cmpl-double'
| 'cmpg-double'
| 'cmp-long'
| 'aget'
| 'aget-wide'
| 'aget-object'
| 'aget-boolean'
| 'aget-byte'
| 'aget-char'
| 'aget-short'
| 'aput'
| 'aput-wide'
| 'aput-object'
| 'aput-boolean'
| 'aput-byte'
| 'aput-char'
| 'aput-short'
| 'add-int'
| 'sub-int'
| 'mul-int'
| 'div-int'
| 'rem-int'
| 'and-int'
| 'or-int'
| 'xor-int'
| 'shl-int'
| 'shr-int'
| 'ushr-int'
| 'add-long'
| 'sub-long'
| 'mul-long'
| 'div-long'
| 'rem-long'
| 'and-long'
| 'or-long'
| 'xor-long'
| 'shl-long'
| 'shr-long'
| 'ushr-long'
| 'add-float'
| 'sub-float'
| 'mul-float'
| 'div-float'
| 'rem-float'
| 'add-double'
| 'sub-double'
| 'mul-double'
| 'div-double'
| 'rem-double';
INSTRUCTION_FORMAT30t
: 'goto/32';
INSTRUCTION_FORMAT31c
: 'const-string/jumbo';
INSTRUCTION_FORMAT31i_OR_ID
: 'const';
INSTRUCTION_FORMAT31i
: 'const-wide/32';
INSTRUCTION_FORMAT31t
: 'fill-array-data'
| 'packed-switch'
| 'sparse-switch';
INSTRUCTION_FORMAT32x
: 'move/16'
| 'move-wide/16'
| 'move-object/16';
INSTRUCTION_FORMAT35c_METHOD
: 'invoke-virtual'
| 'invoke-super'
| 'invoke-direct'
| 'invoke-static'
| 'invoke-interface';
INSTRUCTION_FORMAT35c_METHOD_ODEX
: 'invoke-direct-empty';
INSTRUCTION_FORMAT35c_TYPE
: 'filled-new-array';
INSTRUCTION_FORMAT35mi_METHOD
: 'execute-inline';
INSTRUCTION_FORMAT35ms_METHOD
: 'invoke-virtual-quick'
| 'invoke-super-quick';
INSTRUCTION_FORMAT3rc_METHOD
: 'invoke-virtual/range'
| 'invoke-super/range'
| 'invoke-direct/range'
| 'invoke-static/range'
| 'invoke-interface/range';
INSTRUCTION_FORMAT3rc_METHOD_ODEX
: 'invoke-object-init/range';
INSTRUCTION_FORMAT3rc_TYPE
: 'filled-new-array/range';
INSTRUCTION_FORMAT3rmi_METHOD
: 'execute-inline/range';
INSTRUCTION_FORMAT3rms_METHOD
: 'invoke-virtual-quick/range'
| 'invoke-super-quick/range';
INSTRUCTION_FORMAT51l
: 'const-wide';
/**********************************************************
* Types
**********************************************************/
fragment BASE_SIMPLE_NAME:
( 'A'..'Z'
| 'a'..'z'
| '0'..'9'
| '$'
| '-'
| '_'
| '\u00a1'..'\u1fff'
| '\u2010'..'\u2027'
| '\u2030'..'\ud7ff'
| '\ue000'..'\uffef'
)+;
fragment BASE_PRIMITIVE_TYPE
: 'Z'|'B'|'S'|'C'|'I'|'J'|'F'|'D';
fragment BASE_CLASS_DESCRIPTOR
: 'L' (BASE_SIMPLE_NAME '/')* BASE_SIMPLE_NAME ';';
fragment BASE_ARRAY_DESCRIPTOR
: '['+ (BASE_PRIMITIVE_TYPE | BASE_CLASS_DESCRIPTOR);
fragment BASE_TYPE
: BASE_PRIMITIVE_TYPE
| BASE_CLASS_DESCRIPTOR
| BASE_ARRAY_DESCRIPTOR;
PRIMITIVE_TYPE
: BASE_PRIMITIVE_TYPE;
VOID_TYPE
: 'V';
CLASS_DESCRIPTOR
: BASE_CLASS_DESCRIPTOR;
ARRAY_DESCRIPTOR
: BASE_ARRAY_DESCRIPTOR;
PARAM_LIST_OR_ID
: BASE_PRIMITIVE_TYPE BASE_PRIMITIVE_TYPE+;
PARAM_LIST
: BASE_TYPE BASE_TYPE+;
SIMPLE_NAME
: BASE_SIMPLE_NAME;
METHOD_NAME
: '<init>'
| '<clinit>';
/**********************************************************
* Symbols
**********************************************************/
DOTDOT
: '..';
ARROW
: '->';
EQUAL
: '=';
COLON
: ':';
COMMA
: ',';
OPEN_BRACE
: '{';
CLOSE_BRACE
: '}';
OPEN_PAREN
: '(';
CLOSE_PAREN
: ')';
WHITE_SPACE
: (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;};

View File

@ -29,12 +29,148 @@
parser grammar smaliParser; parser grammar smaliParser;
options { options {
tokenVocab=smaliLexer;
output=AST; output=AST;
ASTLabelType=CommonTree; ASTLabelType=CommonTree;
} }
tokens { tokens {
//Lexer tokens
ACCESS_SPEC;
ANNOTATION_DIRECTIVE;
ANNOTATION_VISIBILITY;
ARRAY_DATA_DIRECTIVE;
ARRAY_DESCRIPTOR;
ARROW;
BASE_ARRAY_DESCRIPTOR;
BASE_CHAR_LITERAL;
BASE_CLASS_DESCRIPTOR;
BASE_FLOAT;
BASE_FLOAT_OR_ID;
BASE_INTEGER;
BASE_PRIMITIVE_TYPE;
BASE_SIMPLE_NAME;
BASE_STRING_LITERAL;
BASE_TYPE;
BINARY_EXPONENT;
BOOL_LITERAL;
BYTE_LITERAL;
CATCH_DIRECTIVE;
CATCHALL_DIRECTIVE;
CHAR_LITERAL;
CLASS_DESCRIPTOR;
CLASS_DIRECTIVE;
CLOSE_BRACE;
CLOSE_PAREN;
COLON;
COMMA;
DECIMAL_EXPONENT;
DOTDOT;
DOUBLE_LITERAL;
DOUBLE_LITERAL_OR_ID;
END_ANNOTATION_DIRECTIVE;
END_ARRAY_DATA_DIRECTIVE;
END_FIELD_DIRECTIVE;
END_LOCAL_DIRECTIVE;
END_METHOD_DIRECTIVE;
END_PACKED_SWITCH_DIRECTIVE;
END_PARAMETER_DIRECTIVE;
END_SPARSE_SWITCH_DIRECTIVE;
END_SUBANNOTATION_DIRECTIVE;
ENUM_DIRECTIVE;
EPILOGUE_DIRECTIVE;
EQUAL;
ESCAPE_SEQUENCE;
FIELD_DIRECTIVE;
FIELD_OFFSET;
FLOAT_LITERAL;
FLOAT_LITERAL_OR_ID;
HEX_DIGIT;
HEX_DIGITS;
HEX_PREFIX;
IMPLEMENTS_DIRECTIVE;
INLINE_INDEX;
INSTRUCTION_FORMAT10t;
INSTRUCTION_FORMAT10x;
INSTRUCTION_FORMAT10x_ODEX;
INSTRUCTION_FORMAT11n;
INSTRUCTION_FORMAT11x;
INSTRUCTION_FORMAT12x;
INSTRUCTION_FORMAT12x_OR_ID;
INSTRUCTION_FORMAT20bc;
INSTRUCTION_FORMAT20t;
INSTRUCTION_FORMAT21c_FIELD;
INSTRUCTION_FORMAT21c_FIELD_ODEX;
INSTRUCTION_FORMAT21c_STRING;
INSTRUCTION_FORMAT21c_TYPE;
INSTRUCTION_FORMAT21h;
INSTRUCTION_FORMAT21s;
INSTRUCTION_FORMAT21t;
INSTRUCTION_FORMAT22b;
INSTRUCTION_FORMAT22c_FIELD;
INSTRUCTION_FORMAT22c_FIELD_ODEX;
INSTRUCTION_FORMAT22c_TYPE;
INSTRUCTION_FORMAT22cs_FIELD;
INSTRUCTION_FORMAT22s;
INSTRUCTION_FORMAT22s_OR_ID;
INSTRUCTION_FORMAT22t;
INSTRUCTION_FORMAT22x;
INSTRUCTION_FORMAT23x;
INSTRUCTION_FORMAT30t;
INSTRUCTION_FORMAT31c;
INSTRUCTION_FORMAT31i;
INSTRUCTION_FORMAT31i_OR_ID;
INSTRUCTION_FORMAT31t;
INSTRUCTION_FORMAT32x;
INSTRUCTION_FORMAT35c_METHOD;
INSTRUCTION_FORMAT35c_METHOD_ODEX;
INSTRUCTION_FORMAT35c_TYPE;
INSTRUCTION_FORMAT35mi_METHOD;
INSTRUCTION_FORMAT35ms_METHOD;
INSTRUCTION_FORMAT3rc_METHOD;
INSTRUCTION_FORMAT3rc_METHOD_ODEX;
INSTRUCTION_FORMAT3rc_TYPE;
INSTRUCTION_FORMAT3rmi_METHOD;
INSTRUCTION_FORMAT3rms_METHOD;
INSTRUCTION_FORMAT51l;
INVALID_TOKEN;
LINE_COMMENT;
LINE_DIRECTIVE;
LOCAL_DIRECTIVE;
LOCALS_DIRECTIVE;
LONG_LITERAL;
METHOD_DIRECTIVE;
METHOD_NAME;
NEGATIVE_INTEGER_LITERAL;
NULL_LITERAL;
OFFSET;
OPEN_BRACE;
OPEN_PAREN;
PACKED_SWITCH_DIRECTIVE;
PARAM_LIST;
PARAM_LIST_OR_ID;
PARAMETER_DIRECTIVE;
POSITIVE_INTEGER_LITERAL;
PRIMITIVE_TYPE;
PROLOGUE_DIRECTIVE;
REGISTER;
REGISTERS_DIRECTIVE;
RESTART_LOCAL_DIRECTIVE;
SHORT_LITERAL;
SIMPLE_NAME;
SOURCE_DIRECTIVE;
SPARSE_SWITCH_DIRECTIVE;
STRING_LITERAL;
SUBANNOTATION_DIRECTIVE;
SUPER_DIRECTIVE;
VERIFICATION_ERROR_TYPE;
VOID_TYPE;
VTABLE_INDEX;
WHITE_SPACE;
//A couple of generated types that we remap other tokens to, to simplify the generated AST
LABEL;
INTEGER_LITERAL;
//I_* tokens are imaginary tokens used as parent AST nodes //I_* tokens are imaginary tokens used as parent AST nodes
I_CLASS_DEF; I_CLASS_DEF;
I_SUPER; I_SUPER;
@ -123,10 +259,6 @@ tokens {
I_STATEMENT_SPARSE_SWITCH; I_STATEMENT_SPARSE_SWITCH;
I_REGISTER_RANGE; I_REGISTER_RANGE;
I_REGISTER_LIST; I_REGISTER_LIST;
LABEL;
INTEGER_LITERAL;
INVALID_TOKEN;
} }
@header { @header {
@ -138,6 +270,8 @@ import org.jf.dexlib.Code.Opcode;
@members { @members {
public static final int ERROR_CHANNEL = 100;
private boolean verboseErrors = false; private boolean verboseErrors = false;
private boolean allowOdex = false; private boolean allowOdex = false;
private int apiLevel; private int apiLevel;

View File

@ -28,7 +28,6 @@
package org.jf.smali; package org.jf.smali;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonToken; import org.antlr.runtime.CommonToken;
public class InvalidToken extends CommonToken { public class InvalidToken extends CommonToken {
@ -37,13 +36,13 @@ public class InvalidToken extends CommonToken {
public InvalidToken(String message) { public InvalidToken(String message) {
super(smaliParser.INVALID_TOKEN); super(smaliParser.INVALID_TOKEN);
this.message = message; this.message = message;
this.channel = smaliLexer.ERROR_CHANNEL; this.channel = smaliParser.ERROR_CHANNEL;
} }
public InvalidToken(String message, String text) { public InvalidToken(String message, String text) {
super(smaliParser.INVALID_TOKEN, text); super(smaliParser.INVALID_TOKEN, text);
this.message = message; this.message = message;
this.channel = smaliLexer.ERROR_CHANNEL; this.channel = smaliParser.ERROR_CHANNEL;
} }
public String getMessage() { public String getMessage() {

View File

@ -103,7 +103,6 @@ public class main {
boolean fixJumbo = true; boolean fixJumbo = true;
boolean fixGoto = true; boolean fixGoto = true;
boolean verboseErrors = false; boolean verboseErrors = false;
boolean oldLexer = false;
boolean printTokens = false; boolean printTokens = false;
boolean apiSet = false; boolean apiSet = false;
@ -158,9 +157,6 @@ public class main {
case 'V': case 'V':
verboseErrors = true; verboseErrors = true;
break; break;
case 'L':
oldLexer = true;
break;
case 'T': case 'T':
printTokens = true; printTokens = true;
break; break;
@ -202,7 +198,7 @@ public class main {
boolean errors = false; boolean errors = false;
for (File file: filesToProcess) { for (File file: filesToProcess) {
if (!assembleSmaliFile(file, dexFile, verboseErrors, oldLexer, printTokens, allowOdex, apiLevel)) { if (!assembleSmaliFile(file, dexFile, verboseErrors, printTokens, allowOdex, apiLevel)) {
errors = true; errors = true;
} }
} }
@ -276,7 +272,7 @@ public class main {
} }
} }
private static boolean assembleSmaliFile(File smaliFile, DexFile dexFile, boolean verboseErrors, boolean oldLexer, private static boolean assembleSmaliFile(File smaliFile, DexFile dexFile, boolean verboseErrors,
boolean printTokens, boolean allowOdex, int apiLevel) boolean printTokens, boolean allowOdex, int apiLevel)
throws Exception { throws Exception {
CommonTokenStream tokens; CommonTokenStream tokens;
@ -285,27 +281,19 @@ public class main {
boolean lexerErrors = false; boolean lexerErrors = false;
LexerErrorInterface lexer; LexerErrorInterface lexer;
if (oldLexer) { FileInputStream fis = new FileInputStream(smaliFile.getAbsolutePath());
ANTLRFileStream input = new ANTLRFileStream(smaliFile.getAbsolutePath(), "UTF-8"); InputStreamReader reader = new InputStreamReader(fis, "UTF-8");
input.name = smaliFile.getAbsolutePath();
lexer = new smaliLexer(input); lexer = new smaliFlexLexer(reader);
tokens = new CommonTokenStream((TokenSource)lexer); ((smaliFlexLexer)lexer).setSourceFile(smaliFile);
} else { tokens = new CommonTokenStream((TokenSource)lexer);
FileInputStream fis = new FileInputStream(smaliFile.getAbsolutePath());
InputStreamReader reader = new InputStreamReader(fis, "UTF-8");
lexer = new smaliFlexLexer(reader);
((smaliFlexLexer)lexer).setSourceFile(smaliFile);
tokens = new CommonTokenStream((TokenSource)lexer);
}
if (printTokens) { if (printTokens) {
tokens.getTokens(); tokens.getTokens();
for (int i=0; i<tokens.size(); i++) { for (int i=0; i<tokens.size(); i++) {
Token token = tokens.get(i); Token token = tokens.get(i);
if (token.getChannel() == smaliLexer.HIDDEN) { if (token.getChannel() == smaliParser.HIDDEN) {
continue; continue;
} }
@ -423,10 +411,6 @@ public class main {
.withDescription("Generate verbose error messages") .withDescription("Generate verbose error messages")
.create("V"); .create("V");
Option oldLexerOption = OptionBuilder.withLongOpt("old-lexer")
.withDescription("Use the old lexer")
.create("L");
Option printTokensOption = OptionBuilder.withLongOpt("print-tokens") Option printTokensOption = OptionBuilder.withLongOpt("print-tokens")
.withDescription("Print the name and text of each token") .withDescription("Print the name and text of each token")
.create("T"); .create("T");
@ -442,7 +426,6 @@ public class main {
debugOptions.addOption(noFixJumboOption); debugOptions.addOption(noFixJumboOption);
debugOptions.addOption(noFixGotoOption); debugOptions.addOption(noFixGotoOption);
debugOptions.addOption(verboseErrorsOption); debugOptions.addOption(verboseErrorsOption);
debugOptions.addOption(oldLexerOption);
debugOptions.addOption(printTokensOption); debugOptions.addOption(printTokensOption);
for (Object option: basicOptions.getOptions()) { for (Object option: basicOptions.getOptions()) {

View File

@ -165,7 +165,7 @@ public class LexerTest {
for (int i=0; i<tokens.size(); i++) { for (int i=0; i<tokens.size(); i++) {
token = (CommonToken)tokens.get(i); token = (CommonToken)tokens.get(i);
if (discardHiddenTokens && token.getChannel() == smaliLexer.HIDDEN) { if (discardHiddenTokens && token.getChannel() == smaliParser.HIDDEN) {
continue; continue;
} }
@ -175,7 +175,7 @@ public class LexerTest {
if (token.getType() == smaliParser.INVALID_TOKEN) { if (token.getType() == smaliParser.INVALID_TOKEN) {
Assert.assertTrue("Encountered an INVALID_TOKEN not on the error channel", Assert.assertTrue("Encountered an INVALID_TOKEN not on the error channel",
token.getChannel() == smaliLexer.ERROR_CHANNEL); token.getChannel() == smaliParser.ERROR_CHANNEL);
} }
ExpectedToken expectedToken = expectedTokens.get(expectedTokenIndex++); ExpectedToken expectedToken = expectedTokens.get(expectedTokenIndex++);