Remove ANTLR lexer

This commit is contained in:
Ben Gruver 2012-09-23 17:52:14 -07:00
parent b88e9c3752
commit 48d5b73027
5 changed files with 151 additions and 826 deletions

View File

@ -1,791 +0,0 @@
/*
* The comment, number, string and character constant lexical rules are
* derived from rules from the Java 1.6 grammar which can be found here:
* http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g
*
* Specifically, these rules:
*
* BASE_INTEGER, DECIMAL_EXPONENT, BINARY_EXPONENT, HEX_PREFIX, HEX_DIGIT,
* BASE_FLOAT_OR_ID, BASE_FLOAT, ESCAPE_SEQUENCE, POSITIVE_INTEGER_LITERAL,
* NEGATIVE_INTEGER_LITERAL, LONG_LITERAL, SHORT_LITERAL, BYTE_LITERAL,
* FLOAT_LITERAL_OR_ID, DOUBLE_LITERAL_OR_ID, FLOAT_LITERAL, DOUBLE_LITERAL,
* BOOL_LITERAL, STRING_LITERAL, BASE_STRING_LITERAL, CHAR_LITERAL,
* BASE_CHAR_LITERAL
*
* These rules were originally copyrighted by Terence Parr, and are used here in
* accordance with the following license
*
* [The "BSD licence"]
* Copyright (c) 2007-2008 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
* The remainder of this grammar is released by me (Ben Gruver) under the
* following license:
*
* [The "BSD licence"]
* Copyright (c) 2010 Ben Gruver
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
lexer grammar smaliLexer;
options {
superClass=ANTLRLexerWithErrorInterface;
}
@lexer::header {
package org.jf.smali;
import static org.jf.smali.LexerErrorInterface.ANTLRLexerWithErrorInterface;
}
@lexer::members {
public static final int ERROR_CHANNEL = 100;
public String getErrorHeader(RecognitionException e) {
return getSourceName()+"["+ e.line+","+e.charPositionInLine+"]";
}
}
/**********************************************************
* DIRECTIVES
**********************************************************/
CLASS_DIRECTIVE
: '.class';
SUPER_DIRECTIVE
: '.super';
IMPLEMENTS_DIRECTIVE
: '.implements';
SOURCE_DIRECTIVE
: '.source';
FIELD_DIRECTIVE
: '.field';
END_FIELD_DIRECTIVE
: '.end field';
SUBANNOTATION_DIRECTIVE
: '.subannotation';
END_SUBANNOTATION_DIRECTIVE
: '.end subannotation';
ANNOTATION_DIRECTIVE
: '.annotation';
END_ANNOTATION_DIRECTIVE
: '.end annotation';
ENUM_DIRECTIVE
: '.enum';
METHOD_DIRECTIVE
: '.method';
END_METHOD_DIRECTIVE
: '.end method';
REGISTERS_DIRECTIVE
: '.registers';
LOCALS_DIRECTIVE
: '.locals';
ARRAY_DATA_DIRECTIVE
: '.array-data';
END_ARRAY_DATA_DIRECTIVE
: '.end array-data';
PACKED_SWITCH_DIRECTIVE
: '.packed-switch';
END_PACKED_SWITCH_DIRECTIVE
: '.end packed-switch';
SPARSE_SWITCH_DIRECTIVE
: '.sparse-switch';
END_SPARSE_SWITCH_DIRECTIVE
: '.end sparse-switch';
CATCH_DIRECTIVE
: '.catch';
CATCHALL_DIRECTIVE
: '.catchall';
LINE_DIRECTIVE
: '.line';
PARAMETER_DIRECTIVE
: '.parameter';
END_PARAMETER_DIRECTIVE
: '.end parameter';
LOCAL_DIRECTIVE
: '.local';
END_LOCAL_DIRECTIVE
: '.end local';
RESTART_LOCAL_DIRECTIVE
: '.restart local';
PROLOGUE_DIRECTIVE
: '.prologue';
EPILOGUE_DIRECTIVE
: '.epilogue';
/**********************************************************
* LITERALS
**********************************************************/
fragment BASE_INTEGER
: '0'
| ('1'..'9') ('0'..'9')*
| '0' ('0'..'7')+
| HEX_PREFIX HEX_DIGIT+;
fragment DECIMAL_EXPONENT
: ('e'|'E') '-'? ('0'..'9')+;
fragment BINARY_EXPONENT
: ('p'|'P') '-'? ('0'..'9')+;
fragment HEX_PREFIX
: '0x'|'0X';
fragment HEX_DIGIT
: ('0'..'9')|('A'..'F')|('a'..'f');
fragment HEX_DIGITS
: HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
/*This can either be floating point numbers, or identifier*/
fragment BASE_FLOAT_OR_ID
: '-'? ('0'..'9')+ DECIMAL_EXPONENT
| HEX_PREFIX HEX_DIGIT+ BINARY_EXPONENT
| '-'? ('i' | 'I') ('n' | 'N') ('f' | 'F') ('i' | 'I') ('n' | 'N') ('i' | 'I') ('t' | 'T') ('y' | 'Y')
| ('n' | 'N') ('a' | 'A') ('n' | 'N');
/*These can't be identifiers, due to the decimal point*/
fragment BASE_FLOAT
: '-'? ('0'..'9')+ '.' ('0'..'9')* DECIMAL_EXPONENT?
| '-'? '.' ('0'..'9')+ DECIMAL_EXPONENT?
| '-'? HEX_PREFIX HEX_DIGIT+ '.' HEX_DIGIT* BINARY_EXPONENT
| '-'? HEX_PREFIX '.' HEX_DIGIT+ BINARY_EXPONENT;
fragment ESCAPE_SEQUENCE[StringBuilder sb]
: '\\'
(
'b' {sb.append("\b");}
| 't' {sb.append("\t");}
| 'n' {sb.append("\n");}
| 'f' {sb.append("\f");}
| 'r' {sb.append("\r");}
| '\"' {sb.append("\"");}
| '\'' {sb.append("'");}
| '\\' {sb.append("\\");}
| 'u' HEX_DIGITS {sb.append((char)Integer.parseInt($HEX_DIGITS.text, 16));}
);
POSITIVE_INTEGER_LITERAL
: BASE_INTEGER;
NEGATIVE_INTEGER_LITERAL
: '-' BASE_INTEGER;
LONG_LITERAL
: '-'? BASE_INTEGER ('l'|'L');
SHORT_LITERAL
: '-'? BASE_INTEGER ('s'|'S');
BYTE_LITERAL
: '-'? BASE_INTEGER ('t'|'T');
FLOAT_LITERAL_OR_ID
: BASE_FLOAT_OR_ID ('f'|'F')
| '-'? ('0'..'9')+ ('f'|'F');
DOUBLE_LITERAL_OR_ID
: BASE_FLOAT_OR_ID ('d'|'D')?
| '-'? ('0'..'9')+ ('d'|'D');
FLOAT_LITERAL
: BASE_FLOAT ('f'|'F');
DOUBLE_LITERAL
: BASE_FLOAT ('d'|'D')?;
BOOL_LITERAL
: 'true'
| 'false';
NULL_LITERAL
: 'null';
STRING_LITERAL
@init {StringBuilder sb = new StringBuilder();}
: BASE_STRING_LITERAL[sb] {setText(sb.toString());};
fragment BASE_STRING_LITERAL[StringBuilder sb]
: '"' {sb.append('"');}
( ESCAPE_SEQUENCE[sb]
| ~( '\\' | '"' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
)*
'"' {sb.append('"');};
CHAR_LITERAL
@init {StringBuilder sb = new StringBuilder();}
: BASE_CHAR_LITERAL[sb] {setText(sb.toString());};
fragment BASE_CHAR_LITERAL[StringBuilder sb]
: '\'' {sb.append('\'');}
( ESCAPE_SEQUENCE[sb]
| ~( '\\' | '\'' | '\r' | '\n' ) {sb.append((char)input.LA(-1));}
)
'\'' { sb.append('\''); };
/**********************************************************
* MISC
**********************************************************/
REGISTER
: ('v'|'p') ('0'..'9')+;
ANNOTATION_VISIBILITY
: 'build'
| 'runtime'
| 'system';
ACCESS_SPEC
: 'public'
| 'private'
| 'protected'
| 'static'
| 'final'
| 'synchronized'
| 'bridge'
| 'varargs'
| 'native'
| 'abstract'
| 'strictfp'
| 'synthetic'
| 'constructor'
| 'declared-synchronized'
| 'interface'
| 'enum'
| 'annotation'
| 'volatile'
| 'transient';
VERIFICATION_ERROR_TYPE
: 'no-error'
| 'generic-error'
| 'no-such-class'
| 'no-such-field'
| 'no-such-method'
| 'illegal-class-access'
| 'illegal-field-access'
| 'illegal-method-access'
| 'class-change-error'
| 'instantiation-error';
INLINE_INDEX
: 'inline@0x' HEX_DIGIT+;
VTABLE_INDEX
: 'vtable@0x' HEX_DIGIT+;
FIELD_OFFSET
: 'field@0x' HEX_DIGIT+;
OFFSET
: '+' BASE_INTEGER;
LINE_COMMENT
: '#'
(
~('\n'|'\r')* ('\r\n' | '\r' | '\n')
| ~('\n'|'\r')*
)
{$channel = HIDDEN;};
/**********************************************************
* Instructions
**********************************************************/
INSTRUCTION_FORMAT10t
: 'goto';
INSTRUCTION_FORMAT10x
: 'return-void'
| 'nop';
INSTRUCTION_FORMAT10x_ODEX
: 'return-void-barrier';
INSTRUCTION_FORMAT11n
: 'const/4';
INSTRUCTION_FORMAT11x
: 'move-result'
| 'move-result-wide'
| 'move-result-object'
| 'move-exception'
| 'return'
| 'return-wide'
| 'return-object'
| 'monitor-enter'
| 'monitor-exit'
| 'throw';
INSTRUCTION_FORMAT12x_OR_ID
: 'move'
| 'move-wide'
| 'move-object'
| 'array-length'
| 'neg-int'
| 'not-int'
| 'neg-long'
| 'not-long'
| 'neg-float'
| 'neg-double'
| 'int-to-long'
| 'int-to-float'
| 'int-to-double'
| 'long-to-int'
| 'long-to-float'
| 'long-to-double'
| 'float-to-int'
| 'float-to-long'
| 'float-to-double'
| 'double-to-int'
| 'double-to-long'
| 'double-to-float'
| 'int-to-byte'
| 'int-to-char'
| 'int-to-short';
INSTRUCTION_FORMAT12x
: 'add-int/2addr'
| 'sub-int/2addr'
| 'mul-int/2addr'
| 'div-int/2addr'
| 'rem-int/2addr'
| 'and-int/2addr'
| 'or-int/2addr'
| 'xor-int/2addr'
| 'shl-int/2addr'
| 'shr-int/2addr'
| 'ushr-int/2addr'
| 'add-long/2addr'
| 'sub-long/2addr'
| 'mul-long/2addr'
| 'div-long/2addr'
| 'rem-long/2addr'
| 'and-long/2addr'
| 'or-long/2addr'
| 'xor-long/2addr'
| 'shl-long/2addr'
| 'shr-long/2addr'
| 'ushr-long/2addr'
| 'add-float/2addr'
| 'sub-float/2addr'
| 'mul-float/2addr'
| 'div-float/2addr'
| 'rem-float/2addr'
| 'add-double/2addr'
| 'sub-double/2addr'
| 'mul-double/2addr'
| 'div-double/2addr'
| 'rem-double/2addr';
INSTRUCTION_FORMAT20bc
: 'throw-verification-error';
INSTRUCTION_FORMAT20t
: 'goto/16';
INSTRUCTION_FORMAT21c_FIELD
: 'sget'
| 'sget-wide'
| 'sget-object'
| 'sget-boolean'
| 'sget-byte'
| 'sget-char'
| 'sget-short'
| 'sput'
| 'sput-wide'
| 'sput-object'
| 'sput-boolean'
| 'sput-byte'
| 'sput-char'
| 'sput-short';
INSTRUCTION_FORMAT21c_FIELD_ODEX
: 'sget-volatile'
| 'sget-wide-volatile'
| 'sget-object-volatile'
| 'sput-volatile'
| 'sput-wide-volatile'
| 'sput-object-volatile';
INSTRUCTION_FORMAT21c_STRING
: 'const-string';
INSTRUCTION_FORMAT21c_TYPE
: 'check-cast'
| 'new-instance'
| 'const-class';
INSTRUCTION_FORMAT21h
: 'const/high16'
| 'const-wide/high16';
INSTRUCTION_FORMAT21s
: 'const/16'
| 'const-wide/16';
INSTRUCTION_FORMAT21t
: 'if-eqz'
| 'if-nez'
| 'if-ltz'
| 'if-gez'
| 'if-gtz'
| 'if-lez';
INSTRUCTION_FORMAT22b
: 'add-int/lit8'
| 'rsub-int/lit8'
| 'mul-int/lit8'
| 'div-int/lit8'
| 'rem-int/lit8'
| 'and-int/lit8'
| 'or-int/lit8'
| 'xor-int/lit8'
| 'shl-int/lit8'
| 'shr-int/lit8'
| 'ushr-int/lit8';
INSTRUCTION_FORMAT22c_FIELD
: 'iget'
| 'iget-wide'
| 'iget-object'
| 'iget-boolean'
| 'iget-byte'
| 'iget-char'
| 'iget-short'
| 'iput'
| 'iput-wide'
| 'iput-object'
| 'iput-boolean'
| 'iput-byte'
| 'iput-char'
| 'iput-short';
INSTRUCTION_FORMAT22c_FIELD_ODEX
: 'iget-volatile'
| 'iget-wide-volatile'
| 'iget-object-volatile'
| 'iput-volatile'
| 'iput-wide-volatile'
| 'iput-object-volatile';
INSTRUCTION_FORMAT22c_TYPE
: 'instance-of'
| 'new-array';
INSTRUCTION_FORMAT22cs_FIELD
: 'iget-quick'
| 'iget-wide-quick'
| 'iget-object-quick'
| 'iput-quick'
| 'iput-wide-quick'
| 'iput-object-quick';
INSTRUCTION_FORMAT22s_OR_ID
: 'rsub-int';
INSTRUCTION_FORMAT22s
: 'add-int/lit16'
| 'mul-int/lit16'
| 'div-int/lit16'
| 'rem-int/lit16'
| 'and-int/lit16'
| 'or-int/lit16'
| 'xor-int/lit16';
INSTRUCTION_FORMAT22t
: 'if-eq'
| 'if-ne'
| 'if-lt'
| 'if-ge'
| 'if-gt'
| 'if-le';
INSTRUCTION_FORMAT22x
: 'move/from16'
| 'move-wide/from16'
| 'move-object/from16';
INSTRUCTION_FORMAT23x
: 'cmpl-float'
| 'cmpg-float'
| 'cmpl-double'
| 'cmpg-double'
| 'cmp-long'
| 'aget'
| 'aget-wide'
| 'aget-object'
| 'aget-boolean'
| 'aget-byte'
| 'aget-char'
| 'aget-short'
| 'aput'
| 'aput-wide'
| 'aput-object'
| 'aput-boolean'
| 'aput-byte'
| 'aput-char'
| 'aput-short'
| 'add-int'
| 'sub-int'
| 'mul-int'
| 'div-int'
| 'rem-int'
| 'and-int'
| 'or-int'
| 'xor-int'
| 'shl-int'
| 'shr-int'
| 'ushr-int'
| 'add-long'
| 'sub-long'
| 'mul-long'
| 'div-long'
| 'rem-long'
| 'and-long'
| 'or-long'
| 'xor-long'
| 'shl-long'
| 'shr-long'
| 'ushr-long'
| 'add-float'
| 'sub-float'
| 'mul-float'
| 'div-float'
| 'rem-float'
| 'add-double'
| 'sub-double'
| 'mul-double'
| 'div-double'
| 'rem-double';
INSTRUCTION_FORMAT30t
: 'goto/32';
INSTRUCTION_FORMAT31c
: 'const-string/jumbo';
INSTRUCTION_FORMAT31i_OR_ID
: 'const';
INSTRUCTION_FORMAT31i
: 'const-wide/32';
INSTRUCTION_FORMAT31t
: 'fill-array-data'
| 'packed-switch'
| 'sparse-switch';
INSTRUCTION_FORMAT32x
: 'move/16'
| 'move-wide/16'
| 'move-object/16';
INSTRUCTION_FORMAT35c_METHOD
: 'invoke-virtual'
| 'invoke-super'
| 'invoke-direct'
| 'invoke-static'
| 'invoke-interface';
INSTRUCTION_FORMAT35c_METHOD_ODEX
: 'invoke-direct-empty';
INSTRUCTION_FORMAT35c_TYPE
: 'filled-new-array';
INSTRUCTION_FORMAT35mi_METHOD
: 'execute-inline';
INSTRUCTION_FORMAT35ms_METHOD
: 'invoke-virtual-quick'
| 'invoke-super-quick';
INSTRUCTION_FORMAT3rc_METHOD
: 'invoke-virtual/range'
| 'invoke-super/range'
| 'invoke-direct/range'
| 'invoke-static/range'
| 'invoke-interface/range';
INSTRUCTION_FORMAT3rc_METHOD_ODEX
: 'invoke-object-init/range';
INSTRUCTION_FORMAT3rc_TYPE
: 'filled-new-array/range';
INSTRUCTION_FORMAT3rmi_METHOD
: 'execute-inline/range';
INSTRUCTION_FORMAT3rms_METHOD
: 'invoke-virtual-quick/range'
| 'invoke-super-quick/range';
INSTRUCTION_FORMAT51l
: 'const-wide';
/**********************************************************
* Types
**********************************************************/
fragment BASE_SIMPLE_NAME:
( 'A'..'Z'
| 'a'..'z'
| '0'..'9'
| '$'
| '-'
| '_'
| '\u00a1'..'\u1fff'
| '\u2010'..'\u2027'
| '\u2030'..'\ud7ff'
| '\ue000'..'\uffef'
)+;
fragment BASE_PRIMITIVE_TYPE
: 'Z'|'B'|'S'|'C'|'I'|'J'|'F'|'D';
fragment BASE_CLASS_DESCRIPTOR
: 'L' (BASE_SIMPLE_NAME '/')* BASE_SIMPLE_NAME ';';
fragment BASE_ARRAY_DESCRIPTOR
: '['+ (BASE_PRIMITIVE_TYPE | BASE_CLASS_DESCRIPTOR);
fragment BASE_TYPE
: BASE_PRIMITIVE_TYPE
| BASE_CLASS_DESCRIPTOR
| BASE_ARRAY_DESCRIPTOR;
PRIMITIVE_TYPE
: BASE_PRIMITIVE_TYPE;
VOID_TYPE
: 'V';
CLASS_DESCRIPTOR
: BASE_CLASS_DESCRIPTOR;
ARRAY_DESCRIPTOR
: BASE_ARRAY_DESCRIPTOR;
PARAM_LIST_OR_ID
: BASE_PRIMITIVE_TYPE BASE_PRIMITIVE_TYPE+;
PARAM_LIST
: BASE_TYPE BASE_TYPE+;
SIMPLE_NAME
: BASE_SIMPLE_NAME;
METHOD_NAME
: '<init>'
| '<clinit>';
/**********************************************************
* Symbols
**********************************************************/
DOTDOT
: '..';
ARROW
: '->';
EQUAL
: '=';
COLON
: ':';
COMMA
: ',';
OPEN_BRACE
: '{';
CLOSE_BRACE
: '}';
OPEN_PAREN
: '(';
CLOSE_PAREN
: ')';
WHITE_SPACE
: (' '|'\t'|'\n'|'\r')+ {$channel = HIDDEN;};

View File

@ -29,12 +29,148 @@
parser grammar smaliParser;
options {
tokenVocab=smaliLexer;
output=AST;
ASTLabelType=CommonTree;
}
tokens {
//Lexer tokens
ACCESS_SPEC;
ANNOTATION_DIRECTIVE;
ANNOTATION_VISIBILITY;
ARRAY_DATA_DIRECTIVE;
ARRAY_DESCRIPTOR;
ARROW;
BASE_ARRAY_DESCRIPTOR;
BASE_CHAR_LITERAL;
BASE_CLASS_DESCRIPTOR;
BASE_FLOAT;
BASE_FLOAT_OR_ID;
BASE_INTEGER;
BASE_PRIMITIVE_TYPE;
BASE_SIMPLE_NAME;
BASE_STRING_LITERAL;
BASE_TYPE;
BINARY_EXPONENT;
BOOL_LITERAL;
BYTE_LITERAL;
CATCH_DIRECTIVE;
CATCHALL_DIRECTIVE;
CHAR_LITERAL;
CLASS_DESCRIPTOR;
CLASS_DIRECTIVE;
CLOSE_BRACE;
CLOSE_PAREN;
COLON;
COMMA;
DECIMAL_EXPONENT;
DOTDOT;
DOUBLE_LITERAL;
DOUBLE_LITERAL_OR_ID;
END_ANNOTATION_DIRECTIVE;
END_ARRAY_DATA_DIRECTIVE;
END_FIELD_DIRECTIVE;
END_LOCAL_DIRECTIVE;
END_METHOD_DIRECTIVE;
END_PACKED_SWITCH_DIRECTIVE;
END_PARAMETER_DIRECTIVE;
END_SPARSE_SWITCH_DIRECTIVE;
END_SUBANNOTATION_DIRECTIVE;
ENUM_DIRECTIVE;
EPILOGUE_DIRECTIVE;
EQUAL;
ESCAPE_SEQUENCE;
FIELD_DIRECTIVE;
FIELD_OFFSET;
FLOAT_LITERAL;
FLOAT_LITERAL_OR_ID;
HEX_DIGIT;
HEX_DIGITS;
HEX_PREFIX;
IMPLEMENTS_DIRECTIVE;
INLINE_INDEX;
INSTRUCTION_FORMAT10t;
INSTRUCTION_FORMAT10x;
INSTRUCTION_FORMAT10x_ODEX;
INSTRUCTION_FORMAT11n;
INSTRUCTION_FORMAT11x;
INSTRUCTION_FORMAT12x;
INSTRUCTION_FORMAT12x_OR_ID;
INSTRUCTION_FORMAT20bc;
INSTRUCTION_FORMAT20t;
INSTRUCTION_FORMAT21c_FIELD;
INSTRUCTION_FORMAT21c_FIELD_ODEX;
INSTRUCTION_FORMAT21c_STRING;
INSTRUCTION_FORMAT21c_TYPE;
INSTRUCTION_FORMAT21h;
INSTRUCTION_FORMAT21s;
INSTRUCTION_FORMAT21t;
INSTRUCTION_FORMAT22b;
INSTRUCTION_FORMAT22c_FIELD;
INSTRUCTION_FORMAT22c_FIELD_ODEX;
INSTRUCTION_FORMAT22c_TYPE;
INSTRUCTION_FORMAT22cs_FIELD;
INSTRUCTION_FORMAT22s;
INSTRUCTION_FORMAT22s_OR_ID;
INSTRUCTION_FORMAT22t;
INSTRUCTION_FORMAT22x;
INSTRUCTION_FORMAT23x;
INSTRUCTION_FORMAT30t;
INSTRUCTION_FORMAT31c;
INSTRUCTION_FORMAT31i;
INSTRUCTION_FORMAT31i_OR_ID;
INSTRUCTION_FORMAT31t;
INSTRUCTION_FORMAT32x;
INSTRUCTION_FORMAT35c_METHOD;
INSTRUCTION_FORMAT35c_METHOD_ODEX;
INSTRUCTION_FORMAT35c_TYPE;
INSTRUCTION_FORMAT35mi_METHOD;
INSTRUCTION_FORMAT35ms_METHOD;
INSTRUCTION_FORMAT3rc_METHOD;
INSTRUCTION_FORMAT3rc_METHOD_ODEX;
INSTRUCTION_FORMAT3rc_TYPE;
INSTRUCTION_FORMAT3rmi_METHOD;
INSTRUCTION_FORMAT3rms_METHOD;
INSTRUCTION_FORMAT51l;
INVALID_TOKEN;
LINE_COMMENT;
LINE_DIRECTIVE;
LOCAL_DIRECTIVE;
LOCALS_DIRECTIVE;
LONG_LITERAL;
METHOD_DIRECTIVE;
METHOD_NAME;
NEGATIVE_INTEGER_LITERAL;
NULL_LITERAL;
OFFSET;
OPEN_BRACE;
OPEN_PAREN;
PACKED_SWITCH_DIRECTIVE;
PARAM_LIST;
PARAM_LIST_OR_ID;
PARAMETER_DIRECTIVE;
POSITIVE_INTEGER_LITERAL;
PRIMITIVE_TYPE;
PROLOGUE_DIRECTIVE;
REGISTER;
REGISTERS_DIRECTIVE;
RESTART_LOCAL_DIRECTIVE;
SHORT_LITERAL;
SIMPLE_NAME;
SOURCE_DIRECTIVE;
SPARSE_SWITCH_DIRECTIVE;
STRING_LITERAL;
SUBANNOTATION_DIRECTIVE;
SUPER_DIRECTIVE;
VERIFICATION_ERROR_TYPE;
VOID_TYPE;
VTABLE_INDEX;
WHITE_SPACE;
//A couple of generated types that we remap other tokens to, to simplify the generated AST
LABEL;
INTEGER_LITERAL;
//I_* tokens are imaginary tokens used as parent AST nodes
I_CLASS_DEF;
I_SUPER;
@ -123,10 +259,6 @@ tokens {
I_STATEMENT_SPARSE_SWITCH;
I_REGISTER_RANGE;
I_REGISTER_LIST;
LABEL;
INTEGER_LITERAL;
INVALID_TOKEN;
}
@header {
@ -138,6 +270,8 @@ import org.jf.dexlib.Code.Opcode;
@members {
public static final int ERROR_CHANNEL = 100;
private boolean verboseErrors = false;
private boolean allowOdex = false;
private int apiLevel;

View File

@ -28,7 +28,6 @@
package org.jf.smali;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonToken;
public class InvalidToken extends CommonToken {
@ -37,13 +36,13 @@ public class InvalidToken extends CommonToken {
public InvalidToken(String message) {
super(smaliParser.INVALID_TOKEN);
this.message = message;
this.channel = smaliLexer.ERROR_CHANNEL;
this.channel = smaliParser.ERROR_CHANNEL;
}
public InvalidToken(String message, String text) {
super(smaliParser.INVALID_TOKEN, text);
this.message = message;
this.channel = smaliLexer.ERROR_CHANNEL;
this.channel = smaliParser.ERROR_CHANNEL;
}
public String getMessage() {

View File

@ -103,7 +103,6 @@ public class main {
boolean fixJumbo = true;
boolean fixGoto = true;
boolean verboseErrors = false;
boolean oldLexer = false;
boolean printTokens = false;
boolean apiSet = false;
@ -158,9 +157,6 @@ public class main {
case 'V':
verboseErrors = true;
break;
case 'L':
oldLexer = true;
break;
case 'T':
printTokens = true;
break;
@ -202,7 +198,7 @@ public class main {
boolean errors = false;
for (File file: filesToProcess) {
if (!assembleSmaliFile(file, dexFile, verboseErrors, oldLexer, printTokens, allowOdex, apiLevel)) {
if (!assembleSmaliFile(file, dexFile, verboseErrors, printTokens, allowOdex, apiLevel)) {
errors = true;
}
}
@ -276,7 +272,7 @@ public class main {
}
}
private static boolean assembleSmaliFile(File smaliFile, DexFile dexFile, boolean verboseErrors, boolean oldLexer,
private static boolean assembleSmaliFile(File smaliFile, DexFile dexFile, boolean verboseErrors,
boolean printTokens, boolean allowOdex, int apiLevel)
throws Exception {
CommonTokenStream tokens;
@ -285,27 +281,19 @@ public class main {
boolean lexerErrors = false;
LexerErrorInterface lexer;
if (oldLexer) {
ANTLRFileStream input = new ANTLRFileStream(smaliFile.getAbsolutePath(), "UTF-8");
input.name = smaliFile.getAbsolutePath();
lexer = new smaliLexer(input);
tokens = new CommonTokenStream((TokenSource)lexer);
} else {
FileInputStream fis = new FileInputStream(smaliFile.getAbsolutePath());
InputStreamReader reader = new InputStreamReader(fis, "UTF-8");
lexer = new smaliFlexLexer(reader);
((smaliFlexLexer)lexer).setSourceFile(smaliFile);
tokens = new CommonTokenStream((TokenSource)lexer);
}
if (printTokens) {
tokens.getTokens();
for (int i=0; i<tokens.size(); i++) {
Token token = tokens.get(i);
if (token.getChannel() == smaliLexer.HIDDEN) {
if (token.getChannel() == smaliParser.HIDDEN) {
continue;
}
@ -423,10 +411,6 @@ public class main {
.withDescription("Generate verbose error messages")
.create("V");
Option oldLexerOption = OptionBuilder.withLongOpt("old-lexer")
.withDescription("Use the old lexer")
.create("L");
Option printTokensOption = OptionBuilder.withLongOpt("print-tokens")
.withDescription("Print the name and text of each token")
.create("T");
@ -442,7 +426,6 @@ public class main {
debugOptions.addOption(noFixJumboOption);
debugOptions.addOption(noFixGotoOption);
debugOptions.addOption(verboseErrorsOption);
debugOptions.addOption(oldLexerOption);
debugOptions.addOption(printTokensOption);
for (Object option: basicOptions.getOptions()) {

View File

@ -165,7 +165,7 @@ public class LexerTest {
for (int i=0; i<tokens.size(); i++) {
token = (CommonToken)tokens.get(i);
if (discardHiddenTokens && token.getChannel() == smaliLexer.HIDDEN) {
if (discardHiddenTokens && token.getChannel() == smaliParser.HIDDEN) {
continue;
}
@ -175,7 +175,7 @@ public class LexerTest {
if (token.getType() == smaliParser.INVALID_TOKEN) {
Assert.assertTrue("Encountered an INVALID_TOKEN not on the error channel",
token.getChannel() == smaliLexer.ERROR_CHANNEL);
token.getChannel() == smaliParser.ERROR_CHANNEL);
}
ExpectedToken expectedToken = expectedTokens.get(expectedTokenIndex++);