diff --git a/smali/src/main/jflex/smaliLexer.jflex b/smali/src/main/jflex/smaliLexer.jflex index 7b19d3dc..bec71bee 100644 --- a/smali/src/main/jflex/smaliLexer.jflex +++ b/smali/src/main/jflex/smaliLexer.jflex @@ -25,10 +25,10 @@ import static org.jf.smali.smaliParser.*; %{ private StringBuffer sb = new StringBuffer(); - private String stringOrCharError = null; - private int stringStartLine; - private int stringStartCol; - private int stringStartChar; + private String tokenError = null; + private int tokenStartLine; + private int tokenStartCol; + private int tokenStartChar; private int lexerErrors = 0; @@ -136,51 +136,44 @@ import static org.jf.smali.smaliParser.*; return invalidToken(message, yytext()); } - private Token simpleNameToken(String text, boolean quoted) { - if (quoted) { - text = text.substring(1, text.length() - 1); /* strip backticks */ - } - return newToken(SIMPLE_NAME, text); - } - - private void beginStringOrChar(int state) { + private void beginStateBasedToken(int state) { yybegin(state); sb.setLength(0); - stringStartLine = getLine(); - stringStartCol = getColumn(); - stringStartChar = yychar; - stringOrCharError = null; + tokenStartLine = getLine(); + tokenStartCol = getColumn(); + tokenStartChar = yychar; + tokenError = null; } - private Token endStringOrChar(int type) { + private Token endStateBasedToken(int type) { yybegin(YYINITIAL); - if (stringOrCharError != null) { - return invalidStringOrChar(stringOrCharError); + if (tokenError != null) { + return invalidStateBasedToken(tokenError); } CommonToken token = new CommonToken(type, sb.toString()); - token.setStartIndex(stringStartChar); + token.setStartIndex(tokenStartChar); token.setStopIndex(yychar + yylength() - 1); - token.setLine(stringStartLine); - token.setCharPositionInLine(stringStartCol); + token.setLine(tokenStartLine); + token.setCharPositionInLine(tokenStartCol); return token; } - private void setStringOrCharError(String message) { - if (stringOrCharError == null) { - stringOrCharError = message; + private void setStateBasedTokenError(String message) { + if (tokenError == null) { + tokenError = message; } } - private Token invalidStringOrChar(String message) { + private Token invalidStateBasedToken(String message) { yybegin(YYINITIAL); InvalidToken token = new InvalidToken(message, sb.toString()); - token.setStartIndex(stringStartChar); + token.setStartIndex(tokenStartChar); token.setStopIndex(yychar + yylength() - 1); - token.setLine(stringStartLine); - token.setCharPositionInLine(stringStartCol); + token.setLine(tokenStartLine); + token.setCharPositionInLine(tokenStartCol); return token; } @@ -201,6 +194,19 @@ import static org.jf.smali.smaliParser.*; zzAtEOF = false; yybegin(initialState); } + + private String processQuotedSimpleName(String text) { + // strip backticks + return text.substring(1, text.length() - 1); + } + + private String processQuotedSimpleNameWithSpaces(String text) { + if (apiLevel < 30) { + setStateBasedTokenError("spaces in class descriptors and member names are not supported prior to API " + + "level 30/dex version 040"); + } + return processQuotedSimpleName(text); + } %} HexPrefix = 0 [xX] @@ -260,6 +266,8 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | %state ARRAY_DESCRIPTOR %state STRING %state CHAR +%state CLASS_DESCRIPTOR_BEGINNING +%state CLASS_DESCRIPTOR_REMAINING %% @@ -320,9 +328,9 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | "true"|"false" { return newToken(BOOL_LITERAL); } "null" { return newToken(NULL_LITERAL); } - "\"" { beginStringOrChar(STRING); sb.append('"'); } + "\"" { beginStateBasedToken(STRING); sb.append('"'); } - ' { beginStringOrChar(CHAR); sb.append('\''); } + ' { beginStateBasedToken(CHAR); sb.append('\''); } } { @@ -333,14 +341,54 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | { {PrimitiveType} { return newToken(PRIMITIVE_TYPE); } - {ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } + {ClassDescriptor} { + yypushback(yylength()); + beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING); + sb.append(yytext()); + } {ArrayPrefix} { return newToken(ARRAY_TYPE_PREFIX); } [^] { yypushback(1); yybegin(YYINITIAL);} <> { yybegin(YYINITIAL);} } + { + "L" {SimpleNameRaw} { + sb.append(yytext()); + yybegin(CLASS_DESCRIPTOR_REMAINING); + } + "L" {SimpleNameQuoted} { + sb.append("L"); + sb.append(processQuotedSimpleName(yytext().substring(1))); + yybegin(CLASS_DESCRIPTOR_REMAINING); + } + "L" {SimpleNameQuotedWithSpaces} { + sb.append("L"); + sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1))); + yybegin(CLASS_DESCRIPTOR_REMAINING); + } +} + + { + "/" {SimpleNameRaw} { + sb.append(yytext()); + } + "/" {SimpleNameQuoted} { + sb.append("/"); + sb.append(processQuotedSimpleName(yytext().substring(1))); + } + "/" {SimpleNameQuotedWithSpaces} { + sb.append("/"); + sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1))); + } + + ";" { + sb.append(yytext()); + return endStateBasedToken(CLASS_DESCRIPTOR); + } +} + { - "\"" { sb.append('"'); return endStringOrChar(STRING_LITERAL); } + "\"" { sb.append('"'); return endStateBasedToken(STRING_LITERAL); } [^\r\n\"\\]+ { sb.append(yytext()); } "\\b" { sb.append('\b'); } @@ -355,28 +403,28 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | "\\u" {FewerHexDigits} { sb.append(yytext()); - setStringOrCharError("Invalid \\u sequence. \\u must be followed by 4 hex digits"); + setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by 4 hex digits"); } "\\" [^btnfr'\"\\u] { sb.append(yytext()); - setStringOrCharError("Invalid escape sequence " + yytext()); + setStateBasedTokenError("Invalid escape sequence " + yytext()); } - [\r\n] { return invalidStringOrChar("Unterminated string literal"); } - <> { return invalidStringOrChar("Unterminated string literal"); } + [\r\n] { return invalidStateBasedToken("Unterminated string literal"); } + <> { return invalidStateBasedToken("Unterminated string literal"); } } { ' { sb.append('\''); if (sb.length() == 2) { - return invalidStringOrChar("Empty character literal"); + return invalidStateBasedToken("Empty character literal"); } else if (sb.length() > 3) { - return invalidStringOrChar("Character literal with multiple chars"); + return invalidStateBasedToken("Character literal with multiple chars"); } - return endStringOrChar(CHAR_LITERAL); + return endStateBasedToken(CHAR_LITERAL); } [^\r\n'\\]+ { sb.append(yytext()); } @@ -392,16 +440,16 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | "\\u" {HexDigit}* { sb.append(yytext()); - setStringOrCharError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits"); + setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits"); } "\\" [^btnfr'\"\\u] { sb.append(yytext()); - setStringOrCharError("Invalid escape sequence " + yytext()); + setStateBasedTokenError("Invalid escape sequence " + yytext()); } - [\r\n] { return invalidStringOrChar("Unterminated character literal"); } - <> { return invalidStringOrChar("Unterminated character literal"); } + [\r\n] { return invalidStateBasedToken("Unterminated character literal"); } + <> { return invalidStateBasedToken("Unterminated character literal"); } } /*Misc*/ @@ -669,17 +717,25 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | } { - {PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); } - {ClassDescriptor} { yybegin(YYINITIAL); return newToken(CLASS_DESCRIPTOR); } - [^] { yypushback(1); yybegin(YYINITIAL); } - <> { yybegin(YYINITIAL); } + {PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); } + {ClassDescriptor} { + yypushback(yylength()); + beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING); + sb.append(yytext()); + } + [^] { yypushback(1); yybegin(YYINITIAL); } + <> { yybegin(YYINITIAL); } } /*Types*/ { {PrimitiveType} { return newToken(PRIMITIVE_TYPE); } V { return newToken(VOID_TYPE); } - {ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } + {ClassDescriptor} { + yypushback(yylength()); + beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING); + sb.append(yytext()); + } // we have to drop into a separate state so that we don't parse something like // "[I->" as "[" followed by "I-" as a SIMPLE_NAME @@ -700,15 +756,9 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | yybegin(PARAM_LIST); } - {SimpleNameRaw} { return simpleNameToken(yytext(), false); } - {SimpleNameQuoted} { return simpleNameToken(yytext(), true); } - {SimpleNameQuotedWithSpaces} { - if (apiLevel < 30) { - String message = "spaces in SimpleName are not allowed prior to API level 30"; - return new InvalidToken(message, yytext()); - } - return simpleNameToken(yytext(), true); - } + {SimpleNameRaw} { return newToken(SIMPLE_NAME, yytext()); } + {SimpleNameQuoted} { return newToken(SIMPLE_NAME, processQuotedSimpleName(yytext())); } + {SimpleNameQuotedWithSpaces} { return newToken(SIMPLE_NAME, processQuotedSimpleNameWithSpaces(yytext())); } "<" {SimpleNameRaw} ">" { return newToken(MEMBER_NAME); } } diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali index bde031ba..4f76d30a 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali @@ -12,6 +12,10 @@ Ljava/lang/String; LI; LV; LI/I/I; +L`single`; +L`java`/lang/String; +L`java`/`lang`/`String`; +Lspace/test/`20 a0 1680 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 200a 202f 205f 3000 `; [Z [B @@ -22,6 +26,9 @@ LI/I/I; [F [D [Ljava/lang/String; +[L`java`/lang/String; +[L`spaaaace spaaaace`; +[L`spaaaace spaaaace`/`spaaaace spaaaace`; [LI/I/I; [[LI/I/I; [[I @@ -40,9 +47,14 @@ Ljava/lang/String;Ljava/lang/String; Ljava/lang/String +L`java`/lang/String L; +L``; +L`` LI L[Ljava/lang/String; +L`[Ljava/lang/String; +LInvalidCharIn321\`[`; [ [V @@ -55,5 +67,7 @@ III [I->clone()Ljava/lang/Object; +`this is the quote that never ends `simple_name_in_backticks` `simple_name_with_spaces_20 a0 1680 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 200a 202f 205f 3000 ` + diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens index f53e0800..2ec07da3 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens @@ -12,6 +12,10 @@ CLASS_DESCRIPTOR("Ljava/lang/String;") CLASS_DESCRIPTOR("LI;") CLASS_DESCRIPTOR("LV;") CLASS_DESCRIPTOR("LI/I/I;") +CLASS_DESCRIPTOR("Lsingle;") +CLASS_DESCRIPTOR("Ljava/lang/String;") +CLASS_DESCRIPTOR("Ljava/lang/String;") +CLASS_DESCRIPTOR("Lspace/test/20 a0\u00a01680\u16802000\u20002001\u20012002\u20022003\u20032004\u20042005\u20052006\u20062007\u20072008\u20082009\u2009200a\u200a202f\u202f205f\u205f3000\u3000;") ARRAY_TYPE_PREFIX("[") PRIMITIVE_TYPE("Z") @@ -32,6 +36,12 @@ PRIMITIVE_TYPE("D") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;") ARRAY_TYPE_PREFIX("[") +CLASS_DESCRIPTOR("Ljava/lang/String;") +ARRAY_TYPE_PREFIX("[") +CLASS_DESCRIPTOR("Lspaaaace spaaaace;") +ARRAY_TYPE_PREFIX("[") +CLASS_DESCRIPTOR("Lspaaaace spaaaace/spaaaace spaaaace;") +ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("LI/I/I;") ARRAY_TYPE_PREFIX("[[") CLASS_DESCRIPTOR("LI/I/I;") @@ -86,9 +96,14 @@ MEMBER_NAME("") MEMBER_NAME("") SIMPLE_NAME("Ljava") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String") +SIMPLE_NAME("L") SIMPLE_NAME("java") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String") SIMPLE_NAME("L") INVALID_TOKEN(";") +SIMPLE_NAME("L") INVALID_TOKEN("`") INVALID_TOKEN("`") INVALID_TOKEN(";") +SIMPLE_NAME("L") INVALID_TOKEN("`") INVALID_TOKEN("`") SIMPLE_NAME("LI") SIMPLE_NAME("L") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;") +SIMPLE_NAME("L") INVALID_TOKEN("`") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;") +SIMPLE_NAME("LInvalidCharIn321") INVALID_TOKEN("\\") INVALID_TOKEN("`") ARRAY_TYPE_PREFIX("[") INVALID_TOKEN("`") INVALID_TOKEN(";") ARRAY_TYPE_PREFIX("[") ARRAY_TYPE_PREFIX("[") VOID_TYPE("V") @@ -109,5 +124,6 @@ OPEN_PAREN("(") CLOSE_PAREN(")") CLASS_DESCRIPTOR("Ljava/lang/Object;") +INVALID_TOKEN("`") SIMPLE_NAME("this") SIMPLE_NAME("is") SIMPLE_NAME("the") SIMPLE_NAME("quote") SIMPLE_NAME("that") SIMPLE_NAME("never") SIMPLE_NAME("ends") SIMPLE_NAME("simple_name_in_backticks") -SIMPLE_NAME("simple_name_with_spaces_20 a0 1680 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 200a 202f 205f 3000 ") +SIMPLE_NAME("simple_name_with_spaces_20 a0\u00a01680\u16802000\u20002001\u20012002\u20022003\u20032004\u20042005\u20052006\u20062007\u20072008\u20082009\u2009200a\u200a202f\u202f205f\u205f3000\u3000")