Fix how quoted simple names are handled in class descriptors

This unifies the handling of simple names between class descriptors and
member names, and ensures that spaces are only allowed for the appropriate
api levels in both cases
This commit is contained in:
Ben Gruver 2019-09-17 13:17:41 -07:00
parent d762deacc9
commit 0e5421cacb
3 changed files with 139 additions and 59 deletions

View File

@ -25,10 +25,10 @@ import static org.jf.smali.smaliParser.*;
%{
private StringBuffer sb = new StringBuffer();
private String stringOrCharError = null;
private int stringStartLine;
private int stringStartCol;
private int stringStartChar;
private String tokenError = null;
private int tokenStartLine;
private int tokenStartCol;
private int tokenStartChar;
private int lexerErrors = 0;
@ -136,51 +136,44 @@ import static org.jf.smali.smaliParser.*;
return invalidToken(message, yytext());
}
private Token simpleNameToken(String text, boolean quoted) {
if (quoted) {
text = text.substring(1, text.length() - 1); /* strip backticks */
}
return newToken(SIMPLE_NAME, text);
}
private void beginStringOrChar(int state) {
private void beginStateBasedToken(int state) {
yybegin(state);
sb.setLength(0);
stringStartLine = getLine();
stringStartCol = getColumn();
stringStartChar = yychar;
stringOrCharError = null;
tokenStartLine = getLine();
tokenStartCol = getColumn();
tokenStartChar = yychar;
tokenError = null;
}
private Token endStringOrChar(int type) {
private Token endStateBasedToken(int type) {
yybegin(YYINITIAL);
if (stringOrCharError != null) {
return invalidStringOrChar(stringOrCharError);
if (tokenError != null) {
return invalidStateBasedToken(tokenError);
}
CommonToken token = new CommonToken(type, sb.toString());
token.setStartIndex(stringStartChar);
token.setStartIndex(tokenStartChar);
token.setStopIndex(yychar + yylength() - 1);
token.setLine(stringStartLine);
token.setCharPositionInLine(stringStartCol);
token.setLine(tokenStartLine);
token.setCharPositionInLine(tokenStartCol);
return token;
}
private void setStringOrCharError(String message) {
if (stringOrCharError == null) {
stringOrCharError = message;
private void setStateBasedTokenError(String message) {
if (tokenError == null) {
tokenError = message;
}
}
private Token invalidStringOrChar(String message) {
private Token invalidStateBasedToken(String message) {
yybegin(YYINITIAL);
InvalidToken token = new InvalidToken(message, sb.toString());
token.setStartIndex(stringStartChar);
token.setStartIndex(tokenStartChar);
token.setStopIndex(yychar + yylength() - 1);
token.setLine(stringStartLine);
token.setCharPositionInLine(stringStartCol);
token.setLine(tokenStartLine);
token.setCharPositionInLine(tokenStartCol);
return token;
}
@ -201,6 +194,19 @@ import static org.jf.smali.smaliParser.*;
zzAtEOF = false;
yybegin(initialState);
}
private String processQuotedSimpleName(String text) {
// strip backticks
return text.substring(1, text.length() - 1);
}
private String processQuotedSimpleNameWithSpaces(String text) {
if (apiLevel < 30) {
setStateBasedTokenError("spaces in class descriptors and member names are not supported prior to API " +
"level 30/dex version 040");
}
return processQuotedSimpleName(text);
}
%}
HexPrefix = 0 [xX]
@ -260,6 +266,8 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
%state ARRAY_DESCRIPTOR
%state STRING
%state CHAR
%state CLASS_DESCRIPTOR_BEGINNING
%state CLASS_DESCRIPTOR_REMAINING
%%
@ -320,9 +328,9 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
"true"|"false" { return newToken(BOOL_LITERAL); }
"null" { return newToken(NULL_LITERAL); }
"\"" { beginStringOrChar(STRING); sb.append('"'); }
"\"" { beginStateBasedToken(STRING); sb.append('"'); }
' { beginStringOrChar(CHAR); sb.append('\''); }
' { beginStateBasedToken(CHAR); sb.append('\''); }
}
<PARAM_LIST_OR_ID> {
@ -333,14 +341,54 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
<PARAM_LIST> {
{PrimitiveType} { return newToken(PRIMITIVE_TYPE); }
{ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); }
{ClassDescriptor} {
yypushback(yylength());
beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
sb.append(yytext());
}
{ArrayPrefix} { return newToken(ARRAY_TYPE_PREFIX); }
[^] { yypushback(1); yybegin(YYINITIAL);}
<<EOF>> { yybegin(YYINITIAL);}
}
<CLASS_DESCRIPTOR_BEGINNING> {
"L" {SimpleNameRaw} {
sb.append(yytext());
yybegin(CLASS_DESCRIPTOR_REMAINING);
}
"L" {SimpleNameQuoted} {
sb.append("L");
sb.append(processQuotedSimpleName(yytext().substring(1)));
yybegin(CLASS_DESCRIPTOR_REMAINING);
}
"L" {SimpleNameQuotedWithSpaces} {
sb.append("L");
sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1)));
yybegin(CLASS_DESCRIPTOR_REMAINING);
}
}
<CLASS_DESCRIPTOR_REMAINING> {
"/" {SimpleNameRaw} {
sb.append(yytext());
}
"/" {SimpleNameQuoted} {
sb.append("/");
sb.append(processQuotedSimpleName(yytext().substring(1)));
}
"/" {SimpleNameQuotedWithSpaces} {
sb.append("/");
sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1)));
}
";" {
sb.append(yytext());
return endStateBasedToken(CLASS_DESCRIPTOR);
}
}
<STRING> {
"\"" { sb.append('"'); return endStringOrChar(STRING_LITERAL); }
"\"" { sb.append('"'); return endStateBasedToken(STRING_LITERAL); }
[^\r\n\"\\]+ { sb.append(yytext()); }
"\\b" { sb.append('\b'); }
@ -355,28 +403,28 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
"\\u" {FewerHexDigits} {
sb.append(yytext());
setStringOrCharError("Invalid \\u sequence. \\u must be followed by 4 hex digits");
setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by 4 hex digits");
}
"\\" [^btnfr'\"\\u] {
sb.append(yytext());
setStringOrCharError("Invalid escape sequence " + yytext());
setStateBasedTokenError("Invalid escape sequence " + yytext());
}
[\r\n] { return invalidStringOrChar("Unterminated string literal"); }
<<EOF>> { return invalidStringOrChar("Unterminated string literal"); }
[\r\n] { return invalidStateBasedToken("Unterminated string literal"); }
<<EOF>> { return invalidStateBasedToken("Unterminated string literal"); }
}
<CHAR> {
' {
sb.append('\'');
if (sb.length() == 2) {
return invalidStringOrChar("Empty character literal");
return invalidStateBasedToken("Empty character literal");
} else if (sb.length() > 3) {
return invalidStringOrChar("Character literal with multiple chars");
return invalidStateBasedToken("Character literal with multiple chars");
}
return endStringOrChar(CHAR_LITERAL);
return endStateBasedToken(CHAR_LITERAL);
}
[^\r\n'\\]+ { sb.append(yytext()); }
@ -392,16 +440,16 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
"\\u" {HexDigit}* {
sb.append(yytext());
setStringOrCharError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits");
setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits");
}
"\\" [^btnfr'\"\\u] {
sb.append(yytext());
setStringOrCharError("Invalid escape sequence " + yytext());
setStateBasedTokenError("Invalid escape sequence " + yytext());
}
[\r\n] { return invalidStringOrChar("Unterminated character literal"); }
<<EOF>> { return invalidStringOrChar("Unterminated character literal"); }
[\r\n] { return invalidStateBasedToken("Unterminated character literal"); }
<<EOF>> { return invalidStateBasedToken("Unterminated character literal"); }
}
/*Misc*/
@ -669,17 +717,25 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
}
<ARRAY_DESCRIPTOR> {
{PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); }
{ClassDescriptor} { yybegin(YYINITIAL); return newToken(CLASS_DESCRIPTOR); }
[^] { yypushback(1); yybegin(YYINITIAL); }
<<EOF>> { yybegin(YYINITIAL); }
{PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); }
{ClassDescriptor} {
yypushback(yylength());
beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
sb.append(yytext());
}
[^] { yypushback(1); yybegin(YYINITIAL); }
<<EOF>> { yybegin(YYINITIAL); }
}
/*Types*/
<YYINITIAL> {
{PrimitiveType} { return newToken(PRIMITIVE_TYPE); }
V { return newToken(VOID_TYPE); }
{ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); }
{ClassDescriptor} {
yypushback(yylength());
beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
sb.append(yytext());
}
// we have to drop into a separate state so that we don't parse something like
// "[I->" as "[" followed by "I-" as a SIMPLE_NAME
@ -700,15 +756,9 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
yybegin(PARAM_LIST);
}
{SimpleNameRaw} { return simpleNameToken(yytext(), false); }
{SimpleNameQuoted} { return simpleNameToken(yytext(), true); }
{SimpleNameQuotedWithSpaces} {
if (apiLevel < 30) {
String message = "spaces in SimpleName are not allowed prior to API level 30";
return new InvalidToken(message, yytext());
}
return simpleNameToken(yytext(), true);
}
{SimpleNameRaw} { return newToken(SIMPLE_NAME, yytext()); }
{SimpleNameQuoted} { return newToken(SIMPLE_NAME, processQuotedSimpleName(yytext())); }
{SimpleNameQuotedWithSpaces} { return newToken(SIMPLE_NAME, processQuotedSimpleNameWithSpaces(yytext())); }
"<" {SimpleNameRaw} ">" { return newToken(MEMBER_NAME); }
}

View File

@ -12,6 +12,10 @@ Ljava/lang/String;
LI;
LV;
LI/I/I;
L`single`;
L`java`/lang/String;
L`java`/`lang`/`String`;
Lspace/test/`20 a0 16802000 200120022003200420052006200720082009200a202f205f3000 `;
[Z
[B
@ -22,6 +26,9 @@ LI/I/I;
[F
[D
[Ljava/lang/String;
[L`java`/lang/String;
[L`spaaaace spaaaace`;
[L`spaaaace spaaaace`/`spaaaace spaaaace`;
[LI/I/I;
[[LI/I/I;
[[I
@ -40,9 +47,14 @@ Ljava/lang/String;Ljava/lang/String;
<init->
Ljava/lang/String
L`java`/lang/String
L;
L``;
L``
LI
L[Ljava/lang/String;
L`[Ljava/lang/String;
LInvalidCharIn321\`[`;
[
[V
@ -55,5 +67,7 @@ III
[I->clone()Ljava/lang/Object;
`this is the quote that never ends
`simple_name_in_backticks`
`simple_name_with_spaces_20 a0 16802000 200120022003200420052006200720082009200a202f205f3000 `

View File

@ -12,6 +12,10 @@ CLASS_DESCRIPTOR("Ljava/lang/String;")
CLASS_DESCRIPTOR("LI;")
CLASS_DESCRIPTOR("LV;")
CLASS_DESCRIPTOR("LI/I/I;")
CLASS_DESCRIPTOR("Lsingle;")
CLASS_DESCRIPTOR("Ljava/lang/String;")
CLASS_DESCRIPTOR("Ljava/lang/String;")
CLASS_DESCRIPTOR("Lspace/test/20 a0\u00a01680\u16802000\u20002001\u20012002\u20022003\u20032004\u20042005\u20052006\u20062007\u20072008\u20082009\u2009200a\u200a202f\u202f205f\u205f3000\u3000;")
ARRAY_TYPE_PREFIX("[")
PRIMITIVE_TYPE("Z")
@ -32,6 +36,12 @@ PRIMITIVE_TYPE("D")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Ljava/lang/String;")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Ljava/lang/String;")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Lspaaaace spaaaace;")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Lspaaaace spaaaace/spaaaace spaaaace;")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("LI/I/I;")
ARRAY_TYPE_PREFIX("[[")
CLASS_DESCRIPTOR("LI/I/I;")
@ -86,9 +96,14 @@ MEMBER_NAME("<blah>")
MEMBER_NAME("<init->")
SIMPLE_NAME("Ljava") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String")
SIMPLE_NAME("L") SIMPLE_NAME("java") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String")
SIMPLE_NAME("L") INVALID_TOKEN(";")
SIMPLE_NAME("L") INVALID_TOKEN("`") INVALID_TOKEN("`") INVALID_TOKEN(";")
SIMPLE_NAME("L") INVALID_TOKEN("`") INVALID_TOKEN("`")
SIMPLE_NAME("LI")
SIMPLE_NAME("L") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;")
SIMPLE_NAME("L") INVALID_TOKEN("`") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;")
SIMPLE_NAME("LInvalidCharIn321") INVALID_TOKEN("\\") INVALID_TOKEN("`") ARRAY_TYPE_PREFIX("[") INVALID_TOKEN("`") INVALID_TOKEN(";")
ARRAY_TYPE_PREFIX("[")
ARRAY_TYPE_PREFIX("[") VOID_TYPE("V")
@ -109,5 +124,6 @@ OPEN_PAREN("(")
CLOSE_PAREN(")")
CLASS_DESCRIPTOR("Ljava/lang/Object;")
INVALID_TOKEN("`") SIMPLE_NAME("this") SIMPLE_NAME("is") SIMPLE_NAME("the") SIMPLE_NAME("quote") SIMPLE_NAME("that") SIMPLE_NAME("never") SIMPLE_NAME("ends")
SIMPLE_NAME("simple_name_in_backticks")
SIMPLE_NAME("simple_name_with_spaces_20 a0 16802000 200120022003200420052006200720082009200a202f205f3000 ")
SIMPLE_NAME("simple_name_with_spaces_20 a0\u00a01680\u16802000\u20002001\u20012002\u20022003\u20032004\u20042005\u20052006\u20062007\u20072008\u20082009\u2009200a\u200a202f\u202f205f\u205f3000\u3000")