Fix how quoted simple names are handled in class descriptors

This unifies the handling of simple names between class descriptors and
member names, and ensures that spaces are only allowed for the appropriate
api levels in both cases
This commit is contained in:
Ben Gruver 2019-09-17 13:17:41 -07:00
parent d762deacc9
commit 0e5421cacb
3 changed files with 139 additions and 59 deletions

View File

@ -25,10 +25,10 @@ import static org.jf.smali.smaliParser.*;
%{ %{
private StringBuffer sb = new StringBuffer(); private StringBuffer sb = new StringBuffer();
private String stringOrCharError = null; private String tokenError = null;
private int stringStartLine; private int tokenStartLine;
private int stringStartCol; private int tokenStartCol;
private int stringStartChar; private int tokenStartChar;
private int lexerErrors = 0; private int lexerErrors = 0;
@ -136,51 +136,44 @@ import static org.jf.smali.smaliParser.*;
return invalidToken(message, yytext()); return invalidToken(message, yytext());
} }
private Token simpleNameToken(String text, boolean quoted) { private void beginStateBasedToken(int state) {
if (quoted) {
text = text.substring(1, text.length() - 1); /* strip backticks */
}
return newToken(SIMPLE_NAME, text);
}
private void beginStringOrChar(int state) {
yybegin(state); yybegin(state);
sb.setLength(0); sb.setLength(0);
stringStartLine = getLine(); tokenStartLine = getLine();
stringStartCol = getColumn(); tokenStartCol = getColumn();
stringStartChar = yychar; tokenStartChar = yychar;
stringOrCharError = null; tokenError = null;
} }
private Token endStringOrChar(int type) { private Token endStateBasedToken(int type) {
yybegin(YYINITIAL); yybegin(YYINITIAL);
if (stringOrCharError != null) { if (tokenError != null) {
return invalidStringOrChar(stringOrCharError); return invalidStateBasedToken(tokenError);
} }
CommonToken token = new CommonToken(type, sb.toString()); CommonToken token = new CommonToken(type, sb.toString());
token.setStartIndex(stringStartChar); token.setStartIndex(tokenStartChar);
token.setStopIndex(yychar + yylength() - 1); token.setStopIndex(yychar + yylength() - 1);
token.setLine(stringStartLine); token.setLine(tokenStartLine);
token.setCharPositionInLine(stringStartCol); token.setCharPositionInLine(tokenStartCol);
return token; return token;
} }
private void setStringOrCharError(String message) { private void setStateBasedTokenError(String message) {
if (stringOrCharError == null) { if (tokenError == null) {
stringOrCharError = message; tokenError = message;
} }
} }
private Token invalidStringOrChar(String message) { private Token invalidStateBasedToken(String message) {
yybegin(YYINITIAL); yybegin(YYINITIAL);
InvalidToken token = new InvalidToken(message, sb.toString()); InvalidToken token = new InvalidToken(message, sb.toString());
token.setStartIndex(stringStartChar); token.setStartIndex(tokenStartChar);
token.setStopIndex(yychar + yylength() - 1); token.setStopIndex(yychar + yylength() - 1);
token.setLine(stringStartLine); token.setLine(tokenStartLine);
token.setCharPositionInLine(stringStartCol); token.setCharPositionInLine(tokenStartCol);
return token; return token;
} }
@ -201,6 +194,19 @@ import static org.jf.smali.smaliParser.*;
zzAtEOF = false; zzAtEOF = false;
yybegin(initialState); yybegin(initialState);
} }
private String processQuotedSimpleName(String text) {
// strip backticks
return text.substring(1, text.length() - 1);
}
private String processQuotedSimpleNameWithSpaces(String text) {
if (apiLevel < 30) {
setStateBasedTokenError("spaces in class descriptors and member names are not supported prior to API " +
"level 30/dex version 040");
}
return processQuotedSimpleName(text);
}
%} %}
HexPrefix = 0 [xX] HexPrefix = 0 [xX]
@ -260,6 +266,8 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
%state ARRAY_DESCRIPTOR %state ARRAY_DESCRIPTOR
%state STRING %state STRING
%state CHAR %state CHAR
%state CLASS_DESCRIPTOR_BEGINNING
%state CLASS_DESCRIPTOR_REMAINING
%% %%
@ -320,9 +328,9 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
"true"|"false" { return newToken(BOOL_LITERAL); } "true"|"false" { return newToken(BOOL_LITERAL); }
"null" { return newToken(NULL_LITERAL); } "null" { return newToken(NULL_LITERAL); }
"\"" { beginStringOrChar(STRING); sb.append('"'); } "\"" { beginStateBasedToken(STRING); sb.append('"'); }
' { beginStringOrChar(CHAR); sb.append('\''); } ' { beginStateBasedToken(CHAR); sb.append('\''); }
} }
<PARAM_LIST_OR_ID> { <PARAM_LIST_OR_ID> {
@ -333,14 +341,54 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
<PARAM_LIST> { <PARAM_LIST> {
{PrimitiveType} { return newToken(PRIMITIVE_TYPE); } {PrimitiveType} { return newToken(PRIMITIVE_TYPE); }
{ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } {ClassDescriptor} {
yypushback(yylength());
beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
sb.append(yytext());
}
{ArrayPrefix} { return newToken(ARRAY_TYPE_PREFIX); } {ArrayPrefix} { return newToken(ARRAY_TYPE_PREFIX); }
[^] { yypushback(1); yybegin(YYINITIAL);} [^] { yypushback(1); yybegin(YYINITIAL);}
<<EOF>> { yybegin(YYINITIAL);} <<EOF>> { yybegin(YYINITIAL);}
} }
<CLASS_DESCRIPTOR_BEGINNING> {
"L" {SimpleNameRaw} {
sb.append(yytext());
yybegin(CLASS_DESCRIPTOR_REMAINING);
}
"L" {SimpleNameQuoted} {
sb.append("L");
sb.append(processQuotedSimpleName(yytext().substring(1)));
yybegin(CLASS_DESCRIPTOR_REMAINING);
}
"L" {SimpleNameQuotedWithSpaces} {
sb.append("L");
sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1)));
yybegin(CLASS_DESCRIPTOR_REMAINING);
}
}
<CLASS_DESCRIPTOR_REMAINING> {
"/" {SimpleNameRaw} {
sb.append(yytext());
}
"/" {SimpleNameQuoted} {
sb.append("/");
sb.append(processQuotedSimpleName(yytext().substring(1)));
}
"/" {SimpleNameQuotedWithSpaces} {
sb.append("/");
sb.append(processQuotedSimpleNameWithSpaces(yytext().substring(1)));
}
";" {
sb.append(yytext());
return endStateBasedToken(CLASS_DESCRIPTOR);
}
}
<STRING> { <STRING> {
"\"" { sb.append('"'); return endStringOrChar(STRING_LITERAL); } "\"" { sb.append('"'); return endStateBasedToken(STRING_LITERAL); }
[^\r\n\"\\]+ { sb.append(yytext()); } [^\r\n\"\\]+ { sb.append(yytext()); }
"\\b" { sb.append('\b'); } "\\b" { sb.append('\b'); }
@ -355,28 +403,28 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
"\\u" {FewerHexDigits} { "\\u" {FewerHexDigits} {
sb.append(yytext()); sb.append(yytext());
setStringOrCharError("Invalid \\u sequence. \\u must be followed by 4 hex digits"); setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by 4 hex digits");
} }
"\\" [^btnfr'\"\\u] { "\\" [^btnfr'\"\\u] {
sb.append(yytext()); sb.append(yytext());
setStringOrCharError("Invalid escape sequence " + yytext()); setStateBasedTokenError("Invalid escape sequence " + yytext());
} }
[\r\n] { return invalidStringOrChar("Unterminated string literal"); } [\r\n] { return invalidStateBasedToken("Unterminated string literal"); }
<<EOF>> { return invalidStringOrChar("Unterminated string literal"); } <<EOF>> { return invalidStateBasedToken("Unterminated string literal"); }
} }
<CHAR> { <CHAR> {
' { ' {
sb.append('\''); sb.append('\'');
if (sb.length() == 2) { if (sb.length() == 2) {
return invalidStringOrChar("Empty character literal"); return invalidStateBasedToken("Empty character literal");
} else if (sb.length() > 3) { } else if (sb.length() > 3) {
return invalidStringOrChar("Character literal with multiple chars"); return invalidStateBasedToken("Character literal with multiple chars");
} }
return endStringOrChar(CHAR_LITERAL); return endStateBasedToken(CHAR_LITERAL);
} }
[^\r\n'\\]+ { sb.append(yytext()); } [^\r\n'\\]+ { sb.append(yytext()); }
@ -392,16 +440,16 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
"\\u" {HexDigit}* { "\\u" {HexDigit}* {
sb.append(yytext()); sb.append(yytext());
setStringOrCharError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits"); setStateBasedTokenError("Invalid \\u sequence. \\u must be followed by exactly 4 hex digits");
} }
"\\" [^btnfr'\"\\u] { "\\" [^btnfr'\"\\u] {
sb.append(yytext()); sb.append(yytext());
setStringOrCharError("Invalid escape sequence " + yytext()); setStateBasedTokenError("Invalid escape sequence " + yytext());
} }
[\r\n] { return invalidStringOrChar("Unterminated character literal"); } [\r\n] { return invalidStateBasedToken("Unterminated character literal"); }
<<EOF>> { return invalidStringOrChar("Unterminated character literal"); } <<EOF>> { return invalidStateBasedToken("Unterminated character literal"); }
} }
/*Misc*/ /*Misc*/
@ -669,17 +717,25 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
} }
<ARRAY_DESCRIPTOR> { <ARRAY_DESCRIPTOR> {
{PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); } {PrimitiveType} { yybegin(YYINITIAL); return newToken(PRIMITIVE_TYPE); }
{ClassDescriptor} { yybegin(YYINITIAL); return newToken(CLASS_DESCRIPTOR); } {ClassDescriptor} {
[^] { yypushback(1); yybegin(YYINITIAL); } yypushback(yylength());
<<EOF>> { yybegin(YYINITIAL); } beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
sb.append(yytext());
}
[^] { yypushback(1); yybegin(YYINITIAL); }
<<EOF>> { yybegin(YYINITIAL); }
} }
/*Types*/ /*Types*/
<YYINITIAL> { <YYINITIAL> {
{PrimitiveType} { return newToken(PRIMITIVE_TYPE); } {PrimitiveType} { return newToken(PRIMITIVE_TYPE); }
V { return newToken(VOID_TYPE); } V { return newToken(VOID_TYPE); }
{ClassDescriptor} { return newToken(CLASS_DESCRIPTOR); } {ClassDescriptor} {
yypushback(yylength());
beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING);
sb.append(yytext());
}
// we have to drop into a separate state so that we don't parse something like // we have to drop into a separate state so that we don't parse something like
// "[I->" as "[" followed by "I-" as a SIMPLE_NAME // "[I->" as "[" followed by "I-" as a SIMPLE_NAME
@ -700,15 +756,9 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} |
yybegin(PARAM_LIST); yybegin(PARAM_LIST);
} }
{SimpleNameRaw} { return simpleNameToken(yytext(), false); } {SimpleNameRaw} { return newToken(SIMPLE_NAME, yytext()); }
{SimpleNameQuoted} { return simpleNameToken(yytext(), true); } {SimpleNameQuoted} { return newToken(SIMPLE_NAME, processQuotedSimpleName(yytext())); }
{SimpleNameQuotedWithSpaces} { {SimpleNameQuotedWithSpaces} { return newToken(SIMPLE_NAME, processQuotedSimpleNameWithSpaces(yytext())); }
if (apiLevel < 30) {
String message = "spaces in SimpleName are not allowed prior to API level 30";
return new InvalidToken(message, yytext());
}
return simpleNameToken(yytext(), true);
}
"<" {SimpleNameRaw} ">" { return newToken(MEMBER_NAME); } "<" {SimpleNameRaw} ">" { return newToken(MEMBER_NAME); }
} }

View File

@ -12,6 +12,10 @@ Ljava/lang/String;
LI; LI;
LV; LV;
LI/I/I; LI/I/I;
L`single`;
L`java`/lang/String;
L`java`/`lang`/`String`;
Lspace/test/`20 a0 16802000 200120022003200420052006200720082009200a202f205f3000 `;
[Z [Z
[B [B
@ -22,6 +26,9 @@ LI/I/I;
[F [F
[D [D
[Ljava/lang/String; [Ljava/lang/String;
[L`java`/lang/String;
[L`spaaaace spaaaace`;
[L`spaaaace spaaaace`/`spaaaace spaaaace`;
[LI/I/I; [LI/I/I;
[[LI/I/I; [[LI/I/I;
[[I [[I
@ -40,9 +47,14 @@ Ljava/lang/String;Ljava/lang/String;
<init-> <init->
Ljava/lang/String Ljava/lang/String
L`java`/lang/String
L; L;
L``;
L``
LI LI
L[Ljava/lang/String; L[Ljava/lang/String;
L`[Ljava/lang/String;
LInvalidCharIn321\`[`;
[ [
[V [V
@ -55,5 +67,7 @@ III
[I->clone()Ljava/lang/Object; [I->clone()Ljava/lang/Object;
`this is the quote that never ends
`simple_name_in_backticks` `simple_name_in_backticks`
`simple_name_with_spaces_20 a0 16802000 200120022003200420052006200720082009200a202f205f3000 ` `simple_name_with_spaces_20 a0 16802000 200120022003200420052006200720082009200a202f205f3000 `

View File

@ -12,6 +12,10 @@ CLASS_DESCRIPTOR("Ljava/lang/String;")
CLASS_DESCRIPTOR("LI;") CLASS_DESCRIPTOR("LI;")
CLASS_DESCRIPTOR("LV;") CLASS_DESCRIPTOR("LV;")
CLASS_DESCRIPTOR("LI/I/I;") CLASS_DESCRIPTOR("LI/I/I;")
CLASS_DESCRIPTOR("Lsingle;")
CLASS_DESCRIPTOR("Ljava/lang/String;")
CLASS_DESCRIPTOR("Ljava/lang/String;")
CLASS_DESCRIPTOR("Lspace/test/20 a0\u00a01680\u16802000\u20002001\u20012002\u20022003\u20032004\u20042005\u20052006\u20062007\u20072008\u20082009\u2009200a\u200a202f\u202f205f\u205f3000\u3000;")
ARRAY_TYPE_PREFIX("[") ARRAY_TYPE_PREFIX("[")
PRIMITIVE_TYPE("Z") PRIMITIVE_TYPE("Z")
@ -32,6 +36,12 @@ PRIMITIVE_TYPE("D")
ARRAY_TYPE_PREFIX("[") ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Ljava/lang/String;") CLASS_DESCRIPTOR("Ljava/lang/String;")
ARRAY_TYPE_PREFIX("[") ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Ljava/lang/String;")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Lspaaaace spaaaace;")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("Lspaaaace spaaaace/spaaaace spaaaace;")
ARRAY_TYPE_PREFIX("[")
CLASS_DESCRIPTOR("LI/I/I;") CLASS_DESCRIPTOR("LI/I/I;")
ARRAY_TYPE_PREFIX("[[") ARRAY_TYPE_PREFIX("[[")
CLASS_DESCRIPTOR("LI/I/I;") CLASS_DESCRIPTOR("LI/I/I;")
@ -86,9 +96,14 @@ MEMBER_NAME("<blah>")
MEMBER_NAME("<init->") MEMBER_NAME("<init->")
SIMPLE_NAME("Ljava") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String") SIMPLE_NAME("Ljava") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String")
SIMPLE_NAME("L") SIMPLE_NAME("java") INVALID_TOKEN("/") SIMPLE_NAME("lang") INVALID_TOKEN("/") SIMPLE_NAME("String")
SIMPLE_NAME("L") INVALID_TOKEN(";") SIMPLE_NAME("L") INVALID_TOKEN(";")
SIMPLE_NAME("L") INVALID_TOKEN("`") INVALID_TOKEN("`") INVALID_TOKEN(";")
SIMPLE_NAME("L") INVALID_TOKEN("`") INVALID_TOKEN("`")
SIMPLE_NAME("LI") SIMPLE_NAME("LI")
SIMPLE_NAME("L") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;") SIMPLE_NAME("L") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;")
SIMPLE_NAME("L") INVALID_TOKEN("`") ARRAY_TYPE_PREFIX("[") CLASS_DESCRIPTOR("Ljava/lang/String;")
SIMPLE_NAME("LInvalidCharIn321") INVALID_TOKEN("\\") INVALID_TOKEN("`") ARRAY_TYPE_PREFIX("[") INVALID_TOKEN("`") INVALID_TOKEN(";")
ARRAY_TYPE_PREFIX("[") ARRAY_TYPE_PREFIX("[")
ARRAY_TYPE_PREFIX("[") VOID_TYPE("V") ARRAY_TYPE_PREFIX("[") VOID_TYPE("V")
@ -109,5 +124,6 @@ OPEN_PAREN("(")
CLOSE_PAREN(")") CLOSE_PAREN(")")
CLASS_DESCRIPTOR("Ljava/lang/Object;") CLASS_DESCRIPTOR("Ljava/lang/Object;")
INVALID_TOKEN("`") SIMPLE_NAME("this") SIMPLE_NAME("is") SIMPLE_NAME("the") SIMPLE_NAME("quote") SIMPLE_NAME("that") SIMPLE_NAME("never") SIMPLE_NAME("ends")
SIMPLE_NAME("simple_name_in_backticks") SIMPLE_NAME("simple_name_in_backticks")
SIMPLE_NAME("simple_name_with_spaces_20 a0 16802000 200120022003200420052006200720082009200a202f205f3000 ") SIMPLE_NAME("simple_name_with_spaces_20 a0\u00a01680\u16802000\u20002001\u20012002\u20022003\u20032004\u20042005\u20052006\u20062007\u20072008\u20082009\u2009200a\u200a202f\u202f205f\u205f3000\u3000")