From ae2efe146b8c571821365ec7d43e6db070a678d9 Mon Sep 17 00:00:00 2001 From: Ben Gruver Date: Thu, 19 Sep 2019 13:46:33 -0700 Subject: [PATCH] Fix lexing regression in certain types of method parameter lists It was mis-lexing the case when a class descriptor is immediately followed by a primitive type. The primitive type was being lexed as PARAM_LIST_OR_ID_PRIMITIVE_TYPE instead of PRIMITIVE_TYPE. The root cause was due to an incorrect state change. Since the CLASS_DESCRIPTOR state can be started from within the PARAM_LIST state, but when the CLASS_DESCRIPTOR state ends it just went back to YYINITIAL, instead of going back to PARAM_LIST. This adds a state stack to track these state changes --- smali/src/main/jflex/smaliLexer.jflex | 11 +++++++---- .../LexerTest/TypeAndIdentifierTest.smali | 1 + .../LexerTest/TypeAndIdentifierTest.tokens | 14 ++++++++++++++ .../LexerTest/TypeAndIdentifierTest_api29.smali | 1 + .../LexerTest/TypeAndIdentifierTest_api29.tokens | 14 ++++++++++++++ 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/smali/src/main/jflex/smaliLexer.jflex b/smali/src/main/jflex/smaliLexer.jflex index bec71bee..b48df4f4 100644 --- a/smali/src/main/jflex/smaliLexer.jflex +++ b/smali/src/main/jflex/smaliLexer.jflex @@ -1,6 +1,7 @@ package org.jf.smali; import java.io.*; +import java.util.Stack; import org.antlr.runtime.*; import org.jf.smali.util.*; import org.jf.util.*; @@ -38,6 +39,8 @@ import static org.jf.smali.smaliParser.*; private int apiLevel; + private Stack stateStack = new Stack<>(); + public Token nextToken() { try { Token token = yylex(); @@ -137,6 +140,7 @@ import static org.jf.smali.smaliParser.*; } private void beginStateBasedToken(int state) { + stateStack.push(yystate()); yybegin(state); sb.setLength(0); tokenStartLine = getLine(); @@ -146,12 +150,12 @@ import static org.jf.smali.smaliParser.*; } private Token endStateBasedToken(int type) { - yybegin(YYINITIAL); - if (tokenError != null) { return invalidStateBasedToken(tokenError); } + yybegin(stateStack.pop()); + CommonToken token = new CommonToken(type, sb.toString()); token.setStartIndex(tokenStartChar); token.setStopIndex(yychar + yylength() - 1); @@ -167,7 +171,7 @@ import static org.jf.smali.smaliParser.*; } private Token invalidStateBasedToken(String message) { - yybegin(YYINITIAL); + yybegin(stateStack.pop()); InvalidToken token = new InvalidToken(message, sb.toString()); token.setStartIndex(tokenStartChar); @@ -734,7 +738,6 @@ Type = {PrimitiveType} | {ClassDescriptor} | {ArrayPrefix} ({ClassDescriptor} | {ClassDescriptor} { yypushback(yylength()); beginStateBasedToken(CLASS_DESCRIPTOR_BEGINNING); - sb.append(yytext()); } // we have to drop into a separate state so that we don't parse something like diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali index 4f76d30a..f4260ea0 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.smali @@ -37,6 +37,7 @@ IIIII ZBSCIJFD ILa;[La;[I Ljava/lang/String;Ljava/lang/String; +IIFFIILjava/lang/String;IIFFII [I[I[I [I[Z [I[Ljava/lang/String; diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens index 2ec07da3..52b62ad0 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest.tokens @@ -73,6 +73,20 @@ PRIMITIVE_TYPE("I") CLASS_DESCRIPTOR("Ljava/lang/String;") CLASS_DESCRIPTOR("Ljava/lang/String;") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") +CLASS_DESCRIPTOR("Ljava/lang/String;") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") + ARRAY_TYPE_PREFIX("[") PRIMITIVE_TYPE("I") ARRAY_TYPE_PREFIX("[") diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.smali b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.smali index 4f76d30a..f4260ea0 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.smali +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.smali @@ -37,6 +37,7 @@ IIIII ZBSCIJFD ILa;[La;[I Ljava/lang/String;Ljava/lang/String; +IIFFIILjava/lang/String;IIFFII [I[I[I [I[Z [I[Ljava/lang/String; diff --git a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.tokens b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.tokens index b2bf92e0..69c6dc38 100644 --- a/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.tokens +++ b/smali/src/test/resources/LexerTest/TypeAndIdentifierTest_api29.tokens @@ -73,6 +73,20 @@ PRIMITIVE_TYPE("I") CLASS_DESCRIPTOR("Ljava/lang/String;") CLASS_DESCRIPTOR("Ljava/lang/String;") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") +CLASS_DESCRIPTOR("Ljava/lang/String;") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("F") +PRIMITIVE_TYPE("I") +PRIMITIVE_TYPE("I") + ARRAY_TYPE_PREFIX("[") PRIMITIVE_TYPE("I") ARRAY_TYPE_PREFIX("[")