aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShevek <[email protected]>2014-09-11 00:16:09 -0700
committerShevek <[email protected]>2014-09-11 00:16:09 -0700
commit2db1eafd535d8db92f76db8ccf45b1b80f6152e3 (patch)
tree83b5b6a9351a820b96755f02901aefbc31d133eb
parent9c8391a9591e0332de4c8a9d7662a16c70780ad5 (diff)
LexerSource: Handle invalid number as a single INVALID token and don't consume the entire line.
-rw-r--r--src/main/java/org/anarres/cpp/LexerSource.java16
-rw-r--r--src/test/java/org/anarres/cpp/LexerSourceTest.java41
-rw-r--r--src/test/java/org/anarres/cpp/PreprocessorTest.java2
3 files changed, 44 insertions, 15 deletions
diff --git a/src/main/java/org/anarres/cpp/LexerSource.java b/src/main/java/org/anarres/cpp/LexerSource.java
index ef779b7..82d76b6 100644
--- a/src/main/java/org/anarres/cpp/LexerSource.java
+++ b/src/main/java/org/anarres/cpp/LexerSource.java
@@ -513,13 +513,17 @@ public class LexerSource extends Source {
flags |= NumericValue.F_DOUBLE;
text.append((char) d);
d = read();
- } // This should probably be isPunct() || isWhite().
- else if (Character.isLetter(d) || d == '_') {
+ }
+ else if (Character.isUnicodeIdentifierPart(d)) {
+ String reason = "Invalid suffix \"" + (char) d + "\" on numeric constant";
+ // We've encountered something initially identified as a number.
+ // Read in the rest of this token as an identifer but return it as an invalid.
+ while (Character.isUnicodeIdentifierPart(d)) {
+ text.append((char) d);
+ d = read();
+ }
unread(d);
- value.setFlags(flags);
- return invalid(text,
- "Invalid suffix \"" + (char) d
- + "\" on numeric constant");
+ return new Token(INVALID, text.toString(), reason);
} else {
unread(d);
value.setFlags(flags);
diff --git a/src/test/java/org/anarres/cpp/LexerSourceTest.java b/src/test/java/org/anarres/cpp/LexerSourceTest.java
index b83ddd6..e21aa01 100644
--- a/src/test/java/org/anarres/cpp/LexerSourceTest.java
+++ b/src/test/java/org/anarres/cpp/LexerSourceTest.java
@@ -3,8 +3,8 @@ package org.anarres.cpp;
import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.junit.Ignore;
import org.junit.Test;
+import static org.anarres.cpp.PreprocessorTest.assertType;
import static org.anarres.cpp.Token.*;
import static org.junit.Assert.*;
@@ -22,20 +22,19 @@ public class LexerSourceTest {
for (int i = 0; i < out.length; i++) {
Token tok = s.token();
LOG.info("Token is " + tok);
- assertEquals(out[i], tok.getType());
+ assertType(out[i], tok);
// assertEquals(col, tok.getColumn());
buf.append(tok.getText());
}
Token tok = s.token();
LOG.info("Token is " + tok);
- assertEquals(EOF, tok.getType());
+ assertType(EOF, tok);
if (textmatch)
assertEquals(in, buf.toString());
}
- @Ignore
@Test
public void testLexerSource()
throws Exception {
@@ -85,10 +84,11 @@ public class LexerSourceTest {
SQSTRING, WHITESPACE,
SQSTRING);
- testLexerSource("1i1I1l1L1ui1ul", true,
- NUMBER, NUMBER,
- NUMBER, NUMBER,
- NUMBER, NUMBER);
+ if (false) // Actually, I think this is illegal.
+ testLexerSource("1i1I1l1L1ui1ul", true,
+ NUMBER, NUMBER,
+ NUMBER, NUMBER,
+ NUMBER, NUMBER);
testLexerSource("'' 'x' 'xx'", true,
SQSTRING, WHITESPACE, CHARACTER, WHITESPACE, SQSTRING);
@@ -108,6 +108,31 @@ public class LexerSourceTest {
}
@Test
+ public void testNumbersSuffix() throws Exception {
+ testLexerSource("6f", true, NUMBER);
+ testLexerSource("6d", true, NUMBER);
+ testLexerSource("6l", true, NUMBER);
+ testLexerSource("6ll", true, NUMBER);
+ testLexerSource("6ul", true, NUMBER);
+ testLexerSource("6ull", true, NUMBER);
+ testLexerSource("6e3f", true, NUMBER);
+ testLexerSource("6e3d", true, NUMBER);
+ testLexerSource("6e3l", true, NUMBER);
+ testLexerSource("6e3ll", true, NUMBER);
+ testLexerSource("6e3ul", true, NUMBER);
+ testLexerSource("6e3ull", true, NUMBER);
+ }
+
+ @Test
+ public void testNumbersInvalid() throws Exception {
+ // testLexerSource("0x foo", true, INVALID, WHITESPACE, IDENTIFIER); // FAIL
+ testLexerSource("6x foo", true, INVALID, WHITESPACE, IDENTIFIER);
+ testLexerSource("6g foo", true, INVALID, WHITESPACE, IDENTIFIER);
+ testLexerSource("6xsd foo", true, INVALID, WHITESPACE, IDENTIFIER);
+ testLexerSource("6gsd foo", true, INVALID, WHITESPACE, IDENTIFIER);
+ }
+
+ @Test
public void testUnterminatedComment() throws Exception {
testLexerSource("5 /*", false, NUMBER, WHITESPACE, INVALID); // Bug #15
testLexerSource("5 //", false, NUMBER, WHITESPACE, CPPCOMMENT);
diff --git a/src/test/java/org/anarres/cpp/PreprocessorTest.java b/src/test/java/org/anarres/cpp/PreprocessorTest.java
index fee233d..8a40823 100644
--- a/src/test/java/org/anarres/cpp/PreprocessorTest.java
+++ b/src/test/java/org/anarres/cpp/PreprocessorTest.java
@@ -176,7 +176,7 @@ public class PreprocessorTest {
} while (t.getType() != EOF);
}
- private void assertType(int type, Token t) {
+ public static void assertType(int type, Token t) {
String typeExpect = TokenType.getTokenName(type);
String typeActual = TokenType.getTokenName(t.getType());
assertEquals("Expected " + typeExpect + " but got " + typeActual, type, t.getType());