LexerSource: Handle invalid number as a single INVALID token and don't consume the entire line.

author: Shevek <[email protected]> 2014-09-11 00:16:09 -0700
committer: Shevek <[email protected]> 2014-09-11 00:16:09 -0700
commit: 2db1eafd535d8db92f76db8ccf45b1b80f6152e3 (patch)
tree: 83b5b6a9351a820b96755f02901aefbc31d133eb
parent: 9c8391a9591e0332de4c8a9d7662a16c70780ad5 (diff)
3 files changed, 44 insertions, 15 deletions
diff --git a/src/main/java/org/anarres/cpp/LexerSource.java b/src/main/java/org/anarres/cpp/LexerSource.java
index ef779b7..82d76b6 100644
--- a/src/main/java/org/anarres/cpp/LexerSource.java
+++ b/src/main/java/org/anarres/cpp/LexerSource.java
@@ -513,13 +513,17 @@ public class LexerSource extends Source {
                 flags |= NumericValue.F_DOUBLE;
                 text.append((char) d);
                 d = read();
-            } // This should probably be isPunct() || isWhite().
-            else if (Character.isLetter(d) || d == '_') {
+            }
+            else if (Character.isUnicodeIdentifierPart(d)) {
+                String reason = "Invalid suffix \"" + (char) d + "\" on numeric constant";
+                // We've encountered something initially identified as a number.
+                // Read in the rest of this token as an identifer but return it as an invalid.
+                while (Character.isUnicodeIdentifierPart(d)) {
+                    text.append((char) d);
+                    d = read();
+                }
                 unread(d);
-                value.setFlags(flags);
-                return invalid(text,
-                        "Invalid suffix \"" + (char) d
-                        + "\" on numeric constant");
+                return new Token(INVALID, text.toString(), reason);
             } else {
                 unread(d);
                 value.setFlags(flags);
diff --git a/src/test/java/org/anarres/cpp/LexerSourceTest.java b/src/test/java/org/anarres/cpp/LexerSourceTest.java
index b83ddd6..e21aa01 100644
--- a/src/test/java/org/anarres/cpp/LexerSourceTest.java
+++ b/src/test/java/org/anarres/cpp/LexerSourceTest.java
@@ -3,8 +3,8 @@ package org.anarres.cpp;
 import java.util.Arrays;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.junit.Ignore;
 import org.junit.Test;
+import static org.anarres.cpp.PreprocessorTest.assertType;
 import static org.anarres.cpp.Token.*;
 import static org.junit.Assert.*;
 
@@ -22,20 +22,19 @@ public class LexerSourceTest {
         for (int i = 0; i < out.length; i++) {
             Token tok = s.token();
             LOG.info("Token is " + tok);
-            assertEquals(out[i], tok.getType());
+            assertType(out[i], tok);
             // assertEquals(col, tok.getColumn());
             buf.append(tok.getText());
         }
 
         Token tok = s.token();
         LOG.info("Token is " + tok);
-        assertEquals(EOF, tok.getType());
+        assertType(EOF, tok);
 
         if (textmatch)
             assertEquals(in, buf.toString());
     }
 
-    @Ignore
     @Test
     public void testLexerSource()
             throws Exception {
@@ -85,10 +84,11 @@ public class LexerSourceTest {
                 SQSTRING, WHITESPACE,
                 SQSTRING);
 
-        testLexerSource("1i1I1l1L1ui1ul", true,
-                NUMBER, NUMBER,
-                NUMBER, NUMBER,
-                NUMBER, NUMBER);
+        if (false)  // Actually, I think this is illegal.
+            testLexerSource("1i1I1l1L1ui1ul", true,
+                    NUMBER, NUMBER,
+                    NUMBER, NUMBER,
+                    NUMBER, NUMBER);
 
         testLexerSource("'' 'x' 'xx'", true,
                 SQSTRING, WHITESPACE, CHARACTER, WHITESPACE, SQSTRING);
@@ -108,6 +108,31 @@ public class LexerSourceTest {
     }
 
     @Test
+    public void testNumbersSuffix() throws Exception {
+        testLexerSource("6f", true, NUMBER);
+        testLexerSource("6d", true, NUMBER);
+        testLexerSource("6l", true, NUMBER);
+        testLexerSource("6ll", true, NUMBER);
+        testLexerSource("6ul", true, NUMBER);
+        testLexerSource("6ull", true, NUMBER);
+        testLexerSource("6e3f", true, NUMBER);
+        testLexerSource("6e3d", true, NUMBER);
+        testLexerSource("6e3l", true, NUMBER);
+        testLexerSource("6e3ll", true, NUMBER);
+        testLexerSource("6e3ul", true, NUMBER);
+        testLexerSource("6e3ull", true, NUMBER);
+    }
+
+    @Test
+    public void testNumbersInvalid() throws Exception {
+        // testLexerSource("0x foo", true, INVALID, WHITESPACE, IDENTIFIER);   // FAIL
+        testLexerSource("6x foo", true, INVALID, WHITESPACE, IDENTIFIER);
+        testLexerSource("6g foo", true, INVALID, WHITESPACE, IDENTIFIER);
+        testLexerSource("6xsd foo", true, INVALID, WHITESPACE, IDENTIFIER);
+        testLexerSource("6gsd foo", true, INVALID, WHITESPACE, IDENTIFIER);
+    }
+
+    @Test
     public void testUnterminatedComment() throws Exception {
         testLexerSource("5 /*", false, NUMBER, WHITESPACE, INVALID);    // Bug #15
         testLexerSource("5 //", false, NUMBER, WHITESPACE, CPPCOMMENT);
diff --git a/src/test/java/org/anarres/cpp/PreprocessorTest.java b/src/test/java/org/anarres/cpp/PreprocessorTest.java
index fee233d..8a40823 100644
--- a/src/test/java/org/anarres/cpp/PreprocessorTest.java
+++ b/src/test/java/org/anarres/cpp/PreprocessorTest.java
@@ -176,7 +176,7 @@ public class PreprocessorTest {
         } while (t.getType() != EOF);
     }
 
-    private void assertType(int type, Token t) {
+    public static void assertType(int type, Token t) {
         String typeExpect = TokenType.getTokenName(type);
         String typeActual = TokenType.getTokenName(t.getType());
         assertEquals("Expected " + typeExpect + " but got " + typeActual, type, t.getType());
author	Shevek <[email protected]>	2014-09-11 00:16:09 -0700
committer	Shevek <[email protected]>	2014-09-11 00:16:09 -0700
commit	2db1eafd535d8db92f76db8ccf45b1b80f6152e3 (patch)
tree	83b5b6a9351a820b96755f02901aefbc31d133eb
parent	9c8391a9591e0332de4c8a9d7662a16c70780ad5 (diff)