NumericValue: Allow floating point numbers.

author: Shevek <[email protected]> 2012-04-01 16:12:33 -0700
committer: Shevek <[email protected]> 2012-04-01 16:12:33 -0700
commit: 943775515e684e1845abbdd6be52678028272399 (patch)
tree: 9d5e0be16ec04d8e9821d58c9a510aa8ccb83202 /src
parent: 0e8b7209a1d4c7f8387ec7ec763ea1afc3f5fb1a (diff)
7 files changed, 292 insertions, 67 deletions
diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java
index c38ff6a..b696c9c 100644
--- a/src/java/org/anarres/cpp/LexerSource.java
+++ b/src/java/org/anarres/cpp/LexerSource.java
@@ -435,58 +435,92 @@ public class LexerSource extends Source {
 						text.toString(), buf.toString());
 	}
 
-	private Token _number(StringBuilder text, long val, int d)
+	private Token _number_suffix(StringBuilder text, NumericValue value, int d)
 						throws IOException,
 								LexerException {
-		int	bits = 0;
+		int	flags = 0;	// U, I, L, LL, F, D, MSB
 		for (;;) {
-			/* XXX Error check duplicate bits. */
 			if (d == 'U' || d == 'u') {
-				bits |= 1;
+				if ((flags & NumericValue.F_UNSIGNED) != 0)
+					warning("Duplicate unsigned suffix " + d);
+				flags |= NumericValue.F_UNSIGNED;
 				text.append((char)d);
 				d = read();
 			}
 			else if (d == 'L' || d == 'l') {
-				if ((bits & 4) != 0)
-					warning("Conflicting numeric suffices: I and L.");
-				bits |= 2;
+				if ((flags & NumericValue.FF_SIZE) != 0)
+					warning("Nultiple length suffixes after " + text);
 				text.append((char)d);
-				d = read();
+				int e = read();
+				if (e == d) {	// Case must match. Ll is Welsh.
+					flags |= NumericValue.F_LONGLONG;
+					text.append((char)e);
+					d = read();
+				} else {
+					flags |= NumericValue.F_LONG;
+					d = e;
+				}
 			}
 			else if (d == 'I' || d == 'i') {
-				if ((bits & 2) != 0)
-					warning("Conflicting numeric suffices: L and I.");
-				bits |= 4;
+				if ((flags & NumericValue.FF_SIZE) != 0)
+					warning("Nultiple length suffixes after " + text);
+				flags |= NumericValue.F_INT;
+				text.append((char)d);
+				d = read();
+			} else if (d == 'F' || d == 'f') {
+				if ((flags & NumericValue.FF_SIZE) != 0)
+					warning("Nultiple length suffixes after " + text);
+				flags |= NumericValue.F_FLOAT;
+				text.append((char)d);
+				d = read();
+			} else if (d == 'D' || d == 'd') {
+				if ((flags & NumericValue.FF_SIZE) != 0)
+					warning("Nultiple length suffixes after " + text);
+				flags |= NumericValue.F_DOUBLE;
 				text.append((char)d);
 				d = read();
 			}
-			else if (Character.isLetter(d)) {
+			// This should probably be isPunct() || isWhite().
+			else if (Character.isLetter(d) || d == '_') {
 				unread(d);
-				return new Token(INVALID, text.toString(),
+				value.setFlags(flags);
+				return invalid(text, 
 						"Invalid suffix \"" + (char)d +
 						"\" on numeric constant");
 			}
 			else {
 				unread(d);
-				return new Token(INTEGER,
-					text.toString(), Long.valueOf(val));
+				value.setFlags(flags);
+				return new Token(NUMBER,
+					text.toString(), value);
 			}
 		}
 	}
 
-	/* We already chewed a zero, so empty is fine. */
-	private Token number_octal()
+	/* Either a decimal part, or a hex exponent. */
+	private String _number_part(StringBuilder text, int base)
 						throws IOException,
 								LexerException {
-		StringBuilder	text = new StringBuilder("0");
+		StringBuilder	part = new StringBuilder();
 		int				d = read();
-		long			val = 0;
-		while (Character.digit(d, 8) != -1) {
-			val = (val << 3) + Character.digit(d, 8);
+		while (Character.digit(d, base) != -1) {
 			text.append((char)d);
+			part.append((char)d);
 			d = read();
 		}
-		return _number(text, val, d);
+		unread(d);
+		return part.toString();
+	}
+
+	/* We already chewed a zero, so empty is fine. */
+	private Token number_octal()
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder("0");
+		String			integer = _number_part(text, 8);
+		int				d = read();
+		NumericValue	value = new NumericValue(8, integer);
+		return _number_suffix(text, value, d);
 	}
 
 	/* We do not know whether know the first digit is valid. */
@@ -495,38 +529,44 @@ public class LexerSource extends Source {
 								LexerException {
 		StringBuilder	text = new StringBuilder("0");
 		text.append(x);
+		String			integer = _number_part(text, 16);
+		NumericValue	value = new NumericValue(16, integer);
 		int				d = read();
-		if (Character.digit(d, 16) == -1) {
-			unread(d);
-			// error("Illegal hexadecimal constant " + (char)d);
-			return new Token(INVALID, text.toString(),
-					"Illegal hexadecimal digit " + (char)d +
-					" after "+ text);
+		if (d == '.') {
+			String		fraction = _number_part(text, 16);
+			value.setFractionalPart(fraction);
+			d = read();
 		}
-		long	val = 0;
-		do {
-			val = (val << 4) + Character.digit(d, 16);
-			text.append((char)d);
+		if (d == 'P' || d == 'p') {
+			String		exponent = _number_part(text, 10);
+			value.setExponent(exponent);
 			d = read();
-		} while (Character.digit(d, 16) != -1);
-		return _number(text, val, d);
+		}
+		// XXX Make sure it's got enough parts
+		return _number_suffix(text, value, d);
 	}
 
 	/* We know we have at least one valid digit, but empty is not
 	 * fine. */
-	/* XXX This needs a complete rewrite. */
-	private Token number_decimal(int c)
+	private Token number_decimal()
 						throws IOException,
 								LexerException {
-		StringBuilder	text = new StringBuilder((char)c);
-		int				d = c;
-		long			val = 0;
-		do {
-			val = val * 10 + Character.digit(d, 10);
-			text.append((char)d);
+		StringBuilder	text = new StringBuilder();
+		String			integer = _number_part(text, 10);
+		NumericValue	value = new NumericValue(10, integer);
+		int				d = read();
+		if (d == '.') {
+			String		fraction = _number_part(text, 10);
+			value.setFractionalPart(fraction);
+			d = read();
+		}
+		if (d == 'E' || d == 'e') {
+			String		exponent = _number_part(text, 10);
+			value.setExponent(exponent);
 			d = read();
-		} while (Character.digit(d, 10) != -1);
-		return _number(text, val, d);
+		}
+		// XXX Make sure it's got enough parts
+		return _number_suffix(text, value, d);
 	}
 
 	private Token identifier(int c)
@@ -760,6 +800,10 @@ public class LexerSource extends Source {
 					tok = cond('.', ELLIPSIS, RANGE);
 				else
 					unread(d);
+				if (Character.isDigit(d)) {
+					unread('.');
+					tok = number_decimal();
+				}
 				/* XXX decimal fraction */
 				break;
 
@@ -793,7 +837,8 @@ public class LexerSource extends Source {
 				tok = whitespace(c);
 			}
 			else if (Character.isDigit(c)) {
-				tok = number_decimal(c);
+				unread(c);
+				tok = number_decimal();
 			}
 			else if (Character.isJavaIdentifierStart(c)) {
 				tok = identifier(c);
diff --git a/src/java/org/anarres/cpp/Main.java b/src/java/org/anarres/cpp/Main.java
index da9d3d6..8777c72 100644
--- a/src/java/org/anarres/cpp/Main.java
+++ b/src/java/org/anarres/cpp/Main.java
@@ -350,11 +350,11 @@ public class Main {
 		}
 
 		Macro			m = new Macro("__WORDSIZE");
-		m.addToken(new Token(INTEGER, -1, -1, "32", Integer.valueOf(32)));
+		m.addToken(new Token(NUMBER, -1, -1, "32", new NumericValue(10, "32")));
 		pp.addMacro(m);
 
 		m = new Macro("__STDC__");
-		m.addToken(new Token(INTEGER, -1, -1, "1", Integer.valueOf(1)));
+		m.addToken(new Token(NUMBER, -1, -1, "1", new NumericValue(10, "1")));
 		pp.addMacro(m);
 
 		try {
diff --git a/src/java/org/anarres/cpp/NumericValue.java b/src/java/org/anarres/cpp/NumericValue.java
new file mode 100644
index 0000000..f8b1559
--- /dev/null
+++ b/src/java/org/anarres/cpp/NumericValue.java
@@ -0,0 +1,179 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (c) 2007-2008, Shevek
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+package org.anarres.cpp;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+public class NumericValue extends Number {
+	public static final int F_UNSIGNED = 1;
+	public static final int F_INT      = 2;
+	public static final int F_LONG     = 4;
+	public static final int F_LONGLONG = 8;
+	public static final int F_FLOAT    = 16;
+	public static final int F_DOUBLE   = 32;
+
+	public static final int FF_SIZE = F_INT | F_LONG | F_LONGLONG | F_FLOAT | F_DOUBLE;
+
+	private int base;
+	private String integer;
+	private String fraction;
+	private String exponent;
+	private int flags;
+
+	public NumericValue(int base, String integer) {
+		this.base = base;
+		this.integer = integer;
+	}
+
+	public int getBase() {
+		return base;
+	}
+
+	public String getIntegerPart() {
+		return integer;
+	}
+
+	public String getFractionalPart() {
+		return fraction;
+	}
+
+	/* pp */ void setFractionalPart(String fraction) {
+		this.fraction = fraction;
+	}
+
+	public String getExponent() {
+		return exponent;
+	}
+
+	/* pp */ void setExponent(String exponent) {
+		this.exponent = exponent;
+	}
+
+	public int getFlags() {
+		return flags;
+	}
+
+	/* pp */ void setFlags(int flags) {
+		this.flags = flags;
+	}
+
+	/**
+	 * So, it turns out that parsing arbitrary bases into arbitrary
+	 * precision numbers is nontrivial, and this routine gets it wrong
+	 * in many important cases.
+	 */
+	public BigDecimal toBigDecimal() {
+		int		scale = 0;
+		String	text = getIntegerPart();
+		String	t_fraction = getFractionalPart();
+		if (t_fraction != null) {
+			text += getFractionalPart();
+			// XXX Wrong for anything but base 10.
+			scale += getFractionalPart().length();
+		}
+		if (getExponent() != null)
+			scale -= Integer.parseInt(getExponent());
+		BigInteger unscaled = new BigInteger(text, getBase());
+		return new BigDecimal(unscaled, scale);
+	}
+
+	public Number toJavaLangNumber() {
+		int flags = getFlags();
+		if ((flags & F_DOUBLE) != 0)
+			return doubleValue();
+		else if ((flags & F_FLOAT) != 0)
+			return floatValue();
+		else if ((flags & (F_LONG | F_LONGLONG)) != 0)
+			return longValue();
+		else if ((flags & F_INT) != 0)
+			return intValue();
+		else if (getFractionalPart() != null)
+			return doubleValue();	// .1 is a double in Java.
+		else if (getExponent() != null)
+			return doubleValue();
+		else
+			return intValue();
+	}
+
+	@Override
+	public int intValue() {
+		return Integer.parseInt(toString());
+	}
+
+	@Override
+	public long longValue() {
+		return Long.parseLong(toString());
+	}
+
+	@Override
+	public float floatValue() {
+		return Float.parseFloat(toString());
+	}
+
+	@Override
+	public double doubleValue() {
+		return Double.parseDouble(toString());
+	}
+
+	private boolean appendFlags(StringBuilder buf, String suffix, int flag) {
+		if ((getFlags() & flag) != flag)
+			return false;
+		buf.append(suffix);
+		return true;
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder buf = new StringBuilder();
+		switch (base) {
+			case 8:
+				buf.append('0');
+				break;
+			case 10:
+				break;
+			case 16:
+				buf.append("0x");
+				break;
+			case 2:
+				buf.append('b');
+				break;
+			default:
+				buf.append("[base-").append(base).append("]");
+				break;
+		}
+		buf.append(getIntegerPart());
+		if (getFractionalPart() != null)
+			buf.append('.').append(getFractionalPart());
+		if (getExponent() != null) {
+			buf.append(base > 10 ? 'p' : 'e');
+			buf.append(getExponent());
+		}
+		/*
+		if (appendFlags(buf, "ui", F_UNSIGNED | F_INT));
+		else if (appendFlags(buf, "ul", F_UNSIGNED | F_LONG));
+		else if (appendFlags(buf, "ull", F_UNSIGNED | F_LONGLONG));
+		else if (appendFlags(buf, "i", F_INT));
+		else if (appendFlags(buf, "l", F_LONG));
+		else if (appendFlags(buf, "ll", F_LONGLONG));
+		else if (appendFlags(buf, "f", F_FLOAT));
+		else if (appendFlags(buf, "d", F_DOUBLE));
+		*/
+		return buf.toString();
+	}
+}
diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java
index 9e8ce89..9bda523 100644
--- a/src/java/org/anarres/cpp/Preprocessor.java
+++ b/src/java/org/anarres/cpp/Preprocessor.java
@@ -772,10 +772,10 @@ public class Preprocessor implements Closeable {
 
 		if (m == __LINE__) {
 			push_source(new FixedTokenSource(
-					new Token[] { new Token(INTEGER,
+					new Token[] { new Token(NUMBER,
 							orig.getLine(), orig.getColumn(),
 							String.valueOf(orig.getLine()),
-							Integer.valueOf(orig.getLine())) }
+							new NumericValue(10, "" + orig.getLine())) }
 						), true);
 		}
 		else if (m == __FILE__) {
@@ -810,10 +810,10 @@ public class Preprocessor implements Closeable {
 			 * a special Macro subclass which overrides getTokens(). */
 			int	value = this.counter++;
 			push_source(new FixedTokenSource(
-					new Token[] { new Token(INTEGER,
+					new Token[] { new Token(NUMBER,
 							orig.getLine(), orig.getColumn(),
 							String.valueOf(value),
-							Integer.valueOf(value)) }
+							new NumericValue(10, "" + value)) }
 						), true);
 		}
 		else {
@@ -1342,21 +1342,21 @@ public class Preprocessor implements Closeable {
 					error(la,
 						"defined() needs identifier, not " +
 						la.getText());
-					tok = new Token(INTEGER,
+					tok = new Token(NUMBER,
 							la.getLine(), la.getColumn(),
-							"0", Integer.valueOf(0));
+							"0", new NumericValue(10, "0"));
 				}
 				else if (macros.containsKey(la.getText())) {
 					// System.out.println("Found macro");
-					tok = new Token(INTEGER,
+					tok = new Token(NUMBER,
 							la.getLine(), la.getColumn(),
-							"1", Integer.valueOf(1));
+							"1", new NumericValue(10, "1"));
 				}
 				else {
 					// System.out.println("Not found macro");
-					tok = new Token(INTEGER,
+					tok = new Token(NUMBER,
 							la.getLine(), la.getColumn(),
-							"0", Integer.valueOf(0));
+							"0", new NumericValue(10, "0"));
 				}
 
 				if (paren) {
@@ -1438,8 +1438,9 @@ public class Preprocessor implements Closeable {
 			case '~': lhs = ~expr(11);              break;
 			case '!': lhs =  expr(11) == 0 ? 1 : 0; break;
 			case '-': lhs = -expr(11);              break;
-			case INTEGER:
-				lhs = ((Number)tok.getValue()).longValue();
+			case NUMBER:
+				NumericValue value = (NumericValue)tok.getValue();
+				lhs = value.longValue();
 				break;
 			case CHARACTER:
 				lhs = (long)((Character)tok.getValue()).charValue();
@@ -1665,7 +1666,7 @@ public class Preprocessor implements Closeable {
 				case XOR_EQ:
 					return tok;
 
-				case INTEGER:
+				case NUMBER:
 					return tok;
 
 				case IDENTIFIER:
diff --git a/src/java/org/anarres/cpp/Token.java b/src/java/org/anarres/cpp/Token.java
index 0dca7ff..9112c25 100644
--- a/src/java/org/anarres/cpp/Token.java
+++ b/src/java/org/anarres/cpp/Token.java
@@ -151,7 +151,7 @@ public final class Token {
 	}
 
 #set ($i = 257)
-#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "CCOMMENT", "CPPCOMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "INVALID" ])
+#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "CCOMMENT", "CPPCOMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "NUMBER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "INVALID" ])
 #foreach ($token in $tokens)
 	/** The token type $token. */
 	public static final int $token = $i;
diff --git a/src/tests/org/anarres/cpp/LexerSourceTestCase.java b/src/tests/org/anarres/cpp/LexerSourceTestCase.java
index adb1862..205176b 100644
--- a/src/tests/org/anarres/cpp/LexerSourceTestCase.java
+++ b/src/tests/org/anarres/cpp/LexerSourceTestCase.java
@@ -31,7 +31,7 @@ public class LexerSourceTestCase extends BaseTestCase implements Test {
 
 		testLexerSource("int a = 5;",
 			IDENTIFIER, WHITESPACE, IDENTIFIER, WHITESPACE,
-			'=', WHITESPACE, INTEGER, ';', EOF
+			'=', WHITESPACE, NUMBER, ';', EOF
 		);
 
 		// \n is WHITESPACE because ppvalid = false
@@ -43,7 +43,7 @@ public class LexerSourceTestCase extends BaseTestCase implements Test {
 		testLexerSource("%:?", '#', '?');
 		testLexerSource("%:%=", '#', MOD_EQ);
 		testLexerSource("0x1234ffdUL 0765I",
-				INTEGER, WHITESPACE, INTEGER);
+				NUMBER, WHITESPACE, NUMBER);
 
 		testLexerSource("+= -= *= /= %= <= >= >>= <<= &= |= ^= x",
 			PLUS_EQ, WHITESPACE,
@@ -73,9 +73,9 @@ public class LexerSourceTestCase extends BaseTestCase implements Test {
 			INVALID);
 
 		testLexerSource("1i1I1l1L1ui1ul", 
-			INTEGER, INTEGER,
-			INTEGER, INTEGER,
-			INTEGER, INTEGER);
+			NUMBER, NUMBER,
+			NUMBER, NUMBER,
+			NUMBER, NUMBER);
 
 	}
 
diff --git a/src/tests/org/anarres/cpp/PreprocessorTestCase.java b/src/tests/org/anarres/cpp/PreprocessorTestCase.java
index 63e4009..217659a 100644
--- a/src/tests/org/anarres/cpp/PreprocessorTestCase.java
+++ b/src/tests/org/anarres/cpp/PreprocessorTestCase.java
@@ -52,7 +52,7 @@ public class PreprocessorTestCase extends BaseTestCase {
 	public void testPreprocessor() throws Exception {
 		/* Magic macros */
 		testInput("line = __LINE__\n",
-			I("line"), WHITESPACE, '=', WHITESPACE, INTEGER
+			I("line"), WHITESPACE, '=', WHITESPACE, NUMBER
 			/*, NL - all nls deferred so as not to block the reader */
 		);
 		testInput("file = __FILE__\n", NL,	/* from before, etc */
author	Shevek <[email protected]>	2012-04-01 16:12:33 -0700
committer	Shevek <[email protected]>	2012-04-01 16:12:33 -0700
commit	943775515e684e1845abbdd6be52678028272399 (patch)
tree	9d5e0be16ec04d8e9821d58c9a510aa8ccb83202 /src
parent	0e8b7209a1d4c7f8387ec7ec763ea1afc3f5fb1a (diff)