diff options
-rw-r--r-- | src/java/org/anarres/cpp/LexerSource.java | 135 | ||||
-rw-r--r-- | src/java/org/anarres/cpp/Main.java | 4 | ||||
-rw-r--r-- | src/java/org/anarres/cpp/NumericValue.java | 179 | ||||
-rw-r--r-- | src/java/org/anarres/cpp/Preprocessor.java | 27 | ||||
-rw-r--r-- | src/java/org/anarres/cpp/Token.java | 2 | ||||
-rw-r--r-- | src/tests/org/anarres/cpp/LexerSourceTestCase.java | 10 | ||||
-rw-r--r-- | src/tests/org/anarres/cpp/PreprocessorTestCase.java | 2 |
7 files changed, 292 insertions, 67 deletions
diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java index c38ff6a..b696c9c 100644 --- a/src/java/org/anarres/cpp/LexerSource.java +++ b/src/java/org/anarres/cpp/LexerSource.java @@ -435,58 +435,92 @@ public class LexerSource extends Source { text.toString(), buf.toString()); } - private Token _number(StringBuilder text, long val, int d) + private Token _number_suffix(StringBuilder text, NumericValue value, int d) throws IOException, LexerException { - int bits = 0; + int flags = 0; // U, I, L, LL, F, D, MSB for (;;) { - /* XXX Error check duplicate bits. */ if (d == 'U' || d == 'u') { - bits |= 1; + if ((flags & NumericValue.F_UNSIGNED) != 0) + warning("Duplicate unsigned suffix " + d); + flags |= NumericValue.F_UNSIGNED; text.append((char)d); d = read(); } else if (d == 'L' || d == 'l') { - if ((bits & 4) != 0) - warning("Conflicting numeric suffices: I and L."); - bits |= 2; + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); text.append((char)d); - d = read(); + int e = read(); + if (e == d) { // Case must match. Ll is Welsh. + flags |= NumericValue.F_LONGLONG; + text.append((char)e); + d = read(); + } else { + flags |= NumericValue.F_LONG; + d = e; + } } else if (d == 'I' || d == 'i') { - if ((bits & 2) != 0) - warning("Conflicting numeric suffices: L and I."); - bits |= 4; + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); + flags |= NumericValue.F_INT; + text.append((char)d); + d = read(); + } else if (d == 'F' || d == 'f') { + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); + flags |= NumericValue.F_FLOAT; + text.append((char)d); + d = read(); + } else if (d == 'D' || d == 'd') { + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); + flags |= NumericValue.F_DOUBLE; text.append((char)d); d = read(); } - else if (Character.isLetter(d)) { + // This should probably be isPunct() || isWhite(). + else if (Character.isLetter(d) || d == '_') { unread(d); - return new Token(INVALID, text.toString(), + value.setFlags(flags); + return invalid(text, "Invalid suffix \"" + (char)d + "\" on numeric constant"); } else { unread(d); - return new Token(INTEGER, - text.toString(), Long.valueOf(val)); + value.setFlags(flags); + return new Token(NUMBER, + text.toString(), value); } } } - /* We already chewed a zero, so empty is fine. */ - private Token number_octal() + /* Either a decimal part, or a hex exponent. */ + private String _number_part(StringBuilder text, int base) throws IOException, LexerException { - StringBuilder text = new StringBuilder("0"); + StringBuilder part = new StringBuilder(); int d = read(); - long val = 0; - while (Character.digit(d, 8) != -1) { - val = (val << 3) + Character.digit(d, 8); + while (Character.digit(d, base) != -1) { text.append((char)d); + part.append((char)d); d = read(); } - return _number(text, val, d); + unread(d); + return part.toString(); + } + + /* We already chewed a zero, so empty is fine. */ + private Token number_octal() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("0"); + String integer = _number_part(text, 8); + int d = read(); + NumericValue value = new NumericValue(8, integer); + return _number_suffix(text, value, d); } /* We do not know whether know the first digit is valid. */ @@ -495,38 +529,44 @@ public class LexerSource extends Source { LexerException { StringBuilder text = new StringBuilder("0"); text.append(x); + String integer = _number_part(text, 16); + NumericValue value = new NumericValue(16, integer); int d = read(); - if (Character.digit(d, 16) == -1) { - unread(d); - // error("Illegal hexadecimal constant " + (char)d); - return new Token(INVALID, text.toString(), - "Illegal hexadecimal digit " + (char)d + - " after "+ text); + if (d == '.') { + String fraction = _number_part(text, 16); + value.setFractionalPart(fraction); + d = read(); } - long val = 0; - do { - val = (val << 4) + Character.digit(d, 16); - text.append((char)d); + if (d == 'P' || d == 'p') { + String exponent = _number_part(text, 10); + value.setExponent(exponent); d = read(); - } while (Character.digit(d, 16) != -1); - return _number(text, val, d); + } + // XXX Make sure it's got enough parts + return _number_suffix(text, value, d); } /* We know we have at least one valid digit, but empty is not * fine. */ - /* XXX This needs a complete rewrite. */ - private Token number_decimal(int c) + private Token number_decimal() throws IOException, LexerException { - StringBuilder text = new StringBuilder((char)c); - int d = c; - long val = 0; - do { - val = val * 10 + Character.digit(d, 10); - text.append((char)d); + StringBuilder text = new StringBuilder(); + String integer = _number_part(text, 10); + NumericValue value = new NumericValue(10, integer); + int d = read(); + if (d == '.') { + String fraction = _number_part(text, 10); + value.setFractionalPart(fraction); + d = read(); + } + if (d == 'E' || d == 'e') { + String exponent = _number_part(text, 10); + value.setExponent(exponent); d = read(); - } while (Character.digit(d, 10) != -1); - return _number(text, val, d); + } + // XXX Make sure it's got enough parts + return _number_suffix(text, value, d); } private Token identifier(int c) @@ -760,6 +800,10 @@ public class LexerSource extends Source { tok = cond('.', ELLIPSIS, RANGE); else unread(d); + if (Character.isDigit(d)) { + unread('.'); + tok = number_decimal(); + } /* XXX decimal fraction */ break; @@ -793,7 +837,8 @@ public class LexerSource extends Source { tok = whitespace(c); } else if (Character.isDigit(c)) { - tok = number_decimal(c); + unread(c); + tok = number_decimal(); } else if (Character.isJavaIdentifierStart(c)) { tok = identifier(c); diff --git a/src/java/org/anarres/cpp/Main.java b/src/java/org/anarres/cpp/Main.java index da9d3d6..8777c72 100644 --- a/src/java/org/anarres/cpp/Main.java +++ b/src/java/org/anarres/cpp/Main.java @@ -350,11 +350,11 @@ public class Main { } Macro m = new Macro("__WORDSIZE"); - m.addToken(new Token(INTEGER, -1, -1, "32", Integer.valueOf(32))); + m.addToken(new Token(NUMBER, -1, -1, "32", new NumericValue(10, "32"))); pp.addMacro(m); m = new Macro("__STDC__"); - m.addToken(new Token(INTEGER, -1, -1, "1", Integer.valueOf(1))); + m.addToken(new Token(NUMBER, -1, -1, "1", new NumericValue(10, "1"))); pp.addMacro(m); try { diff --git a/src/java/org/anarres/cpp/NumericValue.java b/src/java/org/anarres/cpp/NumericValue.java new file mode 100644 index 0000000..f8b1559 --- /dev/null +++ b/src/java/org/anarres/cpp/NumericValue.java @@ -0,0 +1,179 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.math.BigDecimal; +import java.math.BigInteger; + +public class NumericValue extends Number { + public static final int F_UNSIGNED = 1; + public static final int F_INT = 2; + public static final int F_LONG = 4; + public static final int F_LONGLONG = 8; + public static final int F_FLOAT = 16; + public static final int F_DOUBLE = 32; + + public static final int FF_SIZE = F_INT | F_LONG | F_LONGLONG | F_FLOAT | F_DOUBLE; + + private int base; + private String integer; + private String fraction; + private String exponent; + private int flags; + + public NumericValue(int base, String integer) { + this.base = base; + this.integer = integer; + } + + public int getBase() { + return base; + } + + public String getIntegerPart() { + return integer; + } + + public String getFractionalPart() { + return fraction; + } + + /* pp */ void setFractionalPart(String fraction) { + this.fraction = fraction; + } + + public String getExponent() { + return exponent; + } + + /* pp */ void setExponent(String exponent) { + this.exponent = exponent; + } + + public int getFlags() { + return flags; + } + + /* pp */ void setFlags(int flags) { + this.flags = flags; + } + + /** + * So, it turns out that parsing arbitrary bases into arbitrary + * precision numbers is nontrivial, and this routine gets it wrong + * in many important cases. + */ + public BigDecimal toBigDecimal() { + int scale = 0; + String text = getIntegerPart(); + String t_fraction = getFractionalPart(); + if (t_fraction != null) { + text += getFractionalPart(); + // XXX Wrong for anything but base 10. + scale += getFractionalPart().length(); + } + if (getExponent() != null) + scale -= Integer.parseInt(getExponent()); + BigInteger unscaled = new BigInteger(text, getBase()); + return new BigDecimal(unscaled, scale); + } + + public Number toJavaLangNumber() { + int flags = getFlags(); + if ((flags & F_DOUBLE) != 0) + return doubleValue(); + else if ((flags & F_FLOAT) != 0) + return floatValue(); + else if ((flags & (F_LONG | F_LONGLONG)) != 0) + return longValue(); + else if ((flags & F_INT) != 0) + return intValue(); + else if (getFractionalPart() != null) + return doubleValue(); // .1 is a double in Java. + else if (getExponent() != null) + return doubleValue(); + else + return intValue(); + } + + @Override + public int intValue() { + return Integer.parseInt(toString()); + } + + @Override + public long longValue() { + return Long.parseLong(toString()); + } + + @Override + public float floatValue() { + return Float.parseFloat(toString()); + } + + @Override + public double doubleValue() { + return Double.parseDouble(toString()); + } + + private boolean appendFlags(StringBuilder buf, String suffix, int flag) { + if ((getFlags() & flag) != flag) + return false; + buf.append(suffix); + return true; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + switch (base) { + case 8: + buf.append('0'); + break; + case 10: + break; + case 16: + buf.append("0x"); + break; + case 2: + buf.append('b'); + break; + default: + buf.append("[base-").append(base).append("]"); + break; + } + buf.append(getIntegerPart()); + if (getFractionalPart() != null) + buf.append('.').append(getFractionalPart()); + if (getExponent() != null) { + buf.append(base > 10 ? 'p' : 'e'); + buf.append(getExponent()); + } + /* + if (appendFlags(buf, "ui", F_UNSIGNED | F_INT)); + else if (appendFlags(buf, "ul", F_UNSIGNED | F_LONG)); + else if (appendFlags(buf, "ull", F_UNSIGNED | F_LONGLONG)); + else if (appendFlags(buf, "i", F_INT)); + else if (appendFlags(buf, "l", F_LONG)); + else if (appendFlags(buf, "ll", F_LONGLONG)); + else if (appendFlags(buf, "f", F_FLOAT)); + else if (appendFlags(buf, "d", F_DOUBLE)); + */ + return buf.toString(); + } +} diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java index 9e8ce89..9bda523 100644 --- a/src/java/org/anarres/cpp/Preprocessor.java +++ b/src/java/org/anarres/cpp/Preprocessor.java @@ -772,10 +772,10 @@ public class Preprocessor implements Closeable { if (m == __LINE__) { push_source(new FixedTokenSource( - new Token[] { new Token(INTEGER, + new Token[] { new Token(NUMBER, orig.getLine(), orig.getColumn(), String.valueOf(orig.getLine()), - Integer.valueOf(orig.getLine())) } + new NumericValue(10, "" + orig.getLine())) } ), true); } else if (m == __FILE__) { @@ -810,10 +810,10 @@ public class Preprocessor implements Closeable { * a special Macro subclass which overrides getTokens(). */ int value = this.counter++; push_source(new FixedTokenSource( - new Token[] { new Token(INTEGER, + new Token[] { new Token(NUMBER, orig.getLine(), orig.getColumn(), String.valueOf(value), - Integer.valueOf(value)) } + new NumericValue(10, "" + value)) } ), true); } else { @@ -1342,21 +1342,21 @@ public class Preprocessor implements Closeable { error(la, "defined() needs identifier, not " + la.getText()); - tok = new Token(INTEGER, + tok = new Token(NUMBER, la.getLine(), la.getColumn(), - "0", Integer.valueOf(0)); + "0", new NumericValue(10, "0")); } else if (macros.containsKey(la.getText())) { // System.out.println("Found macro"); - tok = new Token(INTEGER, + tok = new Token(NUMBER, la.getLine(), la.getColumn(), - "1", Integer.valueOf(1)); + "1", new NumericValue(10, "1")); } else { // System.out.println("Not found macro"); - tok = new Token(INTEGER, + tok = new Token(NUMBER, la.getLine(), la.getColumn(), - "0", Integer.valueOf(0)); + "0", new NumericValue(10, "0")); } if (paren) { @@ -1438,8 +1438,9 @@ public class Preprocessor implements Closeable { case '~': lhs = ~expr(11); break; case '!': lhs = expr(11) == 0 ? 1 : 0; break; case '-': lhs = -expr(11); break; - case INTEGER: - lhs = ((Number)tok.getValue()).longValue(); + case NUMBER: + NumericValue value = (NumericValue)tok.getValue(); + lhs = value.longValue(); break; case CHARACTER: lhs = (long)((Character)tok.getValue()).charValue(); @@ -1665,7 +1666,7 @@ public class Preprocessor implements Closeable { case XOR_EQ: return tok; - case INTEGER: + case NUMBER: return tok; case IDENTIFIER: diff --git a/src/java/org/anarres/cpp/Token.java b/src/java/org/anarres/cpp/Token.java index 0dca7ff..9112c25 100644 --- a/src/java/org/anarres/cpp/Token.java +++ b/src/java/org/anarres/cpp/Token.java @@ -151,7 +151,7 @@ public final class Token { } #set ($i = 257) -#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "CCOMMENT", "CPPCOMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "INVALID" ]) +#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "CCOMMENT", "CPPCOMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "NUMBER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "INVALID" ]) #foreach ($token in $tokens) /** The token type $token. */ public static final int $token = $i; diff --git a/src/tests/org/anarres/cpp/LexerSourceTestCase.java b/src/tests/org/anarres/cpp/LexerSourceTestCase.java index adb1862..205176b 100644 --- a/src/tests/org/anarres/cpp/LexerSourceTestCase.java +++ b/src/tests/org/anarres/cpp/LexerSourceTestCase.java @@ -31,7 +31,7 @@ public class LexerSourceTestCase extends BaseTestCase implements Test { testLexerSource("int a = 5;", IDENTIFIER, WHITESPACE, IDENTIFIER, WHITESPACE, - '=', WHITESPACE, INTEGER, ';', EOF + '=', WHITESPACE, NUMBER, ';', EOF ); // \n is WHITESPACE because ppvalid = false @@ -43,7 +43,7 @@ public class LexerSourceTestCase extends BaseTestCase implements Test { testLexerSource("%:?", '#', '?'); testLexerSource("%:%=", '#', MOD_EQ); testLexerSource("0x1234ffdUL 0765I", - INTEGER, WHITESPACE, INTEGER); + NUMBER, WHITESPACE, NUMBER); testLexerSource("+= -= *= /= %= <= >= >>= <<= &= |= ^= x", PLUS_EQ, WHITESPACE, @@ -73,9 +73,9 @@ public class LexerSourceTestCase extends BaseTestCase implements Test { INVALID); testLexerSource("1i1I1l1L1ui1ul", - INTEGER, INTEGER, - INTEGER, INTEGER, - INTEGER, INTEGER); + NUMBER, NUMBER, + NUMBER, NUMBER, + NUMBER, NUMBER); } diff --git a/src/tests/org/anarres/cpp/PreprocessorTestCase.java b/src/tests/org/anarres/cpp/PreprocessorTestCase.java index 63e4009..217659a 100644 --- a/src/tests/org/anarres/cpp/PreprocessorTestCase.java +++ b/src/tests/org/anarres/cpp/PreprocessorTestCase.java @@ -52,7 +52,7 @@ public class PreprocessorTestCase extends BaseTestCase { public void testPreprocessor() throws Exception { /* Magic macros */ testInput("line = __LINE__\n", - I("line"), WHITESPACE, '=', WHITESPACE, INTEGER + I("line"), WHITESPACE, '=', WHITESPACE, NUMBER /*, NL - all nls deferred so as not to block the reader */ ); testInput("file = __FILE__\n", NL, /* from before, etc */ |