aboutsummaryrefslogtreecommitdiffstats
path: root/src/main/java/org/anarres/cpp/LexerSource.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/anarres/cpp/LexerSource.java')
-rw-r--r--src/main/java/org/anarres/cpp/LexerSource.java910
1 files changed, 910 insertions, 0 deletions
diff --git a/src/main/java/org/anarres/cpp/LexerSource.java b/src/main/java/org/anarres/cpp/LexerSource.java
new file mode 100644
index 0000000..ca18314
--- /dev/null
+++ b/src/main/java/org/anarres/cpp/LexerSource.java
@@ -0,0 +1,910 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (c) 2007-2008, Shevek
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+package org.anarres.cpp;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import javax.annotation.Nonnull;
+import static org.anarres.cpp.Token.*;
+
+/** Does not handle digraphs. */
+public class LexerSource extends Source {
+
+ private static final boolean DEBUG = false;
+
+ private JoinReader reader;
+ private final boolean ppvalid;
+ private boolean bol;
+ private boolean include;
+
+ private boolean digraphs;
+
+ /* Unread. */
+ private int u0, u1;
+ private int ucount;
+
+ private int line;
+ private int column;
+ private int lastcolumn;
+ private boolean cr;
+
+ /* ppvalid is:
+ * false in StringLexerSource,
+ * true in FileLexerSource */
+ public LexerSource(Reader r, boolean ppvalid) {
+ this.reader = new JoinReader(r);
+ this.ppvalid = ppvalid;
+ this.bol = true;
+ this.include = false;
+
+ this.digraphs = true;
+
+ this.ucount = 0;
+
+ this.line = 1;
+ this.column = 0;
+ this.lastcolumn = -1;
+ this.cr = false;
+ }
+
+ @Override
+ /* pp */ void init(Preprocessor pp) {
+ super.init(pp);
+ this.digraphs = pp.getFeature(Feature.DIGRAPHS);
+ this.reader.init(pp, this);
+ }
+
+ @Override
+ public int getLine() {
+ return line;
+ }
+
+ @Override
+ public int getColumn() {
+ return column;
+ }
+
+ @Override
+ /* pp */ boolean isNumbered() {
+ return true;
+ }
+
+ /* Error handling. */
+ private void _error(String msg, boolean error)
+ throws LexerException {
+ int _l = line;
+ int _c = column;
+ if (_c == 0) {
+ _c = lastcolumn;
+ _l--;
+ } else {
+ _c--;
+ }
+ if (error)
+ super.error(_l, _c, msg);
+ else
+ super.warning(_l, _c, msg);
+ }
+
+ /* Allow JoinReader to call this. */
+ /* pp */ final void error(String msg)
+ throws LexerException {
+ _error(msg, true);
+ }
+
+ /* Allow JoinReader to call this. */
+ /* pp */ final void warning(String msg)
+ throws LexerException {
+ _error(msg, false);
+ }
+
+ /* A flag for string handling. */
+
+ /* pp */ void setInclude(boolean b) {
+ this.include = b;
+ }
+
+ /*
+ private boolean _isLineSeparator(int c) {
+ return Character.getType(c) == Character.LINE_SEPARATOR
+ || c == -1;
+ }
+ */
+
+ /* XXX Move to JoinReader and canonicalise newlines. */
+ private static boolean isLineSeparator(int c) {
+ switch ((char) c) {
+ case '\r':
+ case '\n':
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ return true;
+ default:
+ return (c == -1);
+ }
+ }
+
+ private int read()
+ throws IOException,
+ LexerException {
+ int c;
+ assert ucount <= 2 : "Illegal ucount: " + ucount;
+ switch (ucount) {
+ case 2:
+ ucount = 1;
+ c = u1;
+ break;
+ case 1:
+ ucount = 0;
+ c = u0;
+ break;
+ default:
+ if (reader == null)
+ c = -1;
+ else
+ c = reader.read();
+ break;
+ }
+
+ switch (c) {
+ case '\r':
+ cr = true;
+ line++;
+ lastcolumn = column;
+ column = 0;
+ break;
+ case '\n':
+ if (cr) {
+ cr = false;
+ break;
+ }
+ /* fallthrough */
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ cr = false;
+ line++;
+ lastcolumn = column;
+ column = 0;
+ break;
+ case -1:
+ cr = false;
+ break;
+ default:
+ cr = false;
+ column++;
+ break;
+ }
+
+ /*
+ if (isLineSeparator(c)) {
+ line++;
+ lastcolumn = column;
+ column = 0;
+ }
+ else {
+ column++;
+ }
+ */
+ return c;
+ }
+
+ /* You can unget AT MOST one newline. */
+ private void unread(int c)
+ throws IOException {
+ /* XXX Must unread newlines. */
+ if (c != -1) {
+ if (isLineSeparator(c)) {
+ line--;
+ column = lastcolumn;
+ cr = false;
+ } else {
+ column--;
+ }
+ switch (ucount) {
+ case 0:
+ u0 = c;
+ ucount = 1;
+ break;
+ case 1:
+ u1 = c;
+ ucount = 2;
+ break;
+ default:
+ throw new IllegalStateException(
+ "Cannot unget another character!"
+ );
+ }
+ // reader.unread(c);
+ }
+ }
+
+ /* Consumes the rest of the current line into an invalid. */
+ @Nonnull
+ private Token invalid(StringBuilder text, String reason)
+ throws IOException,
+ LexerException {
+ int d = read();
+ while (!isLineSeparator(d)) {
+ text.append((char) d);
+ d = read();
+ }
+ unread(d);
+ return new Token(INVALID, text.toString(), reason);
+ }
+
+ @Nonnull
+ private Token ccomment()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("/*");
+ int d;
+ do {
+ do {
+ d = read();
+ text.append((char) d);
+ } while (d != '*');
+ do {
+ d = read();
+ text.append((char) d);
+ } while (d == '*');
+ } while (d != '/');
+ return new Token(CCOMMENT, text.toString());
+ }
+
+ @Nonnull
+ private Token cppcomment()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("//");
+ int d = read();
+ while (!isLineSeparator(d)) {
+ text.append((char) d);
+ d = read();
+ }
+ unread(d);
+ return new Token(CPPCOMMENT, text.toString());
+ }
+
+ private int escape(StringBuilder text)
+ throws IOException,
+ LexerException {
+ int d = read();
+ switch (d) {
+ case 'a':
+ text.append('a');
+ return 0x07;
+ case 'b':
+ text.append('b');
+ return '\b';
+ case 'f':
+ text.append('f');
+ return '\f';
+ case 'n':
+ text.append('n');
+ return '\n';
+ case 'r':
+ text.append('r');
+ return '\r';
+ case 't':
+ text.append('t');
+ return '\t';
+ case 'v':
+ text.append('v');
+ return 0x0b;
+ case '\\':
+ text.append('\\');
+ return '\\';
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ int len = 0;
+ int val = 0;
+ do {
+ val = (val << 3) + Character.digit(d, 8);
+ text.append((char) d);
+ d = read();
+ } while (++len < 3 && Character.digit(d, 8) != -1);
+ unread(d);
+ return val;
+
+ case 'x':
+ text.append((char) d);
+ len = 0;
+ val = 0;
+ while (len++ < 2) {
+ d = read();
+ if (Character.digit(d, 16) == -1) {
+ unread(d);
+ break;
+ }
+ val = (val << 4) + Character.digit(d, 16);
+ text.append((char) d);
+ }
+ return val;
+
+ /* Exclude two cases from the warning. */
+ case '"':
+ text.append('"');
+ return '"';
+ case '\'':
+ text.append('\'');
+ return '\'';
+
+ default:
+ warning("Unnecessary escape character " + (char) d);
+ text.append((char) d);
+ return d;
+ }
+ }
+
+ @Nonnull
+ private Token character()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("'");
+ int d = read();
+ if (d == '\\') {
+ text.append('\\');
+ d = escape(text);
+ } else if (isLineSeparator(d)) {
+ unread(d);
+ return new Token(INVALID, text.toString(),
+ "Unterminated character literal");
+ } else if (d == '\'') {
+ text.append('\'');
+ return new Token(INVALID, text.toString(),
+ "Empty character literal");
+ } else if (!Character.isDefined(d)) {
+ text.append('?');
+ return invalid(text, "Illegal unicode character literal");
+ } else {
+ text.append((char) d);
+ }
+
+ int e = read();
+ if (e != '\'') {
+ // error("Illegal character constant");
+ /* We consume up to the next ' or the rest of the line. */
+ for (;;) {
+ if (isLineSeparator(e)) {
+ unread(e);
+ break;
+ }
+ text.append((char) e);
+ if (e == '\'')
+ break;
+ e = read();
+ }
+ return new Token(INVALID, text.toString(),
+ "Illegal character constant " + text);
+ }
+ text.append('\'');
+ /* XXX It this a bad cast? */
+ return new Token(CHARACTER,
+ text.toString(), Character.valueOf((char) d));
+ }
+
+ @Nonnull
+ private Token string(char open, char close)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ text.append(open);
+
+ StringBuilder buf = new StringBuilder();
+
+ for (;;) {
+ int c = read();
+ if (c == close) {
+ break;
+ } else if (c == '\\') {
+ text.append('\\');
+ if (!include) {
+ char d = (char) escape(text);
+ buf.append(d);
+ }
+ } else if (c == -1) {
+ unread(c);
+ // error("End of file in string literal after " + buf);
+ return new Token(INVALID, text.toString(),
+ "End of file in string literal after " + buf);
+ } else if (isLineSeparator(c)) {
+ unread(c);
+ // error("Unterminated string literal after " + buf);
+ return new Token(INVALID, text.toString(),
+ "Unterminated string literal after " + buf);
+ } else {
+ text.append((char) c);
+ buf.append((char) c);
+ }
+ }
+ text.append(close);
+ switch (close) {
+ case '"':
+ return new Token(STRING,
+ text.toString(), buf.toString());
+ case '>':
+ return new Token(HEADER,
+ text.toString(), buf.toString());
+ case '\'':
+ if (buf.length() == 1)
+ return new Token(CHARACTER,
+ text.toString(), buf.toString());
+ return new Token(SQSTRING,
+ text.toString(), buf.toString());
+ default:
+ throw new IllegalStateException(
+ "Unknown closing character " + String.valueOf(close));
+ }
+ }
+
+ @Nonnull
+ private Token _number_suffix(StringBuilder text, NumericValue value, int d)
+ throws IOException,
+ LexerException {
+ int flags = 0; // U, I, L, LL, F, D, MSB
+ for (;;) {
+ if (d == 'U' || d == 'u') {
+ if ((flags & NumericValue.F_UNSIGNED) != 0)
+ warning("Duplicate unsigned suffix " + d);
+ flags |= NumericValue.F_UNSIGNED;
+ text.append((char) d);
+ d = read();
+ } else if (d == 'L' || d == 'l') {
+ if ((flags & NumericValue.FF_SIZE) != 0)
+ warning("Nultiple length suffixes after " + text);
+ text.append((char) d);
+ int e = read();
+ if (e == d) { // Case must match. Ll is Welsh.
+ flags |= NumericValue.F_LONGLONG;
+ text.append((char) e);
+ d = read();
+ } else {
+ flags |= NumericValue.F_LONG;
+ d = e;
+ }
+ } else if (d == 'I' || d == 'i') {
+ if ((flags & NumericValue.FF_SIZE) != 0)
+ warning("Nultiple length suffixes after " + text);
+ flags |= NumericValue.F_INT;
+ text.append((char) d);
+ d = read();
+ } else if (d == 'F' || d == 'f') {
+ if ((flags & NumericValue.FF_SIZE) != 0)
+ warning("Nultiple length suffixes after " + text);
+ flags |= NumericValue.F_FLOAT;
+ text.append((char) d);
+ d = read();
+ } else if (d == 'D' || d == 'd') {
+ if ((flags & NumericValue.FF_SIZE) != 0)
+ warning("Nultiple length suffixes after " + text);
+ flags |= NumericValue.F_DOUBLE;
+ text.append((char) d);
+ d = read();
+ } // This should probably be isPunct() || isWhite().
+ else if (Character.isLetter(d) || d == '_') {
+ unread(d);
+ value.setFlags(flags);
+ return invalid(text,
+ "Invalid suffix \"" + (char) d
+ + "\" on numeric constant");
+ } else {
+ unread(d);
+ value.setFlags(flags);
+ return new Token(NUMBER,
+ text.toString(), value);
+ }
+ }
+ }
+
+ /* Either a decimal part, or a hex exponent. */
+ @Nonnull
+ private String _number_part(StringBuilder text, int base)
+ throws IOException,
+ LexerException {
+ StringBuilder part = new StringBuilder();
+ int d = read();
+ while (Character.digit(d, base) != -1) {
+ text.append((char) d);
+ part.append((char) d);
+ d = read();
+ }
+ unread(d);
+ return part.toString();
+ }
+
+ /* We already chewed a zero, so empty is fine. */
+ @Nonnull
+ private Token number_octal()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("0");
+ String integer = _number_part(text, 8);
+ int d = read();
+ NumericValue value = new NumericValue(8, integer);
+ return _number_suffix(text, value, d);
+ }
+
+ /* We do not know whether know the first digit is valid. */
+ @Nonnull
+ private Token number_hex(char x)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("0");
+ text.append(x);
+ String integer = _number_part(text, 16);
+ NumericValue value = new NumericValue(16, integer);
+ int d = read();
+ if (d == '.') {
+ String fraction = _number_part(text, 16);
+ value.setFractionalPart(fraction);
+ d = read();
+ }
+ if (d == 'P' || d == 'p') {
+ String exponent = _number_part(text, 10);
+ value.setExponent(exponent);
+ d = read();
+ }
+ // XXX Make sure it's got enough parts
+ return _number_suffix(text, value, d);
+ }
+
+ /* We know we have at least one valid digit, but empty is not
+ * fine. */
+ @Nonnull
+ private Token number_decimal()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ String integer = _number_part(text, 10);
+ NumericValue value = new NumericValue(10, integer);
+ int d = read();
+ if (d == '.') {
+ String fraction = _number_part(text, 10);
+ value.setFractionalPart(fraction);
+ d = read();
+ }
+ if (d == 'E' || d == 'e') {
+ String exponent = _number_part(text, 10);
+ value.setExponent(exponent);
+ d = read();
+ }
+ // XXX Make sure it's got enough parts
+ return _number_suffix(text, value, d);
+ }
+
+ @Nonnull
+ private Token identifier(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ int d;
+ text.append((char) c);
+ for (;;) {
+ d = read();
+ if (Character.isIdentifierIgnorable(d))
+ ; else if (Character.isJavaIdentifierPart(d))
+ text.append((char) d);
+ else
+ break;
+ }
+ unread(d);
+ return new Token(IDENTIFIER, text.toString());
+ }
+
+ @Nonnull
+ private Token whitespace(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ int d;
+ text.append((char) c);
+ for (;;) {
+ d = read();
+ if (ppvalid && isLineSeparator(d)) /* XXX Ugly. */
+
+ break;
+ if (Character.isWhitespace(d))
+ text.append((char) d);
+ else
+ break;
+ }
+ unread(d);
+ return new Token(WHITESPACE, text.toString());
+ }
+
+ /* No token processed by cond() contains a newline. */
+ @Nonnull
+ private Token cond(char c, int yes, int no)
+ throws IOException,
+ LexerException {
+ int d = read();
+ if (c == d)
+ return new Token(yes);
+ unread(d);
+ return new Token(no);
+ }
+
+ @Override
+ public Token token()
+ throws IOException,
+ LexerException {
+ Token tok = null;
+
+ int _l = line;
+ int _c = column;
+
+ int c = read();
+ int d;
+
+ switch (c) {
+ case '\n':
+ if (ppvalid) {
+ bol = true;
+ if (include) {
+ tok = new Token(NL, _l, _c, "\n");
+ } else {
+ int nls = 0;
+ do {
+ nls++;
+ d = read();
+ } while (d == '\n');
+ unread(d);
+ char[] text = new char[nls];
+ for (int i = 0; i < text.length; i++)
+ text[i] = '\n';
+ // Skip the bol = false below.
+ tok = new Token(NL, _l, _c, new String(text));
+ }
+ if (DEBUG)
+ System.out.println("lx: Returning NL: " + tok);
+ return tok;
+ }
+ /* Let it be handled as whitespace. */
+ break;
+
+ case '!':
+ tok = cond('=', NE, '!');
+ break;
+
+ case '#':
+ if (bol)
+ tok = new Token(HASH);
+ else
+ tok = cond('#', PASTE, '#');
+ break;
+
+ case '+':
+ d = read();
+ if (d == '+')
+ tok = new Token(INC);
+ else if (d == '=')
+ tok = new Token(PLUS_EQ);
+ else
+ unread(d);
+ break;
+ case '-':
+ d = read();
+ if (d == '-')
+ tok = new Token(DEC);
+ else if (d == '=')
+ tok = new Token(SUB_EQ);
+ else if (d == '>')
+ tok = new Token(ARROW);
+ else
+ unread(d);
+ break;
+
+ case '*':
+ tok = cond('=', MULT_EQ, '*');
+ break;
+ case '/':
+ d = read();
+ if (d == '*')
+ tok = ccomment();
+ else if (d == '/')
+ tok = cppcomment();
+ else if (d == '=')
+ tok = new Token(DIV_EQ);
+ else
+ unread(d);
+ break;
+
+ case '%':
+ d = read();
+ if (d == '=')
+ tok = new Token(MOD_EQ);
+ else if (digraphs && d == '>')
+ tok = new Token('}'); // digraph
+ else if (digraphs && d == ':')
+ PASTE:
+ {
+ d = read();
+ if (d != '%') {
+ unread(d);
+ tok = new Token('#'); // digraph
+ break PASTE;
+ }
+ d = read();
+ if (d != ':') {
+ unread(d); // Unread 2 chars here.
+ unread('%');
+ tok = new Token('#'); // digraph
+ break PASTE;
+ }
+ tok = new Token(PASTE); // digraph
+ }
+ else
+ unread(d);
+ break;
+
+ case ':':
+ /* :: */
+ d = read();
+ if (digraphs && d == '>')
+ tok = new Token(']'); // digraph
+ else
+ unread(d);
+ break;
+
+ case '<':
+ if (include) {
+ tok = string('<', '>');
+ } else {
+ d = read();
+ if (d == '=')
+ tok = new Token(LE);
+ else if (d == '<')
+ tok = cond('=', LSH_EQ, LSH);
+ else if (digraphs && d == ':')
+ tok = new Token('['); // digraph
+ else if (digraphs && d == '%')
+ tok = new Token('{'); // digraph
+ else
+ unread(d);
+ }
+ break;
+
+ case '=':
+ tok = cond('=', EQ, '=');
+ break;
+
+ case '>':
+ d = read();
+ if (d == '=')
+ tok = new Token(GE);
+ else if (d == '>')
+ tok = cond('=', RSH_EQ, RSH);
+ else
+ unread(d);
+ break;
+
+ case '^':
+ tok = cond('=', XOR_EQ, '^');
+ break;
+
+ case '|':
+ d = read();
+ if (d == '=')
+ tok = new Token(OR_EQ);
+ else if (d == '|')
+ tok = cond('=', LOR_EQ, LOR);
+ else
+ unread(d);
+ break;
+ case '&':
+ d = read();
+ if (d == '&')
+ tok = cond('=', LAND_EQ, LAND);
+ else if (d == '=')
+ tok = new Token(AND_EQ);
+ else
+ unread(d);
+ break;
+
+ case '.':
+ d = read();
+ if (d == '.')
+ tok = cond('.', ELLIPSIS, RANGE);
+ else
+ unread(d);
+ if (Character.isDigit(d)) {
+ unread('.');
+ tok = number_decimal();
+ }
+ /* XXX decimal fraction */
+ break;
+
+ case '0':
+ /* octal or hex */
+ d = read();
+ if (d == 'x' || d == 'X')
+ tok = number_hex((char) d);
+ else {
+ unread(d);
+ tok = number_octal();
+ }
+ break;
+
+ case '\'':
+ tok = string('\'', '\'');
+ break;
+
+ case '"':
+ tok = string('"', '"');
+ break;
+
+ case -1:
+ close();
+ tok = new Token(EOF, _l, _c, "<eof>");
+ break;
+ }
+
+ if (tok == null) {
+ if (Character.isWhitespace(c)) {
+ tok = whitespace(c);
+ } else if (Character.isDigit(c)) {
+ unread(c);
+ tok = number_decimal();
+ } else if (Character.isJavaIdentifierStart(c)) {
+ tok = identifier(c);
+ } else {
+ tok = new Token(c);
+ }
+ }
+
+ if (bol) {
+ switch (tok.getType()) {
+ case WHITESPACE:
+ case CCOMMENT:
+ break;
+ default:
+ bol = false;
+ break;
+ }
+ }
+
+ tok.setLocation(_l, _c);
+ if (DEBUG)
+ System.out.println("lx: Returning " + tok);
+ // (new Exception("here")).printStackTrace(System.out);
+ return tok;
+ }
+
+ public void close()
+ throws IOException {
+ if (reader != null) {
+ reader.close();
+ reader = null;
+ }
+ super.close();
+ }
+
+}