aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/anarres/cpp
diff options
context:
space:
mode:
authorShevek <[email protected]>2008-03-21 23:05:04 +0000
committerShevek <[email protected]>2008-03-21 23:05:04 +0000
commit5ff55648127c8a8e1b9829775045af986e37647c (patch)
treeb28209b1efe12824fbdcabd4ee9073e93ca30636 /src/java/org/anarres/cpp
parentfca34200881fcaf7b84b4210f7a5f40c8925c4d1 (diff)
move stuff into trunk
Diffstat (limited to 'src/java/org/anarres/cpp')
-rw-r--r--src/java/org/anarres/cpp/Argument.java79
-rw-r--r--src/java/org/anarres/cpp/CppReader.java147
-rw-r--r--src/java/org/anarres/cpp/FileLexerSource.java74
-rw-r--r--src/java/org/anarres/cpp/FixedTokenSource.java67
-rw-r--r--src/java/org/anarres/cpp/InternalException.java33
-rw-r--r--src/java/org/anarres/cpp/JoinReader.java168
-rw-r--r--src/java/org/anarres/cpp/LexerException.java35
-rw-r--r--src/java/org/anarres/cpp/LexerSource.java677
-rw-r--r--src/java/org/anarres/cpp/Macro.java157
-rw-r--r--src/java/org/anarres/cpp/MacroTokenSource.java191
-rw-r--r--src/java/org/anarres/cpp/Main.java111
-rw-r--r--src/java/org/anarres/cpp/Preprocessor.java1511
-rw-r--r--src/java/org/anarres/cpp/PreprocessorListener.java83
-rw-r--r--src/java/org/anarres/cpp/Source.java226
-rw-r--r--src/java/org/anarres/cpp/SourceIterator.java94
-rw-r--r--src/java/org/anarres/cpp/State.java69
-rw-r--r--src/java/org/anarres/cpp/StringLexerSource.java64
-rw-r--r--src/java/org/anarres/cpp/Token.java215
-rw-r--r--src/java/org/anarres/cpp/TokenSnifferSource.java56
19 files changed, 4057 insertions, 0 deletions
diff --git a/src/java/org/anarres/cpp/Argument.java b/src/java/org/anarres/cpp/Argument.java
new file mode 100644
index 0000000..da87d70
--- /dev/null
+++ b/src/java/org/anarres/cpp/Argument.java
@@ -0,0 +1,79 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A macro argument.
+ *
+ * This encapsulates a raw and preprocessed token stream.
+ */
+/* pp */ class Argument extends ArrayList<Token> {
+ public static final int NO_ARGS = -1;
+
+ private List<Token> expansion;
+
+ public Argument() {
+ this.expansion = null;
+ }
+
+ public void addToken(Token tok) {
+ add(tok);
+ }
+
+ /* pp */ void expand(Preprocessor p)
+ throws IOException,
+ LexerException {
+ /* Cache expansion. */
+ if (expansion == null) {
+ this.expansion = p.expand(this);
+ // System.out.println("Expanded arg " + this);
+ }
+ }
+
+ public Iterator<Token> expansion() {
+ return expansion.iterator();
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("Argument(");
+ // buf.append(super.toString());
+ buf.append("raw=[ ");
+ for (int i = 0; i < size(); i++)
+ buf.append(get(i).getText());
+ buf.append(" ];expansion=[ ");
+ if (expansion == null)
+ buf.append("null");
+ else
+ for (int i = 0; i < expansion.size(); i++)
+ buf.append(expansion.get(i).getText());
+ buf.append(" ])");
+ return buf.toString();
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/CppReader.java b/src/java/org/anarres/cpp/CppReader.java
new file mode 100644
index 0000000..0aa6788
--- /dev/null
+++ b/src/java/org/anarres/cpp/CppReader.java
@@ -0,0 +1,147 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A Reader wrapper around the Preprocessor.
+ *
+ * This is a utility class to provide a transparent {@link Reader}
+ * which preprocesses the input text.
+ *
+ * @see Preprocessor
+ * @see Reader
+ */
+public class CppReader extends Reader {
+
+ private Preprocessor cpp;
+ private String token;
+ private int idx;
+
+ public CppReader(final Reader r) {
+ cpp = new Preprocessor(new LexerSource(r, true) {
+ @Override
+ public String getName() {
+ return "<CppReader Input@" +
+ System.identityHashCode(r) + ">";
+ }
+ });
+ token = "";
+ idx = 0;
+ }
+
+ public CppReader(Preprocessor p) {
+ cpp = p;
+ token = "";
+ idx = 0;
+ }
+
+ /**
+ * Returns the Preprocessor used by this CppReader.
+ */
+ public Preprocessor getPreprocessor() {
+ return cpp;
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name)
+ throws LexerException {
+ cpp.addMacro(name);
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name, String value)
+ throws LexerException {
+ cpp.addMacro(name, value);
+ }
+
+ private boolean refill()
+ throws IOException {
+ try {
+ assert cpp != null : "cpp is null : was it closed?";
+ if (token == null)
+ return false;
+ while (idx >= token.length()) {
+ Token tok = cpp.token();
+ switch (tok.getType()) {
+ case EOF:
+ token = null;
+ return false;
+ case COMMENT:
+ if (false) {
+ token = " ";
+ break;
+ }
+ default:
+ token = tok.getText();
+ break;
+ }
+ idx = 0;
+ }
+ return true;
+ }
+ catch (LexerException e) {
+ IOException ie = new IOException(String.valueOf(e));
+ ie.initCause(e);
+ throw ie;
+ }
+ }
+
+ public int read()
+ throws IOException {
+ if (!refill())
+ return -1;
+ return token.charAt(idx++);
+ }
+
+ /* XXX Very slow and inefficient. */
+ public int read(char cbuf[], int off, int len)
+ throws IOException {
+ if (token == null)
+ return -1;
+ for (int i = 0; i < len; i++) {
+ int ch = read();
+ if (ch == -1)
+ return i;
+ cbuf[off + i] = (char)ch;
+ }
+ return len;
+ }
+
+ public void close()
+ throws IOException {
+ cpp = null;
+ token = null;
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/FileLexerSource.java b/src/java/org/anarres/cpp/FileLexerSource.java
new file mode 100644
index 0000000..9f574a0
--- /dev/null
+++ b/src/java/org/anarres/cpp/FileLexerSource.java
@@ -0,0 +1,74 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A {@link Source} which lexes a file.
+ *
+ * The input is buffered.
+ *
+ * @see Source
+ */
+public class FileLexerSource extends LexerSource {
+ private File file;
+
+ /**
+ * Creates a new Source for lexing the given File.
+ *
+ * Preprocessor directives are honoured within the file.
+ */
+ public FileLexerSource(File file)
+ throws IOException {
+ super(
+ new BufferedReader(
+ new FileReader(
+ file
+ )
+ ),
+ true
+ );
+
+ this.file = file;
+ }
+
+ @Override
+ /* pp */ File getFile() {
+ return file;
+ }
+
+ @Override
+ /* pp */ String getName() {
+ return String.valueOf(file);
+ }
+
+ public String toString() {
+ return "file " + file;
+ }
+}
diff --git a/src/java/org/anarres/cpp/FixedTokenSource.java b/src/java/org/anarres/cpp/FixedTokenSource.java
new file mode 100644
index 0000000..d123f89
--- /dev/null
+++ b/src/java/org/anarres/cpp/FixedTokenSource.java
@@ -0,0 +1,67 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Iterator;
+
+/* pp */ class FixedTokenSource extends Source {
+ private static final Token EOF =
+ new Token(Token.EOF, "<ts-eof>");
+
+ private List<Token> tokens;
+ private int idx;
+
+ /* pp */ FixedTokenSource(Token... tokens) {
+ this.tokens = Arrays.asList(tokens);
+ this.idx = 0;
+ }
+
+ /* pp */ FixedTokenSource(List<Token> tokens) {
+ this.tokens = tokens;
+ this.idx = 0;
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ if (idx >= tokens.size())
+ return EOF;
+ return tokens.get(idx++);
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("constant token stream " + tokens);
+ Source parent = getParent();
+ if (parent != null)
+ buf.append(" in ").append(String.valueOf(parent));
+ return buf.toString();
+ }
+}
diff --git a/src/java/org/anarres/cpp/InternalException.java b/src/java/org/anarres/cpp/InternalException.java
new file mode 100644
index 0000000..d228710
--- /dev/null
+++ b/src/java/org/anarres/cpp/InternalException.java
@@ -0,0 +1,33 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * An internal exception.
+ *
+ * This exception is thrown when an internal state violation is
+ * encountered. This should never happen. If it ever happens, please
+ * report it as a bug.
+ */
+public class InternalException extends RuntimeException {
+ public InternalException(String msg) {
+ super(msg);
+ }
+}
diff --git a/src/java/org/anarres/cpp/JoinReader.java b/src/java/org/anarres/cpp/JoinReader.java
new file mode 100644
index 0000000..10ec535
--- /dev/null
+++ b/src/java/org/anarres/cpp/JoinReader.java
@@ -0,0 +1,168 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.Reader;
+import java.io.PushbackReader;
+import java.io.IOException;
+
+/* pp */ class JoinReader extends Reader {
+ private Reader in;
+
+ private boolean trigraphs;
+
+ private int newlines;
+ private boolean flushnl;
+ private int[] unget;
+ private int uptr;
+
+ public JoinReader(Reader in, boolean trigraphs) {
+ this.in = in;
+ this.trigraphs = trigraphs;
+ this.newlines = 0;
+ this.flushnl = false;
+ this.unget = new int[2];
+ this.uptr = 0;
+ }
+
+ public JoinReader(Reader in) {
+ this(in, false);
+ }
+
+ private int __read() throws IOException {
+ if (uptr > 0)
+ return unget[--uptr];
+ return in.read();
+ }
+
+ private void _unread(int c) {
+ if (c != -1)
+ unget[uptr++] = c;
+ }
+
+ private int _read() throws IOException {
+ int c = __read();
+ if (c == '?' && trigraphs) {
+ int d = __read();
+ if (d == '?') {
+ int e = __read();
+ switch (e) {
+ case '(': return '[';
+ case ')': return ']';
+ case '<': return '{';
+ case '>': return '}';
+ case '=': return '#';
+ case '/': return '\\';
+ case '\'': return '^';
+ case '!': return '|';
+ case '-': return '~';
+ }
+ _unread(e);
+ }
+ _unread(d);
+ }
+ return c;
+ }
+
+ public int read() throws IOException {
+ if (flushnl) {
+ if (newlines > 0) {
+ newlines--;
+ return '\n';
+ }
+ flushnl = false;
+ }
+
+ for (;;) {
+ int c = _read();
+ switch (c) {
+ case '\\':
+ int d = _read();
+ switch (d) {
+ case '\n':
+ newlines++;
+ continue;
+ case '\r':
+ newlines++;
+ int e = _read();
+ if (e != '\n')
+ _unread(e);
+ continue;
+ default:
+ _unread(d);
+ return c;
+ }
+ case '\r':
+ case '\n':
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ flushnl = true;
+ return c;
+ case -1:
+ if (newlines > 0) {
+ newlines--;
+ return '\n';
+ }
+ default:
+ return c;
+ }
+ }
+ }
+
+ public int read(char cbuf[], int off, int len)
+ throws IOException {
+ for (int i = 0; i < len; i++) {
+ int ch = read();
+ if (ch == -1)
+ return i;
+ cbuf[off + i] = (char)ch;
+ }
+ return len;
+ }
+
+ public void close()
+ throws IOException {
+ in.close();
+ }
+
+ public String toString() {
+ return "JoinReader(nl=" + newlines + ")";
+ }
+
+/*
+ public static void main(String[] args) throws IOException {
+ FileReader f = new FileReader(new File(args[0]));
+ BufferedReader b = new BufferedReader(f);
+ JoinReader r = new JoinReader(b);
+ BufferedWriter w = new BufferedWriter(
+ new java.io.OutputStreamWriter(System.out)
+ );
+ int c;
+ while ((c = r.read()) != -1) {
+ w.write((char)c);
+ }
+ w.close();
+ }
+*/
+
+}
diff --git a/src/java/org/anarres/cpp/LexerException.java b/src/java/org/anarres/cpp/LexerException.java
new file mode 100644
index 0000000..a4b5e2e
--- /dev/null
+++ b/src/java/org/anarres/cpp/LexerException.java
@@ -0,0 +1,35 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * A preprocessor exception.
+ *
+ * Note to users: I don't really like the name of this class. S.
+ */
+public class LexerException extends Exception {
+ public LexerException(String msg) {
+ super(msg);
+ }
+
+ public LexerException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java
new file mode 100644
index 0000000..a291bff
--- /dev/null
+++ b/src/java/org/anarres/cpp/LexerSource.java
@@ -0,0 +1,677 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/** Does not handle digraphs. */
+public class LexerSource extends Source {
+ private static final boolean DEBUG = false;
+
+ private PushbackReader reader;
+ private boolean ppvalid;
+ private boolean bol;
+ private boolean include;
+
+ private int line;
+ private int column;
+ private int lastcolumn;
+ private boolean cr;
+
+ /* ppvalid is:
+ * false in StringLexerSource,
+ * true in FileLexerSource */
+ public LexerSource(Reader r, boolean ppvalid) {
+ this.reader = new PushbackReader(new JoinReader(r), 5);
+ this.ppvalid = ppvalid;
+ this.bol = true;
+ this.include = false;
+
+ this.line = 1;
+ this.column = 0;
+ this.lastcolumn = -1;
+ this.cr = false;
+ }
+
+ @Override
+ public int getLine() {
+ return line;
+ }
+
+ public int getColumn() {
+ return column;
+ }
+
+ /* pp */ boolean isNumbered() {
+ return true;
+ }
+
+/* Error handling - this lot is barely worth it. */
+
+ private final void _error(String msg, boolean error)
+ throws LexerException {
+ int _l = line;
+ int _c = column;
+ if (_c == 0) {
+ _c = lastcolumn;
+ _l--;
+ }
+ else {
+ _c--;
+ }
+ if (error)
+ super.error(_l, _c, msg);
+ else
+ super.warning(_l, _c, msg);
+ }
+
+ private final void error(String msg)
+ throws LexerException {
+ _error(msg, true);
+ }
+
+ private final void warning(String msg)
+ throws LexerException {
+ _error(msg, false);
+ }
+
+/* A flag for string handling. */
+
+ /* pp */ void setInclude(boolean b) {
+ this.include = b;
+ }
+
+/*
+ private boolean _isLineSeparator(int c) {
+ return Character.getType(c) == Character.LINE_SEPARATOR
+ || c == -1;
+ }
+*/
+
+ /* XXX Move to JoinReader and canonicalise newlines. */
+ private static final boolean isLineSeparator(int c) {
+ switch ((char)c) {
+ case '\r':
+ case '\n':
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ return true;
+ default:
+ return (c == -1);
+ }
+ }
+
+
+ private int read() throws IOException {
+ int c = reader.read();
+ switch (c) {
+ case '\r':
+ cr = true;
+ line++;
+ lastcolumn = column;
+ column = 0;
+ break;
+ case '\n':
+ if (cr) {
+ cr = false;
+ break;
+ }
+ /* fallthrough */
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ cr = false;
+ line++;
+ lastcolumn = column;
+ column = 0;
+ break;
+ default:
+ cr = false;
+ column++;
+ break;
+ }
+
+/*
+ if (isLineSeparator(c)) {
+ line++;
+ lastcolumn = column;
+ column = 0;
+ }
+ else {
+ column++;
+ }
+*/
+
+ return c;
+ }
+
+ /* You can unget AT MOST one newline. */
+ private void unread(int c)
+ throws IOException {
+ if (c != -1) {
+ if (isLineSeparator(c)) {
+ line--;
+ column = lastcolumn;
+ cr = false;
+ }
+ else {
+ column--;
+ }
+ reader.unread(c);
+ }
+ }
+
+ private Token ccomment()
+ throws IOException {
+ StringBuilder text = new StringBuilder("/*");
+ int d;
+ do {
+ do {
+ d = read();
+ text.append((char)d);
+ } while (d != '*');
+ do {
+ d = read();
+ text.append((char)d);
+ } while (d == '*');
+ } while (d != '/');
+ return new Token(COMMENT, text.toString());
+ }
+
+ private Token cppcomment()
+ throws IOException {
+ StringBuilder text = new StringBuilder("//");
+ int d = read();
+ while (!isLineSeparator(d)) {
+ text.append((char)d);
+ d = read();
+ }
+ unread(d);
+ return new Token(COMMENT, text.toString());
+ }
+
+ private int escape(StringBuilder text)
+ throws IOException,
+ LexerException {
+ int d = read();
+ switch (d) {
+ case 'a': text.append('a'); return 0x0a;
+ case 'b': text.append('b'); return '\b';
+ case 'f': text.append('f'); return '\f';
+ case 'n': text.append('n'); return '\n';
+ case 'r': text.append('r'); return '\r';
+ case 't': text.append('t'); return '\t';
+ case 'v': text.append('v'); return 0x0b;
+ case '\\': text.append('\\'); return '\\';
+
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ int len = 0;
+ int val = 0;
+ do {
+ val = (val << 3) + Character.digit(d, 8);
+ text.append((char)d);
+ d = read();
+ } while (++len < 3 && Character.digit(d, 8) != -1);
+ unread(d);
+ return val;
+
+ case 'x':
+ len = 0;
+ val = 0;
+ do {
+ val = (val << 4) + Character.digit(d, 16);
+ text.append((char)d);
+ d = read();
+ } while (++len < 2 && Character.digit(d, 16) != -1);
+ unread(d);
+ return val;
+
+ /* Exclude two cases from the warning. */
+ case '"': text.append('"'); return '"';
+ case '\'': text.append('\''); return '\'';
+
+ default:
+ warning("Unnecessary escape character " + (char)d);
+ text.append((char)d);
+ return d;
+ }
+ }
+
+ private Token character()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("'");
+ int d = read();
+ if (d == '\\') {
+ text.append('\\');
+ d = escape(text);
+ }
+ else if (isLineSeparator(d)) {
+ unread(d);
+ error("Unterminated character literal");
+ return new Token(ERROR, text.toString(), null);
+ }
+ else if (d == '\'') {
+ text.append('\'');
+ error("Empty character literal");
+ return new Token(ERROR, text.toString(), null);
+ }
+ else if (!Character.isDefined(d)) {
+ text.append('?');
+ error("Illegal unicode character literal");
+ }
+ else {
+ text.append((char)d);
+ }
+
+ int e = read();
+ if (e != '\'') {
+ unread(e);
+ error("Illegal character constant");
+ /* XXX We could do some patching up here? */
+ return new Token(ERROR, text.toString(), null);
+ }
+ text.append('\'');
+ /* XXX Bad cast. */
+ return new Token(CHARACTER,
+ text.toString(), Character.valueOf((char)d));
+ }
+
+ /* XXX This strips the enclosing quotes from the
+ * returned value. */
+ private Token string(char open, char close)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ text.append(open);
+
+ StringBuilder buf = new StringBuilder();
+
+ for (;;) {
+ int c = read();
+ if (c == close) {
+ break;
+ }
+ else if (c == '\\') {
+ text.append('\\');
+ if (!include) {
+ char d = (char)escape(text);
+ buf.append(d);
+ }
+ }
+ else if (c == -1) {
+ unread(c);
+ error("End of file in string literal after " + buf);
+ return new Token(ERROR, text.toString(), null);
+ }
+ else if (isLineSeparator(c)) {
+ unread(c);
+ error("Unterminated string literal after " + buf);
+ return new Token(ERROR, text.toString(), null);
+ }
+ else {
+ text.append((char)c);
+ buf.append((char)c);
+ }
+ }
+ text.append(close);
+ return new Token(close == '>' ? HEADER : STRING,
+ text.toString(), buf.toString());
+ }
+
+ private void number_suffix(StringBuilder text, int d)
+ throws IOException {
+ if (d == 'U') {
+ text.append((char)d);
+ d = read();
+ }
+ if (d == 'L') {
+ text.append((char)d);
+ }
+ else if (d == 'I') {
+ text.append((char)d);
+ }
+ else {
+ unread(d);
+ }
+ }
+
+ /* We already chewed a zero, so empty is fine. */
+ private Token number_octal()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("0");
+ int d = read();
+ long val = 0;
+ while (Character.digit(d, 8) != -1) {
+ val = (val << 3) + Character.digit(d, 8);
+ text.append((char)d);
+ d = read();
+ }
+ number_suffix(text, d);
+ return new Token(INTEGER,
+ text.toString(), Long.valueOf(val));
+ }
+
+ /* We do not know whether know the first digit is valid. */
+ private Token number_hex(char x)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("0");
+ text.append(x);
+ int d = read();
+ if (Character.digit(d, 16) == -1) {
+ unread(d);
+ error("Illegal hexadecimal constant " + (char)d);
+ return new Token(ERROR, text.toString(), null);
+ }
+ long val = 0;
+ do {
+ val = (val << 4) + Character.digit(d, 16);
+ text.append((char)d);
+ d = read();
+ } while (Character.digit(d, 16) != -1);
+ number_suffix(text, d);
+ return new Token(INTEGER,
+ text.toString(), Long.valueOf(val));
+ }
+
+ /* We know we have at least one valid digit, but empty is not
+ * fine. */
+ /* XXX This needs a complete rewrite. */
+ private Token number_decimal(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder((char)c);
+ int d = c;
+ long val = 0;
+ do {
+ val = val * 10 + Character.digit(d, 10);
+ text.append((char)d);
+ d = read();
+ } while (Character.digit(d, 10) != -1);
+ number_suffix(text, d);
+ return new Token(INTEGER,
+ text.toString(), Long.valueOf(val));
+ }
+
+ private Token identifier(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ int d;
+ text.append((char)c);
+ for (;;) {
+ d = read();
+ if (Character.isIdentifierIgnorable(d))
+ ;
+ else if (Character.isJavaIdentifierPart(d))
+ text.append((char)d);
+ else
+ break;
+ }
+ unread(d);
+ return new Token(IDENTIFIER, text.toString());
+ }
+
+ private Token whitespace(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ int d;
+ text.append((char)c);
+ for (;;) {
+ d = read();
+ if (ppvalid && isLineSeparator(d)) /* XXX Ugly. */
+ break;
+ if (Character.isWhitespace(d))
+ text.append((char)d);
+ else
+ break;
+ }
+ unread(d);
+ return new Token(WHITESPACE, text.toString());
+ }
+
+ /* No token processed by cond() contains a newline. */
+ private Token cond(char c, int yes, int no)
+ throws IOException {
+ int d = read();
+ if (c == d)
+ return new Token(yes);
+ unread(d);
+ return new Token(no);
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ Token tok = null;
+
+ int _l = line;
+ int _c = column;
+
+ int c = read();
+ int d, e;
+
+ switch (c) {
+ case '\n':
+ if (ppvalid) {
+ bol = true;
+ if (include) {
+ tok = new Token(NL, _l, _c, new String("\n"));
+ }
+ else {
+ int nls = 0;
+ do {
+ d = read();
+ nls++;
+ } while (d == '\n');
+ unread(d);
+ char[] text = new char[nls];
+ for (int i = 0; i < text.length; i++)
+ text[i] = '\n';
+ // Skip the bol = false below.
+ tok = new Token(NL, _l, _c, new String(text));
+ }
+ if (DEBUG)
+ System.out.println("lx: Returning NL: " + tok);
+ return tok;
+ }
+ /* Let it be handled as whitespace. */
+ break;
+
+ case '!':
+ tok = cond('=', NE, '!');
+ break;
+
+ case '#':
+ if (bol)
+ tok = new Token(HASH);
+ else
+ tok = cond('#', PASTE, '#');
+ break;
+
+ case '+':
+ d = read();
+ if (d == '+')
+ tok = new Token(INC);
+ else if (d == '=')
+ tok = new Token(PLUS_EQ);
+ else
+ unread(d);
+ break;
+ case '-':
+ d = read();
+ if (d == '-')
+ tok = new Token(DEC);
+ else if (d == '=')
+ tok = new Token(SUB_EQ);
+ else if (d == '>')
+ tok = new Token(ARROW);
+ else
+ unread(d);
+ break;
+
+ case '*':
+ tok = cond('=', MULT_EQ, '*');
+ break;
+ case '/':
+ d = read();
+ if (d == '*')
+ tok = ccomment();
+ else if (d == '/')
+ tok = cppcomment();
+ else if (d == '=')
+ tok = new Token(DIV_EQ);
+ else
+ unread(d);
+ break;
+
+ case '%':
+ tok = cond('=', MOD_EQ, '%');
+ break;
+
+ case ':':
+ /* :: */
+ break;
+
+ case '<':
+ if (include) {
+ tok = string('<', '>');
+ }
+ else {
+ d = read();
+ if (d == '=')
+ tok = new Token(LE);
+ else if (d == '<')
+ tok = cond('=', LSH_EQ, LSH);
+ else
+ unread(d);
+ }
+ break;
+
+ case '=':
+ tok = cond('=', EQ, '=');
+ break;
+
+ case '>':
+ d = read();
+ if (d == '=')
+ tok = new Token(GE);
+ else if (d == '>')
+ tok = cond('=', RSH_EQ, RSH);
+ else
+ unread(d);
+ break;
+
+ case '^':
+ tok = cond('=', XOR_EQ, '^');
+ break;
+
+ case '|':
+ d = read();
+ if (d == '=')
+ tok = new Token(OR_EQ);
+ else if (d == '|')
+ tok = cond('=', LOR_EQ, LOR);
+ else
+ unread(d);
+ break;
+ case '&':
+ d = read();
+ if (d == '&')
+ tok = cond('=', LAND_EQ, LAND);
+ else if (d == '=')
+ tok = new Token(AND_EQ);
+ else
+ unread(d);
+ break;
+
+ case '.':
+ d = read();
+ if (d == '.')
+ tok = cond('.', ELLIPSIS, RANGE);
+ else
+ unread(d);
+ /* XXX decimal fraction */
+ break;
+
+ case '0':
+ /* octal or hex */
+ d = read();
+ if (d == 'x' || d == 'X')
+ tok = number_hex((char)d);
+ else {
+ unread(d);
+ tok = number_octal();
+ }
+ break;
+
+ case '\'':
+ tok = character();
+ break;
+
+ case '"':
+ tok = string('"', '"');
+ break;
+
+ case -1:
+ tok = new Token(EOF, _l, _c, "<eof>");
+ break;
+ }
+
+ if (tok == null) {
+ if (Character.isWhitespace(c)) {
+ tok = whitespace(c);
+ }
+ else if (Character.isDigit(c)) {
+ tok = number_decimal(c);
+ }
+ else if (Character.isJavaIdentifierStart(c)) {
+ tok = identifier(c);
+ }
+ else {
+ tok = new Token(c);
+ }
+ }
+
+ bol = false;
+
+ tok.setLocation(_l, _c);
+ if (DEBUG)
+ System.out.println("lx: Returning " + tok);
+ // (new Exception("here")).printStackTrace(System.out);
+ return tok;
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/Macro.java b/src/java/org/anarres/cpp/Macro.java
new file mode 100644
index 0000000..0d0ae55
--- /dev/null
+++ b/src/java/org/anarres/cpp/Macro.java
@@ -0,0 +1,157 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * A macro object.
+ *
+ * This encapsulates a name, an argument count, and a token stream
+ * for replacement. The replacement token stream may contain the
+ * extra tokens {@link Token#M_ARG} and {@link Token#M_STRING}.
+ */
+public class Macro {
+ private String name;
+ /* It's an explicit decision to keep these around here. We don't
+ * need to; the argument token type is M_ARG and the value
+ * is the index. The strings themselves are only used in
+ * stringification of the macro, for debugging. */
+ private List<String> args;
+ private boolean variadic;
+ private List<Token> tokens;
+
+ public Macro(String name) {
+ this.name = name;
+ this.args = null;
+ this.variadic = false;
+ this.tokens = new ArrayList<Token>();
+ }
+
+ /**
+ * Returns the name of this macro.
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Sets the arguments to this macro.
+ */
+ public void setArgs(List<String> args) {
+ this.args = args;
+ }
+
+ /**
+ * Returns true if this is a function-like macro.
+ */
+ public boolean isFunctionLike() {
+ return args != null;
+ }
+
+ /**
+ * Returns the number of arguments to this macro.
+ */
+ public int getArgs() {
+ return args.size();
+ }
+
+ /**
+ * Sets the variadic flag on this Macro.
+ */
+ public void setVariadic(boolean b) {
+ this.variadic = b;
+ }
+
+ /**
+ * Returns true if this is a variadic function-like macro.
+ */
+ public boolean isVariadic() {
+ return variadic;
+ }
+
+ /**
+ * Adds a token to the expansion of this macro.
+ */
+ public void addToken(Token tok) {
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Adds a "paste" operator to the expansion of this macro.
+ *
+ * A paste operator causes the next token added to be pasted
+ * to the previous token when the macro is expanded.
+ * It is an error for a macro to end with a paste token.
+ */
+ public void addPaste(Token tok) {
+ /*
+ * Given: tok0 ## tok1
+ * We generate: M_PASTE, tok0, tok1
+ * This extends as per a stack language:
+ * tok0 ## tok1 ## tok2 ->
+ * M_PASTE, tok0, M_PASTE, tok1, tok2
+ */
+ this.tokens.add(tokens.size() - 1, tok);
+ }
+
+ /* pp */ List<Token> getTokens() {
+ return tokens;
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder(name);
+ if (args != null) {
+ buf.append('(');
+ Iterator<String> it = args.iterator();
+ while (it.hasNext()) {
+ buf.append(it.next());
+ if (it.hasNext())
+ buf.append(", ");
+ else if (isVariadic())
+ buf.append("...");
+ }
+ buf.append(')');
+ }
+ if (!tokens.isEmpty()) {
+ boolean paste = false;
+ buf.append(" => ");
+ for (int i = 0; i < tokens.size(); i++) {
+ Token tok = tokens.get(i);
+ if (tok.getType() == Token.M_PASTE) {
+ paste = true;
+ continue;
+ }
+ else {
+ buf.append(tok.getText());
+ }
+ if (paste) {
+ buf.append(" #" + "# ");
+ paste = false;
+ }
+ // buf.append(tokens.get(i));
+ }
+ }
+ return buf.toString();
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/MacroTokenSource.java b/src/java/org/anarres/cpp/MacroTokenSource.java
new file mode 100644
index 0000000..249afdf
--- /dev/null
+++ b/src/java/org/anarres/cpp/MacroTokenSource.java
@@ -0,0 +1,191 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import static org.anarres.cpp.Token.*;
+
+/* pp */ class MacroTokenSource extends Source {
+ private Macro macro;
+ private Iterator<Token> tokens; /* Pointer into the macro. */
+ private List<Argument> args; /* { unexpanded, expanded } */
+ private Iterator<Token> arg; /* "current expansion" */
+
+ /* pp */ MacroTokenSource(Macro m, List<Argument> args) {
+ this.macro = m;
+ this.tokens = m.getTokens().iterator();
+ this.args = args;
+ this.arg = null;
+ }
+
+ @Override
+ /* pp */ boolean isExpanding(Macro m) {
+ /* When we are expanding an arg, 'this' macro is not
+ * being expanded, and thus we may re-expand it. */
+ if (/* XXX this.arg == null && */ this.macro == m)
+ return true;
+ return super.isExpanding(m);
+ }
+
+ private static void escape(StringBuilder buf, CharSequence cs) {
+ for (int i = 0; i < cs.length(); i++) {
+ char c = cs.charAt(i);
+ switch (c) {
+ case '\\':
+ buf.append("\\\\");
+ break;
+ case '"':
+ buf.append("\\\"");
+ break;
+ case '\n':
+ buf.append("\\n");
+ break;
+ case '\r':
+ buf.append("\\r");
+ break;
+ default:
+ buf.append(c);
+ }
+ }
+ }
+
+ private void concat(StringBuilder buf, Argument arg) {
+ Iterator<Token> it = arg.iterator();
+ while (it.hasNext()) {
+ Token tok = it.next();
+ buf.append(tok.getText());
+ }
+ }
+
+ private Token stringify(Token pos, Argument arg) {
+ StringBuilder buf = new StringBuilder();
+ concat(buf, arg);
+ StringBuilder str = new StringBuilder("\"");
+ escape(str, buf);
+ str.append('\"');
+ return new Token(STRING,
+ pos.getLine(), pos.getColumn(),
+ str.toString(), buf.toString());
+ }
+
+
+ /* At this point, we have consumed the first M_PASTE.
+ * @see Macro#addPaste(Token) */
+ private void paste(Token ptok)
+ throws IOException,
+ LexerException {
+ StringBuilder buf = new StringBuilder();
+ /* We know here that arg is null or expired,
+ * since we cannot paste an expanded arg. */
+
+ int count = 2;
+ for (int i = 0; i < count; i++) {
+ if (!tokens.hasNext())
+ error(ptok.getLine(), ptok.getColumn(),
+ "Paste at end of expansion");
+ Token tok = tokens.next();
+ switch (tok.getType()) {
+ case M_PASTE:
+ /* One extra to paste, plus one because the
+ * paste token didn't count. */
+ count += 2;
+ ptok = tok;
+ break;
+ case M_ARG:
+ int idx = ((Integer)tok.getValue()).intValue();
+ concat(buf, args.get(idx));
+ break;
+ /* XXX Test this. */
+ case COMMENT:
+ break;
+ default:
+ buf.append(tok.getText());
+ break;
+ }
+ }
+
+ /* XXX Somewhere here, need to check that concatenation
+ * produces a valid token. */
+
+ /* Push and re-lex. */
+ StringBuilder src = new StringBuilder();
+ escape(src, buf);
+ StringLexerSource sl = new StringLexerSource(src.toString());
+
+ arg = new SourceIterator(sl);
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ for (;;) {
+ /* Deal with lexed tokens first. */
+
+ if (arg != null) {
+ if (arg.hasNext())
+ return arg.next();
+ arg = null;
+ }
+
+ if (!tokens.hasNext())
+ return new Token(EOF, -1, -1, ""); /* End of macro. */
+ Token tok = tokens.next();
+ int idx;
+ switch (tok.getType()) {
+ case M_STRING:
+ /* Use the nonexpanded arg. */
+ idx = ((Integer)tok.getValue()).intValue();
+ return stringify(tok, args.get(idx));
+ case M_ARG:
+ /* Expand the arg. */
+ idx = ((Integer)tok.getValue()).intValue();
+ // System.out.println("Pushing arg " + args.get(idx));
+ arg = args.get(idx).expansion();
+ break;
+ case M_PASTE:
+ paste(tok);
+ break;
+ default:
+ return tok;
+ }
+ } /* for */
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("expansion of ").append(macro.getName());
+ Source parent = getParent();
+ if (parent != null)
+ buf.append(" in ").append(String.valueOf(parent));
+ return buf.toString();
+ }
+}
diff --git a/src/java/org/anarres/cpp/Main.java b/src/java/org/anarres/cpp/Main.java
new file mode 100644
index 0000000..cec7a37
--- /dev/null
+++ b/src/java/org/anarres/cpp/Main.java
@@ -0,0 +1,111 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * (Currently a simple test class).
+ */
+public class Main {
+
+ public static void main(String[] args) throws Exception {
+ List<String> path = new ArrayList<String>();
+ path.add("/usr/include");
+ path.add("/usr/local/include");
+ path.add("/usr/lib/gcc/i686-pc-linux-gnu/4.1.2/include");
+
+ Source source = new FileLexerSource(new File(args[0]));
+ Preprocessor pp = new Preprocessor(source);
+ pp.setIncludePath(path);
+
+ for (int i = 1; i < args.length; i++) {
+ pp.push_source(new FileLexerSource(new File(args[i])),true);
+ }
+
+ Macro m = new Macro("__WORDSIZE");
+ m.addToken(new Token(INTEGER, -1, -1, "32", Integer.valueOf(32)));
+ pp.addMacro(m);
+
+ m = new Macro("__STDC__");
+ m.addToken(new Token(INTEGER, -1, -1, "1", Integer.valueOf(1)));
+ pp.addMacro(m);
+
+ try {
+ for (;;) {
+ Token tok = pp.token();
+ if (tok != null && tok.getType() == Token.EOF)
+ break;
+ switch (2) {
+ case 0:
+ System.out.print(tok);
+ break;
+ case 1:
+ System.out.print("[" + tok.getText() + "]");
+ break;
+ case 2:
+ System.out.print(tok.getText());
+ break;
+ }
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ Source s = pp.getSource();
+ while (s != null) {
+ System.out.println(" -> " + s);
+ s = s.getParent();
+ }
+
+ /*
+ Iterator<State> it = pp.states.iterator();
+ while (it.hasNext()) {
+ System.out.println(" -? " + it.next());
+ }
+ */
+
+ }
+
+ Map<String,Macro> macros = pp.getMacros();
+ List<String> keys = new ArrayList<String>(
+ macros.keySet()
+ );
+ Collections.sort(keys);
+ Iterator<String> mt = keys.iterator();
+ while (mt.hasNext()) {
+ String key = mt.next();
+ Macro macro = macros.get(key);
+ System.out.println("#" + "macro " + macro);
+ }
+
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java
new file mode 100644
index 0000000..c1b87d7
--- /dev/null
+++ b/src/java/org/anarres/cpp/Preprocessor.java
@@ -0,0 +1,1511 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A C Preprocessor.
+ * The Preprocessor outputs a token stream which does not need
+ * re-lexing for C or C++. Alternatively, the output text may be
+ * reconstructed by concatenating the {@link Token#getText() text}
+ * values of the returned {@link Token Tokens}. (See
+ * {@link CppReader}, which does this.)
+ */
+public class Preprocessor {
+ private static final boolean DEBUG = false;
+
+ public static final int FL_LINEMARKER = 1;
+
+ private static final Macro __LINE__ = new Macro("__LINE__");
+ private static final Macro __FILE__ = new Macro("__FILE__");
+
+ private Map<String,Macro> macros;
+ private Stack<State> states;
+ private Source source;
+
+ private List<String> path;
+ private PreprocessorListener listener;
+
+ private int flags;
+
+ public Preprocessor(Source initial, int flags) {
+ this.macros = new HashMap<String,Macro>();
+ macros.put(__LINE__.getName(), __LINE__);
+ macros.put(__FILE__.getName(), __FILE__);
+ this.states = new Stack<State>();
+ states.push(new State());
+ this.source = null;
+ this.path = null;
+ setListener(new PreprocessorListener());
+ setFlags(flags);
+
+ push_source(initial, false);
+ /* We need to get a \n onto the end of this somehow. */
+ if ((flags & FL_LINEMARKER) != 0)
+ source_untoken(line_token(1, source.getName(), "\n"));
+ }
+
+ public Preprocessor(Source initial) {
+ this(initial, 0);
+ }
+
+ /** Equivalent to
+ * 'new Preprocessor(new {@link FileLexerSource}(file))'
+ */
+ public Preprocessor(File file)
+ throws IOException {
+ this(new FileLexerSource(file), 0);
+ }
+
+ public void setListener(PreprocessorListener listener) {
+ this.listener = listener;
+ Source s = source;
+ while (s != null) {
+ s.setListener(listener);
+ s = s.getParent();
+ }
+ }
+
+ public void setFlags(int flags) {
+ this.flags = flags;
+ }
+
+ /**
+ * Handles an error.
+ *
+ * If a PreprocessorListener is installed, it receives the
+ * error. Otherwise, it is ignored.
+ */
+ protected void error(Token tok, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleError(source,
+ tok.getLine(), tok.getColumn(),
+ msg);
+ }
+
+ /**
+ * Handles a warning.
+ *
+ * If a PreprocessorListener is installed, it receives the
+ * warning. Otherwise, it is ignored.
+ */
+ protected void warning(Token tok, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleError(source,
+ tok.getLine(), tok.getColumn(),
+ msg);
+ }
+
+/*
+ public void setSource(Source source) {
+ this.source = source;
+ }
+*/
+
+ public void addMacro(Macro m) throws LexerException {
+ String name = m.getName();
+ /* Already handled as a source error in macro(). */
+ if ("defined".equals(name))
+ throw new LexerException("Cannot redefine name 'defined'");
+ macros.put(m.getName(), m);
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name, String value)
+ throws LexerException {
+ try {
+ Macro m = new Macro(name);
+ StringLexerSource s = new StringLexerSource(value);
+ for (;;) {
+ Token tok = s.token();
+ if (tok.getType() == EOF)
+ break;
+ m.addToken(tok);
+ }
+ addMacro(m);
+ }
+ catch (IOException e) {
+ throw new LexerException(e);
+ }
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name)
+ throws LexerException {
+ addMacro(name, "1");
+ }
+
+ /**
+ * Sets the include path used by this Preprocessor.
+ */
+ /* Note for future: Create an IncludeHandler? */
+ public void setIncludePath(List<String> path) {
+ this.path = path;
+ }
+
+ /**
+ * Returns the Map of Macros parsed during the run of this
+ * Preprocessor.
+ */
+ protected Map<String,Macro> getMacros() {
+ return macros;
+ }
+
+
+/* States */
+
+ private void push_state() {
+ State top = states.peek();
+ states.push(new State(top));
+ }
+
+ private void pop_state()
+ throws LexerException {
+ State s = states.pop();
+ if (states.isEmpty()) {
+ if (listener != null)
+ listener.handleError(getSource(), 0, 0,
+ "#" + "endif without #" + "if");
+ states.push(s);
+ }
+ }
+
+ private boolean isActive() {
+ State state = states.peek();
+ return state.isParentActive() && state.isActive();
+ }
+
+
+/* Sources */
+
+ /**
+ * Returns the top Source on the input stack.
+ *
+ * @see Source
+ * @see #push_source(Source,boolean)
+ * @see #pop_source()
+ */
+ protected Source getSource() {
+ return source;
+ }
+
+ /**
+ * Pushes a Source onto the input stack.
+ *
+ * @see #getSource()
+ * @see #pop_source()
+ */
+ protected void push_source(Source source, boolean autopop) {
+ source.setParent(this.source, autopop);
+ source.setListener(listener);
+ this.source = source;
+ if (listener != null)
+ listener.handleSourceChange(this.source, "push");
+ }
+
+ /**
+ * Pops a Source from the input stack.
+ *
+ * @see #getSource()
+ * @see #push_source(Source,boolean)
+ */
+ protected void pop_source() {
+ this.source = this.source.getParent();
+ if (listener != null)
+ listener.handleSourceChange(this.source, "pop");
+ }
+
+
+/* Source tokens */
+
+ private Token source_token;
+
+ private Token line_token(int line, String name, String extra) {
+ return new Token(P_LINE, line, 0,
+ "#line " + line + " \"" + name + "\"" + extra,
+ null
+ );
+ }
+
+ private Token source_token()
+ throws IOException,
+ LexerException {
+ if (source_token != null) {
+ Token tok = source_token;
+ source_token = null;
+ return tok;
+ }
+
+ for (;;) {
+ Token tok = source.token();
+ if (tok.getType() == EOF && source.isAutopop()) {
+ // System.out.println("Autopop " + source);
+ Source s = source;
+ pop_source();
+ if ((flags & FL_LINEMARKER) != 0 && s.isNumbered()) {
+ /* Not perfect, but ... */
+ source_untoken(new Token(NL, source.getLine(), 0, "\n"));
+ return line_token(source.getLine(), source.getName(), "");
+ }
+ else {
+ continue;
+ }
+ }
+ return tok;
+ }
+ }
+
+ private void source_untoken(Token tok) {
+ if (this.source_token != null)
+ throw new IllegalStateException("Cannot return two tokens");
+ this.source_token = tok;
+ }
+
+ private boolean isWhite(Token tok) {
+ int type = tok.getType();
+ return (type == WHITESPACE) || (type == COMMENT);
+ }
+
+ private Token source_token_nonwhite()
+ throws IOException,
+ LexerException {
+ Token tok;
+ do {
+ tok = source_token();
+ } while (isWhite(tok));
+ return tok;
+ }
+
+ /**
+ * Returns an NL or an EOF token.
+ *
+ * The metadata on the token will be correct, which is better
+ * than generating a new one.
+ */
+ private Token source_skipline(boolean white)
+ throws IOException,
+ LexerException {
+ // (new Exception("skipping line")).printStackTrace(System.out);
+ return source.skipline(white);
+ }
+
+ /* processes and expands a macro. */
+ private boolean macro(Macro m, Token orig)
+ throws IOException,
+ LexerException {
+ Token tok;
+ List<Argument> args;
+
+ // System.out.println("pp: expanding " + m);
+
+ if (m.isFunctionLike()) {
+ OPEN: for (;;) {
+ tok = source_token();
+ // System.out.println("pp: open: token is " + tok);
+ switch (tok.getType()) {
+ case WHITESPACE: /* XXX Really? */
+ case COMMENT:
+ case NL:
+ break; /* continue */
+ case '(':
+ break OPEN;
+ default:
+ source_untoken(tok);
+ return false;
+ }
+ }
+
+ // tok = expanded_token_nonwhite();
+ tok = source_token_nonwhite();
+
+ /* We either have, or we should have args.
+ * This deals elegantly with the case that we have
+ * one empty arg. */
+ if (tok.getType() != ')' || m.getArgs() > 0) {
+ args = new ArrayList<Argument>();
+
+ Argument arg = new Argument();
+ int depth = 0;
+ boolean space = false;
+
+ ARGS: for (;;) {
+ // System.out.println("pp: arg: token is " + tok);
+ switch (tok.getType()) {
+ case EOF:
+ error(tok, "EOF in macro args");
+ return false;
+
+ case ',':
+ if (depth == 0) {
+ if (m.isVariadic() &&
+ /* We are building the last arg. */
+ args.size() == m.getArgs() - 1) {
+ /* Just add the comma. */
+ arg.addToken(tok);
+ }
+ else {
+ args.add(arg);
+ arg = new Argument();
+ }
+ }
+ else {
+ arg.addToken(tok);
+ }
+ space = false;
+ break;
+ case ')':
+ if (depth == 0) {
+ args.add(arg);
+ break ARGS;
+ }
+ else {
+ depth--;
+ arg.addToken(tok);
+ }
+ space = false;
+ break;
+ case '(':
+ depth++;
+ arg.addToken(tok);
+ space = false;
+ break;
+
+ case WHITESPACE:
+ case COMMENT:
+ /* Avoid duplicating spaces. */
+ space = true;
+ break;
+
+ default:
+ /* Do not put space on the beginning of
+ * an argument token. */
+ if (space && ! arg.isEmpty())
+ arg.addToken(Token.space);
+ arg.addToken(tok);
+ space = false;
+ break;
+
+ }
+ // tok = expanded_token();
+ tok = source_token();
+ }
+ /* space may still be true here, thus trailing space
+ * is stripped from arguments. */
+
+ if (args.size() != m.getArgs()) {
+ error(tok,
+ "macro " + m.getName() +
+ " has " + m.getArgs() + " parameters " +
+ "but given " + args.size() + " args");
+ /* We could replay the arg tokens, but I
+ * note that GNU cpp does exactly what we do,
+ * i.e. output the macro name and chew the args.
+ */
+ return false;
+ }
+
+ for (int i = 0; i < args.size(); i++) {
+ args.get(i).expand(this);
+ }
+
+ // System.out.println("Macro " + m + " args " + args);
+ }
+ else {
+ /* nargs == 0 and we (correctly) got () */
+ args = null;
+ }
+
+ }
+ else {
+ /* Macro without args. */
+ args = null;
+ }
+
+ if (m == __LINE__) {
+ push_source(new FixedTokenSource(
+ new Token[] { new Token(INTEGER,
+ orig.getLine(), orig.getColumn(),
+ String.valueOf(orig.getLine()),
+ Integer.valueOf(orig.getLine())) }
+ ), true);
+ }
+ else if (m == __FILE__) {
+ File file = source.getFile();
+ push_source(new FixedTokenSource(
+ new Token[] { new Token(STRING,
+ orig.getLine(), orig.getColumn(),
+ '"'+ String.valueOf(file) +'"',
+ file) }
+ ), true);
+ }
+ else {
+ push_source(new MacroTokenSource(m, args), true);
+ }
+
+ return true;
+ }
+
+ /**
+ * Expands an argument.
+ */
+ /* I'd rather this were done lazily. */
+ /* pp */ List<Token> expand(List<Token> arg)
+ throws IOException,
+ LexerException {
+ List<Token> expansion = new ArrayList<Token>();
+ boolean space = false;
+
+ push_source(new FixedTokenSource(arg), false);
+ EXPANSION: for (;;) {
+ Token tok = expanded_token();
+ switch (tok.getType()) {
+ case EOF:
+ break EXPANSION;
+
+ case WHITESPACE:
+ case COMMENT:
+ space = true;
+ break;
+
+ default:
+ if (space && ! expansion.isEmpty())
+ expansion.add(Token.space);
+ expansion.add(tok);
+ space = false;
+ break;
+ }
+ }
+
+ pop_source();
+
+ return expansion;
+ }
+
+ /* processes a #define directive */
+ private Token define()
+ throws IOException,
+ LexerException {
+ Token tok = source_token_nonwhite();
+ if (tok.getType() != IDENTIFIER) {
+ error(tok, "Expected identifier");
+ return source_skipline(false);
+ }
+ /* if predefined */
+
+ String name = tok.getText();
+ if ("defined".equals(name)) {
+ error(tok, "Cannot redefine name 'defined'");
+ return source_skipline(false);
+ }
+
+ Macro m = new Macro(name);
+ List<String> args;
+
+ tok = source_token();
+ if (tok.getType() == '(') {
+ tok = source_token_nonwhite();
+ if (tok.getType() != ')') {
+ args = new ArrayList<String>();
+ ARGS: for (;;) {
+ switch (tok.getType()) {
+ case IDENTIFIER:
+ args.add(tok.getText());
+ break;
+ // case ELLIPSIS:
+ case NL:
+ case EOF:
+ error(tok,
+ "Unterminated macro parameter list");
+ break ARGS;
+ default:
+ source_skipline(false);
+ error(tok,
+ "error in macro parameters: " +
+ tok.getText());
+ /* XXX return? */
+ break ARGS;
+ }
+ tok = source_token_nonwhite();
+ switch (tok.getType()) {
+ case ',':
+ break;
+ case ')':
+ tok = source_token_nonwhite();
+ break ARGS;
+ case ELLIPSIS:
+ tok = source_token_nonwhite();
+ if (tok.getType() != ')')
+ error(tok,
+ "ellipsis must be on last argument");
+ m.setVariadic(true);
+ tok = source_token_nonwhite();
+ break ARGS;
+
+ case NL:
+ case EOF:
+ /* Do not skip line. */
+ error(tok,
+ "Unterminated macro definition");
+ break ARGS;
+ default:
+ source_skipline(false);
+ error(tok,
+ "bad token in macro parameters: " +
+ tok.getText());
+ /* XXX return? */
+ break ARGS;
+ }
+ tok = source_token_nonwhite();
+ }
+ }
+ else {
+ tok = source_token_nonwhite(); /* Lose the ')' */
+ args = Collections.emptyList();
+ }
+
+ m.setArgs(args);
+ }
+ else {
+ /* For searching. */
+ args = Collections.emptyList();
+ if (tok.getType() == COMMENT ||
+ tok.getType() == WHITESPACE) {
+ tok = source_token_nonwhite();
+ }
+ }
+
+ /* Get an expansion for the macro, using indexOf. */
+ boolean space = false;
+ boolean paste = false;
+ /* XXX UGLY: Ensure no space at start.
+ * Careful not to break EOF/LF from above. */
+ if (isWhite(tok)) /* XXX Not sure this can ever happen now. */
+ tok = source_token_nonwhite();
+ int idx;
+
+ EXPANSION: for (;;) {
+ switch (tok.getType()) {
+ case EOF:
+ break EXPANSION;
+ case NL:
+ break EXPANSION;
+
+ case COMMENT:
+ // break;
+ case WHITESPACE:
+ if (!paste)
+ space = true;
+ break;
+
+ case PASTE:
+ space = false;
+ paste = true;
+ m.addPaste(new Token(M_PASTE,
+ tok.getLine(), tok.getColumn(),
+ "#" + "#", null));
+ break;
+
+ case '#':
+ if (space)
+ m.addToken(Token.space);
+ space = false;
+ Token la = source_token_nonwhite();
+ if (la.getType() == IDENTIFIER &&
+ ((idx = args.indexOf(la.getText())) != -1)) {
+ m.addToken(new Token(M_STRING,
+ la.getLine(), la.getColumn(),
+ "#" + la.getText(),
+ Integer.valueOf(idx)));
+ }
+ else {
+ m.addToken(tok);
+ /* Allow for special processing. */
+ source_untoken(la);
+ }
+ break;
+
+ case IDENTIFIER:
+ if (space)
+ m.addToken(Token.space);
+ space = false;
+ paste = false;
+ idx = args.indexOf(tok.getText());
+ if (idx == -1)
+ m.addToken(tok);
+ else
+ m.addToken(new Token(M_ARG,
+ tok.getLine(), tok.getColumn(),
+ tok.getText(),
+ Integer.valueOf(idx)));
+ break;
+
+ default:
+ if (space)
+ m.addToken(Token.space);
+ space = false;
+ paste = false;
+ m.addToken(tok);
+ break;
+ }
+ tok = source_token();
+ }
+
+ // if (DEBUG)
+ // System.out.println("Defined macro " + m);
+ addMacro(m);
+
+ return tok; /* NL or EOF. */
+ }
+
+ private Token undef()
+ throws IOException,
+ LexerException {
+ Token tok = source_token_nonwhite();
+ if (tok.getType() != IDENTIFIER) {
+ error(tok,
+ "Expected identifier, not " + tok.getText());
+ if (tok.getType() == NL || tok.getType() == EOF)
+ return tok;
+ }
+ else {
+ Macro m = macros.get(tok.getText());
+ if (m != null) {
+ /* XXX error if predefined */
+ macros.remove(m.getName());
+ }
+ }
+ return source_skipline(true);
+ }
+
+ /**
+ * Handles a include directive.
+ *
+ * The user may override this to provide alternate semantics
+ * for the include directive, for example, creating a Source
+ * based on a virtual file system.
+ */
+ protected void include(File parent, int line,
+ String name, boolean quoted)
+ throws IOException,
+ LexerException {
+ if (quoted) {
+ File dir = parent.getParentFile();
+ if (dir == null)
+ dir = new File("/");
+ File file = new File(dir, name);
+ // System.err.println("Include: " + file);
+ if (file.exists()) {
+ push_source(new FileLexerSource(file), true);
+ return;
+ }
+ }
+
+ if (path != null) {
+ for (int i = 0; i < path.size(); i++) {
+ File file = new File(
+ path.get(i) + File.separator + name
+ );
+ if (file.exists()) {
+ // System.err.println("Include: " + file);
+ push_source(new FileLexerSource(file), true);
+ return;
+ }
+ }
+ }
+
+ if (listener != null)
+ listener.handleError(getSource(),
+ line, 0,
+ "Header not found: " + name + " in " + path
+ );
+ }
+
+ private Token include()
+ throws IOException,
+ LexerException {
+ LexerSource lexer = (LexerSource)source;
+ try {
+ lexer.setInclude(true);
+ Token tok = token_nonwhite();
+
+ String name;
+ boolean quoted;
+
+ if (tok.getType() == STRING) {
+ /* XXX Use the original text, not the value.
+ * Backslashes must not be treated as escapes here. */
+ StringBuilder buf = new StringBuilder((String)tok.getValue());
+ HEADER: for (;;) {
+ tok = _token(); /* Do macros but nothing else. */
+ switch (tok.getType()) {
+ case WHITESPACE:
+ case COMMENT:
+ continue;
+ case STRING:
+ buf.append((String)tok.getValue());
+ break;
+ case NL:
+ case EOF:
+ break HEADER;
+ default:
+ warning(tok,
+ "Unexpected token on #"+"include line");
+ return source_skipline(false);
+ }
+ }
+ name = buf.toString();
+ quoted = true;
+ }
+ else if (tok.getType() == HEADER) {
+ name = (String)tok.getValue();
+ quoted = false;
+ tok = source_skipline(true);
+ }
+ else {
+ error(tok,
+ "Expected string or header, not " + tok.getText());
+ switch (tok.getType()) {
+ case NL:
+ case EOF:
+ return tok;
+ default:
+ /* Only if not a NL or EOF already. */
+ return source_skipline(false);
+ }
+ }
+
+ /* Do the inclusion. */
+ include(source.getFile(), tok.getLine(), name, quoted);
+
+ /* 'tok' is the 'nl' after the include. We use it after the
+ * #line directive. */
+ if ((flags & FL_LINEMARKER) != 0) {
+ source_untoken(tok);
+ return line_token(1, name, "");
+ }
+ return tok;
+ }
+ finally {
+ lexer.setInclude(false);
+ }
+ }
+
+ /* For #error and #warning. */
+ private void error(Token pptok, boolean is_error)
+ throws IOException,
+ LexerException {
+ StringBuilder buf = new StringBuilder();
+ buf.append('#').append(pptok.getText()).append(' ');
+ /* Peculiar construction to ditch first whitespace. */
+ Token tok = source_token_nonwhite();
+ ERROR: for (;;) {
+ switch (tok.getType()) {
+ case NL:
+ case EOF:
+ break ERROR;
+ default:
+ buf.append(tok.getText());
+ break;
+ }
+ tok = source_token();
+ }
+ if (is_error)
+ error(pptok, buf.toString());
+ else
+ warning(pptok, buf.toString());
+ }
+
+
+
+
+ /* This bypasses token() for #elif expressions.
+ * If we don't do this, then isActive() == false
+ * causes token() to simply chew the entire input line. */
+ private Token expanded_token()
+ throws IOException,
+ LexerException {
+ for (;;) {
+ Token tok = source_token();
+ // System.out.println("Source token is " + tok);
+ if (tok.getType() == IDENTIFIER) {
+ Macro m = macros.get(tok.getText());
+ if (m == null)
+ return tok;
+ if (source.isExpanding(m))
+ return tok;
+ if (macro(m, tok))
+ continue;
+ }
+ return tok;
+ }
+ }
+
+ private Token expanded_token_nonwhite()
+ throws IOException,
+ LexerException {
+ Token tok;
+ do {
+ tok = expanded_token();
+ // System.out.println("expanded token is " + tok);
+ } while (isWhite(tok));
+ return tok;
+ }
+
+
+ private Token expr_token = null;
+
+ private Token expr_token()
+ throws IOException,
+ LexerException {
+ Token tok = expr_token;
+
+ if (tok != null) {
+ // System.out.println("ungetting");
+ expr_token = null;
+ }
+ else {
+ tok = expanded_token_nonwhite();
+ // System.out.println("expt is " + tok);
+
+ if (tok.getType() == IDENTIFIER &&
+ tok.getText().equals("defined")) {
+ Token la = source_token_nonwhite();
+ boolean paren = false;
+ if (la.getType() == '(') {
+ paren = true;
+ la = source_token_nonwhite();
+ }
+
+ // System.out.println("Core token is " + la);
+
+ if (la.getType() != IDENTIFIER) {
+ error(la,
+ "defined() needs identifier, not " +
+ la.getText());
+ tok = new Token(INTEGER,
+ la.getLine(), la.getColumn(),
+ "0", Integer.valueOf(0));
+ }
+ else if (macros.containsKey(la.getText())) {
+ // System.out.println("Found macro");
+ tok = new Token(INTEGER,
+ la.getLine(), la.getColumn(),
+ "1", Integer.valueOf(1));
+ }
+ else {
+ // System.out.println("Not found macro");
+ tok = new Token(INTEGER,
+ la.getLine(), la.getColumn(),
+ "0", Integer.valueOf(0));
+ }
+
+ if (paren) {
+ la = source_token_nonwhite();
+ if (la.getType() != ')') {
+ expr_untoken(la);
+ error(la, "Missing ) in defined()");
+ }
+ }
+ }
+ }
+
+ // System.out.println("expr_token returns " + tok);
+
+ return tok;
+ }
+
+ private void expr_untoken(Token tok)
+ throws LexerException {
+ if (expr_token != null)
+ throw new InternalException(
+ "Cannot unget two expression tokens."
+ );
+ expr_token = tok;
+ }
+
+ private int expr_priority(Token op) {
+ switch (op.getType()) {
+ case '/': return 11;
+ case '%': return 11;
+ case '*': return 11;
+ case '+': return 10;
+ case '-': return 10;
+ case LSH: return 9;
+ case RSH: return 9;
+ case '<': return 8;
+ case '>': return 8;
+ case LE: return 8;
+ case GE: return 8;
+ case EQ: return 7;
+ case NE: return 7;
+ case '&': return 6;
+ case '^': return 5;
+ case '|': return 4;
+ case LAND: return 3;
+ case LOR: return 2;
+ case '?': return 1;
+ default:
+ // System.out.println("Unrecognised operator " + op);
+ return 0;
+ }
+ }
+
+ private long expr(int priority)
+ throws IOException,
+ LexerException {
+ /*
+ System.out.flush();
+ (new Exception("expr(" + priority + ") called")).printStackTrace();
+ System.err.flush();
+ */
+
+ Token tok = expr_token();
+ long lhs, rhs;
+
+ // System.out.println("Expr lhs token is " + tok);
+
+ switch (tok.getType()) {
+ case '(':
+ lhs = expr(0);
+ tok = expr_token();
+ if (tok.getType() != ')') {
+ expr_untoken(tok);
+ error(tok, "missing ) in expression");
+ return 0;
+ }
+ break;
+
+ case '~': lhs = ~expr(11); break;
+ case '!': lhs = expr(11) == 0 ? 1 : 0; break;
+ case '-': lhs = -expr(11); break;
+ case INTEGER:
+ lhs = ((Number)tok.getValue()).longValue();
+ break;
+ case CHARACTER:
+ lhs = (long)((Character)tok.getValue()).charValue();
+ break;
+ case IDENTIFIER:
+ /* XXX warn */
+ lhs = 0;
+ break;
+
+ default:
+ expr_untoken(tok);
+ error(tok,
+ "Bad token in expression: " + tok.getText());
+ return 0;
+ }
+
+ EXPR: for (;;) {
+ // System.out.println("expr: lhs is " + lhs + ", pri = " + priority);
+ Token op = expr_token();
+ int pri = expr_priority(op); /* 0 if not a binop. */
+ if (pri == 0 || priority >= pri) {
+ expr_untoken(op);
+ break EXPR;
+ }
+ rhs = expr(pri);
+ // System.out.println("rhs token is " + rhs);
+ switch (op.getType()) {
+ case '/':
+ if (rhs == 0) {
+ error(op, "Division by zero");
+ lhs = 0;
+ }
+ else {
+ lhs = lhs / rhs;
+ }
+ break;
+ case '%':
+ if (rhs == 0) {
+ error(op, "Modulus by zero");
+ lhs = 0;
+ }
+ else {
+ lhs = lhs % rhs;
+ }
+ break;
+ case '*': lhs = lhs * rhs; break;
+ case '+': lhs = lhs + rhs; break;
+ case '-': lhs = lhs - rhs; break;
+ case '<': lhs = lhs < rhs ? 1 : 0; break;
+ case '>': lhs = lhs > rhs ? 1 : 0; break;
+ case '&': lhs = lhs & rhs; break;
+ case '^': lhs = lhs ^ rhs; break;
+ case '|': lhs = lhs | rhs; break;
+
+ case LSH: lhs = lhs << rhs; break;
+ case RSH: lhs = lhs >> rhs; break;
+ case LE: lhs = lhs <= rhs ? 1 : 0; break;
+ case GE: lhs = lhs >= rhs ? 1 : 0; break;
+ case EQ: lhs = lhs == rhs ? 1 : 0; break;
+ case NE: lhs = lhs != rhs ? 1 : 0; break;
+ case LAND: lhs = (lhs != 0) && (rhs != 0) ? 1 : 0; break;
+ case LOR: lhs = (lhs != 0) || (rhs != 0) ? 1 : 0; break;
+
+ case '?':
+ /* XXX Handle this? */
+
+ default:
+ error(op,
+ "Unexpected operator " + op.getText());
+ return 0;
+
+ }
+ }
+
+ /*
+ System.out.flush();
+ (new Exception("expr returning " + lhs)).printStackTrace();
+ System.err.flush();
+ */
+ // System.out.println("expr returning " + lhs);
+
+ return lhs;
+ }
+
+ private Token toWhitespace(Token tok) {
+ String text = tok.getText();
+ int len = text.length();
+ boolean cr = false;
+ int nls = 0;
+
+ for (int i = 0; i < len; i++) {
+ char c = text.charAt(i);
+
+ switch (c) {
+ case '\r':
+ cr = true;
+ nls++;
+ break;
+ case '\n':
+ if (cr) {
+ cr = false;
+ break;
+ }
+ /* fallthrough */
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ cr = false;
+ nls++;
+ break;
+ }
+ }
+
+ char[] cbuf = new char[nls];
+ Arrays.fill(cbuf, '\n');
+ return new Token(WHITESPACE,
+ tok.getLine(), tok.getColumn(),
+ new String(cbuf));
+ }
+
+ private final Token _token()
+ throws IOException,
+ LexerException {
+
+ Token tok;
+ for (;;) {
+ if (!isActive()) {
+ /* Tell lexer to ignore warnings. */
+ tok = source_token();
+ /* Tell lexer to stop ignoring warnings. */
+ switch (tok.getType()) {
+ case HASH:
+ case NL:
+ case EOF:
+ /* The preprocessor has to take action here. */
+ break;
+ case WHITESPACE:
+ case COMMENT:
+ // Patch up to preserve whitespace.
+ /* XXX We might want to return tok here in C */
+ return toWhitespace(tok);
+ default:
+ // Return NL to preserve whitespace.
+ return source_skipline(false);
+ }
+ }
+ else {
+ tok = source_token();
+ }
+
+ LEX: switch (tok.getType()) {
+ case EOF:
+ /* Pop the stacks. */
+ return tok;
+
+ case WHITESPACE:
+ case NL:
+ return tok;
+
+ case COMMENT:
+ return tok;
+
+ case '!': case '%': case '&':
+ case '(': case ')': case '*':
+ case '+': case ',': case '-':
+ case '/': case ':': case ';':
+ case '<': case '=': case '>':
+ case '?': case '[': case ']':
+ case '^': case '{': case '|':
+ case '}': case '~': case '.':
+
+ // case '#':
+
+ case AND_EQ:
+ case ARROW:
+ case CHARACTER:
+ case DEC:
+ case DIV_EQ:
+ case ELLIPSIS:
+ case EQ:
+ case GE:
+ case HEADER: /* Should only arise from include() */
+ case INC:
+ case LAND:
+ case LE:
+ case LOR:
+ case LSH:
+ case LSH_EQ:
+ case SUB_EQ:
+ case MOD_EQ:
+ case MULT_EQ:
+ case NE:
+ case OR_EQ:
+ case PLUS_EQ:
+ case RANGE:
+ case RSH:
+ case RSH_EQ:
+ case STRING:
+ case XOR_EQ:
+ return tok;
+
+ case INTEGER:
+ return tok;
+
+ case IDENTIFIER:
+ Macro m = macros.get(tok.getText());
+ if (m == null)
+ return tok;
+ if (source.isExpanding(m))
+ return tok;
+ if (macro(m, tok))
+ break;
+ return tok;
+
+ case P_LINE:
+ if ((flags & FL_LINEMARKER) != 0)
+ return tok;
+ break;
+
+ case ERROR:
+ return tok;
+
+ default:
+ throw new InternalException("Bad token " + tok);
+ // break;
+
+ case HASH:
+ tok = source_token_nonwhite();
+ // (new Exception("here")).printStackTrace();
+ switch (tok.getType()) {
+ case NL:
+ break LEX; /* Some code has #\n */
+ case IDENTIFIER:
+ break;
+ default:
+ error(tok,
+ "Preprocessor directive not a word " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ Integer _ppcmd = ppcmds.get(tok.getText());
+ if (_ppcmd == null) {
+ error(tok,
+ "Unknown preprocessor directive " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ int ppcmd = _ppcmd.intValue();
+
+ switch (ppcmd) {
+
+ case PP_DEFINE:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ return define();
+ // break;
+
+ case PP_UNDEF:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ return undef();
+ // break;
+
+ case PP_INCLUDE:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ return include();
+ // break;
+
+ case PP_WARNING:
+ case PP_ERROR:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ error(tok, ppcmd == PP_ERROR);
+ break;
+
+ case PP_IF:
+ push_state();
+ if (!isActive()) {
+ return source_skipline(false);
+ }
+ expr_token = null;
+ states.peek().setActive(expr(0) != 0);
+ tok = expr_token(); /* unget */
+ if (tok.getType() == NL)
+ return tok;
+ return source_skipline(true);
+ // break;
+
+ case PP_ELIF:
+ State state = states.peek();
+ if (false) {
+ /* Check for 'if' */ ;
+ }
+ else if (state.sawElse()) {
+ error(tok,
+ "#elif after #" + "else");
+ return source_skipline(false);
+ }
+ else if (!state.isParentActive()) {
+ /* Nested in skipped 'if' */
+ return source_skipline(false);
+ }
+ else if (state.isActive()) {
+ /* The 'if' part got executed. */
+ state.setParentActive(false);
+ /* This is like # else # if but with
+ * only one # end. */
+ state.setActive(false);
+ return source_skipline(false);
+ }
+ else {
+ expr_token = null;
+ state.setActive(expr(0) != 0);
+ tok = expr_token(); /* unget */
+ if (tok.getType() == NL)
+ return tok;
+ return source_skipline(true);
+ }
+ // break;
+
+ case PP_ELSE:
+ state = states.peek();
+ if (false)
+ /* Check for 'if' */ ;
+ else if (state.sawElse()) {
+ error(tok,
+ "#" + "else after #" + "else");
+ return source_skipline(false);
+ }
+ else {
+ state.setSawElse();
+ state.setActive(! state.isActive());
+ return source_skipline(true);
+ }
+ // break;
+
+ case PP_IFDEF:
+ push_state();
+ if (!isActive()) {
+ return source_skipline(false);
+ }
+ else {
+ tok = source_token_nonwhite();
+ // System.out.println("ifdef " + tok);
+ if (tok.getType() != IDENTIFIER) {
+ error(tok,
+ "Expected identifier, not " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ else {
+ String text = tok.getText();
+ boolean exists =
+ macros.containsKey(text);
+ states.peek().setActive(exists);
+ return source_skipline(true);
+ }
+ }
+ // break;
+
+ case PP_IFNDEF:
+ push_state();
+ if (!isActive()) {
+ return source_skipline(false);
+ }
+ else {
+ tok = source_token_nonwhite();
+ if (tok.getType() != IDENTIFIER) {
+ error(tok,
+ "Expected identifier, not " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ else {
+ String text = tok.getText();
+ boolean exists =
+ macros.containsKey(text);
+ states.peek().setActive(!exists);
+ return source_skipline(true);
+ }
+ }
+ // break;
+
+ case PP_ENDIF:
+ pop_state();
+ return source_skipline(true);
+ // break;
+
+ case PP_LINE:
+ return source_skipline(false);
+ // break;
+
+ case PP_PRAGMA:
+ return source_skipline(false);
+ // break;
+
+ default:
+ /* Actual unknown directives are
+ * processed above. If we get here,
+ * we succeeded the map lookup but
+ * failed to handle it. Therefore,
+ * this is (unconditionally?) fatal. */
+ // if (isActive()) /* XXX Could be warning. */
+ throw new InternalException(
+ "Internal error: Unknown directive "
+ + tok);
+ // return source_skipline(false);
+ }
+
+
+ }
+ }
+ }
+
+ private Token token_nonwhite()
+ throws IOException,
+ LexerException {
+ Token tok;
+ do {
+ tok = _token();
+ } while (isWhite(tok));
+ return tok;
+ }
+
+ /**
+ * Returns the next preprocessor token.
+ *
+ * @see Token
+ * @throws LexerException if a preprocessing error occurs.
+ * @throws InternalException if an unexpected error condition arises.
+ */
+ public Token token()
+ throws IOException,
+ LexerException {
+ Token tok = _token();
+ if (DEBUG)
+ System.out.println("pp: Returning " + tok);
+ return tok;
+ }
+
+#set ($i = 1) /* First ppcmd is 1, not 0. */
+#set ($ppcmds = [ "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", "include", "line", "pragma", "undef", "warning" ])
+#foreach ($ppcmd in $ppcmds)
+ private static final int PP_$ppcmd.toUpperCase() = $i;
+#set ($i = $i + 1)
+#end
+
+ private static final Map<String,Integer> ppcmds =
+ new HashMap<String,Integer>();
+
+ static {
+#foreach ($ppcmd in $ppcmds)
+ ppcmds.put("$ppcmd", Integer.valueOf(PP_$ppcmd.toUpperCase()));
+#end
+ }
+
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+
+ Source s = getSource();
+ while (s != null) {
+ buf.append(" -> ").append(String.valueOf(s)).append("\n");
+ s = s.getParent();
+ }
+
+ Map<String,Macro> macros = getMacros();
+ List<String> keys = new ArrayList<String>(
+ macros.keySet()
+ );
+ Collections.sort(keys);
+ Iterator<String> mt = keys.iterator();
+ while (mt.hasNext()) {
+ String key = mt.next();
+ Macro macro = macros.get(key);
+ buf.append("#").append("macro ").append(macro).append("\n");
+ }
+
+ return buf.toString();
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/PreprocessorListener.java b/src/java/org/anarres/cpp/PreprocessorListener.java
new file mode 100644
index 0000000..84a105d
--- /dev/null
+++ b/src/java/org/anarres/cpp/PreprocessorListener.java
@@ -0,0 +1,83 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+
+public class PreprocessorListener {
+
+ private int errors;
+ private int warnings;
+
+ public PreprocessorListener() {
+ clear();
+ }
+
+ public void clear() {
+ errors = 0;
+ warnings = 0;
+ }
+
+ public int getErrors() {
+ return errors;
+ }
+
+ public int getWarnings() {
+ return warnings;
+ }
+
+ protected void print(String msg) {
+ System.err.println(msg);
+ }
+
+ /**
+ * Handles a warning.
+ *
+ * The behaviour of this method is defined by the
+ * implementation. It may simply record the error message, or
+ * it may throw an exception.
+ */
+ public void handleWarning(Source source, int line, int column,
+ String msg)
+ throws LexerException {
+ warnings++;
+ print(source.getName() + ":" + line + ":" + column +
+ ": warning: " + msg);
+ }
+
+ /**
+ * Handles an error.
+ *
+ * The behaviour of this method is defined by the
+ * implementation. It may simply record the error message, or
+ * it may throw an exception.
+ */
+ public void handleError(Source source, int line, int column,
+ String msg)
+ throws LexerException {
+ errors++;
+ print(source.getName() + ":" + line + ":" + column +
+ ": error: " + msg);
+ }
+
+ public void handleSourceChange(Source source, String event) {
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/Source.java b/src/java/org/anarres/cpp/Source.java
new file mode 100644
index 0000000..2999418
--- /dev/null
+++ b/src/java/org/anarres/cpp/Source.java
@@ -0,0 +1,226 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * An input to the Preprocessor.
+ *
+ * Inputs may come from Files, Strings or other sources. The
+ * preprocessor maintains a stack of Sources. Operations such as
+ * file inclusion or token pasting will push a new source onto
+ * the Preprocessor stack. Sources pop from the stack when they
+ * are exhausted; this may be transparent or explicit.
+ *
+ * BUG: Error messages are not handled properly.
+ */
+public abstract class Source implements Iterable<Token> {
+ private Source parent;
+ private boolean autopop;
+ private PreprocessorListener listener;
+
+ /* LineNumberReader */
+
+/*
+ // We can't do this, since we would lose the LexerException
+ private class Itr implements Iterator {
+ private Token next = null;
+ private void advance() {
+ try {
+ if (next != null)
+ next = token();
+ }
+ catch (IOException e) {
+ throw new UnsupportedOperationException(
+ "Failed to advance token iterator: " +
+ e.getMessage()
+ );
+ }
+ }
+ public boolean hasNext() {
+ return next.getType() != EOF;
+ }
+ public Token next() {
+ advance();
+ Token t = next;
+ next = null;
+ return t;
+ }
+ public void remove() {
+ throw new UnsupportedOperationException(
+ "Cannot remove tokens from a Source."
+ );
+ }
+ }
+*/
+
+ public Source() {
+ this.parent = null;
+ this.autopop = false;
+ }
+
+ /* pp */ void setParent(Source parent, boolean autopop) {
+ this.parent = parent;
+ this.autopop = autopop;
+ }
+
+ /* pp */ final Source getParent() {
+ return parent;
+ }
+
+ /* pp */ void setListener(PreprocessorListener listener) {
+ this.listener = listener;
+ }
+
+ /**
+ * Returns the File currently being lexed.
+ *
+ * If this Source is not a {@link FileLexerSource}, then
+ * it will ask the parent Source, and so forth recursively.
+ * If no Source on the stack is a FileLexerSource, returns null.
+ */
+ /* pp */ File getFile() {
+ Source parent = getParent();
+ while (parent != null) {
+ File file = parent.getFile();
+ if (file != null)
+ return file;
+ parent = parent.getParent();
+ }
+ return null;
+ }
+
+ /* pp */ String getName() {
+ Source parent = getParent();
+ while (parent != null) {
+ String name = parent.getName();
+ if (name != null)
+ return name;
+ parent = parent.getParent();
+ }
+ return null;
+ }
+
+ public int getLine() {
+ Source parent = getParent();
+ if (parent == null)
+ return 0;
+ return parent.getLine();
+ }
+
+ /* pp */ boolean isExpanding(Macro m) {
+ Source parent = getParent();
+ if (parent != null)
+ return parent.isExpanding(m);
+ return false;
+ }
+
+ /**
+ * Returns true if this Source should be transparently popped
+ * from the input stack.
+ *
+ * Examples of such sources are macro expansions.
+ */
+ /* pp */ boolean isAutopop() {
+ return autopop;
+ }
+
+ /* pp */ boolean isNumbered() {
+ return false;
+ }
+
+ /**
+ * Returns the next Token parsed from this input stream.
+ *
+ * @see Token
+ */
+ public abstract Token token()
+ throws IOException,
+ LexerException;
+
+ public Iterator<Token> iterator() {
+ return new SourceIterator(this);
+ }
+
+ /**
+ * Skips tokens until the end of line.
+ *
+ * @param white true if only whitespace is permitted on the
+ * remainder of the line.
+ * @return the NL token.
+ */
+ public Token skipline(boolean white)
+ throws IOException,
+ LexerException {
+ for (;;) {
+ Token tok = token();
+ switch (tok.getType()) {
+ case EOF:
+ /* There ought to be a newline before EOF.
+ * At least, in any skipline context. */
+ /* XXX Are we sure about this? */
+ warning(tok.getLine(), tok.getColumn(),
+ "No newline before end of file");
+ return tok;
+ case NL:
+ /* This may contain one or more newlines. */
+ return tok;
+ case COMMENT:
+ case WHITESPACE:
+ break;
+ default:
+ /* XXX Check white, if required. */
+ if (white)
+ warning(tok.getLine(), tok.getColumn(),
+ "Unexpected nonwhite token");
+ break;
+ }
+ }
+ }
+
+ protected void error(int line, int column, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleError(this, line, column, msg);
+ else
+ throw new LexerException("No handler for error at " + line + ":" + column + ": " + msg);
+ }
+
+ protected void warning(int line, int column, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleWarning(this, line, column, msg);
+ else
+ throw new LexerException("No handler for warning at " + line + ":" + column + ": " + msg);
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/SourceIterator.java b/src/java/org/anarres/cpp/SourceIterator.java
new file mode 100644
index 0000000..ac2bc24
--- /dev/null
+++ b/src/java/org/anarres/cpp/SourceIterator.java
@@ -0,0 +1,94 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * An Iterator for {@link Source Sources},
+ * returning {@link Token Tokens}.
+ */
+public class SourceIterator implements Iterator<Token> {
+ private Source source;
+ private Token tok;
+
+ public SourceIterator(Source s) {
+ this.source = s;
+ this.tok = null;
+ }
+
+ /**
+ * Rethrows IOException inside IllegalStateException.
+ */
+ private void advance() {
+ try {
+ if (tok == null)
+ tok = source.token();
+ }
+ catch (LexerException e) {
+ throw new IllegalStateException(e);
+ }
+ catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Returns true if the enclosed Source has more tokens.
+ *
+ * The EOF token is never returned by the iterator.
+ * @throws IllegalStateException if the Source
+ * throws a LexerException or IOException
+ */
+ public boolean hasNext() {
+ advance();
+ return tok.getType() != EOF;
+ }
+
+ /**
+ * Returns the next token from the enclosed Source.
+ *
+ * The EOF token is never returned by the iterator.
+ * @throws IllegalStateException if the Source
+ * throws a LexerException or IOException
+ */
+ public Token next() {
+ if (!hasNext())
+ throw new NoSuchElementException();
+ Token t = this.tok;
+ this.tok = null;
+ return t;
+ }
+
+ /**
+ * Not supported.
+ *
+ * @throws UnsupportedOperationException.
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+}
+
diff --git a/src/java/org/anarres/cpp/State.java b/src/java/org/anarres/cpp/State.java
new file mode 100644
index 0000000..441e71e
--- /dev/null
+++ b/src/java/org/anarres/cpp/State.java
@@ -0,0 +1,69 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/* pp */ class State {
+ boolean parent;
+ boolean active;
+ boolean sawElse;
+
+ /* pp */ State() {
+ this.parent = true;
+ this.active = true;
+ this.sawElse = false;
+ }
+
+ /* pp */ State(State parent) {
+ this.parent = parent.isParentActive() && parent.isActive();
+ this.active = true;
+ this.sawElse = false;
+ }
+
+ /* Required for #elif */
+ /* pp */ void setParentActive(boolean b) {
+ this.parent = b;
+ }
+
+ /* pp */ boolean isParentActive() {
+ return parent;
+ }
+
+ /* pp */ void setActive(boolean b) {
+ this.active = b;
+ }
+
+ /* pp */ boolean isActive() {
+ return active;
+ }
+
+ /* pp */ void setSawElse() {
+ sawElse = true;
+ }
+
+ /* pp */ boolean sawElse() {
+ return sawElse;
+ }
+
+ public String toString() {
+ return "parent=" + parent +
+ ", active=" + active +
+ ", sawelse=" + sawElse;
+ }
+}
diff --git a/src/java/org/anarres/cpp/StringLexerSource.java b/src/java/org/anarres/cpp/StringLexerSource.java
new file mode 100644
index 0000000..7e7df75
--- /dev/null
+++ b/src/java/org/anarres/cpp/StringLexerSource.java
@@ -0,0 +1,64 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A Source for lexing a String.
+ *
+ * This class is used by token pasting, but can be used by user
+ * code.
+ */
+public class StringLexerSource extends LexerSource {
+
+ /**
+ * Creates a new Source for lexing the given String.
+ *
+ * @param ppvalid true if preprocessor directives are to be
+ * honoured within the string.
+ */
+ public StringLexerSource(String string, boolean ppvalid)
+ throws IOException {
+ super(new StringReader(string), ppvalid);
+ }
+
+ /**
+ * Creates a new Source for lexing the given String.
+ *
+ * By default, preprocessor directives are not honoured within
+ * the string.
+ */
+ public StringLexerSource(String string)
+ throws IOException {
+ this(string, false);
+ }
+
+ public String toString() {
+ return "string literal";
+ }
+}
diff --git a/src/java/org/anarres/cpp/Token.java b/src/java/org/anarres/cpp/Token.java
new file mode 100644
index 0000000..e5c1319
--- /dev/null
+++ b/src/java/org/anarres/cpp/Token.java
@@ -0,0 +1,215 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * A Preprocessor token.
+ *
+ * @see Preprocessor
+ */
+public final class Token {
+
+ // public static final int EOF = -1;
+
+ private int type;
+ private int line;
+ private int column;
+ private Object value;
+ private String text;
+
+ public Token(int type, int line, int column,
+ String text, Object value) {
+ this.type = type;
+ this.line = line;
+ this.column = column;
+ this.text = text;
+ this.value = value;
+ }
+
+ public Token(int type, int line, int column, String text) {
+ this(type, line, column, text, null);
+ }
+
+ /* pp */ Token(int type, String text, Object value) {
+ this(type, -1, -1, text, value);
+ }
+
+ /* pp */ Token(int type, String text) {
+ this(type, text, null);
+ }
+
+ /* pp */ Token(int type) {
+ this(type, texts[type]);
+ }
+
+ /**
+ * Returns the semantic type of this token.
+ */
+ public int getType() {
+ return type;
+ }
+
+ /* pp */ void setLocation(int line, int column) {
+ this.line = line;
+ this.column = column;
+ }
+
+ /**
+ * Returns the line at which this token started.
+ *
+ * Lines are numbered from zero.
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Returns the column at which this token started.
+ *
+ * Columns are numbered from zero.
+ */
+ public int getColumn() {
+ return column;
+ }
+
+ /**
+ * Returns the original or generated text of this token.
+ *
+ * This is distinct from the semantic value of the token.
+ *
+ * @see #getValue()
+ */
+ public String getText() {
+ return text;
+ }
+
+ /**
+ * Returns the semantic value of this token.
+ *
+ * For strings, this is the parsed String.
+ * For integers, this is an Integer object.
+ * For other token types, as appropriate.
+ *
+ * @see #getText()
+ */
+ public Object getValue() {
+ return value;
+ }
+
+ /**
+ * Returns a description of this token, for debugging purposes.
+ */
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+
+ buf.append('[').append(getTokenName(type));
+ if (line != -1) {
+ buf.append('@').append(line);
+ if (column != -1)
+ buf.append(',').append(column);
+ }
+ buf.append("]:");
+ if (text != null)
+ buf.append('"').append(text).append('"');
+ else if (type > 3 && type < 256)
+ buf.append( (char)type );
+ else
+ buf.append('<').append(type).append('>');
+ if (value != null)
+ buf.append('=').append(value);
+ return buf.toString();
+ }
+
+ /**
+ * Returns the descriptive name of the given token type.
+ *
+ * This is mostly used for stringification and debugging.
+ */
+ public static final String getTokenName(int type) {
+ if (type < 0)
+ return "Invalid" + type;
+ if (type >= names.length)
+ return "Invalid" + type;
+ if (names[type] == null)
+ return "Unknown" + type;
+ return names[type];
+ }
+
+#set ($i = 257)
+#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "COMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "ERROR" ])
+#foreach ($token in $tokens)
+ /** The token type $token. */
+ public static final int $token = $i;
+#set ($i = $i + 1)
+#end
+ /**
+ * The number of possible semantic token types.
+ *
+ * Please note that not all token types below 255 are used.
+ */
+ public static final int _TOKENS = $i;
+
+ /** The position-less space token. */
+ /* pp */ static final Token space = new Token(WHITESPACE, -1, -1, " ");
+
+ private static final String[] names = new String[_TOKENS];
+ private static final String[] texts = new String[_TOKENS];
+ static {
+ for (int i = 0; i < 255; i++) {
+ texts[i] = String.valueOf(new char[] { (char)i });
+ names[i] = texts[i];
+ }
+
+ texts[AND_EQ] = "&=";
+ texts[ARROW] = "->";
+ texts[DEC] = "--";
+ texts[DIV_EQ] = "/=";
+ texts[ELLIPSIS] = "...";
+ texts[EQ] = "==";
+ texts[GE] = ">=";
+ texts[HASH] = "#";
+ texts[INC] = "++";
+ texts[LAND] = "&&";
+ texts[LAND_EQ] = "&&=";
+ texts[LE] = "<=";
+ texts[LOR] = "||";
+ texts[LOR_EQ] = "||=";
+ texts[LSH] = "<<";
+ texts[LSH_EQ] = "<<=";
+ texts[MOD_EQ] = "%=";
+ texts[MULT_EQ] = "*=";
+ texts[NE] = "!=";
+ texts[NL] = "\n";
+ texts[OR_EQ] = "|=";
+ /* We have to split the two hashes or Velocity eats them. */
+ texts[PASTE] = "#" + "#";
+ texts[PLUS_EQ] = "+=";
+ texts[RANGE] = "..";
+ texts[RSH] = ">>";
+ texts[RSH_EQ] = ">>=";
+ texts[SUB_EQ] = "-=";
+ texts[XOR_EQ] = "^=";
+
+#foreach ($token in $tokens)
+ names[$token] = "$token";
+#end
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/TokenSnifferSource.java b/src/java/org/anarres/cpp/TokenSnifferSource.java
new file mode 100644
index 0000000..55b53d7
--- /dev/null
+++ b/src/java/org/anarres/cpp/TokenSnifferSource.java
@@ -0,0 +1,56 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+@Deprecated
+/* pp */ class TokenSnifferSource extends Source {
+ private List<Token> target;
+
+ /* pp */ TokenSnifferSource(List<Token> target) {
+ this.target = target;
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ Token tok = getParent().token();
+ if (tok.getType() != EOF)
+ target.add(tok);
+ return tok;
+ }
+
+ public String toString() {
+ return getParent().toString();
+ }
+}