aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/input/test.c61
-rw-r--r--src/input/test0.h7
-rw-r--r--src/input/test1.h7
-rw-r--r--src/java/org/anarres/cpp/Argument.java79
-rw-r--r--src/java/org/anarres/cpp/CppReader.java147
-rw-r--r--src/java/org/anarres/cpp/FileLexerSource.java74
-rw-r--r--src/java/org/anarres/cpp/FixedTokenSource.java67
-rw-r--r--src/java/org/anarres/cpp/InternalException.java33
-rw-r--r--src/java/org/anarres/cpp/JoinReader.java168
-rw-r--r--src/java/org/anarres/cpp/LexerException.java35
-rw-r--r--src/java/org/anarres/cpp/LexerSource.java677
-rw-r--r--src/java/org/anarres/cpp/Macro.java157
-rw-r--r--src/java/org/anarres/cpp/MacroTokenSource.java191
-rw-r--r--src/java/org/anarres/cpp/Main.java111
-rw-r--r--src/java/org/anarres/cpp/Preprocessor.java1511
-rw-r--r--src/java/org/anarres/cpp/PreprocessorListener.java83
-rw-r--r--src/java/org/anarres/cpp/Source.java226
-rw-r--r--src/java/org/anarres/cpp/SourceIterator.java94
-rw-r--r--src/java/org/anarres/cpp/State.java69
-rw-r--r--src/java/org/anarres/cpp/StringLexerSource.java64
-rw-r--r--src/java/org/anarres/cpp/Token.java215
-rw-r--r--src/java/org/anarres/cpp/TokenSnifferSource.java56
-rw-r--r--src/resources/log4j.properties22
-rw-r--r--src/scripts/cpp.sh32
-rw-r--r--src/scripts/release.sh4
-rw-r--r--src/tests/AutoTestSuite.java121
-rw-r--r--src/tests/org/anarres/cpp/BaseTestCase.java6
-rw-r--r--src/tests/org/anarres/cpp/CppReaderTestCase.java34
-rw-r--r--src/tests/org/anarres/cpp/ErrorTestCase.java50
-rw-r--r--src/tests/org/anarres/cpp/JoinReaderTestCase.java40
-rw-r--r--src/tests/org/anarres/cpp/LexerSourceTestCase.java43
-rw-r--r--src/tests/org/anarres/cpp/PreprocessorTestCase.java154
32 files changed, 4638 insertions, 0 deletions
diff --git a/src/input/test.c b/src/input/test.c
new file mode 100644
index 0000000..150b759
--- /dev/null
+++ b/src/input/test.c
@@ -0,0 +1,61 @@
+line = __LINE__
+file = __FILE__
+
+#define A a /* a defined */
+#define B b /* b defined */
+#define C c /* c defined */
+
+#define EXPAND(x) x
+EXPAND(a) -> a
+EXPAND(A) -> a
+
+#define _STRINGIFY(x) #x
+_STRINGIFY(A) -> "A"
+
+#define STRINGIFY(x) _STRINGIFY(x)
+STRINGIFY(b) -> "b"
+STRINGIFY(A) -> "a"
+
+#define _CONCAT(x, y) x ## y
+_CONCAT(A, B) -> AB
+
+#define A_CONCAT done_a_concat
+_CONCAT(A, _CONCAT(B, C)) -> done_a_concat(b, c)
+
+#define CONCAT(x, y) _CONCAT(x, y)
+CONCAT(A, CONCAT(B, C)) -> abc
+
+#define _CONCAT3(x, y, z) x ## y ## z
+_CONCAT3(a, b, c) -> abc
+_CONCAT3(A, B, C) -> ABC
+_CONCAT3(A, EXPAND(B), C) -> AEXPAND(b)C
+
+Line is __LINE__
+File is __FILE__
+
+#define two three
+one /* one */
+#define one two
+one /* three */
+#undef two
+#define two five
+one /* five */
+#undef two
+one /* two */
+#undef one
+#define one four
+one /* four */
+#undef one
+#define one one
+one /* one */
+
+/* warning line 57 column 0 */
+#warning arse
+
+#define foo(x) foo(x, b)
+foo(1) -> _foo(1, b) without the _
+foo(foo(2)) -> _foo(_foo(2, b), b) without the _
+foo(y, z)
+
+#define var(x...) a x b
+var(e, f, g) -> a e, f, g b
diff --git a/src/input/test0.h b/src/input/test0.h
new file mode 100644
index 0000000..72db7b7
--- /dev/null
+++ b/src/input/test0.h
@@ -0,0 +1,7 @@
+
+test0start_2
+
+#include <test1.h>
+
+test0end___6
+
diff --git a/src/input/test1.h b/src/input/test1.h
new file mode 100644
index 0000000..0b690f7
--- /dev/null
+++ b/src/input/test1.h
@@ -0,0 +1,7 @@
+
+test1start_2
+
+test1mid___4
+
+test1end___6
+
diff --git a/src/java/org/anarres/cpp/Argument.java b/src/java/org/anarres/cpp/Argument.java
new file mode 100644
index 0000000..da87d70
--- /dev/null
+++ b/src/java/org/anarres/cpp/Argument.java
@@ -0,0 +1,79 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A macro argument.
+ *
+ * This encapsulates a raw and preprocessed token stream.
+ */
+/* pp */ class Argument extends ArrayList<Token> {
+ public static final int NO_ARGS = -1;
+
+ private List<Token> expansion;
+
+ public Argument() {
+ this.expansion = null;
+ }
+
+ public void addToken(Token tok) {
+ add(tok);
+ }
+
+ /* pp */ void expand(Preprocessor p)
+ throws IOException,
+ LexerException {
+ /* Cache expansion. */
+ if (expansion == null) {
+ this.expansion = p.expand(this);
+ // System.out.println("Expanded arg " + this);
+ }
+ }
+
+ public Iterator<Token> expansion() {
+ return expansion.iterator();
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("Argument(");
+ // buf.append(super.toString());
+ buf.append("raw=[ ");
+ for (int i = 0; i < size(); i++)
+ buf.append(get(i).getText());
+ buf.append(" ];expansion=[ ");
+ if (expansion == null)
+ buf.append("null");
+ else
+ for (int i = 0; i < expansion.size(); i++)
+ buf.append(expansion.get(i).getText());
+ buf.append(" ])");
+ return buf.toString();
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/CppReader.java b/src/java/org/anarres/cpp/CppReader.java
new file mode 100644
index 0000000..0aa6788
--- /dev/null
+++ b/src/java/org/anarres/cpp/CppReader.java
@@ -0,0 +1,147 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A Reader wrapper around the Preprocessor.
+ *
+ * This is a utility class to provide a transparent {@link Reader}
+ * which preprocesses the input text.
+ *
+ * @see Preprocessor
+ * @see Reader
+ */
+public class CppReader extends Reader {
+
+ private Preprocessor cpp;
+ private String token;
+ private int idx;
+
+ public CppReader(final Reader r) {
+ cpp = new Preprocessor(new LexerSource(r, true) {
+ @Override
+ public String getName() {
+ return "<CppReader Input@" +
+ System.identityHashCode(r) + ">";
+ }
+ });
+ token = "";
+ idx = 0;
+ }
+
+ public CppReader(Preprocessor p) {
+ cpp = p;
+ token = "";
+ idx = 0;
+ }
+
+ /**
+ * Returns the Preprocessor used by this CppReader.
+ */
+ public Preprocessor getPreprocessor() {
+ return cpp;
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name)
+ throws LexerException {
+ cpp.addMacro(name);
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name, String value)
+ throws LexerException {
+ cpp.addMacro(name, value);
+ }
+
+ private boolean refill()
+ throws IOException {
+ try {
+ assert cpp != null : "cpp is null : was it closed?";
+ if (token == null)
+ return false;
+ while (idx >= token.length()) {
+ Token tok = cpp.token();
+ switch (tok.getType()) {
+ case EOF:
+ token = null;
+ return false;
+ case COMMENT:
+ if (false) {
+ token = " ";
+ break;
+ }
+ default:
+ token = tok.getText();
+ break;
+ }
+ idx = 0;
+ }
+ return true;
+ }
+ catch (LexerException e) {
+ IOException ie = new IOException(String.valueOf(e));
+ ie.initCause(e);
+ throw ie;
+ }
+ }
+
+ public int read()
+ throws IOException {
+ if (!refill())
+ return -1;
+ return token.charAt(idx++);
+ }
+
+ /* XXX Very slow and inefficient. */
+ public int read(char cbuf[], int off, int len)
+ throws IOException {
+ if (token == null)
+ return -1;
+ for (int i = 0; i < len; i++) {
+ int ch = read();
+ if (ch == -1)
+ return i;
+ cbuf[off + i] = (char)ch;
+ }
+ return len;
+ }
+
+ public void close()
+ throws IOException {
+ cpp = null;
+ token = null;
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/FileLexerSource.java b/src/java/org/anarres/cpp/FileLexerSource.java
new file mode 100644
index 0000000..9f574a0
--- /dev/null
+++ b/src/java/org/anarres/cpp/FileLexerSource.java
@@ -0,0 +1,74 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A {@link Source} which lexes a file.
+ *
+ * The input is buffered.
+ *
+ * @see Source
+ */
+public class FileLexerSource extends LexerSource {
+ private File file;
+
+ /**
+ * Creates a new Source for lexing the given File.
+ *
+ * Preprocessor directives are honoured within the file.
+ */
+ public FileLexerSource(File file)
+ throws IOException {
+ super(
+ new BufferedReader(
+ new FileReader(
+ file
+ )
+ ),
+ true
+ );
+
+ this.file = file;
+ }
+
+ @Override
+ /* pp */ File getFile() {
+ return file;
+ }
+
+ @Override
+ /* pp */ String getName() {
+ return String.valueOf(file);
+ }
+
+ public String toString() {
+ return "file " + file;
+ }
+}
diff --git a/src/java/org/anarres/cpp/FixedTokenSource.java b/src/java/org/anarres/cpp/FixedTokenSource.java
new file mode 100644
index 0000000..d123f89
--- /dev/null
+++ b/src/java/org/anarres/cpp/FixedTokenSource.java
@@ -0,0 +1,67 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Iterator;
+
+/* pp */ class FixedTokenSource extends Source {
+ private static final Token EOF =
+ new Token(Token.EOF, "<ts-eof>");
+
+ private List<Token> tokens;
+ private int idx;
+
+ /* pp */ FixedTokenSource(Token... tokens) {
+ this.tokens = Arrays.asList(tokens);
+ this.idx = 0;
+ }
+
+ /* pp */ FixedTokenSource(List<Token> tokens) {
+ this.tokens = tokens;
+ this.idx = 0;
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ if (idx >= tokens.size())
+ return EOF;
+ return tokens.get(idx++);
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("constant token stream " + tokens);
+ Source parent = getParent();
+ if (parent != null)
+ buf.append(" in ").append(String.valueOf(parent));
+ return buf.toString();
+ }
+}
diff --git a/src/java/org/anarres/cpp/InternalException.java b/src/java/org/anarres/cpp/InternalException.java
new file mode 100644
index 0000000..d228710
--- /dev/null
+++ b/src/java/org/anarres/cpp/InternalException.java
@@ -0,0 +1,33 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * An internal exception.
+ *
+ * This exception is thrown when an internal state violation is
+ * encountered. This should never happen. If it ever happens, please
+ * report it as a bug.
+ */
+public class InternalException extends RuntimeException {
+ public InternalException(String msg) {
+ super(msg);
+ }
+}
diff --git a/src/java/org/anarres/cpp/JoinReader.java b/src/java/org/anarres/cpp/JoinReader.java
new file mode 100644
index 0000000..10ec535
--- /dev/null
+++ b/src/java/org/anarres/cpp/JoinReader.java
@@ -0,0 +1,168 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.Reader;
+import java.io.PushbackReader;
+import java.io.IOException;
+
+/* pp */ class JoinReader extends Reader {
+ private Reader in;
+
+ private boolean trigraphs;
+
+ private int newlines;
+ private boolean flushnl;
+ private int[] unget;
+ private int uptr;
+
+ public JoinReader(Reader in, boolean trigraphs) {
+ this.in = in;
+ this.trigraphs = trigraphs;
+ this.newlines = 0;
+ this.flushnl = false;
+ this.unget = new int[2];
+ this.uptr = 0;
+ }
+
+ public JoinReader(Reader in) {
+ this(in, false);
+ }
+
+ private int __read() throws IOException {
+ if (uptr > 0)
+ return unget[--uptr];
+ return in.read();
+ }
+
+ private void _unread(int c) {
+ if (c != -1)
+ unget[uptr++] = c;
+ }
+
+ private int _read() throws IOException {
+ int c = __read();
+ if (c == '?' && trigraphs) {
+ int d = __read();
+ if (d == '?') {
+ int e = __read();
+ switch (e) {
+ case '(': return '[';
+ case ')': return ']';
+ case '<': return '{';
+ case '>': return '}';
+ case '=': return '#';
+ case '/': return '\\';
+ case '\'': return '^';
+ case '!': return '|';
+ case '-': return '~';
+ }
+ _unread(e);
+ }
+ _unread(d);
+ }
+ return c;
+ }
+
+ public int read() throws IOException {
+ if (flushnl) {
+ if (newlines > 0) {
+ newlines--;
+ return '\n';
+ }
+ flushnl = false;
+ }
+
+ for (;;) {
+ int c = _read();
+ switch (c) {
+ case '\\':
+ int d = _read();
+ switch (d) {
+ case '\n':
+ newlines++;
+ continue;
+ case '\r':
+ newlines++;
+ int e = _read();
+ if (e != '\n')
+ _unread(e);
+ continue;
+ default:
+ _unread(d);
+ return c;
+ }
+ case '\r':
+ case '\n':
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ flushnl = true;
+ return c;
+ case -1:
+ if (newlines > 0) {
+ newlines--;
+ return '\n';
+ }
+ default:
+ return c;
+ }
+ }
+ }
+
+ public int read(char cbuf[], int off, int len)
+ throws IOException {
+ for (int i = 0; i < len; i++) {
+ int ch = read();
+ if (ch == -1)
+ return i;
+ cbuf[off + i] = (char)ch;
+ }
+ return len;
+ }
+
+ public void close()
+ throws IOException {
+ in.close();
+ }
+
+ public String toString() {
+ return "JoinReader(nl=" + newlines + ")";
+ }
+
+/*
+ public static void main(String[] args) throws IOException {
+ FileReader f = new FileReader(new File(args[0]));
+ BufferedReader b = new BufferedReader(f);
+ JoinReader r = new JoinReader(b);
+ BufferedWriter w = new BufferedWriter(
+ new java.io.OutputStreamWriter(System.out)
+ );
+ int c;
+ while ((c = r.read()) != -1) {
+ w.write((char)c);
+ }
+ w.close();
+ }
+*/
+
+}
diff --git a/src/java/org/anarres/cpp/LexerException.java b/src/java/org/anarres/cpp/LexerException.java
new file mode 100644
index 0000000..a4b5e2e
--- /dev/null
+++ b/src/java/org/anarres/cpp/LexerException.java
@@ -0,0 +1,35 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * A preprocessor exception.
+ *
+ * Note to users: I don't really like the name of this class. S.
+ */
+public class LexerException extends Exception {
+ public LexerException(String msg) {
+ super(msg);
+ }
+
+ public LexerException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java
new file mode 100644
index 0000000..a291bff
--- /dev/null
+++ b/src/java/org/anarres/cpp/LexerSource.java
@@ -0,0 +1,677 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/** Does not handle digraphs. */
+public class LexerSource extends Source {
+ private static final boolean DEBUG = false;
+
+ private PushbackReader reader;
+ private boolean ppvalid;
+ private boolean bol;
+ private boolean include;
+
+ private int line;
+ private int column;
+ private int lastcolumn;
+ private boolean cr;
+
+ /* ppvalid is:
+ * false in StringLexerSource,
+ * true in FileLexerSource */
+ public LexerSource(Reader r, boolean ppvalid) {
+ this.reader = new PushbackReader(new JoinReader(r), 5);
+ this.ppvalid = ppvalid;
+ this.bol = true;
+ this.include = false;
+
+ this.line = 1;
+ this.column = 0;
+ this.lastcolumn = -1;
+ this.cr = false;
+ }
+
+ @Override
+ public int getLine() {
+ return line;
+ }
+
+ public int getColumn() {
+ return column;
+ }
+
+ /* pp */ boolean isNumbered() {
+ return true;
+ }
+
+/* Error handling - this lot is barely worth it. */
+
+ private final void _error(String msg, boolean error)
+ throws LexerException {
+ int _l = line;
+ int _c = column;
+ if (_c == 0) {
+ _c = lastcolumn;
+ _l--;
+ }
+ else {
+ _c--;
+ }
+ if (error)
+ super.error(_l, _c, msg);
+ else
+ super.warning(_l, _c, msg);
+ }
+
+ private final void error(String msg)
+ throws LexerException {
+ _error(msg, true);
+ }
+
+ private final void warning(String msg)
+ throws LexerException {
+ _error(msg, false);
+ }
+
+/* A flag for string handling. */
+
+ /* pp */ void setInclude(boolean b) {
+ this.include = b;
+ }
+
+/*
+ private boolean _isLineSeparator(int c) {
+ return Character.getType(c) == Character.LINE_SEPARATOR
+ || c == -1;
+ }
+*/
+
+ /* XXX Move to JoinReader and canonicalise newlines. */
+ private static final boolean isLineSeparator(int c) {
+ switch ((char)c) {
+ case '\r':
+ case '\n':
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ return true;
+ default:
+ return (c == -1);
+ }
+ }
+
+
+ private int read() throws IOException {
+ int c = reader.read();
+ switch (c) {
+ case '\r':
+ cr = true;
+ line++;
+ lastcolumn = column;
+ column = 0;
+ break;
+ case '\n':
+ if (cr) {
+ cr = false;
+ break;
+ }
+ /* fallthrough */
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ cr = false;
+ line++;
+ lastcolumn = column;
+ column = 0;
+ break;
+ default:
+ cr = false;
+ column++;
+ break;
+ }
+
+/*
+ if (isLineSeparator(c)) {
+ line++;
+ lastcolumn = column;
+ column = 0;
+ }
+ else {
+ column++;
+ }
+*/
+
+ return c;
+ }
+
+ /* You can unget AT MOST one newline. */
+ private void unread(int c)
+ throws IOException {
+ if (c != -1) {
+ if (isLineSeparator(c)) {
+ line--;
+ column = lastcolumn;
+ cr = false;
+ }
+ else {
+ column--;
+ }
+ reader.unread(c);
+ }
+ }
+
+ private Token ccomment()
+ throws IOException {
+ StringBuilder text = new StringBuilder("/*");
+ int d;
+ do {
+ do {
+ d = read();
+ text.append((char)d);
+ } while (d != '*');
+ do {
+ d = read();
+ text.append((char)d);
+ } while (d == '*');
+ } while (d != '/');
+ return new Token(COMMENT, text.toString());
+ }
+
+ private Token cppcomment()
+ throws IOException {
+ StringBuilder text = new StringBuilder("//");
+ int d = read();
+ while (!isLineSeparator(d)) {
+ text.append((char)d);
+ d = read();
+ }
+ unread(d);
+ return new Token(COMMENT, text.toString());
+ }
+
+ private int escape(StringBuilder text)
+ throws IOException,
+ LexerException {
+ int d = read();
+ switch (d) {
+ case 'a': text.append('a'); return 0x0a;
+ case 'b': text.append('b'); return '\b';
+ case 'f': text.append('f'); return '\f';
+ case 'n': text.append('n'); return '\n';
+ case 'r': text.append('r'); return '\r';
+ case 't': text.append('t'); return '\t';
+ case 'v': text.append('v'); return 0x0b;
+ case '\\': text.append('\\'); return '\\';
+
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ int len = 0;
+ int val = 0;
+ do {
+ val = (val << 3) + Character.digit(d, 8);
+ text.append((char)d);
+ d = read();
+ } while (++len < 3 && Character.digit(d, 8) != -1);
+ unread(d);
+ return val;
+
+ case 'x':
+ len = 0;
+ val = 0;
+ do {
+ val = (val << 4) + Character.digit(d, 16);
+ text.append((char)d);
+ d = read();
+ } while (++len < 2 && Character.digit(d, 16) != -1);
+ unread(d);
+ return val;
+
+ /* Exclude two cases from the warning. */
+ case '"': text.append('"'); return '"';
+ case '\'': text.append('\''); return '\'';
+
+ default:
+ warning("Unnecessary escape character " + (char)d);
+ text.append((char)d);
+ return d;
+ }
+ }
+
+ private Token character()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("'");
+ int d = read();
+ if (d == '\\') {
+ text.append('\\');
+ d = escape(text);
+ }
+ else if (isLineSeparator(d)) {
+ unread(d);
+ error("Unterminated character literal");
+ return new Token(ERROR, text.toString(), null);
+ }
+ else if (d == '\'') {
+ text.append('\'');
+ error("Empty character literal");
+ return new Token(ERROR, text.toString(), null);
+ }
+ else if (!Character.isDefined(d)) {
+ text.append('?');
+ error("Illegal unicode character literal");
+ }
+ else {
+ text.append((char)d);
+ }
+
+ int e = read();
+ if (e != '\'') {
+ unread(e);
+ error("Illegal character constant");
+ /* XXX We could do some patching up here? */
+ return new Token(ERROR, text.toString(), null);
+ }
+ text.append('\'');
+ /* XXX Bad cast. */
+ return new Token(CHARACTER,
+ text.toString(), Character.valueOf((char)d));
+ }
+
+ /* XXX This strips the enclosing quotes from the
+ * returned value. */
+ private Token string(char open, char close)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ text.append(open);
+
+ StringBuilder buf = new StringBuilder();
+
+ for (;;) {
+ int c = read();
+ if (c == close) {
+ break;
+ }
+ else if (c == '\\') {
+ text.append('\\');
+ if (!include) {
+ char d = (char)escape(text);
+ buf.append(d);
+ }
+ }
+ else if (c == -1) {
+ unread(c);
+ error("End of file in string literal after " + buf);
+ return new Token(ERROR, text.toString(), null);
+ }
+ else if (isLineSeparator(c)) {
+ unread(c);
+ error("Unterminated string literal after " + buf);
+ return new Token(ERROR, text.toString(), null);
+ }
+ else {
+ text.append((char)c);
+ buf.append((char)c);
+ }
+ }
+ text.append(close);
+ return new Token(close == '>' ? HEADER : STRING,
+ text.toString(), buf.toString());
+ }
+
+ private void number_suffix(StringBuilder text, int d)
+ throws IOException {
+ if (d == 'U') {
+ text.append((char)d);
+ d = read();
+ }
+ if (d == 'L') {
+ text.append((char)d);
+ }
+ else if (d == 'I') {
+ text.append((char)d);
+ }
+ else {
+ unread(d);
+ }
+ }
+
+ /* We already chewed a zero, so empty is fine. */
+ private Token number_octal()
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("0");
+ int d = read();
+ long val = 0;
+ while (Character.digit(d, 8) != -1) {
+ val = (val << 3) + Character.digit(d, 8);
+ text.append((char)d);
+ d = read();
+ }
+ number_suffix(text, d);
+ return new Token(INTEGER,
+ text.toString(), Long.valueOf(val));
+ }
+
+ /* We do not know whether know the first digit is valid. */
+ private Token number_hex(char x)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder("0");
+ text.append(x);
+ int d = read();
+ if (Character.digit(d, 16) == -1) {
+ unread(d);
+ error("Illegal hexadecimal constant " + (char)d);
+ return new Token(ERROR, text.toString(), null);
+ }
+ long val = 0;
+ do {
+ val = (val << 4) + Character.digit(d, 16);
+ text.append((char)d);
+ d = read();
+ } while (Character.digit(d, 16) != -1);
+ number_suffix(text, d);
+ return new Token(INTEGER,
+ text.toString(), Long.valueOf(val));
+ }
+
+ /* We know we have at least one valid digit, but empty is not
+ * fine. */
+ /* XXX This needs a complete rewrite. */
+ private Token number_decimal(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder((char)c);
+ int d = c;
+ long val = 0;
+ do {
+ val = val * 10 + Character.digit(d, 10);
+ text.append((char)d);
+ d = read();
+ } while (Character.digit(d, 10) != -1);
+ number_suffix(text, d);
+ return new Token(INTEGER,
+ text.toString(), Long.valueOf(val));
+ }
+
+ private Token identifier(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ int d;
+ text.append((char)c);
+ for (;;) {
+ d = read();
+ if (Character.isIdentifierIgnorable(d))
+ ;
+ else if (Character.isJavaIdentifierPart(d))
+ text.append((char)d);
+ else
+ break;
+ }
+ unread(d);
+ return new Token(IDENTIFIER, text.toString());
+ }
+
+ private Token whitespace(int c)
+ throws IOException,
+ LexerException {
+ StringBuilder text = new StringBuilder();
+ int d;
+ text.append((char)c);
+ for (;;) {
+ d = read();
+ if (ppvalid && isLineSeparator(d)) /* XXX Ugly. */
+ break;
+ if (Character.isWhitespace(d))
+ text.append((char)d);
+ else
+ break;
+ }
+ unread(d);
+ return new Token(WHITESPACE, text.toString());
+ }
+
+ /* No token processed by cond() contains a newline. */
+ private Token cond(char c, int yes, int no)
+ throws IOException {
+ int d = read();
+ if (c == d)
+ return new Token(yes);
+ unread(d);
+ return new Token(no);
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ Token tok = null;
+
+ int _l = line;
+ int _c = column;
+
+ int c = read();
+ int d, e;
+
+ switch (c) {
+ case '\n':
+ if (ppvalid) {
+ bol = true;
+ if (include) {
+ tok = new Token(NL, _l, _c, new String("\n"));
+ }
+ else {
+ int nls = 0;
+ do {
+ d = read();
+ nls++;
+ } while (d == '\n');
+ unread(d);
+ char[] text = new char[nls];
+ for (int i = 0; i < text.length; i++)
+ text[i] = '\n';
+ // Skip the bol = false below.
+ tok = new Token(NL, _l, _c, new String(text));
+ }
+ if (DEBUG)
+ System.out.println("lx: Returning NL: " + tok);
+ return tok;
+ }
+ /* Let it be handled as whitespace. */
+ break;
+
+ case '!':
+ tok = cond('=', NE, '!');
+ break;
+
+ case '#':
+ if (bol)
+ tok = new Token(HASH);
+ else
+ tok = cond('#', PASTE, '#');
+ break;
+
+ case '+':
+ d = read();
+ if (d == '+')
+ tok = new Token(INC);
+ else if (d == '=')
+ tok = new Token(PLUS_EQ);
+ else
+ unread(d);
+ break;
+ case '-':
+ d = read();
+ if (d == '-')
+ tok = new Token(DEC);
+ else if (d == '=')
+ tok = new Token(SUB_EQ);
+ else if (d == '>')
+ tok = new Token(ARROW);
+ else
+ unread(d);
+ break;
+
+ case '*':
+ tok = cond('=', MULT_EQ, '*');
+ break;
+ case '/':
+ d = read();
+ if (d == '*')
+ tok = ccomment();
+ else if (d == '/')
+ tok = cppcomment();
+ else if (d == '=')
+ tok = new Token(DIV_EQ);
+ else
+ unread(d);
+ break;
+
+ case '%':
+ tok = cond('=', MOD_EQ, '%');
+ break;
+
+ case ':':
+ /* :: */
+ break;
+
+ case '<':
+ if (include) {
+ tok = string('<', '>');
+ }
+ else {
+ d = read();
+ if (d == '=')
+ tok = new Token(LE);
+ else if (d == '<')
+ tok = cond('=', LSH_EQ, LSH);
+ else
+ unread(d);
+ }
+ break;
+
+ case '=':
+ tok = cond('=', EQ, '=');
+ break;
+
+ case '>':
+ d = read();
+ if (d == '=')
+ tok = new Token(GE);
+ else if (d == '>')
+ tok = cond('=', RSH_EQ, RSH);
+ else
+ unread(d);
+ break;
+
+ case '^':
+ tok = cond('=', XOR_EQ, '^');
+ break;
+
+ case '|':
+ d = read();
+ if (d == '=')
+ tok = new Token(OR_EQ);
+ else if (d == '|')
+ tok = cond('=', LOR_EQ, LOR);
+ else
+ unread(d);
+ break;
+ case '&':
+ d = read();
+ if (d == '&')
+ tok = cond('=', LAND_EQ, LAND);
+ else if (d == '=')
+ tok = new Token(AND_EQ);
+ else
+ unread(d);
+ break;
+
+ case '.':
+ d = read();
+ if (d == '.')
+ tok = cond('.', ELLIPSIS, RANGE);
+ else
+ unread(d);
+ /* XXX decimal fraction */
+ break;
+
+ case '0':
+ /* octal or hex */
+ d = read();
+ if (d == 'x' || d == 'X')
+ tok = number_hex((char)d);
+ else {
+ unread(d);
+ tok = number_octal();
+ }
+ break;
+
+ case '\'':
+ tok = character();
+ break;
+
+ case '"':
+ tok = string('"', '"');
+ break;
+
+ case -1:
+ tok = new Token(EOF, _l, _c, "<eof>");
+ break;
+ }
+
+ if (tok == null) {
+ if (Character.isWhitespace(c)) {
+ tok = whitespace(c);
+ }
+ else if (Character.isDigit(c)) {
+ tok = number_decimal(c);
+ }
+ else if (Character.isJavaIdentifierStart(c)) {
+ tok = identifier(c);
+ }
+ else {
+ tok = new Token(c);
+ }
+ }
+
+ bol = false;
+
+ tok.setLocation(_l, _c);
+ if (DEBUG)
+ System.out.println("lx: Returning " + tok);
+ // (new Exception("here")).printStackTrace(System.out);
+ return tok;
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/Macro.java b/src/java/org/anarres/cpp/Macro.java
new file mode 100644
index 0000000..0d0ae55
--- /dev/null
+++ b/src/java/org/anarres/cpp/Macro.java
@@ -0,0 +1,157 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * A macro object.
+ *
+ * This encapsulates a name, an argument count, and a token stream
+ * for replacement. The replacement token stream may contain the
+ * extra tokens {@link Token#M_ARG} and {@link Token#M_STRING}.
+ */
+public class Macro {
+ private String name;
+ /* It's an explicit decision to keep these around here. We don't
+ * need to; the argument token type is M_ARG and the value
+ * is the index. The strings themselves are only used in
+ * stringification of the macro, for debugging. */
+ private List<String> args;
+ private boolean variadic;
+ private List<Token> tokens;
+
+ public Macro(String name) {
+ this.name = name;
+ this.args = null;
+ this.variadic = false;
+ this.tokens = new ArrayList<Token>();
+ }
+
+ /**
+ * Returns the name of this macro.
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Sets the arguments to this macro.
+ */
+ public void setArgs(List<String> args) {
+ this.args = args;
+ }
+
+ /**
+ * Returns true if this is a function-like macro.
+ */
+ public boolean isFunctionLike() {
+ return args != null;
+ }
+
+ /**
+ * Returns the number of arguments to this macro.
+ */
+ public int getArgs() {
+ return args.size();
+ }
+
+ /**
+ * Sets the variadic flag on this Macro.
+ */
+ public void setVariadic(boolean b) {
+ this.variadic = b;
+ }
+
+ /**
+ * Returns true if this is a variadic function-like macro.
+ */
+ public boolean isVariadic() {
+ return variadic;
+ }
+
+ /**
+ * Adds a token to the expansion of this macro.
+ */
+ public void addToken(Token tok) {
+ this.tokens.add(tok);
+ }
+
+ /**
+ * Adds a "paste" operator to the expansion of this macro.
+ *
+ * A paste operator causes the next token added to be pasted
+ * to the previous token when the macro is expanded.
+ * It is an error for a macro to end with a paste token.
+ */
+ public void addPaste(Token tok) {
+ /*
+ * Given: tok0 ## tok1
+ * We generate: M_PASTE, tok0, tok1
+ * This extends as per a stack language:
+ * tok0 ## tok1 ## tok2 ->
+ * M_PASTE, tok0, M_PASTE, tok1, tok2
+ */
+ this.tokens.add(tokens.size() - 1, tok);
+ }
+
+ /* pp */ List<Token> getTokens() {
+ return tokens;
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder(name);
+ if (args != null) {
+ buf.append('(');
+ Iterator<String> it = args.iterator();
+ while (it.hasNext()) {
+ buf.append(it.next());
+ if (it.hasNext())
+ buf.append(", ");
+ else if (isVariadic())
+ buf.append("...");
+ }
+ buf.append(')');
+ }
+ if (!tokens.isEmpty()) {
+ boolean paste = false;
+ buf.append(" => ");
+ for (int i = 0; i < tokens.size(); i++) {
+ Token tok = tokens.get(i);
+ if (tok.getType() == Token.M_PASTE) {
+ paste = true;
+ continue;
+ }
+ else {
+ buf.append(tok.getText());
+ }
+ if (paste) {
+ buf.append(" #" + "# ");
+ paste = false;
+ }
+ // buf.append(tokens.get(i));
+ }
+ }
+ return buf.toString();
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/MacroTokenSource.java b/src/java/org/anarres/cpp/MacroTokenSource.java
new file mode 100644
index 0000000..249afdf
--- /dev/null
+++ b/src/java/org/anarres/cpp/MacroTokenSource.java
@@ -0,0 +1,191 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import static org.anarres.cpp.Token.*;
+
+/* pp */ class MacroTokenSource extends Source {
+ private Macro macro;
+ private Iterator<Token> tokens; /* Pointer into the macro. */
+ private List<Argument> args; /* { unexpanded, expanded } */
+ private Iterator<Token> arg; /* "current expansion" */
+
+ /* pp */ MacroTokenSource(Macro m, List<Argument> args) {
+ this.macro = m;
+ this.tokens = m.getTokens().iterator();
+ this.args = args;
+ this.arg = null;
+ }
+
+ @Override
+ /* pp */ boolean isExpanding(Macro m) {
+ /* When we are expanding an arg, 'this' macro is not
+ * being expanded, and thus we may re-expand it. */
+ if (/* XXX this.arg == null && */ this.macro == m)
+ return true;
+ return super.isExpanding(m);
+ }
+
+ private static void escape(StringBuilder buf, CharSequence cs) {
+ for (int i = 0; i < cs.length(); i++) {
+ char c = cs.charAt(i);
+ switch (c) {
+ case '\\':
+ buf.append("\\\\");
+ break;
+ case '"':
+ buf.append("\\\"");
+ break;
+ case '\n':
+ buf.append("\\n");
+ break;
+ case '\r':
+ buf.append("\\r");
+ break;
+ default:
+ buf.append(c);
+ }
+ }
+ }
+
+ private void concat(StringBuilder buf, Argument arg) {
+ Iterator<Token> it = arg.iterator();
+ while (it.hasNext()) {
+ Token tok = it.next();
+ buf.append(tok.getText());
+ }
+ }
+
+ private Token stringify(Token pos, Argument arg) {
+ StringBuilder buf = new StringBuilder();
+ concat(buf, arg);
+ StringBuilder str = new StringBuilder("\"");
+ escape(str, buf);
+ str.append('\"');
+ return new Token(STRING,
+ pos.getLine(), pos.getColumn(),
+ str.toString(), buf.toString());
+ }
+
+
+ /* At this point, we have consumed the first M_PASTE.
+ * @see Macro#addPaste(Token) */
+ private void paste(Token ptok)
+ throws IOException,
+ LexerException {
+ StringBuilder buf = new StringBuilder();
+ /* We know here that arg is null or expired,
+ * since we cannot paste an expanded arg. */
+
+ int count = 2;
+ for (int i = 0; i < count; i++) {
+ if (!tokens.hasNext())
+ error(ptok.getLine(), ptok.getColumn(),
+ "Paste at end of expansion");
+ Token tok = tokens.next();
+ switch (tok.getType()) {
+ case M_PASTE:
+ /* One extra to paste, plus one because the
+ * paste token didn't count. */
+ count += 2;
+ ptok = tok;
+ break;
+ case M_ARG:
+ int idx = ((Integer)tok.getValue()).intValue();
+ concat(buf, args.get(idx));
+ break;
+ /* XXX Test this. */
+ case COMMENT:
+ break;
+ default:
+ buf.append(tok.getText());
+ break;
+ }
+ }
+
+ /* XXX Somewhere here, need to check that concatenation
+ * produces a valid token. */
+
+ /* Push and re-lex. */
+ StringBuilder src = new StringBuilder();
+ escape(src, buf);
+ StringLexerSource sl = new StringLexerSource(src.toString());
+
+ arg = new SourceIterator(sl);
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ for (;;) {
+ /* Deal with lexed tokens first. */
+
+ if (arg != null) {
+ if (arg.hasNext())
+ return arg.next();
+ arg = null;
+ }
+
+ if (!tokens.hasNext())
+ return new Token(EOF, -1, -1, ""); /* End of macro. */
+ Token tok = tokens.next();
+ int idx;
+ switch (tok.getType()) {
+ case M_STRING:
+ /* Use the nonexpanded arg. */
+ idx = ((Integer)tok.getValue()).intValue();
+ return stringify(tok, args.get(idx));
+ case M_ARG:
+ /* Expand the arg. */
+ idx = ((Integer)tok.getValue()).intValue();
+ // System.out.println("Pushing arg " + args.get(idx));
+ arg = args.get(idx).expansion();
+ break;
+ case M_PASTE:
+ paste(tok);
+ break;
+ default:
+ return tok;
+ }
+ } /* for */
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("expansion of ").append(macro.getName());
+ Source parent = getParent();
+ if (parent != null)
+ buf.append(" in ").append(String.valueOf(parent));
+ return buf.toString();
+ }
+}
diff --git a/src/java/org/anarres/cpp/Main.java b/src/java/org/anarres/cpp/Main.java
new file mode 100644
index 0000000..cec7a37
--- /dev/null
+++ b/src/java/org/anarres/cpp/Main.java
@@ -0,0 +1,111 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * (Currently a simple test class).
+ */
+public class Main {
+
+ public static void main(String[] args) throws Exception {
+ List<String> path = new ArrayList<String>();
+ path.add("/usr/include");
+ path.add("/usr/local/include");
+ path.add("/usr/lib/gcc/i686-pc-linux-gnu/4.1.2/include");
+
+ Source source = new FileLexerSource(new File(args[0]));
+ Preprocessor pp = new Preprocessor(source);
+ pp.setIncludePath(path);
+
+ for (int i = 1; i < args.length; i++) {
+ pp.push_source(new FileLexerSource(new File(args[i])),true);
+ }
+
+ Macro m = new Macro("__WORDSIZE");
+ m.addToken(new Token(INTEGER, -1, -1, "32", Integer.valueOf(32)));
+ pp.addMacro(m);
+
+ m = new Macro("__STDC__");
+ m.addToken(new Token(INTEGER, -1, -1, "1", Integer.valueOf(1)));
+ pp.addMacro(m);
+
+ try {
+ for (;;) {
+ Token tok = pp.token();
+ if (tok != null && tok.getType() == Token.EOF)
+ break;
+ switch (2) {
+ case 0:
+ System.out.print(tok);
+ break;
+ case 1:
+ System.out.print("[" + tok.getText() + "]");
+ break;
+ case 2:
+ System.out.print(tok.getText());
+ break;
+ }
+ }
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ Source s = pp.getSource();
+ while (s != null) {
+ System.out.println(" -> " + s);
+ s = s.getParent();
+ }
+
+ /*
+ Iterator<State> it = pp.states.iterator();
+ while (it.hasNext()) {
+ System.out.println(" -? " + it.next());
+ }
+ */
+
+ }
+
+ Map<String,Macro> macros = pp.getMacros();
+ List<String> keys = new ArrayList<String>(
+ macros.keySet()
+ );
+ Collections.sort(keys);
+ Iterator<String> mt = keys.iterator();
+ while (mt.hasNext()) {
+ String key = mt.next();
+ Macro macro = macros.get(key);
+ System.out.println("#" + "macro " + macro);
+ }
+
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java
new file mode 100644
index 0000000..c1b87d7
--- /dev/null
+++ b/src/java/org/anarres/cpp/Preprocessor.java
@@ -0,0 +1,1511 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A C Preprocessor.
+ * The Preprocessor outputs a token stream which does not need
+ * re-lexing for C or C++. Alternatively, the output text may be
+ * reconstructed by concatenating the {@link Token#getText() text}
+ * values of the returned {@link Token Tokens}. (See
+ * {@link CppReader}, which does this.)
+ */
+public class Preprocessor {
+ private static final boolean DEBUG = false;
+
+ public static final int FL_LINEMARKER = 1;
+
+ private static final Macro __LINE__ = new Macro("__LINE__");
+ private static final Macro __FILE__ = new Macro("__FILE__");
+
+ private Map<String,Macro> macros;
+ private Stack<State> states;
+ private Source source;
+
+ private List<String> path;
+ private PreprocessorListener listener;
+
+ private int flags;
+
+ public Preprocessor(Source initial, int flags) {
+ this.macros = new HashMap<String,Macro>();
+ macros.put(__LINE__.getName(), __LINE__);
+ macros.put(__FILE__.getName(), __FILE__);
+ this.states = new Stack<State>();
+ states.push(new State());
+ this.source = null;
+ this.path = null;
+ setListener(new PreprocessorListener());
+ setFlags(flags);
+
+ push_source(initial, false);
+ /* We need to get a \n onto the end of this somehow. */
+ if ((flags & FL_LINEMARKER) != 0)
+ source_untoken(line_token(1, source.getName(), "\n"));
+ }
+
+ public Preprocessor(Source initial) {
+ this(initial, 0);
+ }
+
+ /** Equivalent to
+ * 'new Preprocessor(new {@link FileLexerSource}(file))'
+ */
+ public Preprocessor(File file)
+ throws IOException {
+ this(new FileLexerSource(file), 0);
+ }
+
+ public void setListener(PreprocessorListener listener) {
+ this.listener = listener;
+ Source s = source;
+ while (s != null) {
+ s.setListener(listener);
+ s = s.getParent();
+ }
+ }
+
+ public void setFlags(int flags) {
+ this.flags = flags;
+ }
+
+ /**
+ * Handles an error.
+ *
+ * If a PreprocessorListener is installed, it receives the
+ * error. Otherwise, it is ignored.
+ */
+ protected void error(Token tok, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleError(source,
+ tok.getLine(), tok.getColumn(),
+ msg);
+ }
+
+ /**
+ * Handles a warning.
+ *
+ * If a PreprocessorListener is installed, it receives the
+ * warning. Otherwise, it is ignored.
+ */
+ protected void warning(Token tok, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleError(source,
+ tok.getLine(), tok.getColumn(),
+ msg);
+ }
+
+/*
+ public void setSource(Source source) {
+ this.source = source;
+ }
+*/
+
+ public void addMacro(Macro m) throws LexerException {
+ String name = m.getName();
+ /* Already handled as a source error in macro(). */
+ if ("defined".equals(name))
+ throw new LexerException("Cannot redefine name 'defined'");
+ macros.put(m.getName(), m);
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name, String value)
+ throws LexerException {
+ try {
+ Macro m = new Macro(name);
+ StringLexerSource s = new StringLexerSource(value);
+ for (;;) {
+ Token tok = s.token();
+ if (tok.getType() == EOF)
+ break;
+ m.addToken(tok);
+ }
+ addMacro(m);
+ }
+ catch (IOException e) {
+ throw new LexerException(e);
+ }
+ }
+
+ /**
+ * Defines the given name as a macro.
+ *
+ * This is a convnience method.
+ */
+ public void addMacro(String name)
+ throws LexerException {
+ addMacro(name, "1");
+ }
+
+ /**
+ * Sets the include path used by this Preprocessor.
+ */
+ /* Note for future: Create an IncludeHandler? */
+ public void setIncludePath(List<String> path) {
+ this.path = path;
+ }
+
+ /**
+ * Returns the Map of Macros parsed during the run of this
+ * Preprocessor.
+ */
+ protected Map<String,Macro> getMacros() {
+ return macros;
+ }
+
+
+/* States */
+
+ private void push_state() {
+ State top = states.peek();
+ states.push(new State(top));
+ }
+
+ private void pop_state()
+ throws LexerException {
+ State s = states.pop();
+ if (states.isEmpty()) {
+ if (listener != null)
+ listener.handleError(getSource(), 0, 0,
+ "#" + "endif without #" + "if");
+ states.push(s);
+ }
+ }
+
+ private boolean isActive() {
+ State state = states.peek();
+ return state.isParentActive() && state.isActive();
+ }
+
+
+/* Sources */
+
+ /**
+ * Returns the top Source on the input stack.
+ *
+ * @see Source
+ * @see #push_source(Source,boolean)
+ * @see #pop_source()
+ */
+ protected Source getSource() {
+ return source;
+ }
+
+ /**
+ * Pushes a Source onto the input stack.
+ *
+ * @see #getSource()
+ * @see #pop_source()
+ */
+ protected void push_source(Source source, boolean autopop) {
+ source.setParent(this.source, autopop);
+ source.setListener(listener);
+ this.source = source;
+ if (listener != null)
+ listener.handleSourceChange(this.source, "push");
+ }
+
+ /**
+ * Pops a Source from the input stack.
+ *
+ * @see #getSource()
+ * @see #push_source(Source,boolean)
+ */
+ protected void pop_source() {
+ this.source = this.source.getParent();
+ if (listener != null)
+ listener.handleSourceChange(this.source, "pop");
+ }
+
+
+/* Source tokens */
+
+ private Token source_token;
+
+ private Token line_token(int line, String name, String extra) {
+ return new Token(P_LINE, line, 0,
+ "#line " + line + " \"" + name + "\"" + extra,
+ null
+ );
+ }
+
+ private Token source_token()
+ throws IOException,
+ LexerException {
+ if (source_token != null) {
+ Token tok = source_token;
+ source_token = null;
+ return tok;
+ }
+
+ for (;;) {
+ Token tok = source.token();
+ if (tok.getType() == EOF && source.isAutopop()) {
+ // System.out.println("Autopop " + source);
+ Source s = source;
+ pop_source();
+ if ((flags & FL_LINEMARKER) != 0 && s.isNumbered()) {
+ /* Not perfect, but ... */
+ source_untoken(new Token(NL, source.getLine(), 0, "\n"));
+ return line_token(source.getLine(), source.getName(), "");
+ }
+ else {
+ continue;
+ }
+ }
+ return tok;
+ }
+ }
+
+ private void source_untoken(Token tok) {
+ if (this.source_token != null)
+ throw new IllegalStateException("Cannot return two tokens");
+ this.source_token = tok;
+ }
+
+ private boolean isWhite(Token tok) {
+ int type = tok.getType();
+ return (type == WHITESPACE) || (type == COMMENT);
+ }
+
+ private Token source_token_nonwhite()
+ throws IOException,
+ LexerException {
+ Token tok;
+ do {
+ tok = source_token();
+ } while (isWhite(tok));
+ return tok;
+ }
+
+ /**
+ * Returns an NL or an EOF token.
+ *
+ * The metadata on the token will be correct, which is better
+ * than generating a new one.
+ */
+ private Token source_skipline(boolean white)
+ throws IOException,
+ LexerException {
+ // (new Exception("skipping line")).printStackTrace(System.out);
+ return source.skipline(white);
+ }
+
+ /* processes and expands a macro. */
+ private boolean macro(Macro m, Token orig)
+ throws IOException,
+ LexerException {
+ Token tok;
+ List<Argument> args;
+
+ // System.out.println("pp: expanding " + m);
+
+ if (m.isFunctionLike()) {
+ OPEN: for (;;) {
+ tok = source_token();
+ // System.out.println("pp: open: token is " + tok);
+ switch (tok.getType()) {
+ case WHITESPACE: /* XXX Really? */
+ case COMMENT:
+ case NL:
+ break; /* continue */
+ case '(':
+ break OPEN;
+ default:
+ source_untoken(tok);
+ return false;
+ }
+ }
+
+ // tok = expanded_token_nonwhite();
+ tok = source_token_nonwhite();
+
+ /* We either have, or we should have args.
+ * This deals elegantly with the case that we have
+ * one empty arg. */
+ if (tok.getType() != ')' || m.getArgs() > 0) {
+ args = new ArrayList<Argument>();
+
+ Argument arg = new Argument();
+ int depth = 0;
+ boolean space = false;
+
+ ARGS: for (;;) {
+ // System.out.println("pp: arg: token is " + tok);
+ switch (tok.getType()) {
+ case EOF:
+ error(tok, "EOF in macro args");
+ return false;
+
+ case ',':
+ if (depth == 0) {
+ if (m.isVariadic() &&
+ /* We are building the last arg. */
+ args.size() == m.getArgs() - 1) {
+ /* Just add the comma. */
+ arg.addToken(tok);
+ }
+ else {
+ args.add(arg);
+ arg = new Argument();
+ }
+ }
+ else {
+ arg.addToken(tok);
+ }
+ space = false;
+ break;
+ case ')':
+ if (depth == 0) {
+ args.add(arg);
+ break ARGS;
+ }
+ else {
+ depth--;
+ arg.addToken(tok);
+ }
+ space = false;
+ break;
+ case '(':
+ depth++;
+ arg.addToken(tok);
+ space = false;
+ break;
+
+ case WHITESPACE:
+ case COMMENT:
+ /* Avoid duplicating spaces. */
+ space = true;
+ break;
+
+ default:
+ /* Do not put space on the beginning of
+ * an argument token. */
+ if (space && ! arg.isEmpty())
+ arg.addToken(Token.space);
+ arg.addToken(tok);
+ space = false;
+ break;
+
+ }
+ // tok = expanded_token();
+ tok = source_token();
+ }
+ /* space may still be true here, thus trailing space
+ * is stripped from arguments. */
+
+ if (args.size() != m.getArgs()) {
+ error(tok,
+ "macro " + m.getName() +
+ " has " + m.getArgs() + " parameters " +
+ "but given " + args.size() + " args");
+ /* We could replay the arg tokens, but I
+ * note that GNU cpp does exactly what we do,
+ * i.e. output the macro name and chew the args.
+ */
+ return false;
+ }
+
+ for (int i = 0; i < args.size(); i++) {
+ args.get(i).expand(this);
+ }
+
+ // System.out.println("Macro " + m + " args " + args);
+ }
+ else {
+ /* nargs == 0 and we (correctly) got () */
+ args = null;
+ }
+
+ }
+ else {
+ /* Macro without args. */
+ args = null;
+ }
+
+ if (m == __LINE__) {
+ push_source(new FixedTokenSource(
+ new Token[] { new Token(INTEGER,
+ orig.getLine(), orig.getColumn(),
+ String.valueOf(orig.getLine()),
+ Integer.valueOf(orig.getLine())) }
+ ), true);
+ }
+ else if (m == __FILE__) {
+ File file = source.getFile();
+ push_source(new FixedTokenSource(
+ new Token[] { new Token(STRING,
+ orig.getLine(), orig.getColumn(),
+ '"'+ String.valueOf(file) +'"',
+ file) }
+ ), true);
+ }
+ else {
+ push_source(new MacroTokenSource(m, args), true);
+ }
+
+ return true;
+ }
+
+ /**
+ * Expands an argument.
+ */
+ /* I'd rather this were done lazily. */
+ /* pp */ List<Token> expand(List<Token> arg)
+ throws IOException,
+ LexerException {
+ List<Token> expansion = new ArrayList<Token>();
+ boolean space = false;
+
+ push_source(new FixedTokenSource(arg), false);
+ EXPANSION: for (;;) {
+ Token tok = expanded_token();
+ switch (tok.getType()) {
+ case EOF:
+ break EXPANSION;
+
+ case WHITESPACE:
+ case COMMENT:
+ space = true;
+ break;
+
+ default:
+ if (space && ! expansion.isEmpty())
+ expansion.add(Token.space);
+ expansion.add(tok);
+ space = false;
+ break;
+ }
+ }
+
+ pop_source();
+
+ return expansion;
+ }
+
+ /* processes a #define directive */
+ private Token define()
+ throws IOException,
+ LexerException {
+ Token tok = source_token_nonwhite();
+ if (tok.getType() != IDENTIFIER) {
+ error(tok, "Expected identifier");
+ return source_skipline(false);
+ }
+ /* if predefined */
+
+ String name = tok.getText();
+ if ("defined".equals(name)) {
+ error(tok, "Cannot redefine name 'defined'");
+ return source_skipline(false);
+ }
+
+ Macro m = new Macro(name);
+ List<String> args;
+
+ tok = source_token();
+ if (tok.getType() == '(') {
+ tok = source_token_nonwhite();
+ if (tok.getType() != ')') {
+ args = new ArrayList<String>();
+ ARGS: for (;;) {
+ switch (tok.getType()) {
+ case IDENTIFIER:
+ args.add(tok.getText());
+ break;
+ // case ELLIPSIS:
+ case NL:
+ case EOF:
+ error(tok,
+ "Unterminated macro parameter list");
+ break ARGS;
+ default:
+ source_skipline(false);
+ error(tok,
+ "error in macro parameters: " +
+ tok.getText());
+ /* XXX return? */
+ break ARGS;
+ }
+ tok = source_token_nonwhite();
+ switch (tok.getType()) {
+ case ',':
+ break;
+ case ')':
+ tok = source_token_nonwhite();
+ break ARGS;
+ case ELLIPSIS:
+ tok = source_token_nonwhite();
+ if (tok.getType() != ')')
+ error(tok,
+ "ellipsis must be on last argument");
+ m.setVariadic(true);
+ tok = source_token_nonwhite();
+ break ARGS;
+
+ case NL:
+ case EOF:
+ /* Do not skip line. */
+ error(tok,
+ "Unterminated macro definition");
+ break ARGS;
+ default:
+ source_skipline(false);
+ error(tok,
+ "bad token in macro parameters: " +
+ tok.getText());
+ /* XXX return? */
+ break ARGS;
+ }
+ tok = source_token_nonwhite();
+ }
+ }
+ else {
+ tok = source_token_nonwhite(); /* Lose the ')' */
+ args = Collections.emptyList();
+ }
+
+ m.setArgs(args);
+ }
+ else {
+ /* For searching. */
+ args = Collections.emptyList();
+ if (tok.getType() == COMMENT ||
+ tok.getType() == WHITESPACE) {
+ tok = source_token_nonwhite();
+ }
+ }
+
+ /* Get an expansion for the macro, using indexOf. */
+ boolean space = false;
+ boolean paste = false;
+ /* XXX UGLY: Ensure no space at start.
+ * Careful not to break EOF/LF from above. */
+ if (isWhite(tok)) /* XXX Not sure this can ever happen now. */
+ tok = source_token_nonwhite();
+ int idx;
+
+ EXPANSION: for (;;) {
+ switch (tok.getType()) {
+ case EOF:
+ break EXPANSION;
+ case NL:
+ break EXPANSION;
+
+ case COMMENT:
+ // break;
+ case WHITESPACE:
+ if (!paste)
+ space = true;
+ break;
+
+ case PASTE:
+ space = false;
+ paste = true;
+ m.addPaste(new Token(M_PASTE,
+ tok.getLine(), tok.getColumn(),
+ "#" + "#", null));
+ break;
+
+ case '#':
+ if (space)
+ m.addToken(Token.space);
+ space = false;
+ Token la = source_token_nonwhite();
+ if (la.getType() == IDENTIFIER &&
+ ((idx = args.indexOf(la.getText())) != -1)) {
+ m.addToken(new Token(M_STRING,
+ la.getLine(), la.getColumn(),
+ "#" + la.getText(),
+ Integer.valueOf(idx)));
+ }
+ else {
+ m.addToken(tok);
+ /* Allow for special processing. */
+ source_untoken(la);
+ }
+ break;
+
+ case IDENTIFIER:
+ if (space)
+ m.addToken(Token.space);
+ space = false;
+ paste = false;
+ idx = args.indexOf(tok.getText());
+ if (idx == -1)
+ m.addToken(tok);
+ else
+ m.addToken(new Token(M_ARG,
+ tok.getLine(), tok.getColumn(),
+ tok.getText(),
+ Integer.valueOf(idx)));
+ break;
+
+ default:
+ if (space)
+ m.addToken(Token.space);
+ space = false;
+ paste = false;
+ m.addToken(tok);
+ break;
+ }
+ tok = source_token();
+ }
+
+ // if (DEBUG)
+ // System.out.println("Defined macro " + m);
+ addMacro(m);
+
+ return tok; /* NL or EOF. */
+ }
+
+ private Token undef()
+ throws IOException,
+ LexerException {
+ Token tok = source_token_nonwhite();
+ if (tok.getType() != IDENTIFIER) {
+ error(tok,
+ "Expected identifier, not " + tok.getText());
+ if (tok.getType() == NL || tok.getType() == EOF)
+ return tok;
+ }
+ else {
+ Macro m = macros.get(tok.getText());
+ if (m != null) {
+ /* XXX error if predefined */
+ macros.remove(m.getName());
+ }
+ }
+ return source_skipline(true);
+ }
+
+ /**
+ * Handles a include directive.
+ *
+ * The user may override this to provide alternate semantics
+ * for the include directive, for example, creating a Source
+ * based on a virtual file system.
+ */
+ protected void include(File parent, int line,
+ String name, boolean quoted)
+ throws IOException,
+ LexerException {
+ if (quoted) {
+ File dir = parent.getParentFile();
+ if (dir == null)
+ dir = new File("/");
+ File file = new File(dir, name);
+ // System.err.println("Include: " + file);
+ if (file.exists()) {
+ push_source(new FileLexerSource(file), true);
+ return;
+ }
+ }
+
+ if (path != null) {
+ for (int i = 0; i < path.size(); i++) {
+ File file = new File(
+ path.get(i) + File.separator + name
+ );
+ if (file.exists()) {
+ // System.err.println("Include: " + file);
+ push_source(new FileLexerSource(file), true);
+ return;
+ }
+ }
+ }
+
+ if (listener != null)
+ listener.handleError(getSource(),
+ line, 0,
+ "Header not found: " + name + " in " + path
+ );
+ }
+
+ private Token include()
+ throws IOException,
+ LexerException {
+ LexerSource lexer = (LexerSource)source;
+ try {
+ lexer.setInclude(true);
+ Token tok = token_nonwhite();
+
+ String name;
+ boolean quoted;
+
+ if (tok.getType() == STRING) {
+ /* XXX Use the original text, not the value.
+ * Backslashes must not be treated as escapes here. */
+ StringBuilder buf = new StringBuilder((String)tok.getValue());
+ HEADER: for (;;) {
+ tok = _token(); /* Do macros but nothing else. */
+ switch (tok.getType()) {
+ case WHITESPACE:
+ case COMMENT:
+ continue;
+ case STRING:
+ buf.append((String)tok.getValue());
+ break;
+ case NL:
+ case EOF:
+ break HEADER;
+ default:
+ warning(tok,
+ "Unexpected token on #"+"include line");
+ return source_skipline(false);
+ }
+ }
+ name = buf.toString();
+ quoted = true;
+ }
+ else if (tok.getType() == HEADER) {
+ name = (String)tok.getValue();
+ quoted = false;
+ tok = source_skipline(true);
+ }
+ else {
+ error(tok,
+ "Expected string or header, not " + tok.getText());
+ switch (tok.getType()) {
+ case NL:
+ case EOF:
+ return tok;
+ default:
+ /* Only if not a NL or EOF already. */
+ return source_skipline(false);
+ }
+ }
+
+ /* Do the inclusion. */
+ include(source.getFile(), tok.getLine(), name, quoted);
+
+ /* 'tok' is the 'nl' after the include. We use it after the
+ * #line directive. */
+ if ((flags & FL_LINEMARKER) != 0) {
+ source_untoken(tok);
+ return line_token(1, name, "");
+ }
+ return tok;
+ }
+ finally {
+ lexer.setInclude(false);
+ }
+ }
+
+ /* For #error and #warning. */
+ private void error(Token pptok, boolean is_error)
+ throws IOException,
+ LexerException {
+ StringBuilder buf = new StringBuilder();
+ buf.append('#').append(pptok.getText()).append(' ');
+ /* Peculiar construction to ditch first whitespace. */
+ Token tok = source_token_nonwhite();
+ ERROR: for (;;) {
+ switch (tok.getType()) {
+ case NL:
+ case EOF:
+ break ERROR;
+ default:
+ buf.append(tok.getText());
+ break;
+ }
+ tok = source_token();
+ }
+ if (is_error)
+ error(pptok, buf.toString());
+ else
+ warning(pptok, buf.toString());
+ }
+
+
+
+
+ /* This bypasses token() for #elif expressions.
+ * If we don't do this, then isActive() == false
+ * causes token() to simply chew the entire input line. */
+ private Token expanded_token()
+ throws IOException,
+ LexerException {
+ for (;;) {
+ Token tok = source_token();
+ // System.out.println("Source token is " + tok);
+ if (tok.getType() == IDENTIFIER) {
+ Macro m = macros.get(tok.getText());
+ if (m == null)
+ return tok;
+ if (source.isExpanding(m))
+ return tok;
+ if (macro(m, tok))
+ continue;
+ }
+ return tok;
+ }
+ }
+
+ private Token expanded_token_nonwhite()
+ throws IOException,
+ LexerException {
+ Token tok;
+ do {
+ tok = expanded_token();
+ // System.out.println("expanded token is " + tok);
+ } while (isWhite(tok));
+ return tok;
+ }
+
+
+ private Token expr_token = null;
+
+ private Token expr_token()
+ throws IOException,
+ LexerException {
+ Token tok = expr_token;
+
+ if (tok != null) {
+ // System.out.println("ungetting");
+ expr_token = null;
+ }
+ else {
+ tok = expanded_token_nonwhite();
+ // System.out.println("expt is " + tok);
+
+ if (tok.getType() == IDENTIFIER &&
+ tok.getText().equals("defined")) {
+ Token la = source_token_nonwhite();
+ boolean paren = false;
+ if (la.getType() == '(') {
+ paren = true;
+ la = source_token_nonwhite();
+ }
+
+ // System.out.println("Core token is " + la);
+
+ if (la.getType() != IDENTIFIER) {
+ error(la,
+ "defined() needs identifier, not " +
+ la.getText());
+ tok = new Token(INTEGER,
+ la.getLine(), la.getColumn(),
+ "0", Integer.valueOf(0));
+ }
+ else if (macros.containsKey(la.getText())) {
+ // System.out.println("Found macro");
+ tok = new Token(INTEGER,
+ la.getLine(), la.getColumn(),
+ "1", Integer.valueOf(1));
+ }
+ else {
+ // System.out.println("Not found macro");
+ tok = new Token(INTEGER,
+ la.getLine(), la.getColumn(),
+ "0", Integer.valueOf(0));
+ }
+
+ if (paren) {
+ la = source_token_nonwhite();
+ if (la.getType() != ')') {
+ expr_untoken(la);
+ error(la, "Missing ) in defined()");
+ }
+ }
+ }
+ }
+
+ // System.out.println("expr_token returns " + tok);
+
+ return tok;
+ }
+
+ private void expr_untoken(Token tok)
+ throws LexerException {
+ if (expr_token != null)
+ throw new InternalException(
+ "Cannot unget two expression tokens."
+ );
+ expr_token = tok;
+ }
+
+ private int expr_priority(Token op) {
+ switch (op.getType()) {
+ case '/': return 11;
+ case '%': return 11;
+ case '*': return 11;
+ case '+': return 10;
+ case '-': return 10;
+ case LSH: return 9;
+ case RSH: return 9;
+ case '<': return 8;
+ case '>': return 8;
+ case LE: return 8;
+ case GE: return 8;
+ case EQ: return 7;
+ case NE: return 7;
+ case '&': return 6;
+ case '^': return 5;
+ case '|': return 4;
+ case LAND: return 3;
+ case LOR: return 2;
+ case '?': return 1;
+ default:
+ // System.out.println("Unrecognised operator " + op);
+ return 0;
+ }
+ }
+
+ private long expr(int priority)
+ throws IOException,
+ LexerException {
+ /*
+ System.out.flush();
+ (new Exception("expr(" + priority + ") called")).printStackTrace();
+ System.err.flush();
+ */
+
+ Token tok = expr_token();
+ long lhs, rhs;
+
+ // System.out.println("Expr lhs token is " + tok);
+
+ switch (tok.getType()) {
+ case '(':
+ lhs = expr(0);
+ tok = expr_token();
+ if (tok.getType() != ')') {
+ expr_untoken(tok);
+ error(tok, "missing ) in expression");
+ return 0;
+ }
+ break;
+
+ case '~': lhs = ~expr(11); break;
+ case '!': lhs = expr(11) == 0 ? 1 : 0; break;
+ case '-': lhs = -expr(11); break;
+ case INTEGER:
+ lhs = ((Number)tok.getValue()).longValue();
+ break;
+ case CHARACTER:
+ lhs = (long)((Character)tok.getValue()).charValue();
+ break;
+ case IDENTIFIER:
+ /* XXX warn */
+ lhs = 0;
+ break;
+
+ default:
+ expr_untoken(tok);
+ error(tok,
+ "Bad token in expression: " + tok.getText());
+ return 0;
+ }
+
+ EXPR: for (;;) {
+ // System.out.println("expr: lhs is " + lhs + ", pri = " + priority);
+ Token op = expr_token();
+ int pri = expr_priority(op); /* 0 if not a binop. */
+ if (pri == 0 || priority >= pri) {
+ expr_untoken(op);
+ break EXPR;
+ }
+ rhs = expr(pri);
+ // System.out.println("rhs token is " + rhs);
+ switch (op.getType()) {
+ case '/':
+ if (rhs == 0) {
+ error(op, "Division by zero");
+ lhs = 0;
+ }
+ else {
+ lhs = lhs / rhs;
+ }
+ break;
+ case '%':
+ if (rhs == 0) {
+ error(op, "Modulus by zero");
+ lhs = 0;
+ }
+ else {
+ lhs = lhs % rhs;
+ }
+ break;
+ case '*': lhs = lhs * rhs; break;
+ case '+': lhs = lhs + rhs; break;
+ case '-': lhs = lhs - rhs; break;
+ case '<': lhs = lhs < rhs ? 1 : 0; break;
+ case '>': lhs = lhs > rhs ? 1 : 0; break;
+ case '&': lhs = lhs & rhs; break;
+ case '^': lhs = lhs ^ rhs; break;
+ case '|': lhs = lhs | rhs; break;
+
+ case LSH: lhs = lhs << rhs; break;
+ case RSH: lhs = lhs >> rhs; break;
+ case LE: lhs = lhs <= rhs ? 1 : 0; break;
+ case GE: lhs = lhs >= rhs ? 1 : 0; break;
+ case EQ: lhs = lhs == rhs ? 1 : 0; break;
+ case NE: lhs = lhs != rhs ? 1 : 0; break;
+ case LAND: lhs = (lhs != 0) && (rhs != 0) ? 1 : 0; break;
+ case LOR: lhs = (lhs != 0) || (rhs != 0) ? 1 : 0; break;
+
+ case '?':
+ /* XXX Handle this? */
+
+ default:
+ error(op,
+ "Unexpected operator " + op.getText());
+ return 0;
+
+ }
+ }
+
+ /*
+ System.out.flush();
+ (new Exception("expr returning " + lhs)).printStackTrace();
+ System.err.flush();
+ */
+ // System.out.println("expr returning " + lhs);
+
+ return lhs;
+ }
+
+ private Token toWhitespace(Token tok) {
+ String text = tok.getText();
+ int len = text.length();
+ boolean cr = false;
+ int nls = 0;
+
+ for (int i = 0; i < len; i++) {
+ char c = text.charAt(i);
+
+ switch (c) {
+ case '\r':
+ cr = true;
+ nls++;
+ break;
+ case '\n':
+ if (cr) {
+ cr = false;
+ break;
+ }
+ /* fallthrough */
+ case '\u2028':
+ case '\u2029':
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ cr = false;
+ nls++;
+ break;
+ }
+ }
+
+ char[] cbuf = new char[nls];
+ Arrays.fill(cbuf, '\n');
+ return new Token(WHITESPACE,
+ tok.getLine(), tok.getColumn(),
+ new String(cbuf));
+ }
+
+ private final Token _token()
+ throws IOException,
+ LexerException {
+
+ Token tok;
+ for (;;) {
+ if (!isActive()) {
+ /* Tell lexer to ignore warnings. */
+ tok = source_token();
+ /* Tell lexer to stop ignoring warnings. */
+ switch (tok.getType()) {
+ case HASH:
+ case NL:
+ case EOF:
+ /* The preprocessor has to take action here. */
+ break;
+ case WHITESPACE:
+ case COMMENT:
+ // Patch up to preserve whitespace.
+ /* XXX We might want to return tok here in C */
+ return toWhitespace(tok);
+ default:
+ // Return NL to preserve whitespace.
+ return source_skipline(false);
+ }
+ }
+ else {
+ tok = source_token();
+ }
+
+ LEX: switch (tok.getType()) {
+ case EOF:
+ /* Pop the stacks. */
+ return tok;
+
+ case WHITESPACE:
+ case NL:
+ return tok;
+
+ case COMMENT:
+ return tok;
+
+ case '!': case '%': case '&':
+ case '(': case ')': case '*':
+ case '+': case ',': case '-':
+ case '/': case ':': case ';':
+ case '<': case '=': case '>':
+ case '?': case '[': case ']':
+ case '^': case '{': case '|':
+ case '}': case '~': case '.':
+
+ // case '#':
+
+ case AND_EQ:
+ case ARROW:
+ case CHARACTER:
+ case DEC:
+ case DIV_EQ:
+ case ELLIPSIS:
+ case EQ:
+ case GE:
+ case HEADER: /* Should only arise from include() */
+ case INC:
+ case LAND:
+ case LE:
+ case LOR:
+ case LSH:
+ case LSH_EQ:
+ case SUB_EQ:
+ case MOD_EQ:
+ case MULT_EQ:
+ case NE:
+ case OR_EQ:
+ case PLUS_EQ:
+ case RANGE:
+ case RSH:
+ case RSH_EQ:
+ case STRING:
+ case XOR_EQ:
+ return tok;
+
+ case INTEGER:
+ return tok;
+
+ case IDENTIFIER:
+ Macro m = macros.get(tok.getText());
+ if (m == null)
+ return tok;
+ if (source.isExpanding(m))
+ return tok;
+ if (macro(m, tok))
+ break;
+ return tok;
+
+ case P_LINE:
+ if ((flags & FL_LINEMARKER) != 0)
+ return tok;
+ break;
+
+ case ERROR:
+ return tok;
+
+ default:
+ throw new InternalException("Bad token " + tok);
+ // break;
+
+ case HASH:
+ tok = source_token_nonwhite();
+ // (new Exception("here")).printStackTrace();
+ switch (tok.getType()) {
+ case NL:
+ break LEX; /* Some code has #\n */
+ case IDENTIFIER:
+ break;
+ default:
+ error(tok,
+ "Preprocessor directive not a word " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ Integer _ppcmd = ppcmds.get(tok.getText());
+ if (_ppcmd == null) {
+ error(tok,
+ "Unknown preprocessor directive " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ int ppcmd = _ppcmd.intValue();
+
+ switch (ppcmd) {
+
+ case PP_DEFINE:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ return define();
+ // break;
+
+ case PP_UNDEF:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ return undef();
+ // break;
+
+ case PP_INCLUDE:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ return include();
+ // break;
+
+ case PP_WARNING:
+ case PP_ERROR:
+ if (!isActive())
+ return source_skipline(false);
+ else
+ error(tok, ppcmd == PP_ERROR);
+ break;
+
+ case PP_IF:
+ push_state();
+ if (!isActive()) {
+ return source_skipline(false);
+ }
+ expr_token = null;
+ states.peek().setActive(expr(0) != 0);
+ tok = expr_token(); /* unget */
+ if (tok.getType() == NL)
+ return tok;
+ return source_skipline(true);
+ // break;
+
+ case PP_ELIF:
+ State state = states.peek();
+ if (false) {
+ /* Check for 'if' */ ;
+ }
+ else if (state.sawElse()) {
+ error(tok,
+ "#elif after #" + "else");
+ return source_skipline(false);
+ }
+ else if (!state.isParentActive()) {
+ /* Nested in skipped 'if' */
+ return source_skipline(false);
+ }
+ else if (state.isActive()) {
+ /* The 'if' part got executed. */
+ state.setParentActive(false);
+ /* This is like # else # if but with
+ * only one # end. */
+ state.setActive(false);
+ return source_skipline(false);
+ }
+ else {
+ expr_token = null;
+ state.setActive(expr(0) != 0);
+ tok = expr_token(); /* unget */
+ if (tok.getType() == NL)
+ return tok;
+ return source_skipline(true);
+ }
+ // break;
+
+ case PP_ELSE:
+ state = states.peek();
+ if (false)
+ /* Check for 'if' */ ;
+ else if (state.sawElse()) {
+ error(tok,
+ "#" + "else after #" + "else");
+ return source_skipline(false);
+ }
+ else {
+ state.setSawElse();
+ state.setActive(! state.isActive());
+ return source_skipline(true);
+ }
+ // break;
+
+ case PP_IFDEF:
+ push_state();
+ if (!isActive()) {
+ return source_skipline(false);
+ }
+ else {
+ tok = source_token_nonwhite();
+ // System.out.println("ifdef " + tok);
+ if (tok.getType() != IDENTIFIER) {
+ error(tok,
+ "Expected identifier, not " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ else {
+ String text = tok.getText();
+ boolean exists =
+ macros.containsKey(text);
+ states.peek().setActive(exists);
+ return source_skipline(true);
+ }
+ }
+ // break;
+
+ case PP_IFNDEF:
+ push_state();
+ if (!isActive()) {
+ return source_skipline(false);
+ }
+ else {
+ tok = source_token_nonwhite();
+ if (tok.getType() != IDENTIFIER) {
+ error(tok,
+ "Expected identifier, not " +
+ tok.getText());
+ return source_skipline(false);
+ }
+ else {
+ String text = tok.getText();
+ boolean exists =
+ macros.containsKey(text);
+ states.peek().setActive(!exists);
+ return source_skipline(true);
+ }
+ }
+ // break;
+
+ case PP_ENDIF:
+ pop_state();
+ return source_skipline(true);
+ // break;
+
+ case PP_LINE:
+ return source_skipline(false);
+ // break;
+
+ case PP_PRAGMA:
+ return source_skipline(false);
+ // break;
+
+ default:
+ /* Actual unknown directives are
+ * processed above. If we get here,
+ * we succeeded the map lookup but
+ * failed to handle it. Therefore,
+ * this is (unconditionally?) fatal. */
+ // if (isActive()) /* XXX Could be warning. */
+ throw new InternalException(
+ "Internal error: Unknown directive "
+ + tok);
+ // return source_skipline(false);
+ }
+
+
+ }
+ }
+ }
+
+ private Token token_nonwhite()
+ throws IOException,
+ LexerException {
+ Token tok;
+ do {
+ tok = _token();
+ } while (isWhite(tok));
+ return tok;
+ }
+
+ /**
+ * Returns the next preprocessor token.
+ *
+ * @see Token
+ * @throws LexerException if a preprocessing error occurs.
+ * @throws InternalException if an unexpected error condition arises.
+ */
+ public Token token()
+ throws IOException,
+ LexerException {
+ Token tok = _token();
+ if (DEBUG)
+ System.out.println("pp: Returning " + tok);
+ return tok;
+ }
+
+#set ($i = 1) /* First ppcmd is 1, not 0. */
+#set ($ppcmds = [ "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", "include", "line", "pragma", "undef", "warning" ])
+#foreach ($ppcmd in $ppcmds)
+ private static final int PP_$ppcmd.toUpperCase() = $i;
+#set ($i = $i + 1)
+#end
+
+ private static final Map<String,Integer> ppcmds =
+ new HashMap<String,Integer>();
+
+ static {
+#foreach ($ppcmd in $ppcmds)
+ ppcmds.put("$ppcmd", Integer.valueOf(PP_$ppcmd.toUpperCase()));
+#end
+ }
+
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+
+ Source s = getSource();
+ while (s != null) {
+ buf.append(" -> ").append(String.valueOf(s)).append("\n");
+ s = s.getParent();
+ }
+
+ Map<String,Macro> macros = getMacros();
+ List<String> keys = new ArrayList<String>(
+ macros.keySet()
+ );
+ Collections.sort(keys);
+ Iterator<String> mt = keys.iterator();
+ while (mt.hasNext()) {
+ String key = mt.next();
+ Macro macro = macros.get(key);
+ buf.append("#").append("macro ").append(macro).append("\n");
+ }
+
+ return buf.toString();
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/PreprocessorListener.java b/src/java/org/anarres/cpp/PreprocessorListener.java
new file mode 100644
index 0000000..84a105d
--- /dev/null
+++ b/src/java/org/anarres/cpp/PreprocessorListener.java
@@ -0,0 +1,83 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+
+public class PreprocessorListener {
+
+ private int errors;
+ private int warnings;
+
+ public PreprocessorListener() {
+ clear();
+ }
+
+ public void clear() {
+ errors = 0;
+ warnings = 0;
+ }
+
+ public int getErrors() {
+ return errors;
+ }
+
+ public int getWarnings() {
+ return warnings;
+ }
+
+ protected void print(String msg) {
+ System.err.println(msg);
+ }
+
+ /**
+ * Handles a warning.
+ *
+ * The behaviour of this method is defined by the
+ * implementation. It may simply record the error message, or
+ * it may throw an exception.
+ */
+ public void handleWarning(Source source, int line, int column,
+ String msg)
+ throws LexerException {
+ warnings++;
+ print(source.getName() + ":" + line + ":" + column +
+ ": warning: " + msg);
+ }
+
+ /**
+ * Handles an error.
+ *
+ * The behaviour of this method is defined by the
+ * implementation. It may simply record the error message, or
+ * it may throw an exception.
+ */
+ public void handleError(Source source, int line, int column,
+ String msg)
+ throws LexerException {
+ errors++;
+ print(source.getName() + ":" + line + ":" + column +
+ ": error: " + msg);
+ }
+
+ public void handleSourceChange(Source source, String event) {
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/Source.java b/src/java/org/anarres/cpp/Source.java
new file mode 100644
index 0000000..2999418
--- /dev/null
+++ b/src/java/org/anarres/cpp/Source.java
@@ -0,0 +1,226 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * An input to the Preprocessor.
+ *
+ * Inputs may come from Files, Strings or other sources. The
+ * preprocessor maintains a stack of Sources. Operations such as
+ * file inclusion or token pasting will push a new source onto
+ * the Preprocessor stack. Sources pop from the stack when they
+ * are exhausted; this may be transparent or explicit.
+ *
+ * BUG: Error messages are not handled properly.
+ */
+public abstract class Source implements Iterable<Token> {
+ private Source parent;
+ private boolean autopop;
+ private PreprocessorListener listener;
+
+ /* LineNumberReader */
+
+/*
+ // We can't do this, since we would lose the LexerException
+ private class Itr implements Iterator {
+ private Token next = null;
+ private void advance() {
+ try {
+ if (next != null)
+ next = token();
+ }
+ catch (IOException e) {
+ throw new UnsupportedOperationException(
+ "Failed to advance token iterator: " +
+ e.getMessage()
+ );
+ }
+ }
+ public boolean hasNext() {
+ return next.getType() != EOF;
+ }
+ public Token next() {
+ advance();
+ Token t = next;
+ next = null;
+ return t;
+ }
+ public void remove() {
+ throw new UnsupportedOperationException(
+ "Cannot remove tokens from a Source."
+ );
+ }
+ }
+*/
+
+ public Source() {
+ this.parent = null;
+ this.autopop = false;
+ }
+
+ /* pp */ void setParent(Source parent, boolean autopop) {
+ this.parent = parent;
+ this.autopop = autopop;
+ }
+
+ /* pp */ final Source getParent() {
+ return parent;
+ }
+
+ /* pp */ void setListener(PreprocessorListener listener) {
+ this.listener = listener;
+ }
+
+ /**
+ * Returns the File currently being lexed.
+ *
+ * If this Source is not a {@link FileLexerSource}, then
+ * it will ask the parent Source, and so forth recursively.
+ * If no Source on the stack is a FileLexerSource, returns null.
+ */
+ /* pp */ File getFile() {
+ Source parent = getParent();
+ while (parent != null) {
+ File file = parent.getFile();
+ if (file != null)
+ return file;
+ parent = parent.getParent();
+ }
+ return null;
+ }
+
+ /* pp */ String getName() {
+ Source parent = getParent();
+ while (parent != null) {
+ String name = parent.getName();
+ if (name != null)
+ return name;
+ parent = parent.getParent();
+ }
+ return null;
+ }
+
+ public int getLine() {
+ Source parent = getParent();
+ if (parent == null)
+ return 0;
+ return parent.getLine();
+ }
+
+ /* pp */ boolean isExpanding(Macro m) {
+ Source parent = getParent();
+ if (parent != null)
+ return parent.isExpanding(m);
+ return false;
+ }
+
+ /**
+ * Returns true if this Source should be transparently popped
+ * from the input stack.
+ *
+ * Examples of such sources are macro expansions.
+ */
+ /* pp */ boolean isAutopop() {
+ return autopop;
+ }
+
+ /* pp */ boolean isNumbered() {
+ return false;
+ }
+
+ /**
+ * Returns the next Token parsed from this input stream.
+ *
+ * @see Token
+ */
+ public abstract Token token()
+ throws IOException,
+ LexerException;
+
+ public Iterator<Token> iterator() {
+ return new SourceIterator(this);
+ }
+
+ /**
+ * Skips tokens until the end of line.
+ *
+ * @param white true if only whitespace is permitted on the
+ * remainder of the line.
+ * @return the NL token.
+ */
+ public Token skipline(boolean white)
+ throws IOException,
+ LexerException {
+ for (;;) {
+ Token tok = token();
+ switch (tok.getType()) {
+ case EOF:
+ /* There ought to be a newline before EOF.
+ * At least, in any skipline context. */
+ /* XXX Are we sure about this? */
+ warning(tok.getLine(), tok.getColumn(),
+ "No newline before end of file");
+ return tok;
+ case NL:
+ /* This may contain one or more newlines. */
+ return tok;
+ case COMMENT:
+ case WHITESPACE:
+ break;
+ default:
+ /* XXX Check white, if required. */
+ if (white)
+ warning(tok.getLine(), tok.getColumn(),
+ "Unexpected nonwhite token");
+ break;
+ }
+ }
+ }
+
+ protected void error(int line, int column, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleError(this, line, column, msg);
+ else
+ throw new LexerException("No handler for error at " + line + ":" + column + ": " + msg);
+ }
+
+ protected void warning(int line, int column, String msg)
+ throws LexerException {
+ if (listener != null)
+ listener.handleWarning(this, line, column, msg);
+ else
+ throw new LexerException("No handler for warning at " + line + ":" + column + ": " + msg);
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/SourceIterator.java b/src/java/org/anarres/cpp/SourceIterator.java
new file mode 100644
index 0000000..ac2bc24
--- /dev/null
+++ b/src/java/org/anarres/cpp/SourceIterator.java
@@ -0,0 +1,94 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * An Iterator for {@link Source Sources},
+ * returning {@link Token Tokens}.
+ */
+public class SourceIterator implements Iterator<Token> {
+ private Source source;
+ private Token tok;
+
+ public SourceIterator(Source s) {
+ this.source = s;
+ this.tok = null;
+ }
+
+ /**
+ * Rethrows IOException inside IllegalStateException.
+ */
+ private void advance() {
+ try {
+ if (tok == null)
+ tok = source.token();
+ }
+ catch (LexerException e) {
+ throw new IllegalStateException(e);
+ }
+ catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Returns true if the enclosed Source has more tokens.
+ *
+ * The EOF token is never returned by the iterator.
+ * @throws IllegalStateException if the Source
+ * throws a LexerException or IOException
+ */
+ public boolean hasNext() {
+ advance();
+ return tok.getType() != EOF;
+ }
+
+ /**
+ * Returns the next token from the enclosed Source.
+ *
+ * The EOF token is never returned by the iterator.
+ * @throws IllegalStateException if the Source
+ * throws a LexerException or IOException
+ */
+ public Token next() {
+ if (!hasNext())
+ throw new NoSuchElementException();
+ Token t = this.tok;
+ this.tok = null;
+ return t;
+ }
+
+ /**
+ * Not supported.
+ *
+ * @throws UnsupportedOperationException.
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+}
+
diff --git a/src/java/org/anarres/cpp/State.java b/src/java/org/anarres/cpp/State.java
new file mode 100644
index 0000000..441e71e
--- /dev/null
+++ b/src/java/org/anarres/cpp/State.java
@@ -0,0 +1,69 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/* pp */ class State {
+ boolean parent;
+ boolean active;
+ boolean sawElse;
+
+ /* pp */ State() {
+ this.parent = true;
+ this.active = true;
+ this.sawElse = false;
+ }
+
+ /* pp */ State(State parent) {
+ this.parent = parent.isParentActive() && parent.isActive();
+ this.active = true;
+ this.sawElse = false;
+ }
+
+ /* Required for #elif */
+ /* pp */ void setParentActive(boolean b) {
+ this.parent = b;
+ }
+
+ /* pp */ boolean isParentActive() {
+ return parent;
+ }
+
+ /* pp */ void setActive(boolean b) {
+ this.active = b;
+ }
+
+ /* pp */ boolean isActive() {
+ return active;
+ }
+
+ /* pp */ void setSawElse() {
+ sawElse = true;
+ }
+
+ /* pp */ boolean sawElse() {
+ return sawElse;
+ }
+
+ public String toString() {
+ return "parent=" + parent +
+ ", active=" + active +
+ ", sawelse=" + sawElse;
+ }
+}
diff --git a/src/java/org/anarres/cpp/StringLexerSource.java b/src/java/org/anarres/cpp/StringLexerSource.java
new file mode 100644
index 0000000..7e7df75
--- /dev/null
+++ b/src/java/org/anarres/cpp/StringLexerSource.java
@@ -0,0 +1,64 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A Source for lexing a String.
+ *
+ * This class is used by token pasting, but can be used by user
+ * code.
+ */
+public class StringLexerSource extends LexerSource {
+
+ /**
+ * Creates a new Source for lexing the given String.
+ *
+ * @param ppvalid true if preprocessor directives are to be
+ * honoured within the string.
+ */
+ public StringLexerSource(String string, boolean ppvalid)
+ throws IOException {
+ super(new StringReader(string), ppvalid);
+ }
+
+ /**
+ * Creates a new Source for lexing the given String.
+ *
+ * By default, preprocessor directives are not honoured within
+ * the string.
+ */
+ public StringLexerSource(String string)
+ throws IOException {
+ this(string, false);
+ }
+
+ public String toString() {
+ return "string literal";
+ }
+}
diff --git a/src/java/org/anarres/cpp/Token.java b/src/java/org/anarres/cpp/Token.java
new file mode 100644
index 0000000..e5c1319
--- /dev/null
+++ b/src/java/org/anarres/cpp/Token.java
@@ -0,0 +1,215 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * A Preprocessor token.
+ *
+ * @see Preprocessor
+ */
+public final class Token {
+
+ // public static final int EOF = -1;
+
+ private int type;
+ private int line;
+ private int column;
+ private Object value;
+ private String text;
+
+ public Token(int type, int line, int column,
+ String text, Object value) {
+ this.type = type;
+ this.line = line;
+ this.column = column;
+ this.text = text;
+ this.value = value;
+ }
+
+ public Token(int type, int line, int column, String text) {
+ this(type, line, column, text, null);
+ }
+
+ /* pp */ Token(int type, String text, Object value) {
+ this(type, -1, -1, text, value);
+ }
+
+ /* pp */ Token(int type, String text) {
+ this(type, text, null);
+ }
+
+ /* pp */ Token(int type) {
+ this(type, texts[type]);
+ }
+
+ /**
+ * Returns the semantic type of this token.
+ */
+ public int getType() {
+ return type;
+ }
+
+ /* pp */ void setLocation(int line, int column) {
+ this.line = line;
+ this.column = column;
+ }
+
+ /**
+ * Returns the line at which this token started.
+ *
+ * Lines are numbered from zero.
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Returns the column at which this token started.
+ *
+ * Columns are numbered from zero.
+ */
+ public int getColumn() {
+ return column;
+ }
+
+ /**
+ * Returns the original or generated text of this token.
+ *
+ * This is distinct from the semantic value of the token.
+ *
+ * @see #getValue()
+ */
+ public String getText() {
+ return text;
+ }
+
+ /**
+ * Returns the semantic value of this token.
+ *
+ * For strings, this is the parsed String.
+ * For integers, this is an Integer object.
+ * For other token types, as appropriate.
+ *
+ * @see #getText()
+ */
+ public Object getValue() {
+ return value;
+ }
+
+ /**
+ * Returns a description of this token, for debugging purposes.
+ */
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+
+ buf.append('[').append(getTokenName(type));
+ if (line != -1) {
+ buf.append('@').append(line);
+ if (column != -1)
+ buf.append(',').append(column);
+ }
+ buf.append("]:");
+ if (text != null)
+ buf.append('"').append(text).append('"');
+ else if (type > 3 && type < 256)
+ buf.append( (char)type );
+ else
+ buf.append('<').append(type).append('>');
+ if (value != null)
+ buf.append('=').append(value);
+ return buf.toString();
+ }
+
+ /**
+ * Returns the descriptive name of the given token type.
+ *
+ * This is mostly used for stringification and debugging.
+ */
+ public static final String getTokenName(int type) {
+ if (type < 0)
+ return "Invalid" + type;
+ if (type >= names.length)
+ return "Invalid" + type;
+ if (names[type] == null)
+ return "Unknown" + type;
+ return names[type];
+ }
+
+#set ($i = 257)
+#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "COMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "ERROR" ])
+#foreach ($token in $tokens)
+ /** The token type $token. */
+ public static final int $token = $i;
+#set ($i = $i + 1)
+#end
+ /**
+ * The number of possible semantic token types.
+ *
+ * Please note that not all token types below 255 are used.
+ */
+ public static final int _TOKENS = $i;
+
+ /** The position-less space token. */
+ /* pp */ static final Token space = new Token(WHITESPACE, -1, -1, " ");
+
+ private static final String[] names = new String[_TOKENS];
+ private static final String[] texts = new String[_TOKENS];
+ static {
+ for (int i = 0; i < 255; i++) {
+ texts[i] = String.valueOf(new char[] { (char)i });
+ names[i] = texts[i];
+ }
+
+ texts[AND_EQ] = "&=";
+ texts[ARROW] = "->";
+ texts[DEC] = "--";
+ texts[DIV_EQ] = "/=";
+ texts[ELLIPSIS] = "...";
+ texts[EQ] = "==";
+ texts[GE] = ">=";
+ texts[HASH] = "#";
+ texts[INC] = "++";
+ texts[LAND] = "&&";
+ texts[LAND_EQ] = "&&=";
+ texts[LE] = "<=";
+ texts[LOR] = "||";
+ texts[LOR_EQ] = "||=";
+ texts[LSH] = "<<";
+ texts[LSH_EQ] = "<<=";
+ texts[MOD_EQ] = "%=";
+ texts[MULT_EQ] = "*=";
+ texts[NE] = "!=";
+ texts[NL] = "\n";
+ texts[OR_EQ] = "|=";
+ /* We have to split the two hashes or Velocity eats them. */
+ texts[PASTE] = "#" + "#";
+ texts[PLUS_EQ] = "+=";
+ texts[RANGE] = "..";
+ texts[RSH] = ">>";
+ texts[RSH_EQ] = ">>=";
+ texts[SUB_EQ] = "-=";
+ texts[XOR_EQ] = "^=";
+
+#foreach ($token in $tokens)
+ names[$token] = "$token";
+#end
+ }
+
+}
diff --git a/src/java/org/anarres/cpp/TokenSnifferSource.java b/src/java/org/anarres/cpp/TokenSnifferSource.java
new file mode 100644
index 0000000..55b53d7
--- /dev/null
+++ b/src/java/org/anarres/cpp/TokenSnifferSource.java
@@ -0,0 +1,56 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+@Deprecated
+/* pp */ class TokenSnifferSource extends Source {
+ private List<Token> target;
+
+ /* pp */ TokenSnifferSource(List<Token> target) {
+ this.target = target;
+ }
+
+ public Token token()
+ throws IOException,
+ LexerException {
+ Token tok = getParent().token();
+ if (tok.getType() != EOF)
+ target.add(tok);
+ return tok;
+ }
+
+ public String toString() {
+ return getParent().toString();
+ }
+}
diff --git a/src/resources/log4j.properties b/src/resources/log4j.properties
new file mode 100644
index 0000000..901854c
--- /dev/null
+++ b/src/resources/log4j.properties
@@ -0,0 +1,22 @@
+# Set root logger level to DEBUG and its only appender to stdout.
+log4j.rootLogger=INFO, stdout
+
+# stdout is set to be a ConsoleAppender.
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+
+# stdout uses PatternLayout.
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+# log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+# log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %p %c{1} - %m%n
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} [%t %x] %p %c - %m%n
+# Make log4j honour other programs changing System.out
+log4j.appender.stdout.follow=true
+
+# Turn on Axis exception logging
+log4j.logger.org.apache.axis.EXCEPTIONS=DEBUG
+
+# log4j.logger.org.apache.axis=WARN
+# log4j.logger.org.mortbay=WARN
+# log4j.logger.com.mchange=WARN
+
+# log4j.logger.org.anarres.iengine=DEBUG
diff --git a/src/scripts/cpp.sh b/src/scripts/cpp.sh
new file mode 100644
index 0000000..ed167c5
--- /dev/null
+++ b/src/scripts/cpp.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+CPP_JAR=anarres-cpp.jar
+
+if [ -n "$CPP_ROOT" ] ; then
+ CPP_ROOT="$CPP_ROOT"
+elif [ -f lib/$CPP_JAR ] ; then
+ CPP_ROOT="."
+elif [ -f ../lib/$CPP_JAR ] ; then
+ CPP_ROOT=".."
+elif [ -f $(dirname $0)/lib/$CPP_JAR ] ; then
+ CPP_ROOT=$(dirname $0)
+else
+ echo "Could not find $CPP_JAR. Please set CPP_ROOT."
+ exit 1
+fi
+
+if [ -z "$CPP_LIB" ] ; then
+ CPP_LIB=$CPP_ROOT/lib
+fi
+
+if [ -z "$CPP_CLASSPATH" ] ; then
+ CPP_CLASSPATH="$(ls $CPP_LIB/*.jar | tr '\n' ':')"
+fi
+
+if [ -z "$CPP_MAINCLASS" ] ; then
+ CPP_MAINCLASS=org.anarres.cpp.Main
+fi
+
+CPP_JFLAGS="-Xmx128M"
+
+exec java $CPP_JFLAGS -cp "$CPP_CLASSPATH" $CPP_MAINCLASS "$@"
diff --git a/src/scripts/release.sh b/src/scripts/release.sh
new file mode 100644
index 0000000..6393a95
--- /dev/null
+++ b/src/scripts/release.sh
@@ -0,0 +1,4 @@
+scp build/dist/anarres-cpp-*.tar.gz [email protected]:public_html/projects/jcpp
+scp -r build/javadoc/ [email protected]:public_html/projects/jcpp
+cp build/tar/lib/anarres-cpp.jar /home/shevek/java/iengine/lib/jcpp
+cp build/tar/lib/anarres-cpp.jar /home/shevek/java/karma/trunk/lib/dp
diff --git a/src/tests/AutoTestSuite.java b/src/tests/AutoTestSuite.java
new file mode 100644
index 0000000..894a365
--- /dev/null
+++ b/src/tests/AutoTestSuite.java
@@ -0,0 +1,121 @@
+import java.lang.reflect.Modifier;
+
+import java.io.File;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import junit.framework.TestSuite;
+import junit.framework.TestCase;
+import junit.framework.Test;
+
+public class AutoTestSuite extends TestSuite {
+ private String testPackage;
+ private Set<String> testCases;
+ private boolean testAll;
+ private File root;
+
+ public AutoTestSuite() {
+ this.testPackage = System.getProperty("test.package");
+ String tcase = System.getProperty("test.case");
+ if (tcase != null && tcase.length() > 0) {
+ this.testCases = new HashSet(Arrays.asList(
+ tcase.split("[,:]")
+ ));
+ }
+ else {
+ this.testCases = null;
+ }
+ this.testAll = System.getProperty("test.all") != null;
+ this.root = new File(System.getProperty("test.root"));
+
+ Set<Class> tests = new HashSet();
+
+ findClasses("", root, tests);
+
+ Iterator<Class> i = tests.iterator();
+
+ while(i.hasNext()) {
+ addTestSuite(i.next());
+ }
+ }
+
+ public void addTestSuite(Class clazz) {
+ if (testPackage != null) {
+ String name = clazz.getPackage().getName();
+ if (!name.startsWith(testPackage)) {
+ /*
+ System.out.println("Skipping test in package '" +
+ name + "' - does not start with '" +
+ testPackage + "'");
+ */
+ return;
+ }
+ }
+ if (testCases != null) {
+ String name = clazz.getName();
+ name = name.substring(name.lastIndexOf('.') + 1);
+ if (!testCases.contains(name)) {
+ /*
+ System.out.println("Skipping test in class '" +
+ name + "' - does not start with '" +
+ testCases + "'");
+ */
+ return;
+ }
+ }
+ /*
+ if (
+ testCases == null &&
+ testPackage == null &&
+ !testAll &&
+ Optional.class.isAssignableFrom(clazz)
+ )
+ {
+ return;
+ }
+ */
+ System.out.println("Adding test class '" + clazz + "'");
+ super.addTestSuite(clazz);
+ }
+
+ public static Test suite() {
+ return new AutoTestSuite();
+ }
+
+ private final void findClasses(String pkg, File root, Set<Class> result) {
+ File[] children = root.listFiles();
+ for(int i = 0; i<children.length; i++) {
+ File child = children[i];
+ if(child.isDirectory()) {
+ findClasses(
+ pkg + child.getName() + ".",
+ child,
+ result
+ );
+ } else if(child.isFile()) {
+ String name = child.getName();
+ // System.out.println("Checking: " + pkg + name);
+ if(name.endsWith(".class") && name.indexOf('$') == -1) {
+ try {
+ Class test = Class.forName(pkg +
+ name.substring(0,name.length() - 6));
+ int modifiers = test.getModifiers();
+ if(
+ (modifiers & Modifier.ABSTRACT) > 0 ||
+ (modifiers & Modifier.INTERFACE) > 0 ||
+ !TestCase.class.isAssignableFrom(test) ||
+ TestSuite.class.isAssignableFrom(test)
+ )
+ continue;
+ result.add(test);
+ } catch (ClassNotFoundException cnfe) {
+ cnfe.printStackTrace();
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/tests/org/anarres/cpp/BaseTestCase.java b/src/tests/org/anarres/cpp/BaseTestCase.java
new file mode 100644
index 0000000..ad6ae6a
--- /dev/null
+++ b/src/tests/org/anarres/cpp/BaseTestCase.java
@@ -0,0 +1,6 @@
+package org.anarres.cpp;
+
+import junit.framework.TestCase;
+
+public abstract class BaseTestCase extends TestCase {
+}
diff --git a/src/tests/org/anarres/cpp/CppReaderTestCase.java b/src/tests/org/anarres/cpp/CppReaderTestCase.java
new file mode 100644
index 0000000..5aeee06
--- /dev/null
+++ b/src/tests/org/anarres/cpp/CppReaderTestCase.java
@@ -0,0 +1,34 @@
+package org.anarres.cpp;
+
+import java.util.Collections;
+
+import java.io.StringReader;
+import java.io.BufferedReader;
+
+import junit.framework.Test;
+
+public class CppReaderTestCase extends BaseTestCase implements Test {
+
+ private void testCppReader(String in, String out)
+ throws Exception {
+ System.out.println("Testing " + in + " => " + out);
+ StringReader r = new StringReader(in);
+ CppReader p = new CppReader(r);
+ p.getPreprocessor().setIncludePath(
+ Collections.singletonList("src/input")
+ );
+ p.getPreprocessor().setFlags(Preprocessor.FL_LINEMARKER);
+ BufferedReader b = new BufferedReader(p);
+
+ String line;
+ while ((line = b.readLine()) != null) {
+ System.out.println(" >> " + line);
+ }
+ }
+
+ public void testJoinReader()
+ throws Exception {
+ testCppReader("#include <test0.h>\n", "ab");
+ }
+
+}
diff --git a/src/tests/org/anarres/cpp/ErrorTestCase.java b/src/tests/org/anarres/cpp/ErrorTestCase.java
new file mode 100644
index 0000000..d5d44a3
--- /dev/null
+++ b/src/tests/org/anarres/cpp/ErrorTestCase.java
@@ -0,0 +1,50 @@
+package org.anarres.cpp;
+
+import java.io.*;
+
+import junit.framework.Test;
+
+import static org.anarres.cpp.Token.*;
+
+public class ErrorTestCase extends BaseTestCase {
+
+ private void testError(Source source)
+ throws LexerException,
+ IOException {
+ for (;;) {
+ Token tok = source.token();
+ if (tok.getType() == EOF)
+ break;
+ }
+
+ }
+
+ private void testError(String input) throws Exception {
+ StringLexerSource sl;
+ PreprocessorListener pl;
+
+ /* Without a PreprocessorListener, throws an exception. */
+ sl = new StringLexerSource(input, true);
+ try {
+ testError(sl);
+ fail("Lexing succeeded");
+ }
+ catch (LexerException e) {
+ /* ignored */
+ }
+
+ /* With a PreprocessorListener, records the error. */
+ sl = new StringLexerSource(input, true);
+ pl = new PreprocessorListener();
+ sl.setListener(pl);
+ testError(sl);
+ assertTrue(pl.getErrors() > 0);
+ }
+
+ public void testErrors() throws Exception {
+ testError("\"");
+ testError("'");
+ testError("''");
+ }
+
+}
diff --git a/src/tests/org/anarres/cpp/JoinReaderTestCase.java b/src/tests/org/anarres/cpp/JoinReaderTestCase.java
new file mode 100644
index 0000000..2b99c2f
--- /dev/null
+++ b/src/tests/org/anarres/cpp/JoinReaderTestCase.java
@@ -0,0 +1,40 @@
+package org.anarres.cpp;
+
+import java.io.StringReader;
+
+import junit.framework.Test;
+
+public class JoinReaderTestCase extends BaseTestCase implements Test {
+
+ private void testJoinReader(String in, String out, boolean tg)
+ throws Exception {
+ System.out.println("Testing " + in + " => " + out);
+ StringReader r = new StringReader(in);
+ JoinReader j = new JoinReader(r, tg);
+
+ for (int i = 0; i < out.length(); i++) {
+ int c = j.read();
+ // System.out.println("At offset " + i + ": " + (char)c);
+ assertEquals((char)out.charAt(i), c);
+ }
+ assertEquals(-1, j.read());
+ assertEquals(-1, j.read());
+ }
+
+ private void testJoinReader(String in, String out)
+ throws Exception {
+ testJoinReader(in, out, true);
+ testJoinReader(in, out, false);
+ }
+
+ public void testJoinReader()
+ throws Exception {
+ testJoinReader("ab", "ab");
+ testJoinReader("a\\b", "a\\b");
+ testJoinReader("a\nb", "a\nb");
+ testJoinReader("a\\\nb", "ab\n");
+ testJoinReader("foo??(bar", "foo[bar", true);
+ testJoinReader("foo??/\nbar", "foobar\n", true);
+ }
+
+}
diff --git a/src/tests/org/anarres/cpp/LexerSourceTestCase.java b/src/tests/org/anarres/cpp/LexerSourceTestCase.java
new file mode 100644
index 0000000..e8fb410
--- /dev/null
+++ b/src/tests/org/anarres/cpp/LexerSourceTestCase.java
@@ -0,0 +1,43 @@
+package org.anarres.cpp;
+
+import java.io.StringReader;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import junit.framework.Test;
+
+import static org.anarres.cpp.Token.*;
+
+public class LexerSourceTestCase extends BaseTestCase implements Test {
+
+ private void testLexerSource(String in, int[] out)
+ throws Exception {
+ System.out.println("Testing '" + in + "' => " +
+ Arrays.toString(out));
+ StringLexerSource s = new StringLexerSource(in);
+
+ for (int i = 0; i < out.length; i++) {
+ Token tok = s.token();
+ System.out.println("Token is " + tok);
+ assertEquals(out[i], tok.getType());
+ }
+ assertEquals(EOF, s.token().getType());
+ }
+
+ public void testJoinReader()
+ throws Exception {
+
+ testLexerSource("int a = 5;", new int[] {
+ IDENTIFIER, WHITESPACE, IDENTIFIER, WHITESPACE,
+ '=', WHITESPACE, INTEGER, ';', EOF
+ });
+
+ testLexerSource("# # foo", new int[] {
+ HASH, WHITESPACE, '#', WHITESPACE, IDENTIFIER
+ });
+
+ }
+
+}
diff --git a/src/tests/org/anarres/cpp/PreprocessorTestCase.java b/src/tests/org/anarres/cpp/PreprocessorTestCase.java
new file mode 100644
index 0000000..ea6aab7
--- /dev/null
+++ b/src/tests/org/anarres/cpp/PreprocessorTestCase.java
@@ -0,0 +1,154 @@
+package org.anarres.cpp;
+
+import java.io.*;
+
+import junit.framework.Test;
+
+import static org.anarres.cpp.Token.*;
+
+public class PreprocessorTestCase extends BaseTestCase {
+ private OutputStreamWriter writer;
+ private Preprocessor p;
+
+ public void setUp() throws Exception {
+ final PipedOutputStream po = new PipedOutputStream();
+ writer = new OutputStreamWriter(po);
+
+ p = new Preprocessor(
+ new LexerSource(
+ new InputStreamReader(
+ new PipedInputStream(po)
+ ),
+ true
+ ) {
+ public File getFile() {
+ return new File("test-input");
+ }
+ }
+ ) {
+ @Override
+ protected void include(File parent, int line,
+ String name, boolean quoted) {
+ /* XXX Perform a useful assertion. */
+ }
+ };
+ }
+
+ private static class I {
+ private String t;
+ public I(String t) {
+ this.t = t;
+ }
+ public String getText() {
+ return t;
+ }
+ }
+
+ private static I I(String t) {
+ return new I(t);
+ }
+
+ public void testPreprocessor() throws Exception {
+ /* Magic macros */
+ testInput("line = __LINE__\n",
+ I("line"), WHITESPACE, '=', WHITESPACE, INTEGER
+ /*, NL - all nls deferred so as not to block the reader */
+ );
+ testInput("file = __FILE__\n", NL, /* from before, etc */
+ I("file"), WHITESPACE, '=', WHITESPACE, STRING
+ );
+
+ /* Simple definitions */
+ testInput("#define A a /* a defined */\n", NL);
+ testInput("#define B b /* b defined */\n", NL);
+ testInput("#define C c /* c defined */\n", NL);
+
+ /* Expansion of arguments */
+ testInput("#define EXPAND(x) x\n", NL);
+ testInput("EXPAND(a)\n", NL, I("a"));
+ testInput("EXPAND(A)\n", NL, I("a"));
+
+ /* Stringification */
+ testInput("#define _STRINGIFY(x) #x\n", NL);
+ testInput("_STRINGIFY(A)\n", NL, "A");
+ testInput("#define STRINGIFY(x) _STRINGIFY(x)\n", NL);
+ testInput("STRINGIFY(b)\n", NL, "b");
+ testInput("STRINGIFY(A)\n", NL, "a");
+
+ /* Concatenation */
+ testInput("#define _CONCAT(x, y) x ## y\n", NL);
+ testInput("_CONCAT(A, B)\n", NL, I("AB"));
+ testInput("#define A_CONCAT done_a_concat\n", NL);
+ testInput("_CONCAT(A, _CONCAT(B, C))\n", NL,
+ I("done_a_concat"), '(', I("b"), ',', WHITESPACE, I("c"), ')'
+ );
+ testInput("#define CONCAT(x, y) _CONCAT(x, y)\n", NL);
+ testInput("CONCAT(A, CONCAT(B, C))\n", NL, I("abc"));
+ testInput("#define _CONCAT3(x, y, z) x ## y ## z\n", NL);
+ testInput("_CONCAT3(a, b, c)\n", NL, I("abc"));
+ testInput("_CONCAT3(A, B, C)\n", NL, I("ABC"));
+
+/* Redefinitions, undefinitions. */
+testInput("#define two three\n", NL);
+testInput("one /* one */\n", NL, I("one"), WHITESPACE, COMMENT);
+testInput("#define one two\n", NL);
+testInput("one /* three */\n", NL, I("three"), WHITESPACE, COMMENT);
+testInput("#undef two\n", NL);
+testInput("#define two five\n", NL);
+testInput("one /* five */\n", NL, I("five"), WHITESPACE, COMMENT);
+testInput("#undef two\n", NL);
+testInput("one /* two */\n", NL, I("two"), WHITESPACE, COMMENT);
+testInput("#undef one\n", NL);
+testInput("#define one four\n", NL);
+testInput("one /* four */\n", NL, I("four"), WHITESPACE, COMMENT);
+testInput("#undef one\n", NL);
+testInput("#define one one\n", NL);
+testInput("one /* one */\n", NL, I("one"), WHITESPACE, COMMENT);
+
+ /* Variadic macros. */
+ testInput("#define var(x...) a x b\n", NL);
+ testInput("var(e, f, g)", NL,
+ I("a"), WHITESPACE,
+ I("e"), ',', WHITESPACE,
+ I("f"), ',', WHITESPACE,
+ I("g"), WHITESPACE,
+ I("b")
+ );
+
+ writer.close();
+
+ Token t;
+ do {
+ t = p.token();
+ System.out.println("Remaining token " + t);
+ } while(t.getType() != EOF);
+ }
+
+ private void testInput(String in, Object... out)
+ throws Exception {
+ System.out.print("Input: " + in);
+ writer.write(in);
+ writer.flush();
+ for (int i = 0; i < out.length; i++) {
+ Token t = p.token();
+ System.out.println(t);
+ Object v = out[i];
+ if (v instanceof String) {
+ if (t.getType() != STRING)
+ fail("Expected STRING, but got " + t);
+ assertEquals((String)v, (String)t.getValue());
+ }
+ else if (v instanceof I) {
+ if (t.getType() != IDENTIFIER)
+ fail("Expected IDENTIFIER, but got " + t);
+ assertEquals( ((I)v).getText(), (String)t.getText());
+ }
+ else if (v instanceof Character)
+ assertEquals( (int)((Character)v).charValue(), t.getType());
+ else if (v instanceof Integer)
+ assertEquals( ((Integer)v).intValue(), t.getType());
+ else
+ fail("Bad object " + v.getClass());
+ }
+ }
+}