From 5ff55648127c8a8e1b9829775045af986e37647c Mon Sep 17 00:00:00 2001
From: Shevek <shevek@anarres.org>
Date: Fri, 21 Mar 2008 23:05:04 +0000
Subject: move stuff into trunk

---
 src/java/org/anarres/cpp/Argument.java             |   79 +
 src/java/org/anarres/cpp/CppReader.java            |  147 ++
 src/java/org/anarres/cpp/FileLexerSource.java      |   74 +
 src/java/org/anarres/cpp/FixedTokenSource.java     |   67 +
 src/java/org/anarres/cpp/InternalException.java    |   33 +
 src/java/org/anarres/cpp/JoinReader.java           |  168 +++
 src/java/org/anarres/cpp/LexerException.java       |   35 +
 src/java/org/anarres/cpp/LexerSource.java          |  677 +++++++++
 src/java/org/anarres/cpp/Macro.java                |  157 ++
 src/java/org/anarres/cpp/MacroTokenSource.java     |  191 +++
 src/java/org/anarres/cpp/Main.java                 |  111 ++
 src/java/org/anarres/cpp/Preprocessor.java         | 1511 ++++++++++++++++++++
 src/java/org/anarres/cpp/PreprocessorListener.java |   83 ++
 src/java/org/anarres/cpp/Source.java               |  226 +++
 src/java/org/anarres/cpp/SourceIterator.java       |   94 ++
 src/java/org/anarres/cpp/State.java                |   69 +
 src/java/org/anarres/cpp/StringLexerSource.java    |   64 +
 src/java/org/anarres/cpp/Token.java                |  215 +++
 src/java/org/anarres/cpp/TokenSnifferSource.java   |   56 +
 19 files changed, 4057 insertions(+)
 create mode 100644 src/java/org/anarres/cpp/Argument.java
 create mode 100644 src/java/org/anarres/cpp/CppReader.java
 create mode 100644 src/java/org/anarres/cpp/FileLexerSource.java
 create mode 100644 src/java/org/anarres/cpp/FixedTokenSource.java
 create mode 100644 src/java/org/anarres/cpp/InternalException.java
 create mode 100644 src/java/org/anarres/cpp/JoinReader.java
 create mode 100644 src/java/org/anarres/cpp/LexerException.java
 create mode 100644 src/java/org/anarres/cpp/LexerSource.java
 create mode 100644 src/java/org/anarres/cpp/Macro.java
 create mode 100644 src/java/org/anarres/cpp/MacroTokenSource.java
 create mode 100644 src/java/org/anarres/cpp/Main.java
 create mode 100644 src/java/org/anarres/cpp/Preprocessor.java
 create mode 100644 src/java/org/anarres/cpp/PreprocessorListener.java
 create mode 100644 src/java/org/anarres/cpp/Source.java
 create mode 100644 src/java/org/anarres/cpp/SourceIterator.java
 create mode 100644 src/java/org/anarres/cpp/State.java
 create mode 100644 src/java/org/anarres/cpp/StringLexerSource.java
 create mode 100644 src/java/org/anarres/cpp/Token.java
 create mode 100644 src/java/org/anarres/cpp/TokenSnifferSource.java

(limited to 'src/java/org/anarres/cpp')
diff --git a/src/java/org/anarres/cpp/Argument.java b/src/java/org/anarres/cpp/Argument.java
new file mode 100644
index 0000000..da87d70
--- /dev/null
+++ b/src/java/org/anarres/cpp/Argument.java
@@ -0,0 +1,79 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A macro argument.
+ *
+ * This encapsulates a raw and preprocessed token stream.
+ */
+/* pp */ class Argument extends ArrayList<Token> {
+	public static final int	NO_ARGS = -1;
+
+	private List<Token>	expansion;
+
+	public Argument() {
+		this.expansion = null;
+	}
+
+	public void addToken(Token tok) {
+		add(tok);
+	}
+
+	/* pp */ void expand(Preprocessor p)
+						throws IOException,
+								LexerException {
+		/* Cache expansion. */
+		if (expansion == null) {
+			this.expansion = p.expand(this);
+			// System.out.println("Expanded arg " + this);
+		}
+	}
+
+	public Iterator<Token> expansion() {
+		return expansion.iterator();
+	}
+
+	public String toString() {
+		StringBuilder	buf = new StringBuilder();
+		buf.append("Argument(");
+		// buf.append(super.toString());
+		buf.append("raw=[ ");
+		for (int i = 0; i < size(); i++)
+			buf.append(get(i).getText());
+		buf.append(" ];expansion=[ ");
+		if (expansion == null)
+			buf.append("null");
+		else
+			for (int i = 0; i < expansion.size(); i++)
+				buf.append(expansion.get(i).getText());
+		buf.append(" ])");
+		return buf.toString();
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/CppReader.java b/src/java/org/anarres/cpp/CppReader.java
new file mode 100644
index 0000000..0aa6788
--- /dev/null
+++ b/src/java/org/anarres/cpp/CppReader.java
@@ -0,0 +1,147 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A Reader wrapper around the Preprocessor.
+ *
+ * This is a utility class to provide a transparent {@link Reader}
+ * which preprocesses the input text.
+ *
+ * @see Preprocessor
+ * @see Reader
+ */
+public class CppReader extends Reader {
+
+	private Preprocessor	cpp;
+	private String			token;
+	private int				idx;
+
+	public CppReader(final Reader r) {
+		cpp = new Preprocessor(new LexerSource(r, true) {
+			@Override
+			public String getName() {
+				return "<CppReader Input@" +
+						System.identityHashCode(r) + ">";
+			}
+		});
+		token = "";
+		idx = 0;
+	}
+
+	public CppReader(Preprocessor p) {
+		cpp = p;
+		token = "";
+		idx = 0;
+	}
+
+	/**
+	 * Returns the Preprocessor used by this CppReader.
+	 */
+	public Preprocessor getPreprocessor() {
+		return cpp;
+	}
+
+	/**
+	 * Defines the given name as a macro.
+	 *
+	 * This is a convnience method.
+	 */
+	public void addMacro(String name)
+						throws LexerException {
+		cpp.addMacro(name);
+	}
+
+	/**
+	 * Defines the given name as a macro.
+	 *
+	 * This is a convnience method.
+	 */
+	public void addMacro(String name, String value)
+						throws LexerException {
+		cpp.addMacro(name, value);
+	}
+
+	private boolean refill()
+						throws IOException {
+		try {
+			assert cpp != null : "cpp is null : was it closed?";
+			if (token == null)
+				return false;
+			while (idx >= token.length()) {
+				Token	tok = cpp.token();
+				switch (tok.getType()) {
+					case EOF:
+						token = null;
+						return false;
+					case COMMENT:
+						if (false) {
+							token = " ";
+							break;
+						}
+					default:
+						token = tok.getText();
+						break;
+				}
+				idx = 0;
+			}
+			return true;
+		}
+		catch (LexerException e) {
+			IOException	ie = new IOException(String.valueOf(e));
+			ie.initCause(e);
+			throw ie;
+		}
+	}
+
+	public int read()
+						throws IOException {
+		if (!refill())
+			return -1;
+		return token.charAt(idx++);
+	}
+
+	/* XXX Very slow and inefficient. */
+	public int read(char cbuf[], int off, int len)
+						throws IOException {
+		if (token == null)
+			return -1;
+		for (int i = 0; i < len; i++) {
+			int	ch = read();
+			if (ch == -1)
+				return i;
+			cbuf[off + i] = (char)ch;
+		}
+		return len;
+	}
+
+	public void close()
+						throws IOException {
+		cpp = null;
+		token = null;
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/FileLexerSource.java b/src/java/org/anarres/cpp/FileLexerSource.java
new file mode 100644
index 0000000..9f574a0
--- /dev/null
+++ b/src/java/org/anarres/cpp/FileLexerSource.java
@@ -0,0 +1,74 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A {@link Source} which lexes a file.
+ *
+ * The input is buffered.
+ *
+ * @see Source
+ */
+public class FileLexerSource extends LexerSource {
+	private File	file;
+
+	/**
+	 * Creates a new Source for lexing the given File.
+	 *
+	 * Preprocessor directives are honoured within the file.
+	 */
+	public FileLexerSource(File file)
+						throws IOException {
+		super(
+			new BufferedReader(
+				new FileReader(
+					file
+				)
+			),
+			true
+		);
+
+		this.file = file;
+	}
+
+	@Override
+	/* pp */ File getFile() {
+		return file;
+	}
+
+	@Override
+	/* pp */ String getName() {
+		return String.valueOf(file);
+	}
+
+	public String toString() {
+		return "file " + file;
+	}
+}
diff --git a/src/java/org/anarres/cpp/FixedTokenSource.java b/src/java/org/anarres/cpp/FixedTokenSource.java
new file mode 100644
index 0000000..d123f89
--- /dev/null
+++ b/src/java/org/anarres/cpp/FixedTokenSource.java
@@ -0,0 +1,67 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Iterator;
+
+/* pp */ class FixedTokenSource extends Source {
+	private static final Token	EOF =
+			new Token(Token.EOF, "<ts-eof>");
+
+	private List<Token>	tokens;
+	private int			idx;
+
+	/* pp */ FixedTokenSource(Token... tokens) {
+		this.tokens = Arrays.asList(tokens);
+		this.idx = 0;
+	}
+
+	/* pp */ FixedTokenSource(List<Token> tokens) {
+		this.tokens = tokens;
+		this.idx = 0;
+	}
+
+	public Token token()
+						throws IOException,
+								LexerException {
+		if (idx >= tokens.size())
+			return EOF;
+		return tokens.get(idx++);
+	}
+
+	public String toString() {
+		StringBuilder	buf = new StringBuilder();
+		buf.append("constant token stream " + tokens);
+		Source	parent = getParent();
+		if (parent != null)
+			buf.append(" in ").append(String.valueOf(parent));
+		return buf.toString();
+	}
+}
diff --git a/src/java/org/anarres/cpp/InternalException.java b/src/java/org/anarres/cpp/InternalException.java
new file mode 100644
index 0000000..d228710
--- /dev/null
+++ b/src/java/org/anarres/cpp/InternalException.java
@@ -0,0 +1,33 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * An internal exception.
+ *
+ * This exception is thrown when an internal state violation is
+ * encountered. This should never happen. If it ever happens, please
+ * report it as a bug.
+ */
+public class InternalException extends RuntimeException {
+	public InternalException(String msg) {
+		super(msg);
+	}
+}
diff --git a/src/java/org/anarres/cpp/JoinReader.java b/src/java/org/anarres/cpp/JoinReader.java
new file mode 100644
index 0000000..10ec535
--- /dev/null
+++ b/src/java/org/anarres/cpp/JoinReader.java
@@ -0,0 +1,168 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.Reader;
+import java.io.PushbackReader;
+import java.io.IOException;
+
+/* pp */ class JoinReader extends Reader {
+	private Reader	in;
+
+	private boolean	trigraphs;
+
+	private int		newlines;
+	private boolean	flushnl;
+	private int[]	unget;
+	private int		uptr;
+
+	public JoinReader(Reader in, boolean trigraphs) {
+		this.in = in;
+		this.trigraphs = trigraphs;
+		this.newlines = 0;
+		this.flushnl = false;
+		this.unget = new int[2];
+		this.uptr = 0;
+	}
+
+	public JoinReader(Reader in) {
+		this(in, false);
+	}
+
+	private int __read() throws IOException {
+		if (uptr > 0)
+			return unget[--uptr];
+		return in.read();
+	}
+
+	private void _unread(int c) {
+		if (c != -1)
+			unget[uptr++] = c;
+	}
+
+	private int _read() throws IOException {
+		int	c = __read();
+		if (c == '?' && trigraphs) {
+			int d = __read();
+			if (d == '?') {
+				int	e = __read();
+				switch (e) {
+					case '(': return '[';
+					case ')': return ']';
+					case '<': return '{';
+					case '>': return '}';
+					case '=': return '#';
+					case '/': return '\\';
+					case '\'': return '^';
+					case '!': return '|';
+					case '-': return '~';
+				}
+				_unread(e);
+			}
+			_unread(d);
+		}
+		return c;
+	}
+
+	public int read() throws IOException {
+		if (flushnl) {
+			if (newlines > 0) {
+				newlines--;
+				return '\n';
+			}
+			flushnl = false;
+		}
+
+		for (;;) {
+			int	c = _read();
+			switch (c) {
+				case '\\':
+					int	d = _read();
+					switch (d) {
+						case '\n':
+							newlines++;
+							continue;
+						case '\r':
+							newlines++;
+							int	e = _read();
+							if (e != '\n')
+								_unread(e);
+							continue;
+						default:
+							_unread(d);
+							return c;
+					}
+				case '\r':
+				case '\n':
+				case '\u2028':
+				case '\u2029':
+				case '\u000B':
+				case '\u000C':
+				case '\u0085':
+					flushnl = true;
+					return c;
+				case -1:
+					if (newlines > 0) {
+						newlines--;
+						return '\n';
+					}
+				default:
+					return c;
+			}
+		}
+	}
+
+	public int read(char cbuf[], int off, int len)
+						throws IOException {
+		for (int i = 0; i < len; i++) {
+			int	ch = read();
+			if (ch == -1)
+				return i;
+			cbuf[off + i] = (char)ch;
+		}
+		return len;
+	}
+
+	public void close()
+						throws IOException {
+		in.close();
+	}
+
+	public String toString() {
+		return "JoinReader(nl=" + newlines + ")";
+	}
+
+/*
+	public static void main(String[] args) throws IOException {
+		FileReader		f = new FileReader(new File(args[0]));
+		BufferedReader	b = new BufferedReader(f);
+		JoinReader		r = new JoinReader(b);
+		BufferedWriter	w = new BufferedWriter(
+				new java.io.OutputStreamWriter(System.out)
+					);
+		int				c;
+		while ((c = r.read()) != -1) {
+			w.write((char)c);
+		}
+		w.close();
+	}
+*/
+
+}
diff --git a/src/java/org/anarres/cpp/LexerException.java b/src/java/org/anarres/cpp/LexerException.java
new file mode 100644
index 0000000..a4b5e2e
--- /dev/null
+++ b/src/java/org/anarres/cpp/LexerException.java
@@ -0,0 +1,35 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * A preprocessor exception.
+ *
+ * Note to users: I don't really like the name of this class. S.
+ */
+public class LexerException extends Exception {
+	public LexerException(String msg) {
+		super(msg);
+	}
+
+	public LexerException(Throwable cause) {
+		super(cause);
+	}
+}
diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java
new file mode 100644
index 0000000..a291bff
--- /dev/null
+++ b/src/java/org/anarres/cpp/LexerSource.java
@@ -0,0 +1,677 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/** Does not handle digraphs. */
+public class LexerSource extends Source {
+	private static final boolean	DEBUG = false;
+
+	private PushbackReader	reader;
+	private boolean			ppvalid;
+	private boolean			bol;
+	private boolean			include;
+
+	private int				line;
+	private int				column;
+	private int				lastcolumn;
+	private boolean			cr;
+
+	/* ppvalid is:
+	 * false in StringLexerSource,
+	 * true in FileLexerSource */
+	public LexerSource(Reader r, boolean ppvalid) {
+		this.reader = new PushbackReader(new JoinReader(r), 5);
+		this.ppvalid = ppvalid;
+		this.bol = true;
+		this.include = false;
+
+		this.line = 1;
+		this.column = 0;
+		this.lastcolumn = -1;
+		this.cr = false;
+	}
+
+	@Override
+	public int getLine() {
+		return line;
+	}
+
+	public int getColumn() {
+		return column;
+	}
+
+	/* pp */ boolean isNumbered() {
+		return true;
+	}
+
+/* Error handling - this lot is barely worth it. */
+
+	private final void _error(String msg, boolean error)
+						throws LexerException {
+		int	_l = line;
+		int	_c = column;
+		if (_c == 0) {
+			_c = lastcolumn;
+			_l--;
+		}
+		else {
+			_c--;
+		}
+		if (error)
+			super.error(_l, _c, msg);
+		else
+			super.warning(_l, _c, msg);
+	}
+
+	private final void error(String msg)
+						throws LexerException {
+		_error(msg, true);
+	}
+
+	private final void warning(String msg)
+						throws LexerException {
+		_error(msg, false);
+	}
+
+/* A flag for string handling. */
+
+	/* pp */ void setInclude(boolean b) {
+		this.include = b;
+	}
+
+/*
+	private boolean _isLineSeparator(int c) {
+		return Character.getType(c) == Character.LINE_SEPARATOR
+				|| c == -1;
+	}
+*/
+
+	/* XXX Move to JoinReader and canonicalise newlines. */
+	private static final boolean isLineSeparator(int c) {
+		switch ((char)c) {
+			case '\r':
+			case '\n':
+			case '\u2028':
+			case '\u2029':
+			case '\u000B':
+			case '\u000C':
+			case '\u0085':
+				return true;
+			default:
+				return (c == -1);
+		}
+	}
+
+
+	private int read() throws IOException {
+		int	c = reader.read();
+		switch (c) {
+			case '\r':
+				cr = true;
+				line++;
+				lastcolumn = column;
+				column = 0;
+				break;
+			case '\n':
+				if (cr) {
+					cr = false;
+					break;
+				}
+				/* fallthrough */
+			case '\u2028':
+			case '\u2029':
+			case '\u000B':
+			case '\u000C':
+			case '\u0085':
+				cr = false;
+				line++;
+				lastcolumn = column;
+				column = 0;
+				break;
+			default:
+				cr = false;
+				column++;
+				break;
+		}
+
+/*
+		if (isLineSeparator(c)) {
+			line++;
+			lastcolumn = column;
+			column = 0;
+		}
+		else {
+			column++;
+		}
+*/
+
+		return c;
+	}
+
+	/* You can unget AT MOST one newline. */
+	private void unread(int c)
+						throws IOException {
+		if (c != -1) {
+			if (isLineSeparator(c)) {
+				line--;
+				column = lastcolumn;
+				cr = false;
+			}
+			else {
+				column--;
+			}
+			reader.unread(c);
+		}
+	}
+
+	private Token ccomment()
+						throws IOException {
+		StringBuilder	text = new StringBuilder("/*");
+		int				d;
+		do {
+			do {
+				d = read();
+				text.append((char)d);
+			} while (d != '*');
+			do {
+				d = read();
+				text.append((char)d);
+			} while (d == '*');
+		} while (d != '/');
+		return new Token(COMMENT, text.toString());
+	}
+
+	private Token cppcomment()
+						throws IOException {
+		StringBuilder	text = new StringBuilder("//");
+		int				d = read();
+		while (!isLineSeparator(d)) {
+			text.append((char)d);
+			d = read();
+		}
+		unread(d);
+		return new Token(COMMENT, text.toString());
+	}
+
+	private int escape(StringBuilder text)
+						throws IOException,
+								LexerException {
+		int		d = read();
+		switch (d) {
+			case 'a': text.append('a'); return 0x0a;
+			case 'b': text.append('b'); return '\b';
+			case 'f': text.append('f'); return '\f';
+			case 'n': text.append('n'); return '\n';
+			case 'r': text.append('r'); return '\r';
+			case 't': text.append('t'); return '\t';
+			case 'v': text.append('v'); return 0x0b;
+			case '\\': text.append('\\'); return '\\';
+
+			case '0': case '1': case '2': case '3':
+			case '4': case '5': case '6': case '7':
+				int	len = 0;
+				int	val = 0;
+				do {
+					val = (val << 3) + Character.digit(d, 8);
+					text.append((char)d);
+					d = read();
+				} while (++len < 3 && Character.digit(d, 8) != -1);
+				unread(d);
+				return val;
+
+			case 'x':
+				len = 0;
+				val = 0;
+				do {
+					val = (val << 4) + Character.digit(d, 16);
+					text.append((char)d);
+					d = read();
+				} while (++len < 2 && Character.digit(d, 16) != -1);
+				unread(d);
+				return val;
+
+			/* Exclude two cases from the warning. */
+			case '"': text.append('"'); return '"';
+			case '\'': text.append('\''); return '\'';
+
+			default:
+				warning("Unnecessary escape character " + (char)d);
+				text.append((char)d);
+				return d;
+		}
+	}
+
+	private Token character()
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder("'");
+		int				d = read();
+		if (d == '\\') {
+			text.append('\\');
+			d = escape(text);
+		}
+		else if (isLineSeparator(d)) {
+			unread(d);
+			error("Unterminated character literal");
+			return new Token(ERROR, text.toString(), null);
+		}
+		else if (d == '\'') {
+			text.append('\'');
+			error("Empty character literal");
+			return new Token(ERROR, text.toString(), null);
+		}
+		else if (!Character.isDefined(d)) {
+			text.append('?');
+			error("Illegal unicode character literal");
+		}
+		else {
+			text.append((char)d);
+		}
+
+		int		e = read();
+		if (e != '\'') {
+			unread(e);
+			error("Illegal character constant");
+			/* XXX We could do some patching up here? */
+			return new Token(ERROR, text.toString(), null);
+		}
+		text.append('\'');
+		/* XXX Bad cast. */
+		return new Token(CHARACTER,
+				text.toString(), Character.valueOf((char)d));
+	}
+
+	/* XXX This strips the enclosing quotes from the
+	 * returned value. */
+	private Token string(char open, char close)
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder();
+		text.append(open);
+
+		StringBuilder	buf = new StringBuilder();
+
+		for (;;) {
+			int	c = read();
+			if (c == close) {
+				break;
+			}
+			else if (c == '\\') {
+				text.append('\\');
+				if (!include) {
+					char	d = (char)escape(text);
+					buf.append(d);
+				}
+			}
+			else if (c == -1) {
+				unread(c);
+				error("End of file in string literal after " + buf);
+				return new Token(ERROR, text.toString(), null);
+			}
+			else if (isLineSeparator(c)) {
+				unread(c);
+				error("Unterminated string literal after " + buf);
+				return new Token(ERROR, text.toString(), null);
+			}
+			else {
+				text.append((char)c);
+				buf.append((char)c);
+			}
+		}
+		text.append(close);
+		return new Token(close == '>' ? HEADER : STRING,
+						text.toString(), buf.toString());
+	}
+
+	private void number_suffix(StringBuilder text, int d)
+						throws IOException {
+		if (d == 'U') {
+			text.append((char)d);
+			d = read();
+		}
+		if (d == 'L') {
+			text.append((char)d);
+		}
+		else if (d == 'I') {
+			text.append((char)d);
+		}
+		else {
+			unread(d);
+		}
+	}
+
+	/* We already chewed a zero, so empty is fine. */
+	private Token number_octal()
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder("0");
+		int				d = read();
+		long			val = 0;
+		while (Character.digit(d, 8) != -1) {
+			val = (val << 3) + Character.digit(d, 8);
+			text.append((char)d);
+			d = read();
+		}
+		number_suffix(text, d);
+		return new Token(INTEGER,
+				text.toString(), Long.valueOf(val));
+	}
+
+	/* We do not know whether know the first digit is valid. */
+	private Token number_hex(char x)
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder("0");
+		text.append(x);
+		int				d = read();
+		if (Character.digit(d, 16) == -1) {
+			unread(d);
+			error("Illegal hexadecimal constant " + (char)d);
+			return new Token(ERROR, text.toString(), null);
+		}
+		long	val = 0;
+		do {
+			val = (val << 4) + Character.digit(d, 16);
+			text.append((char)d);
+			d = read();
+		} while (Character.digit(d, 16) != -1);
+		number_suffix(text, d);
+		return new Token(INTEGER,
+				text.toString(), Long.valueOf(val));
+	}
+
+	/* We know we have at least one valid digit, but empty is not
+	 * fine. */
+	/* XXX This needs a complete rewrite. */
+	private Token number_decimal(int c)
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder((char)c);
+		int				d = c;
+		long			val = 0;
+		do {
+			val = val * 10 + Character.digit(d, 10);
+			text.append((char)d);
+			d = read();
+		} while (Character.digit(d, 10) != -1);
+		number_suffix(text, d);
+		return new Token(INTEGER,
+				text.toString(), Long.valueOf(val));
+	}
+
+	private Token identifier(int c)
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder();
+		int				d;
+		text.append((char)c);
+		for (;;) {
+			d = read();
+			if (Character.isIdentifierIgnorable(d))
+				;
+			else if (Character.isJavaIdentifierPart(d))
+				text.append((char)d);
+			else
+				break;
+		}
+		unread(d);
+		return new Token(IDENTIFIER, text.toString());
+	}
+
+	private Token whitespace(int c)
+						throws IOException,
+								LexerException {
+		StringBuilder	text = new StringBuilder();
+		int				d;
+		text.append((char)c);
+		for (;;) {
+			d = read();
+			if (ppvalid && isLineSeparator(d))	/* XXX Ugly. */
+				break;
+			if (Character.isWhitespace(d))
+				text.append((char)d);
+			else
+				break;
+		}
+		unread(d);
+		return new Token(WHITESPACE, text.toString());
+	}
+
+	/* No token processed by cond() contains a newline. */
+	private Token cond(char c, int yes, int no)
+						throws IOException {
+		int	d = read();
+		if (c == d)
+			return new Token(yes);
+		unread(d);
+		return new Token(no);
+	}
+
+	public Token token()
+						throws IOException,
+								LexerException {
+		Token	tok = null;
+
+		int		_l = line;
+		int		_c = column;
+
+		int		c = read();
+		int		d, e;
+
+		switch (c) {
+			case '\n':
+				if (ppvalid) {
+					bol = true;
+					if (include) {
+						tok = new Token(NL, _l, _c, new String("\n"));
+					}
+					else {
+						int	nls = 0;
+						do {
+							d = read();
+							nls++;
+						} while (d == '\n');
+						unread(d);
+						char[]	text = new char[nls];
+						for (int i = 0; i < text.length; i++)
+							text[i] = '\n';
+						// Skip the bol = false below.
+						tok = new Token(NL, _l, _c, new String(text));
+					}
+					if (DEBUG)
+						System.out.println("lx: Returning NL: " + tok);
+					return tok;
+				}
+				/* Let it be handled as whitespace. */
+				break;
+
+			case '!':
+				tok = cond('=', NE, '!');
+				break;
+
+			case '#':
+				if (bol)
+					tok = new Token(HASH);
+				else
+					tok = cond('#', PASTE, '#');
+				break;
+
+			case '+':
+				d = read();
+				if (d == '+')
+					tok = new Token(INC);
+				else if (d == '=')
+					tok = new Token(PLUS_EQ);
+				else
+					unread(d);
+				break;
+			case '-':
+				d = read();
+				if (d == '-')
+					tok = new Token(DEC);
+				else if (d == '=')
+					tok = new Token(SUB_EQ);
+				else if (d == '>')
+					tok = new Token(ARROW);
+				else
+					unread(d);
+				break;
+
+			case '*':
+				tok = cond('=', MULT_EQ, '*');
+				break;
+			case '/':
+				d = read();
+				if (d == '*')
+					tok = ccomment();
+				else if (d == '/')
+					tok = cppcomment();
+				else if (d == '=')
+					tok = new Token(DIV_EQ);
+				else
+					unread(d);
+				break;
+
+			case '%':
+				tok = cond('=', MOD_EQ, '%');
+				break;
+
+			case ':':
+				/* :: */
+				break;
+
+			case '<':
+				if (include) {
+					tok = string('<', '>');
+				}
+				else {
+					d = read();
+					if (d == '=')
+						tok = new Token(LE);
+					else if (d == '<')
+						tok = cond('=', LSH_EQ, LSH);
+					else
+						unread(d);
+				}
+				break;
+
+			case '=':
+				tok = cond('=', EQ, '=');
+				break;
+
+			case '>':
+				d = read();
+				if (d == '=')
+					tok = new Token(GE);
+				else if (d == '>')
+					tok = cond('=', RSH_EQ, RSH);
+				else
+					unread(d);
+				break;
+
+			case '^':
+				tok = cond('=', XOR_EQ, '^');
+				break;
+
+			case '|':
+				d = read();
+				if (d == '=')
+					tok = new Token(OR_EQ);
+				else if (d == '|')
+					tok = cond('=', LOR_EQ, LOR);
+				else
+					unread(d);
+				break;
+			case '&':
+				d = read();
+				if (d == '&')
+					tok = cond('=', LAND_EQ, LAND);
+				else if (d == '=')
+					tok = new Token(AND_EQ);
+				else
+					unread(d);
+				break;
+
+			case '.':
+				d = read();
+				if (d == '.')
+					tok = cond('.', ELLIPSIS, RANGE);
+				else
+					unread(d);
+				/* XXX decimal fraction */
+				break;
+
+			case '0':
+				/* octal or hex */
+				d = read();
+				if (d == 'x' || d == 'X')
+					tok = number_hex((char)d);
+				else {
+					unread(d);
+					tok = number_octal();
+				}
+				break;
+
+			case '\'':
+				tok = character();
+				break;
+
+			case '"':
+				tok = string('"', '"');
+				break;
+
+			case -1:
+				tok = new Token(EOF, _l, _c, "<eof>");
+				break;
+		}
+
+		if (tok == null) {
+			if (Character.isWhitespace(c)) {
+				tok = whitespace(c);
+			}
+			else if (Character.isDigit(c)) {
+				tok = number_decimal(c);
+			}
+			else if (Character.isJavaIdentifierStart(c)) {
+				tok = identifier(c);
+			}
+			else {
+				tok = new Token(c);
+			}
+		}
+
+		bol = false;
+
+		tok.setLocation(_l, _c);
+		if (DEBUG)
+			System.out.println("lx: Returning " + tok);
+		// (new Exception("here")).printStackTrace(System.out);
+		return tok;
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/Macro.java b/src/java/org/anarres/cpp/Macro.java
new file mode 100644
index 0000000..0d0ae55
--- /dev/null
+++ b/src/java/org/anarres/cpp/Macro.java
@@ -0,0 +1,157 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * A macro object.
+ *
+ * This encapsulates a name, an argument count, and a token stream
+ * for replacement. The replacement token stream may contain the
+ * extra tokens {@link Token#M_ARG} and {@link Token#M_STRING}.
+ */
+public class Macro {
+	private String			name;
+	/* It's an explicit decision to keep these around here. We don't
+	 * need to; the argument token type is M_ARG and the value
+	 * is the index. The strings themselves are only used in
+	 * stringification of the macro, for debugging. */
+	private List<String>	args;
+	private boolean			variadic;
+	private List<Token>		tokens;
+
+	public Macro(String name) {
+		this.name = name;
+		this.args = null;
+		this.variadic = false;
+		this.tokens = new ArrayList<Token>();
+	}
+
+	/**
+	 * Returns the name of this macro.
+	 */
+	public String getName() {
+		return name;
+	}
+
+	/**
+	 * Sets the arguments to this macro.
+	 */
+	public void setArgs(List<String> args) {
+		this.args = args;
+	}
+
+	/**
+	 * Returns true if this is a function-like macro.
+	 */
+	public boolean isFunctionLike() {
+		return args != null;
+	}
+
+	/**
+	 * Returns the number of arguments to this macro.
+	 */
+	public int getArgs() {
+		return args.size();
+	}
+
+	/**
+	 * Sets the variadic flag on this Macro.
+	 */
+	public void setVariadic(boolean b) {
+		this.variadic = b;
+	}
+
+	/**
+	 * Returns true if this is a variadic function-like macro.
+	 */
+	public boolean isVariadic() {
+		return variadic;
+	}
+
+	/**
+	 * Adds a token to the expansion of this macro.
+	 */
+	public void addToken(Token tok) {
+		this.tokens.add(tok);
+	}
+
+	/**
+	 * Adds a "paste" operator to the expansion of this macro.
+	 *
+	 * A paste operator causes the next token added to be pasted
+	 * to the previous token when the macro is expanded.
+	 * It is an error for a macro to end with a paste token.
+	 */
+	public void addPaste(Token tok) {
+		/*
+		 * Given: tok0 ## tok1
+		 * We generate: M_PASTE, tok0, tok1
+		 * This extends as per a stack language:
+		 * tok0 ## tok1 ## tok2 ->
+		 *   M_PASTE, tok0, M_PASTE, tok1, tok2
+		 */
+		this.tokens.add(tokens.size() - 1, tok);
+	}
+
+	/* pp */ List<Token> getTokens() {
+		return tokens;
+	}
+
+	public String toString() {
+		StringBuilder	buf = new StringBuilder(name);
+		if (args != null) {
+			buf.append('(');
+			Iterator<String>	it = args.iterator();
+			while (it.hasNext()) {
+				buf.append(it.next());
+				if (it.hasNext())
+					buf.append(", ");
+				else if (isVariadic())
+					buf.append("...");
+			}
+			buf.append(')');
+		}
+		if (!tokens.isEmpty()) {
+			boolean	paste = false;
+			buf.append(" => ");
+			for (int i = 0; i < tokens.size(); i++) {
+				Token	tok = tokens.get(i);
+				if (tok.getType() == Token.M_PASTE) {
+					paste = true;
+					continue;
+				}
+				else {
+					buf.append(tok.getText());
+				}
+				if (paste) {
+					buf.append(" #" + "# ");
+					paste = false;
+				}
+				// buf.append(tokens.get(i));
+			}
+		}
+		return buf.toString();
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/MacroTokenSource.java b/src/java/org/anarres/cpp/MacroTokenSource.java
new file mode 100644
index 0000000..249afdf
--- /dev/null
+++ b/src/java/org/anarres/cpp/MacroTokenSource.java
@@ -0,0 +1,191 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import static org.anarres.cpp.Token.*;
+
+/* pp */ class MacroTokenSource extends Source {
+	private Macro				macro;
+	private Iterator<Token>		tokens;	/* Pointer into the macro.  */
+	private List<Argument>		args;	/* { unexpanded, expanded } */
+	private Iterator<Token>		arg;	/* "current expansion" */
+
+	/* pp */ MacroTokenSource(Macro m, List<Argument> args) {
+		this.macro = m;
+		this.tokens = m.getTokens().iterator();
+		this.args = args;
+		this.arg = null;
+	}
+
+	@Override
+	/* pp */ boolean isExpanding(Macro m) {
+		/* When we are expanding an arg, 'this' macro is not
+		 * being expanded, and thus we may re-expand it. */
+		if (/* XXX this.arg == null && */ this.macro == m)
+			return true;
+		return super.isExpanding(m);
+	}
+
+	private static void escape(StringBuilder buf, CharSequence cs) {
+		for (int i = 0; i < cs.length(); i++) {
+			char	c = cs.charAt(i);
+			switch (c) {
+				case '\\':
+					buf.append("\\\\");
+					break;
+				case '"':
+					buf.append("\\\"");
+					break;
+				case '\n':
+					buf.append("\\n");
+					break;
+				case '\r':
+					buf.append("\\r");
+					break;
+				default:
+					buf.append(c);
+			}
+		}
+	}
+
+	private void concat(StringBuilder buf, Argument arg) {
+		Iterator<Token>	it = arg.iterator();
+		while (it.hasNext()) {
+			Token	tok = it.next();
+			buf.append(tok.getText());
+		}
+	}
+
+	private Token stringify(Token pos, Argument arg) {
+		StringBuilder	buf = new StringBuilder();
+		concat(buf, arg);
+		StringBuilder	str = new StringBuilder("\"");
+		escape(str, buf);
+		str.append('\"');
+		return new Token(STRING,
+				pos.getLine(), pos.getColumn(),
+				str.toString(), buf.toString());
+	}
+
+
+	/* At this point, we have consumed the first M_PASTE.
+	 * @see Macro#addPaste(Token) */
+	private void paste(Token ptok)
+						throws IOException,
+								LexerException {
+		StringBuilder	buf = new StringBuilder();
+		/* We know here that arg is null or expired,
+		 * since we cannot paste an expanded arg. */
+
+		int	count = 2;
+		for (int i = 0; i < count; i++) {
+			if (!tokens.hasNext())
+				error(ptok.getLine(), ptok.getColumn(),
+						"Paste at end of expansion");
+			Token	tok = tokens.next();
+			switch (tok.getType()) {
+				case M_PASTE:
+					/* One extra to paste, plus one because the
+					 * paste token didn't count. */
+					count += 2;
+					ptok = tok;
+					break;
+				case M_ARG:
+					int idx = ((Integer)tok.getValue()).intValue();
+					concat(buf, args.get(idx));
+					break;
+				/* XXX Test this. */
+				case COMMENT:
+					break;
+				default:
+					buf.append(tok.getText());
+					break;
+			}
+		}
+
+		/* XXX Somewhere here, need to check that concatenation
+		 * produces a valid token. */
+
+		/* Push and re-lex. */
+		StringBuilder		src = new StringBuilder();
+		escape(src, buf);
+		StringLexerSource	sl = new StringLexerSource(src.toString());
+
+		arg = new SourceIterator(sl);
+	}
+
+	public Token token()
+						throws IOException,
+								LexerException {
+		for (;;) {
+			/* Deal with lexed tokens first. */
+
+			if (arg != null) {
+				if (arg.hasNext())
+					return arg.next();
+				arg = null;
+			}
+
+			if (!tokens.hasNext())
+				return new Token(EOF, -1, -1, "");	/* End of macro. */
+			Token	tok = tokens.next();
+			int		idx;
+			switch (tok.getType()) {
+				case M_STRING:
+					/* Use the nonexpanded arg. */
+					idx = ((Integer)tok.getValue()).intValue();
+					return stringify(tok, args.get(idx));
+				case M_ARG:
+					/* Expand the arg. */
+					idx = ((Integer)tok.getValue()).intValue();
+					// System.out.println("Pushing arg " + args.get(idx));
+					arg = args.get(idx).expansion();
+					break;
+				case M_PASTE:
+					paste(tok);
+					break;
+				default:
+					return tok;
+			}
+		} /* for */
+	}
+
+	public String toString() {
+		StringBuilder	buf = new StringBuilder();
+		buf.append("expansion of ").append(macro.getName());
+		Source	parent = getParent();
+		if (parent != null)
+			buf.append(" in ").append(String.valueOf(parent));
+		return buf.toString();
+	}
+}
diff --git a/src/java/org/anarres/cpp/Main.java b/src/java/org/anarres/cpp/Main.java
new file mode 100644
index 0000000..cec7a37
--- /dev/null
+++ b/src/java/org/anarres/cpp/Main.java
@@ -0,0 +1,111 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * (Currently a simple test class).
+ */
+public class Main {
+
+	public static void main(String[] args) throws Exception {
+		List<String>	path = new ArrayList<String>();
+		path.add("/usr/include");
+		path.add("/usr/local/include");
+		path.add("/usr/lib/gcc/i686-pc-linux-gnu/4.1.2/include");
+
+		Source			source = new FileLexerSource(new File(args[0]));
+		Preprocessor	pp = new Preprocessor(source);
+		pp.setIncludePath(path);
+
+		for (int i = 1; i < args.length; i++) {
+			pp.push_source(new FileLexerSource(new File(args[i])),true);
+		}
+
+		Macro			m = new Macro("__WORDSIZE");
+		m.addToken(new Token(INTEGER, -1, -1, "32", Integer.valueOf(32)));
+		pp.addMacro(m);
+
+		m = new Macro("__STDC__");
+		m.addToken(new Token(INTEGER, -1, -1, "1", Integer.valueOf(1)));
+		pp.addMacro(m);
+
+		try {
+			for (;;) {
+				Token	tok = pp.token();
+				if (tok != null && tok.getType() == Token.EOF)
+					break;
+				switch (2) {
+					case 0:
+						System.out.print(tok);
+						break;
+					case 1:
+						System.out.print("[" + tok.getText() + "]");
+						break;
+					case 2:
+						System.out.print(tok.getText());
+						break;
+				}
+			}
+		}
+		catch (Exception e) {
+			e.printStackTrace();
+			Source	s = pp.getSource();
+			while (s != null) {
+				System.out.println(" -> " + s);
+				s = s.getParent();
+			}
+
+			/*
+			Iterator<State>	it = pp.states.iterator();
+			while (it.hasNext()) {
+				System.out.println(" -? " + it.next());
+			}
+			*/
+
+		}
+
+		Map<String,Macro>	macros = pp.getMacros();
+		List<String>		keys = new ArrayList<String>(
+				macros.keySet()
+					);
+		Collections.sort(keys);
+		Iterator<String>	mt = keys.iterator();
+		while (mt.hasNext()) {
+			String	key = mt.next();
+			Macro	macro = macros.get(key);
+			System.out.println("#" + "macro " + macro);
+		}
+
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java
new file mode 100644
index 0000000..c1b87d7
--- /dev/null
+++ b/src/java/org/anarres/cpp/Preprocessor.java
@@ -0,0 +1,1511 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+import java.io.IOException;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A C Preprocessor.
+ * The Preprocessor outputs a token stream which does not need
+ * re-lexing for C or C++. Alternatively, the output text may be
+ * reconstructed by concatenating the {@link Token#getText() text}
+ * values of the returned {@link Token Tokens}. (See
+ * {@link CppReader}, which does this.)
+ */
+public class Preprocessor {
+	private static final boolean	DEBUG = false;
+
+	public static final int	FL_LINEMARKER = 1;
+
+	private static final Macro		__LINE__ = new Macro("__LINE__");
+	private static final Macro		__FILE__ = new Macro("__FILE__");
+
+	private Map<String,Macro>		macros;
+	private Stack<State>			states;
+	private Source					source;
+
+	private List<String>			path;
+	private PreprocessorListener	listener;
+
+	private int						flags;
+
+	public Preprocessor(Source initial, int flags) {
+		this.macros = new HashMap<String,Macro>();
+		macros.put(__LINE__.getName(), __LINE__);
+		macros.put(__FILE__.getName(), __FILE__);
+		this.states = new Stack<State>();
+		states.push(new State());
+		this.source = null;
+		this.path = null;
+		setListener(new PreprocessorListener());
+		setFlags(flags);
+
+		push_source(initial, false);
+		/* We need to get a \n onto the end of this somehow. */
+		if ((flags & FL_LINEMARKER) != 0)
+			source_untoken(line_token(1, source.getName(), "\n"));
+	}
+
+	public Preprocessor(Source initial) {
+		this(initial, 0);
+	}
+
+	/** Equivalent to
+	 * 'new Preprocessor(new {@link FileLexerSource}(file))'
+	 */
+	public Preprocessor(File file)
+						throws IOException {
+		this(new FileLexerSource(file), 0);
+	}
+
+	public void setListener(PreprocessorListener listener) {
+		this.listener = listener;
+		Source	s = source;
+		while (s != null) {
+			s.setListener(listener);
+			s = s.getParent();
+		}
+	}
+
+	public void setFlags(int flags) {
+		this.flags = flags;
+	}
+
+	/**
+	 * Handles an error.
+	 *
+	 * If a PreprocessorListener is installed, it receives the
+	 * error. Otherwise, it is ignored.
+	 */
+	protected void error(Token tok, String msg)
+						throws LexerException {
+		if (listener != null)
+			listener.handleError(source,
+					tok.getLine(), tok.getColumn(),
+					msg);
+	}
+
+	/**
+	 * Handles a warning.
+	 *
+	 * If a PreprocessorListener is installed, it receives the
+	 * warning. Otherwise, it is ignored.
+	 */
+	protected void warning(Token tok, String msg)
+						throws LexerException {
+		if (listener != null)
+			listener.handleError(source,
+					tok.getLine(), tok.getColumn(),
+					msg);
+	}
+
+/*
+	public void setSource(Source source) {
+		this.source = source;
+	}
+*/
+
+	public void addMacro(Macro m) throws LexerException {
+		String	name = m.getName();
+		/* Already handled as a source error in macro(). */
+		if ("defined".equals(name))
+			throw new LexerException("Cannot redefine name 'defined'");
+		macros.put(m.getName(), m);
+	}
+
+	/**
+	 * Defines the given name as a macro.
+	 *
+	 * This is a convnience method.
+	 */
+	public void addMacro(String name, String value)
+						throws LexerException {
+		try {
+			Macro				m = new Macro(name);
+			StringLexerSource	s = new StringLexerSource(value);
+			for (;;) {
+				Token	tok = s.token();
+				if (tok.getType() == EOF)
+					break;
+				m.addToken(tok);
+			}
+			addMacro(m);
+		}
+		catch (IOException e) {
+			throw new LexerException(e);
+		}
+	}
+
+	/**
+	 * Defines the given name as a macro.
+	 *
+	 * This is a convnience method.
+	 */
+	public void addMacro(String name)
+						throws LexerException {
+		addMacro(name, "1");
+	}
+
+	/**
+	 * Sets the include path used by this Preprocessor.
+	 */
+	/* Note for future: Create an IncludeHandler? */
+	public void setIncludePath(List<String> path) {
+		this.path = path;
+	}
+
+	/**
+	 * Returns the Map of Macros parsed during the run of this
+	 * Preprocessor.
+	 */
+	protected Map<String,Macro> getMacros() {
+		return macros;
+	}
+
+
+/* States */
+
+	private void push_state() {
+		State	top = states.peek();
+		states.push(new State(top));
+	}
+
+	private void pop_state()
+						throws LexerException {
+		State	s = states.pop();
+		if (states.isEmpty()) {
+			if (listener != null)
+				listener.handleError(getSource(), 0, 0,
+							"#" + "endif without #" + "if");
+			states.push(s);
+		}
+	}
+
+	private boolean isActive() {
+		State	state = states.peek();
+		return state.isParentActive() && state.isActive();
+	}
+
+
+/* Sources */
+
+	/**
+	 * Returns the top Source on the input stack.
+	 *
+	 * @see Source
+	 * @see #push_source(Source,boolean)
+	 * @see #pop_source()
+	 */
+	protected Source getSource() {
+		return source;
+	}
+
+	/**
+	 * Pushes a Source onto the input stack.
+	 *
+	 * @see #getSource()
+	 * @see #pop_source()
+	 */
+	protected void push_source(Source source, boolean autopop) {
+		source.setParent(this.source, autopop);
+		source.setListener(listener);
+		this.source = source;
+		if (listener != null)
+			listener.handleSourceChange(this.source, "push");
+	}
+
+	/**
+	 * Pops a Source from the input stack.
+	 *
+	 * @see #getSource()
+	 * @see #push_source(Source,boolean)
+	 */
+	protected void pop_source() {
+		this.source = this.source.getParent();
+		if (listener != null)
+			listener.handleSourceChange(this.source, "pop");
+	}
+
+
+/* Source tokens */
+
+	private Token	source_token;
+
+	private Token line_token(int line, String name, String extra) {
+		return new Token(P_LINE, line, 0,
+			"#line " + line + " \"" + name + "\"" + extra,
+			null
+				);
+	}
+
+	private Token source_token()
+						throws IOException,
+								LexerException {
+		if (source_token != null) {
+			Token	tok = source_token;
+			source_token = null;
+			return tok;
+		}
+
+		for (;;) {
+			Token	tok = source.token();
+			if (tok.getType() == EOF && source.isAutopop()) {
+				// System.out.println("Autopop " + source);
+				Source	s = source;
+				pop_source();
+				if ((flags & FL_LINEMARKER) != 0 && s.isNumbered()) {
+					/* Not perfect, but ... */
+					source_untoken(new Token(NL, source.getLine(), 0, "\n"));
+					return line_token(source.getLine(), source.getName(), "");
+				}
+				else {
+					continue;
+				}
+			}
+			return tok;
+		}
+	}
+
+	private void source_untoken(Token tok) {
+		if (this.source_token != null)
+			throw new IllegalStateException("Cannot return two tokens");
+		this.source_token = tok;
+	}
+
+	private boolean isWhite(Token tok) {
+		int	type = tok.getType();
+		return (type == WHITESPACE) || (type == COMMENT);
+	}
+
+	private Token source_token_nonwhite()
+						throws IOException,
+								LexerException {
+		Token	tok;
+		do {
+			tok = source_token();
+		} while (isWhite(tok));
+		return tok;
+	}
+
+	/**
+	 * Returns an NL or an EOF token.
+	 *
+	 * The metadata on the token will be correct, which is better
+	 * than generating a new one.
+	 */
+	private Token source_skipline(boolean white)
+						throws IOException,
+								LexerException {
+		// (new Exception("skipping line")).printStackTrace(System.out);
+		return source.skipline(white);
+	}
+
+	/* processes and expands a macro. */
+	private boolean macro(Macro m, Token orig)
+						throws IOException,
+								LexerException {
+		Token			tok;
+		List<Argument>	args;
+
+		// System.out.println("pp: expanding " + m);
+
+		if (m.isFunctionLike()) {
+			OPEN: for (;;) {
+				tok = source_token();
+				// System.out.println("pp: open: token is " + tok);
+				switch (tok.getType()) {
+					case WHITESPACE:	/* XXX Really? */
+					case COMMENT:
+					case NL:
+						break;	/* continue */
+					case '(':
+						break OPEN;
+					default:
+						source_untoken(tok);
+						return false;
+				}
+			}
+
+			// tok = expanded_token_nonwhite();
+			tok = source_token_nonwhite();
+
+			/* We either have, or we should have args.
+			 * This deals elegantly with the case that we have
+			 * one empty arg. */
+			if (tok.getType() != ')' || m.getArgs() > 0) {
+				args = new ArrayList<Argument>();
+
+				Argument		arg = new Argument();
+				int				depth = 0;
+				boolean			space = false;
+
+				ARGS: for (;;) {
+					// System.out.println("pp: arg: token is " + tok);
+					switch (tok.getType()) {
+						case EOF:
+							error(tok, "EOF in macro args");
+							return false;
+
+						case ',':
+							if (depth == 0) {
+								if (m.isVariadic() &&
+									/* We are building the last arg. */
+									args.size() == m.getArgs() - 1) {
+									/* Just add the comma. */
+									arg.addToken(tok);
+								}
+								else {
+									args.add(arg);
+									arg = new Argument();
+								}
+							}
+							else {
+								arg.addToken(tok);
+							}
+							space = false;
+							break;
+						case ')':
+							if (depth == 0) {
+								args.add(arg);
+								break ARGS;
+							}
+							else {
+								depth--;
+								arg.addToken(tok);
+							}
+							space = false;
+							break;
+						case '(':
+							depth++;
+							arg.addToken(tok);
+							space = false;
+							break;
+
+						case WHITESPACE:
+						case COMMENT:
+							/* Avoid duplicating spaces. */
+							space = true;
+							break;
+
+						default:
+							/* Do not put space on the beginning of
+							 * an argument token. */
+							if (space && ! arg.isEmpty())
+								arg.addToken(Token.space);
+							arg.addToken(tok);
+							space = false;
+							break;
+
+					}
+					// tok = expanded_token();
+					tok = source_token();
+				}
+				/* space may still be true here, thus trailing space
+				 * is stripped from arguments. */
+
+				if (args.size() != m.getArgs()) {
+					error(tok,
+							"macro " + m.getName() +
+							" has " + m.getArgs() + " parameters " +
+							"but given " + args.size() + " args");
+					/* We could replay the arg tokens, but I
+					 * note that GNU cpp does exactly what we do,
+					 * i.e. output the macro name and chew the args.
+					 */
+					return false;
+				}
+
+				for (int i = 0; i < args.size(); i++) {
+					args.get(i).expand(this);
+				}
+
+				// System.out.println("Macro " + m + " args " + args);
+			}
+			else {
+				/* nargs == 0 and we (correctly) got () */
+				args = null;
+			}
+
+		}
+		else {
+			/* Macro without args. */
+				args = null;
+		}
+
+		if (m == __LINE__) {
+			push_source(new FixedTokenSource(
+					new Token[] { new Token(INTEGER,
+							orig.getLine(), orig.getColumn(),
+							String.valueOf(orig.getLine()),
+							Integer.valueOf(orig.getLine())) }
+						), true);
+		}
+		else if (m == __FILE__) {
+			File	file = source.getFile();
+			push_source(new FixedTokenSource(
+					new Token[] { new Token(STRING,
+							orig.getLine(), orig.getColumn(),
+							'"'+ String.valueOf(file) +'"',
+							file) }
+						), true);
+		}
+		else {
+			push_source(new MacroTokenSource(m, args), true);
+		}
+
+		return true;
+	}
+
+	/**
+	 * Expands an argument.
+	 */
+	/* I'd rather this were done lazily. */
+	/* pp */ List<Token> expand(List<Token> arg)
+						throws IOException,
+								LexerException {
+		List<Token>	expansion = new ArrayList<Token>();
+		boolean		space = false;
+
+		push_source(new FixedTokenSource(arg), false); 
+		EXPANSION: for (;;) {
+			Token	tok = expanded_token();
+			switch (tok.getType()) {
+				case EOF:
+					break EXPANSION;
+
+				case WHITESPACE:
+				case COMMENT:
+					space = true; 
+					break;
+
+				default:
+					if (space && ! expansion.isEmpty())
+						expansion.add(Token.space);
+					expansion.add(tok);
+					space = false;
+					break;
+			}
+		}
+
+		pop_source();
+
+		return expansion;
+	}
+
+	/* processes a #define directive */
+	private Token define()
+						throws IOException,
+								LexerException {
+		Token	tok = source_token_nonwhite();
+		if (tok.getType() != IDENTIFIER) {
+			error(tok, "Expected identifier");
+			return source_skipline(false);
+		}
+		/* if predefined */
+
+		String			name = tok.getText();
+		if ("defined".equals(name)) {
+			error(tok, "Cannot redefine name 'defined'");
+			return source_skipline(false);
+		}
+
+		Macro			m = new Macro(name);
+		List<String>	args;
+
+		tok = source_token();
+		if (tok.getType() == '(') {
+			tok = source_token_nonwhite();
+			if (tok.getType() != ')') {
+				args = new ArrayList<String>();
+				ARGS: for (;;) {
+					switch (tok.getType()) {
+						case IDENTIFIER:
+							args.add(tok.getText());
+							break;
+						// case ELLIPSIS:
+						case NL:
+						case EOF:
+							error(tok,
+								"Unterminated macro parameter list");
+							break ARGS;
+						default:
+							source_skipline(false);
+							error(tok,
+								"error in macro parameters: " +
+								tok.getText());
+							/* XXX return? */
+							break ARGS;
+					}
+					tok = source_token_nonwhite();
+					switch (tok.getType()) {
+						case ',':
+							break;
+						case ')':
+							tok = source_token_nonwhite();
+							break ARGS;
+						case ELLIPSIS:
+							tok = source_token_nonwhite();
+							if (tok.getType() != ')')
+								error(tok,
+									"ellipsis must be on last argument");
+							m.setVariadic(true);
+							tok = source_token_nonwhite();
+							break ARGS;
+
+						case NL:
+						case EOF:
+							/* Do not skip line. */
+							error(tok,
+								"Unterminated macro definition");
+							break ARGS;
+						default:
+							source_skipline(false);
+							error(tok,
+								"bad token in macro parameters: " +
+								tok.getText());
+							/* XXX return? */
+							break ARGS;
+					}
+					tok = source_token_nonwhite();
+				}
+			}
+			else {
+				tok = source_token_nonwhite();	/* Lose the ')' */
+				args = Collections.emptyList();
+			}
+
+			m.setArgs(args);
+		}
+		else {
+			/* For searching. */
+			args = Collections.emptyList();
+			if (tok.getType() == COMMENT ||
+				tok.getType() == WHITESPACE) {
+				tok = source_token_nonwhite();
+			}
+		}
+
+		/* Get an expansion for the macro, using indexOf. */
+		boolean	space = false;
+		boolean	paste = false;
+		/* XXX UGLY: Ensure no space at start.
+		 * Careful not to break EOF/LF from above. */
+		if (isWhite(tok))	/* XXX Not sure this can ever happen now. */
+			tok = source_token_nonwhite();
+		int		idx;
+
+		EXPANSION: for (;;) {
+			switch (tok.getType()) {
+				case EOF:
+					break EXPANSION;
+				case NL:
+					break EXPANSION;
+
+				case COMMENT:
+					// break;
+				case WHITESPACE:
+					if (!paste)
+						space = true;
+					break;
+
+				case PASTE:
+					space = false;
+					paste = true;
+					m.addPaste(new Token(M_PASTE,
+							tok.getLine(), tok.getColumn(),
+							"#" + "#", null));
+					break;
+
+				case '#':
+					if (space)
+						m.addToken(Token.space);
+					space = false;
+					Token	la = source_token_nonwhite();
+					if (la.getType() == IDENTIFIER &&
+						((idx = args.indexOf(la.getText())) != -1)) {
+						m.addToken(new Token(M_STRING,
+								la.getLine(), la.getColumn(),
+								"#" + la.getText(),
+								Integer.valueOf(idx)));
+					}
+					else {
+						m.addToken(tok);
+						/* Allow for special processing. */
+						source_untoken(la);
+					}
+					break;
+
+				case IDENTIFIER:
+					if (space)
+						m.addToken(Token.space);
+					space = false;
+					paste = false;
+					idx = args.indexOf(tok.getText());
+					if (idx == -1)
+						m.addToken(tok);
+					else
+						m.addToken(new Token(M_ARG,
+								tok.getLine(), tok.getColumn(),
+								tok.getText(),
+								Integer.valueOf(idx)));
+					break;
+
+				default:
+					if (space)
+						m.addToken(Token.space);
+					space = false;
+					paste = false;
+					m.addToken(tok);
+					break;
+			}
+			tok = source_token();
+		}
+
+		// if (DEBUG)
+			// System.out.println("Defined macro " + m);
+		addMacro(m);
+
+		return tok;	/* NL or EOF. */
+	}
+
+	private Token undef()
+						throws IOException,
+								LexerException {
+		Token	tok = source_token_nonwhite();
+		if (tok.getType() != IDENTIFIER) {
+			error(tok,
+				"Expected identifier, not " + tok.getText());
+			if (tok.getType() == NL || tok.getType() == EOF)
+				return tok;
+		}
+		else {
+			Macro	m = macros.get(tok.getText());
+			if (m != null) {
+				/* XXX error if predefined */
+				macros.remove(m.getName());
+			}
+		}
+		return source_skipline(true);
+	}
+
+	/**
+	 * Handles a include directive.
+	 *
+	 * The user may override this to provide alternate semantics
+	 * for the include directive, for example, creating a Source
+	 * based on a virtual file system.
+	 */
+	protected void include(File parent, int line,
+					String name, boolean quoted)
+						throws IOException,
+								LexerException {
+		if (quoted) {
+			File	dir = parent.getParentFile();
+			if (dir == null)
+				dir = new File("/");
+			File	file = new File(dir, name);
+			// System.err.println("Include: " + file);
+			if (file.exists()) {
+				push_source(new FileLexerSource(file), true);
+				return;
+			}
+		}
+
+		if (path != null) {
+			for (int i = 0; i < path.size(); i++) {
+				File	file = new File(
+							path.get(i) + File.separator + name
+								);
+				if (file.exists()) {
+					// System.err.println("Include: " + file);
+					push_source(new FileLexerSource(file), true);
+					return;
+				}
+			}
+		}
+
+		if (listener != null)
+			listener.handleError(getSource(),
+					line, 0,
+					"Header not found: " + name + " in " + path
+						);
+	}
+
+	private Token include()
+						throws IOException,
+								LexerException {
+		LexerSource	lexer = (LexerSource)source;
+		try {
+			lexer.setInclude(true);
+			Token	tok = token_nonwhite();
+
+			String	name;
+			boolean	quoted;
+
+			if (tok.getType() == STRING) {
+				/* XXX Use the original text, not the value.
+				 * Backslashes must not be treated as escapes here. */
+				StringBuilder	buf = new StringBuilder((String)tok.getValue());
+				HEADER: for (;;) {
+					tok = _token();	/* Do macros but nothing else. */
+					switch (tok.getType()) {
+						case WHITESPACE:
+						case COMMENT:
+							continue;
+						case STRING:
+							buf.append((String)tok.getValue());
+							break;
+						case NL:
+						case EOF:
+							break HEADER;
+						default:
+							warning(tok,
+								"Unexpected token on #"+"include line");
+							return source_skipline(false);
+					}
+				}
+				name = buf.toString();
+				quoted = true;
+			}
+			else if (tok.getType() == HEADER) {
+				name = (String)tok.getValue();
+				quoted = false;
+				tok = source_skipline(true);
+			}
+			else {
+				error(tok,
+					"Expected string or header, not " + tok.getText());
+				switch (tok.getType()) {
+					case NL:
+					case EOF:
+						return tok;
+					default:
+						/* Only if not a NL or EOF already. */
+						return source_skipline(false);
+				}
+			}
+
+			/* Do the inclusion. */
+			include(source.getFile(), tok.getLine(), name, quoted);
+
+			/* 'tok' is the 'nl' after the include. We use it after the
+			 * #line directive. */
+			if ((flags & FL_LINEMARKER) != 0) {
+				source_untoken(tok);
+				return line_token(1, name, "");
+			}
+			return tok;
+		}
+		finally {
+			lexer.setInclude(false);
+		}
+	}
+
+	/* For #error and #warning. */
+	private void error(Token pptok, boolean is_error)
+						throws IOException,
+								LexerException {
+		StringBuilder	buf = new StringBuilder();
+		buf.append('#').append(pptok.getText()).append(' ');
+		/* Peculiar construction to ditch first whitespace. */
+		Token		tok = source_token_nonwhite();
+		ERROR: for (;;) {
+			switch (tok.getType()) {
+				case NL:
+				case EOF:
+					break ERROR;
+				default:
+					buf.append(tok.getText());
+					break;
+			}
+			tok = source_token();
+		}
+		if (is_error)
+			error(pptok, buf.toString());
+		else
+			warning(pptok, buf.toString());
+	}
+
+
+
+
+	/* This bypasses token() for #elif expressions.
+	 * If we don't do this, then isActive() == false
+	 * causes token() to simply chew the entire input line. */
+	private Token expanded_token()
+						throws IOException,
+								LexerException {
+		for (;;) {
+			Token	tok = source_token();
+			// System.out.println("Source token is " + tok);
+			if (tok.getType() == IDENTIFIER) {
+				Macro	m = macros.get(tok.getText());
+				if (m == null)
+					return tok;
+				if (source.isExpanding(m))
+					return tok;
+				if (macro(m, tok))
+					continue;
+			}
+			return tok;
+		}
+	}
+
+	private Token expanded_token_nonwhite()
+						throws IOException,
+								LexerException {
+		Token	tok;
+		do {
+			tok = expanded_token();
+			// System.out.println("expanded token is " + tok);
+		} while (isWhite(tok));
+		return tok;
+	}
+
+
+	private Token	expr_token = null;
+
+	private Token expr_token()
+						throws IOException,
+								LexerException {
+		Token	tok = expr_token;
+
+		if (tok != null) {
+			// System.out.println("ungetting");
+			expr_token = null;
+		}
+		else {
+			tok = expanded_token_nonwhite();
+			// System.out.println("expt is " + tok);
+
+			if (tok.getType() == IDENTIFIER &&
+				tok.getText().equals("defined")) {
+				Token	la = source_token_nonwhite();
+				boolean	paren = false;
+				if (la.getType() == '(') {
+					paren = true;
+					la = source_token_nonwhite();
+				}
+
+				// System.out.println("Core token is " + la);
+
+				if (la.getType() != IDENTIFIER) {
+					error(la,
+						"defined() needs identifier, not " +
+						la.getText());
+					tok = new Token(INTEGER,
+							la.getLine(), la.getColumn(),
+							"0", Integer.valueOf(0));
+				}
+				else if (macros.containsKey(la.getText())) {
+					// System.out.println("Found macro");
+					tok = new Token(INTEGER,
+							la.getLine(), la.getColumn(),
+							"1", Integer.valueOf(1));
+				}
+				else {
+					// System.out.println("Not found macro");
+					tok = new Token(INTEGER,
+							la.getLine(), la.getColumn(),
+							"0", Integer.valueOf(0));
+				}
+
+				if (paren) {
+					la = source_token_nonwhite();
+					if (la.getType() != ')') {
+						expr_untoken(la);
+						error(la, "Missing ) in defined()");
+					}
+				}
+			}
+		}
+
+		// System.out.println("expr_token returns " + tok);
+
+		return tok;
+	}
+
+	private void expr_untoken(Token tok)
+						throws LexerException {
+		if (expr_token != null)
+			throw new InternalException(
+					"Cannot unget two expression tokens."
+						);
+		expr_token = tok;
+	}
+
+	private int expr_priority(Token op) {
+		switch (op.getType()) {
+			case '/': return 11;
+			case '%': return 11;
+			case '*': return 11;
+			case '+': return 10;
+			case '-': return 10;
+			case LSH: return 9;
+			case RSH: return 9;
+			case '<': return 8;
+			case '>': return 8;
+			case LE: return 8;
+			case GE: return 8;
+			case EQ: return 7;
+			case NE: return 7;
+			case '&': return 6;
+			case '^': return 5;
+			case '|': return 4;
+			case LAND: return 3;
+			case LOR: return 2;
+			case '?': return 1;
+			default:
+				// System.out.println("Unrecognised operator " + op);
+				return 0;
+		}
+	}
+
+	private long expr(int priority)
+						throws IOException,
+								LexerException {
+		/*
+		System.out.flush();
+		(new Exception("expr(" + priority + ") called")).printStackTrace();
+		System.err.flush();
+		*/
+
+		Token	tok = expr_token();
+		long	lhs, rhs;
+
+		// System.out.println("Expr lhs token is " + tok);
+
+		switch (tok.getType()) {
+			case '(':
+				lhs = expr(0);
+				tok = expr_token();
+				if (tok.getType() != ')') {
+					expr_untoken(tok);
+					error(tok, "missing ) in expression");
+					return 0;
+				}
+				break;
+
+			case '~': lhs = ~expr(11);              break;
+			case '!': lhs =  expr(11) == 0 ? 1 : 0; break;
+			case '-': lhs = -expr(11);              break;
+			case INTEGER:
+				lhs = ((Number)tok.getValue()).longValue();
+				break;
+			case CHARACTER:
+				lhs = (long)((Character)tok.getValue()).charValue();
+				break;
+			case IDENTIFIER:
+				/* XXX warn */
+				lhs = 0;
+				break;
+
+			default:
+				expr_untoken(tok);
+				error(tok,
+					"Bad token in expression: " + tok.getText());
+				return 0;
+		}
+
+		EXPR: for (;;) {
+			// System.out.println("expr: lhs is " + lhs + ", pri = " + priority);
+			Token	op = expr_token();
+			int		pri = expr_priority(op);	/* 0 if not a binop. */
+			if (pri == 0 || priority >= pri) {
+				expr_untoken(op);
+				break EXPR;
+			}
+			rhs = expr(pri);
+			// System.out.println("rhs token is " + rhs);
+			switch (op.getType()) {
+				case '/': 
+					if (rhs == 0) {
+						error(op, "Division by zero");
+						lhs = 0;
+					}
+					else {
+						lhs = lhs / rhs;
+					}
+					break;
+				case '%': 
+					if (rhs == 0) {
+						error(op, "Modulus by zero");
+						lhs = 0;
+					}
+					else {
+						lhs = lhs % rhs;
+					}
+					break;
+				case '*':  lhs = lhs * rhs; break;
+				case '+':  lhs = lhs + rhs; break;
+				case '-':  lhs = lhs - rhs; break;
+				case '<':  lhs = lhs < rhs ? 1 : 0; break;
+				case '>':  lhs = lhs > rhs ? 1 : 0; break;
+				case '&':  lhs = lhs & rhs; break;
+				case '^':  lhs = lhs ^ rhs; break;
+				case '|':  lhs = lhs | rhs; break;
+
+				case LSH:  lhs = lhs << rhs; break;
+				case RSH:  lhs = lhs >> rhs; break;
+				case LE:   lhs = lhs <= rhs ? 1 : 0; break;
+				case GE:   lhs = lhs >= rhs ? 1 : 0; break;
+				case EQ:   lhs = lhs == rhs ? 1 : 0; break;
+				case NE:   lhs = lhs != rhs ? 1 : 0; break;
+				case LAND: lhs = (lhs != 0) && (rhs != 0) ? 1 : 0; break;
+				case LOR:  lhs = (lhs != 0) || (rhs != 0) ? 1 : 0; break;
+
+				case '?':
+					/* XXX Handle this? */
+
+				default:
+					error(op,
+						"Unexpected operator " + op.getText());
+					return 0;
+
+			}
+		}
+
+		/*
+		System.out.flush();
+		(new Exception("expr returning " + lhs)).printStackTrace();
+		System.err.flush();
+		*/
+		// System.out.println("expr returning " + lhs);
+
+		return lhs;
+	}
+
+	private Token toWhitespace(Token tok) {
+		String	text = tok.getText();
+		int		len = text.length();
+		boolean	cr = false;
+		int		nls = 0;
+
+		for (int i = 0; i < len; i++) {
+			char	c = text.charAt(i);
+
+			switch (c) {
+				case '\r':
+					cr = true;
+					nls++;
+					break;
+				case '\n':
+					if (cr) {
+						cr = false;
+						break;
+					}
+					/* fallthrough */
+				case '\u2028':
+				case '\u2029':
+				case '\u000B':
+				case '\u000C':
+				case '\u0085':
+					cr = false;
+					nls++;
+					break;
+			}
+		}
+
+		char[]	cbuf = new char[nls];
+		Arrays.fill(cbuf, '\n');
+		return new Token(WHITESPACE,
+				tok.getLine(), tok.getColumn(),
+				new String(cbuf));
+	}
+
+	private final Token _token()
+						throws IOException,
+								LexerException {
+
+		Token	tok;
+		for (;;) {
+			if (!isActive()) {
+				/* Tell lexer to ignore warnings. */
+				tok = source_token();
+				/* Tell lexer to stop ignoring warnings. */
+				switch (tok.getType()) {
+					case HASH:
+					case NL:
+					case EOF:
+						/* The preprocessor has to take action here. */
+						break;
+					case WHITESPACE:
+					case COMMENT:
+						// Patch up to preserve whitespace.
+						/* XXX We might want to return tok here in C */
+						return toWhitespace(tok);
+					default:
+						// Return NL to preserve whitespace.
+						return source_skipline(false);
+				}
+			}
+			else {
+				tok = source_token();
+			}
+
+			LEX: switch (tok.getType()) {
+				case EOF:
+					/* Pop the stacks. */
+					return tok;
+
+				case WHITESPACE:
+				case NL:
+					return tok;
+
+				case COMMENT:
+					return tok;
+
+				case '!': case '%': case '&':
+				case '(': case ')': case '*':
+				case '+': case ',': case '-':
+				case '/': case ':': case ';':
+				case '<': case '=': case '>':
+				case '?': case '[': case ']':
+				case '^': case '{': case '|':
+				case '}': case '~': case '.':
+
+				// case '#':
+
+				case AND_EQ:
+				case ARROW:
+				case CHARACTER:
+				case DEC:
+				case DIV_EQ:
+				case ELLIPSIS:
+				case EQ:
+				case GE:
+				case HEADER:	/* Should only arise from include() */
+				case INC:
+				case LAND:
+				case LE:
+				case LOR:
+				case LSH:
+				case LSH_EQ:
+				case SUB_EQ:
+				case MOD_EQ:
+				case MULT_EQ:
+				case NE:
+				case OR_EQ:
+				case PLUS_EQ:
+				case RANGE:
+				case RSH:
+				case RSH_EQ:
+				case STRING:
+				case XOR_EQ:
+					return tok;
+
+				case INTEGER:
+					return tok;
+
+				case IDENTIFIER:
+					Macro	m = macros.get(tok.getText());
+					if (m == null)
+						return tok;
+					if (source.isExpanding(m))
+						return tok;
+					if (macro(m, tok))
+						break;
+					return tok;
+
+				case P_LINE:
+					if ((flags & FL_LINEMARKER) != 0)
+						return tok;
+					break;
+
+				case ERROR:
+					return tok;
+
+				default:
+					throw new InternalException("Bad token " + tok);
+					// break;
+
+				case HASH:
+					tok = source_token_nonwhite();
+					// (new Exception("here")).printStackTrace();
+					switch (tok.getType()) {
+						case NL:
+							break LEX;	/* Some code has #\n */
+						case IDENTIFIER:
+							break;
+						default:
+							error(tok,
+								"Preprocessor directive not a word " +
+								tok.getText());
+							return source_skipline(false);
+					}
+					Integer	_ppcmd = ppcmds.get(tok.getText());
+					if (_ppcmd == null) {
+						error(tok,
+							"Unknown preprocessor directive " +
+							tok.getText());
+						return source_skipline(false);
+					}
+					int	ppcmd = _ppcmd.intValue();
+
+					switch (ppcmd) {
+
+						case PP_DEFINE:
+							if (!isActive())
+								return source_skipline(false);
+							else
+								return define();
+							// break;
+
+						case PP_UNDEF:
+							if (!isActive())
+								return source_skipline(false);
+							else
+								return undef();
+							// break;
+
+						case PP_INCLUDE:
+							if (!isActive())
+								return source_skipline(false);
+							else
+								return include();
+							// break;
+
+						case PP_WARNING:
+						case PP_ERROR:
+							if (!isActive())
+								return source_skipline(false);
+							else
+								error(tok, ppcmd == PP_ERROR);
+							break;
+
+						case PP_IF:
+							push_state();
+							if (!isActive()) {
+								return source_skipline(false);
+							}
+							expr_token = null;
+							states.peek().setActive(expr(0) != 0);
+							tok = expr_token();	/* unget */
+							if (tok.getType() == NL)
+								return tok;
+							return source_skipline(true);
+							// break;
+
+						case PP_ELIF:
+							State	state = states.peek();
+							if (false) {
+								/* Check for 'if' */ ;
+							}
+							else if (state.sawElse()) {
+								error(tok,
+									"#elif after #" + "else");
+								return source_skipline(false);
+							}
+							else if (!state.isParentActive()) {
+								/* Nested in skipped 'if' */
+								return source_skipline(false);
+							}
+							else if (state.isActive()) {
+								/* The 'if' part got executed. */
+								state.setParentActive(false);
+								/* This is like # else # if but with
+								 * only one # end. */
+								state.setActive(false);
+								return source_skipline(false);
+							}
+							else {
+								expr_token = null;
+								state.setActive(expr(0) != 0);
+								tok = expr_token();	/* unget */
+								if (tok.getType() == NL)
+									return tok;
+								return source_skipline(true);
+							}
+							// break;
+
+						case PP_ELSE:
+							state = states.peek();
+							if (false)
+								/* Check for 'if' */ ;
+							else if (state.sawElse()) {
+								error(tok,
+									"#" + "else after #" + "else");
+								return source_skipline(false);
+							}
+							else {
+								state.setSawElse();
+								state.setActive(! state.isActive());
+								return source_skipline(true);
+							}
+							// break;
+
+						case PP_IFDEF:
+							push_state();
+							if (!isActive()) {
+								return source_skipline(false);
+							}
+							else {
+								tok = source_token_nonwhite();
+								// System.out.println("ifdef " + tok);
+								if (tok.getType() != IDENTIFIER) {
+									error(tok,
+										"Expected identifier, not " +
+										tok.getText());
+									return source_skipline(false);
+								}
+								else {
+									String	text = tok.getText();
+									boolean	exists =
+										macros.containsKey(text);
+									states.peek().setActive(exists);
+									return source_skipline(true);
+								}
+							}
+							// break;
+
+						case PP_IFNDEF:
+							push_state();
+							if (!isActive()) {
+								return source_skipline(false);
+							}
+							else {
+								tok = source_token_nonwhite();
+								if (tok.getType() != IDENTIFIER) {
+									error(tok,
+										"Expected identifier, not " +
+										tok.getText());
+									return source_skipline(false);
+								}
+								else {
+									String	text = tok.getText();
+									boolean	exists =
+										macros.containsKey(text);
+									states.peek().setActive(!exists);
+									return source_skipline(true);
+								}
+							}
+							// break;
+
+						case PP_ENDIF:
+							pop_state();
+							return source_skipline(true);
+							// break;
+
+						case PP_LINE:
+							return source_skipline(false);
+							// break;
+
+						case PP_PRAGMA:
+							return source_skipline(false);
+							// break;
+
+						default:
+							/* Actual unknown directives are
+							 * processed above. If we get here,
+							 * we succeeded the map lookup but
+							 * failed to handle it. Therefore,
+							 * this is (unconditionally?) fatal. */
+							// if (isActive()) /* XXX Could be warning. */
+								throw new InternalException(
+									"Internal error: Unknown directive "
+									+ tok);
+							// return source_skipline(false);
+					}
+
+
+			}
+		}
+	}
+
+	private Token token_nonwhite()
+						throws IOException,
+								LexerException {
+		Token	tok;
+		do {
+			tok = _token();
+		} while (isWhite(tok));
+		return tok;
+	}
+
+	/**
+	 * Returns the next preprocessor token.
+	 *
+	 * @see Token
+	 * @throws LexerException if a preprocessing error occurs.
+	 * @throws InternalException if an unexpected error condition arises.
+	 */
+	public Token token()
+						throws IOException,
+								LexerException {
+		Token	tok = _token();
+		if (DEBUG)
+			System.out.println("pp: Returning " + tok);
+		return tok;
+	}
+
+#set ($i = 1)	/* First ppcmd is 1, not 0. */
+#set ($ppcmds = [ "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", "include", "line", "pragma", "undef", "warning" ])
+#foreach ($ppcmd in $ppcmds)
+	private static final int PP_$ppcmd.toUpperCase() = $i;
+#set ($i = $i + 1)
+#end
+
+	private static final Map<String,Integer>	ppcmds =
+			new HashMap<String,Integer>();
+
+	static {
+#foreach ($ppcmd in $ppcmds)
+		ppcmds.put("$ppcmd", Integer.valueOf(PP_$ppcmd.toUpperCase()));
+#end
+	}
+
+
+	public String toString() {
+		StringBuilder	buf = new StringBuilder();
+
+		Source	s = getSource();
+		while (s != null) {
+			buf.append(" -> ").append(String.valueOf(s)).append("\n");
+			s = s.getParent();
+		}
+
+		Map<String,Macro>	macros = getMacros();
+		List<String>		keys = new ArrayList<String>(
+				macros.keySet()
+					);
+		Collections.sort(keys);
+		Iterator<String>	mt = keys.iterator();
+		while (mt.hasNext()) {
+			String	key = mt.next();
+			Macro	macro = macros.get(key);
+			buf.append("#").append("macro ").append(macro).append("\n");
+		}
+
+		return buf.toString();
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/PreprocessorListener.java b/src/java/org/anarres/cpp/PreprocessorListener.java
new file mode 100644
index 0000000..84a105d
--- /dev/null
+++ b/src/java/org/anarres/cpp/PreprocessorListener.java
@@ -0,0 +1,83 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.File;
+
+public class PreprocessorListener {
+
+	private int	errors;
+	private int	warnings;
+
+	public PreprocessorListener() {
+		clear();
+	}
+
+	public void clear() {
+		errors = 0;
+		warnings = 0;
+	}
+
+	public int getErrors() {
+		return errors;
+	}
+
+	public int getWarnings() {
+		return warnings;
+	}
+
+	protected void print(String msg) {
+		System.err.println(msg);
+	}
+
+	/**
+	 * Handles a warning.
+	 *
+	 * The behaviour of this method is defined by the
+	 * implementation. It may simply record the error message, or
+	 * it may throw an exception.
+	 */
+	public void handleWarning(Source source, int line, int column,
+					String msg)
+						throws LexerException {
+		warnings++;
+		print(source.getName() + ":" + line + ":" + column +
+				": warning: " + msg); 
+	}
+
+	/**
+	 * Handles an error.
+	 *
+	 * The behaviour of this method is defined by the
+	 * implementation. It may simply record the error message, or
+	 * it may throw an exception.
+	 */
+	public void handleError(Source source, int line, int column,
+					String msg)
+						throws LexerException {
+		errors++;
+		print(source.getName() + ":" + line + ":" + column +
+				": error: " + msg); 
+	}
+
+	public void handleSourceChange(Source source, String event) {
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/Source.java b/src/java/org/anarres/cpp/Source.java
new file mode 100644
index 0000000..2999418
--- /dev/null
+++ b/src/java/org/anarres/cpp/Source.java
@@ -0,0 +1,226 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * An input to the Preprocessor.
+ *
+ * Inputs may come from Files, Strings or other sources. The
+ * preprocessor maintains a stack of Sources. Operations such as
+ * file inclusion or token pasting will push a new source onto
+ * the Preprocessor stack. Sources pop from the stack when they
+ * are exhausted; this may be transparent or explicit.
+ *
+ * BUG: Error messages are not handled properly.
+ */
+public abstract class Source implements Iterable<Token> {
+	private Source					parent;
+	private boolean					autopop;
+	private PreprocessorListener	listener;
+
+	/* LineNumberReader */
+
+/*
+	// We can't do this, since we would lose the LexerException
+	private class Itr implements Iterator {
+		private Token	next = null;
+		private void advance() {
+			try {
+				if (next != null)
+					next = token();
+			}
+			catch (IOException e) {
+				throw new UnsupportedOperationException(
+						"Failed to advance token iterator: " +
+								e.getMessage()
+							);
+			}
+		}
+		public boolean hasNext() {
+			return next.getType() != EOF;
+		}
+		public Token next() {
+			advance();
+			Token	t = next;
+			next = null;
+			return t;
+		}
+		public void remove() {
+			throw new UnsupportedOperationException(
+					"Cannot remove tokens from a Source."
+						);
+		}
+	}
+*/
+
+	public Source() {
+		this.parent = null;
+		this.autopop = false;
+	}
+
+	/* pp */ void setParent(Source parent, boolean autopop) {
+		this.parent = parent;
+		this.autopop = autopop;
+	}
+
+	/* pp */ final Source getParent() {
+		return parent;
+	}
+
+	/* pp */ void setListener(PreprocessorListener listener) {
+		this.listener = listener;
+	}
+
+	/**
+	 * Returns the File currently being lexed.
+	 *
+	 * If this Source is not a {@link FileLexerSource}, then
+	 * it will ask the parent Source, and so forth recursively.
+	 * If no Source on the stack is a FileLexerSource, returns null.
+	 */
+	/* pp */ File getFile() {
+		Source	parent = getParent();
+		while (parent != null) {
+			File	file = parent.getFile();
+			if (file != null)
+				return file;
+			parent = parent.getParent();
+		}
+		return null;
+	}
+
+	/* pp */ String getName() {
+		Source	parent = getParent();
+		while (parent != null) {
+			String	name = parent.getName();
+			if (name != null)
+				return name;
+			parent = parent.getParent();
+		}
+		return null;
+	}
+
+	public int getLine() {
+		Source	parent = getParent();
+		if (parent == null)
+			return 0;
+		return parent.getLine();
+	}
+
+	/* pp */ boolean isExpanding(Macro m) {
+		Source	parent = getParent();
+		if (parent != null)
+			return parent.isExpanding(m);
+		return false;
+	}
+
+	/**
+	 * Returns true if this Source should be transparently popped
+	 * from the input stack.
+	 *
+	 * Examples of such sources are macro expansions.
+	 */
+	/* pp */ boolean isAutopop() {
+		return autopop;
+	}
+
+	/* pp */ boolean isNumbered() {
+		return false;
+	}
+
+	/**
+	 * Returns the next Token parsed from this input stream.
+	 *
+	 * @see Token
+	 */
+	public abstract Token token()
+						throws IOException,
+								LexerException;
+
+	public Iterator<Token> iterator() {
+		return new SourceIterator(this);
+	}
+
+	/**
+	 * Skips tokens until the end of line.
+	 *
+	 * @param white true if only whitespace is permitted on the
+	 *	remainder of the line.
+	 * @return the NL token.
+	 */
+	public Token skipline(boolean white)
+						throws IOException,
+								LexerException {
+		for (;;) {
+			Token	tok = token();
+			switch (tok.getType()) {
+				case EOF:
+					/* There ought to be a newline before EOF.
+					 * At least, in any skipline context. */
+					/* XXX Are we sure about this? */
+					warning(tok.getLine(), tok.getColumn(),
+									"No newline before end of file");
+					return tok;
+				case NL:
+					/* This may contain one or more newlines. */
+					return tok;
+				case COMMENT:
+				case WHITESPACE:
+					break;
+				default:
+					/* XXX Check white, if required. */
+					if (white)
+						warning(tok.getLine(), tok.getColumn(),
+										"Unexpected nonwhite token");
+					break;
+			}
+		}
+	}
+
+	protected void error(int line, int column, String msg)
+						throws LexerException {
+		if (listener != null)
+			listener.handleError(this, line, column, msg);
+		else
+			throw new LexerException("No handler for error at " + line + ":" + column + ": " + msg);
+	}
+
+	protected void warning(int line, int column, String msg)
+						throws LexerException {
+		if (listener != null)
+			listener.handleWarning(this, line, column, msg);
+		else
+			throw new LexerException("No handler for warning at " + line + ":" + column + ": " + msg);
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/SourceIterator.java b/src/java/org/anarres/cpp/SourceIterator.java
new file mode 100644
index 0000000..ac2bc24
--- /dev/null
+++ b/src/java/org/anarres/cpp/SourceIterator.java
@@ -0,0 +1,94 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * An Iterator for {@link Source Sources},
+ * returning {@link Token Tokens}.
+ */
+public class SourceIterator implements Iterator<Token> {
+	private Source	source;
+	private Token	tok;
+
+	public SourceIterator(Source s) {
+		this.source = s;
+		this.tok = null;
+	}
+
+	/**
+	 * Rethrows IOException inside IllegalStateException.
+	 */
+	private void advance() {
+		try {
+			if (tok == null)
+				tok = source.token();
+		}
+		catch (LexerException e) {
+			throw new IllegalStateException(e);
+		}
+		catch (IOException e) {
+			throw new IllegalStateException(e);
+		}
+	}
+
+	/**
+	 * Returns true if the enclosed Source has more tokens.
+	 *
+	 * The EOF token is never returned by the iterator.
+	 * @throws IllegalStateException if the Source
+	 *		throws a LexerException or IOException
+	 */
+	public boolean hasNext() {
+		advance();
+		return tok.getType() != EOF;
+	}
+
+	/**
+	 * Returns the next token from the enclosed Source.
+	 *
+	 * The EOF token is never returned by the iterator.
+	 * @throws IllegalStateException if the Source
+	 *		throws a LexerException or IOException
+	 */
+	public Token next() {
+		if (!hasNext())
+			throw new NoSuchElementException();
+		Token	t = this.tok;
+		this.tok = null;
+		return t;
+	}
+
+	/**
+	 * Not supported.
+	 *
+	 * @throws UnsupportedOperationException.
+	 */
+	public void remove() {
+		throw new UnsupportedOperationException();
+	}
+}
+
diff --git a/src/java/org/anarres/cpp/State.java b/src/java/org/anarres/cpp/State.java
new file mode 100644
index 0000000..441e71e
--- /dev/null
+++ b/src/java/org/anarres/cpp/State.java
@@ -0,0 +1,69 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+/* pp */ class State {
+	boolean	parent;
+	boolean	active;
+	boolean	sawElse;
+
+	/* pp */ State() {
+		this.parent = true;
+		this.active = true;
+		this.sawElse = false;
+	}
+
+	/* pp */ State(State parent) {
+		this.parent = parent.isParentActive() && parent.isActive();
+		this.active = true;
+		this.sawElse = false;
+	}
+
+	/* Required for #elif */
+	/* pp */ void setParentActive(boolean b) {
+		this.parent = b;
+	}
+
+	/* pp */ boolean isParentActive() {
+		return parent;
+	}
+
+	/* pp */ void setActive(boolean b) {
+		this.active = b;
+	}
+
+	/* pp */ boolean isActive() {
+		return active;
+	}
+
+	/* pp */ void setSawElse() {
+		sawElse = true;
+	}
+
+	/* pp */ boolean sawElse() {
+		return sawElse;
+	}
+
+	public String toString() {
+		return "parent=" + parent +
+			", active=" + active +
+			", sawelse=" + sawElse;
+	}
+}
diff --git a/src/java/org/anarres/cpp/StringLexerSource.java b/src/java/org/anarres/cpp/StringLexerSource.java
new file mode 100644
index 0000000..7e7df75
--- /dev/null
+++ b/src/java/org/anarres/cpp/StringLexerSource.java
@@ -0,0 +1,64 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+/**
+ * A Source for lexing a String.
+ *
+ * This class is used by token pasting, but can be used by user
+ * code.
+ */
+public class StringLexerSource extends LexerSource {
+
+	/**
+	 * Creates a new Source for lexing the given String.
+	 *
+	 * @param ppvalid true if preprocessor directives are to be
+	 *	honoured within the string.
+	 */
+	public StringLexerSource(String string, boolean ppvalid)
+						throws IOException {
+		super(new StringReader(string), ppvalid);
+	}
+
+	/**
+	 * Creates a new Source for lexing the given String.
+	 *
+	 * By default, preprocessor directives are not honoured within
+	 * the string.
+	 */
+	public StringLexerSource(String string)
+						throws IOException {
+		this(string, false);
+	}
+
+	public String toString() {
+		return "string literal";
+	}
+}
diff --git a/src/java/org/anarres/cpp/Token.java b/src/java/org/anarres/cpp/Token.java
new file mode 100644
index 0000000..e5c1319
--- /dev/null
+++ b/src/java/org/anarres/cpp/Token.java
@@ -0,0 +1,215 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+/**
+ * A Preprocessor token.
+ *
+ * @see Preprocessor
+ */
+public final class Token {
+
+	// public static final int	EOF        = -1;
+
+	private int		type;
+	private int		line;
+	private int		column;
+	private Object	value;
+	private String	text;
+
+	public Token(int type, int line, int column,
+					String text, Object value) {
+		this.type = type;
+		this.line = line;
+		this.column = column;
+		this.text = text;
+		this.value = value;
+	}
+
+	public Token(int type, int line, int column, String text) {
+		this(type, line, column, text, null);
+	}
+
+	/* pp */ Token(int type, String text, Object value) {
+		this(type, -1, -1, text, value);
+	}
+
+	/* pp */ Token(int type, String text) {
+		this(type, text, null);
+	}
+
+	/* pp */ Token(int type) {
+		this(type, texts[type]);
+	}
+
+	/**
+	 * Returns the semantic type of this token.
+	 */
+	public int getType() {
+		return type;
+	}
+
+	/* pp */ void setLocation(int line, int column) {
+		this.line = line;
+		this.column = column;
+	}
+
+	/**
+	 * Returns the line at which this token started.
+	 *
+	 * Lines are numbered from zero.
+	 */
+	public int getLine() {
+		return line;
+	}
+
+	/**
+	 * Returns the column at which this token started.
+	 *
+	 * Columns are numbered from zero.
+	 */
+	public int getColumn() {
+		return column;
+	}
+
+	/**
+	 * Returns the original or generated text of this token.
+	 *
+	 * This is distinct from the semantic value of the token.
+	 *
+	 * @see #getValue()
+	 */
+	public String getText() {
+		return text;
+	}
+
+	/**
+	 * Returns the semantic value of this token.
+	 *
+	 * For strings, this is the parsed String.
+	 * For integers, this is an Integer object.
+	 * For other token types, as appropriate.
+	 *
+	 * @see #getText()
+	 */
+	public Object getValue() {
+		return value;
+	}
+
+	/**
+	 * Returns a description of this token, for debugging purposes.
+	 */
+	public String toString() {
+		StringBuilder	buf = new StringBuilder();
+
+		buf.append('[').append(getTokenName(type));
+		if (line != -1) {
+			buf.append('@').append(line);
+			if (column != -1)
+				buf.append(',').append(column);
+		}
+		buf.append("]:");
+		if (text != null)
+			buf.append('"').append(text).append('"');
+		else if (type > 3 && type < 256)
+			buf.append( (char)type );
+		else
+			buf.append('<').append(type).append('>');
+		if (value != null)
+			buf.append('=').append(value);
+		return buf.toString();
+	}
+
+	/**
+	 * Returns the descriptive name of the given token type.
+	 *
+	 * This is mostly used for stringification and debugging.
+	 */
+	public static final String getTokenName(int type) {
+		if (type < 0)
+			return "Invalid" + type;
+		if (type >= names.length)
+			return "Invalid" + type;
+		if (names[type] == null)
+			return "Unknown" + type;
+		return names[type];
+	}
+
+#set ($i = 257)
+#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "COMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "ERROR" ])
+#foreach ($token in $tokens)
+	/** The token type $token. */
+	public static final int $token = $i;
+#set ($i = $i + 1)
+#end
+	/**
+	 * The number of possible semantic token types.
+	 *
+	 * Please note that not all token types below 255 are used.
+	 */
+	public static final int _TOKENS = $i;
+
+	/** The position-less space token. */
+	/* pp */ static final Token	 space = new Token(WHITESPACE, -1, -1, " ");
+
+	private static final String[] names = new String[_TOKENS];
+	private static final String[] texts = new String[_TOKENS];
+	static {
+		for (int i = 0; i < 255; i++) {
+			texts[i] = String.valueOf(new char[] { (char)i });
+			names[i] = texts[i];
+		}
+
+		texts[AND_EQ]      = "&=";
+		texts[ARROW]       = "->";
+		texts[DEC]         = "--";
+		texts[DIV_EQ]      = "/=";
+		texts[ELLIPSIS]    = "...";
+		texts[EQ]          = "==";
+		texts[GE]          = ">=";
+		texts[HASH]        = "#";
+		texts[INC]         = "++";
+		texts[LAND]        = "&&";
+		texts[LAND_EQ]     = "&&=";
+		texts[LE]          = "<=";
+		texts[LOR]         = "||";
+		texts[LOR_EQ]      = "||=";
+		texts[LSH]         = "<<";
+		texts[LSH_EQ]      = "<<=";
+		texts[MOD_EQ]      = "%=";
+		texts[MULT_EQ]     = "*=";
+		texts[NE]          = "!=";
+		texts[NL]          = "\n";
+		texts[OR_EQ]       = "|=";
+		/* We have to split the two hashes or Velocity eats them. */
+		texts[PASTE]       = "#" + "#";
+		texts[PLUS_EQ]     = "+=";
+		texts[RANGE]       = "..";
+		texts[RSH]         = ">>";
+		texts[RSH_EQ]      = ">>=";
+		texts[SUB_EQ]      = "-=";
+		texts[XOR_EQ]      = "^=";
+
+#foreach ($token in $tokens)
+		names[$token] = "$token";
+#end
+	}
+
+}
diff --git a/src/java/org/anarres/cpp/TokenSnifferSource.java b/src/java/org/anarres/cpp/TokenSnifferSource.java
new file mode 100644
index 0000000..55b53d7
--- /dev/null
+++ b/src/java/org/anarres/cpp/TokenSnifferSource.java
@@ -0,0 +1,56 @@
+/*
+ * Anarres C Preprocessor
+ * Copyright (C) 2007 Shevek
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+package org.anarres.cpp;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Iterator;
+
+import static org.anarres.cpp.Token.*;
+
+@Deprecated
+/* pp */ class TokenSnifferSource extends Source {
+	private List<Token>	target;
+
+	/* pp */ TokenSnifferSource(List<Token> target) {
+		this.target = target;
+	}
+
+	public Token token()
+						throws IOException,
+								LexerException {
+		Token	tok = getParent().token();
+		if (tok.getType() != EOF)
+			target.add(tok);
+		return tok;
+	}
+
+	public String toString() {
+		return getParent().toString();
+	}
+}
-- 
cgit v1.2.3