From 5ff55648127c8a8e1b9829775045af986e37647c Mon Sep 17 00:00:00 2001 From: Shevek Date: Fri, 21 Mar 2008 23:05:04 +0000 Subject: move stuff into trunk --- src/java/org/anarres/cpp/Argument.java | 79 + src/java/org/anarres/cpp/CppReader.java | 147 ++ src/java/org/anarres/cpp/FileLexerSource.java | 74 + src/java/org/anarres/cpp/FixedTokenSource.java | 67 + src/java/org/anarres/cpp/InternalException.java | 33 + src/java/org/anarres/cpp/JoinReader.java | 168 +++ src/java/org/anarres/cpp/LexerException.java | 35 + src/java/org/anarres/cpp/LexerSource.java | 677 +++++++++ src/java/org/anarres/cpp/Macro.java | 157 ++ src/java/org/anarres/cpp/MacroTokenSource.java | 191 +++ src/java/org/anarres/cpp/Main.java | 111 ++ src/java/org/anarres/cpp/Preprocessor.java | 1511 ++++++++++++++++++++ src/java/org/anarres/cpp/PreprocessorListener.java | 83 ++ src/java/org/anarres/cpp/Source.java | 226 +++ src/java/org/anarres/cpp/SourceIterator.java | 94 ++ src/java/org/anarres/cpp/State.java | 69 + src/java/org/anarres/cpp/StringLexerSource.java | 64 + src/java/org/anarres/cpp/Token.java | 215 +++ src/java/org/anarres/cpp/TokenSnifferSource.java | 56 + 19 files changed, 4057 insertions(+) create mode 100644 src/java/org/anarres/cpp/Argument.java create mode 100644 src/java/org/anarres/cpp/CppReader.java create mode 100644 src/java/org/anarres/cpp/FileLexerSource.java create mode 100644 src/java/org/anarres/cpp/FixedTokenSource.java create mode 100644 src/java/org/anarres/cpp/InternalException.java create mode 100644 src/java/org/anarres/cpp/JoinReader.java create mode 100644 src/java/org/anarres/cpp/LexerException.java create mode 100644 src/java/org/anarres/cpp/LexerSource.java create mode 100644 src/java/org/anarres/cpp/Macro.java create mode 100644 src/java/org/anarres/cpp/MacroTokenSource.java create mode 100644 src/java/org/anarres/cpp/Main.java create mode 100644 src/java/org/anarres/cpp/Preprocessor.java create mode 100644 src/java/org/anarres/cpp/PreprocessorListener.java create mode 100644 src/java/org/anarres/cpp/Source.java create mode 100644 src/java/org/anarres/cpp/SourceIterator.java create mode 100644 src/java/org/anarres/cpp/State.java create mode 100644 src/java/org/anarres/cpp/StringLexerSource.java create mode 100644 src/java/org/anarres/cpp/Token.java create mode 100644 src/java/org/anarres/cpp/TokenSnifferSource.java (limited to 'src/java/org/anarres/cpp') diff --git a/src/java/org/anarres/cpp/Argument.java b/src/java/org/anarres/cpp/Argument.java new file mode 100644 index 0000000..da87d70 --- /dev/null +++ b/src/java/org/anarres/cpp/Argument.java @@ -0,0 +1,79 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.IOException; + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * A macro argument. + * + * This encapsulates a raw and preprocessed token stream. + */ +/* pp */ class Argument extends ArrayList { + public static final int NO_ARGS = -1; + + private List expansion; + + public Argument() { + this.expansion = null; + } + + public void addToken(Token tok) { + add(tok); + } + + /* pp */ void expand(Preprocessor p) + throws IOException, + LexerException { + /* Cache expansion. */ + if (expansion == null) { + this.expansion = p.expand(this); + // System.out.println("Expanded arg " + this); + } + } + + public Iterator expansion() { + return expansion.iterator(); + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("Argument("); + // buf.append(super.toString()); + buf.append("raw=[ "); + for (int i = 0; i < size(); i++) + buf.append(get(i).getText()); + buf.append(" ];expansion=[ "); + if (expansion == null) + buf.append("null"); + else + for (int i = 0; i < expansion.size(); i++) + buf.append(expansion.get(i).getText()); + buf.append(" ])"); + return buf.toString(); + } + +} diff --git a/src/java/org/anarres/cpp/CppReader.java b/src/java/org/anarres/cpp/CppReader.java new file mode 100644 index 0000000..0aa6788 --- /dev/null +++ b/src/java/org/anarres/cpp/CppReader.java @@ -0,0 +1,147 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; +import java.io.Reader; + +import static org.anarres.cpp.Token.*; + +/** + * A Reader wrapper around the Preprocessor. + * + * This is a utility class to provide a transparent {@link Reader} + * which preprocesses the input text. + * + * @see Preprocessor + * @see Reader + */ +public class CppReader extends Reader { + + private Preprocessor cpp; + private String token; + private int idx; + + public CppReader(final Reader r) { + cpp = new Preprocessor(new LexerSource(r, true) { + @Override + public String getName() { + return ""; + } + }); + token = ""; + idx = 0; + } + + public CppReader(Preprocessor p) { + cpp = p; + token = ""; + idx = 0; + } + + /** + * Returns the Preprocessor used by this CppReader. + */ + public Preprocessor getPreprocessor() { + return cpp; + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name) + throws LexerException { + cpp.addMacro(name); + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name, String value) + throws LexerException { + cpp.addMacro(name, value); + } + + private boolean refill() + throws IOException { + try { + assert cpp != null : "cpp is null : was it closed?"; + if (token == null) + return false; + while (idx >= token.length()) { + Token tok = cpp.token(); + switch (tok.getType()) { + case EOF: + token = null; + return false; + case COMMENT: + if (false) { + token = " "; + break; + } + default: + token = tok.getText(); + break; + } + idx = 0; + } + return true; + } + catch (LexerException e) { + IOException ie = new IOException(String.valueOf(e)); + ie.initCause(e); + throw ie; + } + } + + public int read() + throws IOException { + if (!refill()) + return -1; + return token.charAt(idx++); + } + + /* XXX Very slow and inefficient. */ + public int read(char cbuf[], int off, int len) + throws IOException { + if (token == null) + return -1; + for (int i = 0; i < len; i++) { + int ch = read(); + if (ch == -1) + return i; + cbuf[off + i] = (char)ch; + } + return len; + } + + public void close() + throws IOException { + cpp = null; + token = null; + } + +} diff --git a/src/java/org/anarres/cpp/FileLexerSource.java b/src/java/org/anarres/cpp/FileLexerSource.java new file mode 100644 index 0000000..9f574a0 --- /dev/null +++ b/src/java/org/anarres/cpp/FileLexerSource.java @@ -0,0 +1,74 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; + +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * A {@link Source} which lexes a file. + * + * The input is buffered. + * + * @see Source + */ +public class FileLexerSource extends LexerSource { + private File file; + + /** + * Creates a new Source for lexing the given File. + * + * Preprocessor directives are honoured within the file. + */ + public FileLexerSource(File file) + throws IOException { + super( + new BufferedReader( + new FileReader( + file + ) + ), + true + ); + + this.file = file; + } + + @Override + /* pp */ File getFile() { + return file; + } + + @Override + /* pp */ String getName() { + return String.valueOf(file); + } + + public String toString() { + return "file " + file; + } +} diff --git a/src/java/org/anarres/cpp/FixedTokenSource.java b/src/java/org/anarres/cpp/FixedTokenSource.java new file mode 100644 index 0000000..d123f89 --- /dev/null +++ b/src/java/org/anarres/cpp/FixedTokenSource.java @@ -0,0 +1,67 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.Arrays; +import java.util.List; +import java.util.Iterator; + +/* pp */ class FixedTokenSource extends Source { + private static final Token EOF = + new Token(Token.EOF, ""); + + private List tokens; + private int idx; + + /* pp */ FixedTokenSource(Token... tokens) { + this.tokens = Arrays.asList(tokens); + this.idx = 0; + } + + /* pp */ FixedTokenSource(List tokens) { + this.tokens = tokens; + this.idx = 0; + } + + public Token token() + throws IOException, + LexerException { + if (idx >= tokens.size()) + return EOF; + return tokens.get(idx++); + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("constant token stream " + tokens); + Source parent = getParent(); + if (parent != null) + buf.append(" in ").append(String.valueOf(parent)); + return buf.toString(); + } +} diff --git a/src/java/org/anarres/cpp/InternalException.java b/src/java/org/anarres/cpp/InternalException.java new file mode 100644 index 0000000..d228710 --- /dev/null +++ b/src/java/org/anarres/cpp/InternalException.java @@ -0,0 +1,33 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +/** + * An internal exception. + * + * This exception is thrown when an internal state violation is + * encountered. This should never happen. If it ever happens, please + * report it as a bug. + */ +public class InternalException extends RuntimeException { + public InternalException(String msg) { + super(msg); + } +} diff --git a/src/java/org/anarres/cpp/JoinReader.java b/src/java/org/anarres/cpp/JoinReader.java new file mode 100644 index 0000000..10ec535 --- /dev/null +++ b/src/java/org/anarres/cpp/JoinReader.java @@ -0,0 +1,168 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.Reader; +import java.io.PushbackReader; +import java.io.IOException; + +/* pp */ class JoinReader extends Reader { + private Reader in; + + private boolean trigraphs; + + private int newlines; + private boolean flushnl; + private int[] unget; + private int uptr; + + public JoinReader(Reader in, boolean trigraphs) { + this.in = in; + this.trigraphs = trigraphs; + this.newlines = 0; + this.flushnl = false; + this.unget = new int[2]; + this.uptr = 0; + } + + public JoinReader(Reader in) { + this(in, false); + } + + private int __read() throws IOException { + if (uptr > 0) + return unget[--uptr]; + return in.read(); + } + + private void _unread(int c) { + if (c != -1) + unget[uptr++] = c; + } + + private int _read() throws IOException { + int c = __read(); + if (c == '?' && trigraphs) { + int d = __read(); + if (d == '?') { + int e = __read(); + switch (e) { + case '(': return '['; + case ')': return ']'; + case '<': return '{'; + case '>': return '}'; + case '=': return '#'; + case '/': return '\\'; + case '\'': return '^'; + case '!': return '|'; + case '-': return '~'; + } + _unread(e); + } + _unread(d); + } + return c; + } + + public int read() throws IOException { + if (flushnl) { + if (newlines > 0) { + newlines--; + return '\n'; + } + flushnl = false; + } + + for (;;) { + int c = _read(); + switch (c) { + case '\\': + int d = _read(); + switch (d) { + case '\n': + newlines++; + continue; + case '\r': + newlines++; + int e = _read(); + if (e != '\n') + _unread(e); + continue; + default: + _unread(d); + return c; + } + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + flushnl = true; + return c; + case -1: + if (newlines > 0) { + newlines--; + return '\n'; + } + default: + return c; + } + } + } + + public int read(char cbuf[], int off, int len) + throws IOException { + for (int i = 0; i < len; i++) { + int ch = read(); + if (ch == -1) + return i; + cbuf[off + i] = (char)ch; + } + return len; + } + + public void close() + throws IOException { + in.close(); + } + + public String toString() { + return "JoinReader(nl=" + newlines + ")"; + } + +/* + public static void main(String[] args) throws IOException { + FileReader f = new FileReader(new File(args[0])); + BufferedReader b = new BufferedReader(f); + JoinReader r = new JoinReader(b); + BufferedWriter w = new BufferedWriter( + new java.io.OutputStreamWriter(System.out) + ); + int c; + while ((c = r.read()) != -1) { + w.write((char)c); + } + w.close(); + } +*/ + +} diff --git a/src/java/org/anarres/cpp/LexerException.java b/src/java/org/anarres/cpp/LexerException.java new file mode 100644 index 0000000..a4b5e2e --- /dev/null +++ b/src/java/org/anarres/cpp/LexerException.java @@ -0,0 +1,35 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +/** + * A preprocessor exception. + * + * Note to users: I don't really like the name of this class. S. + */ +public class LexerException extends Exception { + public LexerException(String msg) { + super(msg); + } + + public LexerException(Throwable cause) { + super(cause); + } +} diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java new file mode 100644 index 0000000..a291bff --- /dev/null +++ b/src/java/org/anarres/cpp/LexerSource.java @@ -0,0 +1,677 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.util.Stack; + +import static org.anarres.cpp.Token.*; + +/** Does not handle digraphs. */ +public class LexerSource extends Source { + private static final boolean DEBUG = false; + + private PushbackReader reader; + private boolean ppvalid; + private boolean bol; + private boolean include; + + private int line; + private int column; + private int lastcolumn; + private boolean cr; + + /* ppvalid is: + * false in StringLexerSource, + * true in FileLexerSource */ + public LexerSource(Reader r, boolean ppvalid) { + this.reader = new PushbackReader(new JoinReader(r), 5); + this.ppvalid = ppvalid; + this.bol = true; + this.include = false; + + this.line = 1; + this.column = 0; + this.lastcolumn = -1; + this.cr = false; + } + + @Override + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } + + /* pp */ boolean isNumbered() { + return true; + } + +/* Error handling - this lot is barely worth it. */ + + private final void _error(String msg, boolean error) + throws LexerException { + int _l = line; + int _c = column; + if (_c == 0) { + _c = lastcolumn; + _l--; + } + else { + _c--; + } + if (error) + super.error(_l, _c, msg); + else + super.warning(_l, _c, msg); + } + + private final void error(String msg) + throws LexerException { + _error(msg, true); + } + + private final void warning(String msg) + throws LexerException { + _error(msg, false); + } + +/* A flag for string handling. */ + + /* pp */ void setInclude(boolean b) { + this.include = b; + } + +/* + private boolean _isLineSeparator(int c) { + return Character.getType(c) == Character.LINE_SEPARATOR + || c == -1; + } +*/ + + /* XXX Move to JoinReader and canonicalise newlines. */ + private static final boolean isLineSeparator(int c) { + switch ((char)c) { + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + return true; + default: + return (c == -1); + } + } + + + private int read() throws IOException { + int c = reader.read(); + switch (c) { + case '\r': + cr = true; + line++; + lastcolumn = column; + column = 0; + break; + case '\n': + if (cr) { + cr = false; + break; + } + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + line++; + lastcolumn = column; + column = 0; + break; + default: + cr = false; + column++; + break; + } + +/* + if (isLineSeparator(c)) { + line++; + lastcolumn = column; + column = 0; + } + else { + column++; + } +*/ + + return c; + } + + /* You can unget AT MOST one newline. */ + private void unread(int c) + throws IOException { + if (c != -1) { + if (isLineSeparator(c)) { + line--; + column = lastcolumn; + cr = false; + } + else { + column--; + } + reader.unread(c); + } + } + + private Token ccomment() + throws IOException { + StringBuilder text = new StringBuilder("/*"); + int d; + do { + do { + d = read(); + text.append((char)d); + } while (d != '*'); + do { + d = read(); + text.append((char)d); + } while (d == '*'); + } while (d != '/'); + return new Token(COMMENT, text.toString()); + } + + private Token cppcomment() + throws IOException { + StringBuilder text = new StringBuilder("//"); + int d = read(); + while (!isLineSeparator(d)) { + text.append((char)d); + d = read(); + } + unread(d); + return new Token(COMMENT, text.toString()); + } + + private int escape(StringBuilder text) + throws IOException, + LexerException { + int d = read(); + switch (d) { + case 'a': text.append('a'); return 0x0a; + case 'b': text.append('b'); return '\b'; + case 'f': text.append('f'); return '\f'; + case 'n': text.append('n'); return '\n'; + case 'r': text.append('r'); return '\r'; + case 't': text.append('t'); return '\t'; + case 'v': text.append('v'); return 0x0b; + case '\\': text.append('\\'); return '\\'; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + int len = 0; + int val = 0; + do { + val = (val << 3) + Character.digit(d, 8); + text.append((char)d); + d = read(); + } while (++len < 3 && Character.digit(d, 8) != -1); + unread(d); + return val; + + case 'x': + len = 0; + val = 0; + do { + val = (val << 4) + Character.digit(d, 16); + text.append((char)d); + d = read(); + } while (++len < 2 && Character.digit(d, 16) != -1); + unread(d); + return val; + + /* Exclude two cases from the warning. */ + case '"': text.append('"'); return '"'; + case '\'': text.append('\''); return '\''; + + default: + warning("Unnecessary escape character " + (char)d); + text.append((char)d); + return d; + } + } + + private Token character() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("'"); + int d = read(); + if (d == '\\') { + text.append('\\'); + d = escape(text); + } + else if (isLineSeparator(d)) { + unread(d); + error("Unterminated character literal"); + return new Token(ERROR, text.toString(), null); + } + else if (d == '\'') { + text.append('\''); + error("Empty character literal"); + return new Token(ERROR, text.toString(), null); + } + else if (!Character.isDefined(d)) { + text.append('?'); + error("Illegal unicode character literal"); + } + else { + text.append((char)d); + } + + int e = read(); + if (e != '\'') { + unread(e); + error("Illegal character constant"); + /* XXX We could do some patching up here? */ + return new Token(ERROR, text.toString(), null); + } + text.append('\''); + /* XXX Bad cast. */ + return new Token(CHARACTER, + text.toString(), Character.valueOf((char)d)); + } + + /* XXX This strips the enclosing quotes from the + * returned value. */ + private Token string(char open, char close) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder(); + text.append(open); + + StringBuilder buf = new StringBuilder(); + + for (;;) { + int c = read(); + if (c == close) { + break; + } + else if (c == '\\') { + text.append('\\'); + if (!include) { + char d = (char)escape(text); + buf.append(d); + } + } + else if (c == -1) { + unread(c); + error("End of file in string literal after " + buf); + return new Token(ERROR, text.toString(), null); + } + else if (isLineSeparator(c)) { + unread(c); + error("Unterminated string literal after " + buf); + return new Token(ERROR, text.toString(), null); + } + else { + text.append((char)c); + buf.append((char)c); + } + } + text.append(close); + return new Token(close == '>' ? HEADER : STRING, + text.toString(), buf.toString()); + } + + private void number_suffix(StringBuilder text, int d) + throws IOException { + if (d == 'U') { + text.append((char)d); + d = read(); + } + if (d == 'L') { + text.append((char)d); + } + else if (d == 'I') { + text.append((char)d); + } + else { + unread(d); + } + } + + /* We already chewed a zero, so empty is fine. */ + private Token number_octal() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("0"); + int d = read(); + long val = 0; + while (Character.digit(d, 8) != -1) { + val = (val << 3) + Character.digit(d, 8); + text.append((char)d); + d = read(); + } + number_suffix(text, d); + return new Token(INTEGER, + text.toString(), Long.valueOf(val)); + } + + /* We do not know whether know the first digit is valid. */ + private Token number_hex(char x) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("0"); + text.append(x); + int d = read(); + if (Character.digit(d, 16) == -1) { + unread(d); + error("Illegal hexadecimal constant " + (char)d); + return new Token(ERROR, text.toString(), null); + } + long val = 0; + do { + val = (val << 4) + Character.digit(d, 16); + text.append((char)d); + d = read(); + } while (Character.digit(d, 16) != -1); + number_suffix(text, d); + return new Token(INTEGER, + text.toString(), Long.valueOf(val)); + } + + /* We know we have at least one valid digit, but empty is not + * fine. */ + /* XXX This needs a complete rewrite. */ + private Token number_decimal(int c) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder((char)c); + int d = c; + long val = 0; + do { + val = val * 10 + Character.digit(d, 10); + text.append((char)d); + d = read(); + } while (Character.digit(d, 10) != -1); + number_suffix(text, d); + return new Token(INTEGER, + text.toString(), Long.valueOf(val)); + } + + private Token identifier(int c) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder(); + int d; + text.append((char)c); + for (;;) { + d = read(); + if (Character.isIdentifierIgnorable(d)) + ; + else if (Character.isJavaIdentifierPart(d)) + text.append((char)d); + else + break; + } + unread(d); + return new Token(IDENTIFIER, text.toString()); + } + + private Token whitespace(int c) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder(); + int d; + text.append((char)c); + for (;;) { + d = read(); + if (ppvalid && isLineSeparator(d)) /* XXX Ugly. */ + break; + if (Character.isWhitespace(d)) + text.append((char)d); + else + break; + } + unread(d); + return new Token(WHITESPACE, text.toString()); + } + + /* No token processed by cond() contains a newline. */ + private Token cond(char c, int yes, int no) + throws IOException { + int d = read(); + if (c == d) + return new Token(yes); + unread(d); + return new Token(no); + } + + public Token token() + throws IOException, + LexerException { + Token tok = null; + + int _l = line; + int _c = column; + + int c = read(); + int d, e; + + switch (c) { + case '\n': + if (ppvalid) { + bol = true; + if (include) { + tok = new Token(NL, _l, _c, new String("\n")); + } + else { + int nls = 0; + do { + d = read(); + nls++; + } while (d == '\n'); + unread(d); + char[] text = new char[nls]; + for (int i = 0; i < text.length; i++) + text[i] = '\n'; + // Skip the bol = false below. + tok = new Token(NL, _l, _c, new String(text)); + } + if (DEBUG) + System.out.println("lx: Returning NL: " + tok); + return tok; + } + /* Let it be handled as whitespace. */ + break; + + case '!': + tok = cond('=', NE, '!'); + break; + + case '#': + if (bol) + tok = new Token(HASH); + else + tok = cond('#', PASTE, '#'); + break; + + case '+': + d = read(); + if (d == '+') + tok = new Token(INC); + else if (d == '=') + tok = new Token(PLUS_EQ); + else + unread(d); + break; + case '-': + d = read(); + if (d == '-') + tok = new Token(DEC); + else if (d == '=') + tok = new Token(SUB_EQ); + else if (d == '>') + tok = new Token(ARROW); + else + unread(d); + break; + + case '*': + tok = cond('=', MULT_EQ, '*'); + break; + case '/': + d = read(); + if (d == '*') + tok = ccomment(); + else if (d == '/') + tok = cppcomment(); + else if (d == '=') + tok = new Token(DIV_EQ); + else + unread(d); + break; + + case '%': + tok = cond('=', MOD_EQ, '%'); + break; + + case ':': + /* :: */ + break; + + case '<': + if (include) { + tok = string('<', '>'); + } + else { + d = read(); + if (d == '=') + tok = new Token(LE); + else if (d == '<') + tok = cond('=', LSH_EQ, LSH); + else + unread(d); + } + break; + + case '=': + tok = cond('=', EQ, '='); + break; + + case '>': + d = read(); + if (d == '=') + tok = new Token(GE); + else if (d == '>') + tok = cond('=', RSH_EQ, RSH); + else + unread(d); + break; + + case '^': + tok = cond('=', XOR_EQ, '^'); + break; + + case '|': + d = read(); + if (d == '=') + tok = new Token(OR_EQ); + else if (d == '|') + tok = cond('=', LOR_EQ, LOR); + else + unread(d); + break; + case '&': + d = read(); + if (d == '&') + tok = cond('=', LAND_EQ, LAND); + else if (d == '=') + tok = new Token(AND_EQ); + else + unread(d); + break; + + case '.': + d = read(); + if (d == '.') + tok = cond('.', ELLIPSIS, RANGE); + else + unread(d); + /* XXX decimal fraction */ + break; + + case '0': + /* octal or hex */ + d = read(); + if (d == 'x' || d == 'X') + tok = number_hex((char)d); + else { + unread(d); + tok = number_octal(); + } + break; + + case '\'': + tok = character(); + break; + + case '"': + tok = string('"', '"'); + break; + + case -1: + tok = new Token(EOF, _l, _c, ""); + break; + } + + if (tok == null) { + if (Character.isWhitespace(c)) { + tok = whitespace(c); + } + else if (Character.isDigit(c)) { + tok = number_decimal(c); + } + else if (Character.isJavaIdentifierStart(c)) { + tok = identifier(c); + } + else { + tok = new Token(c); + } + } + + bol = false; + + tok.setLocation(_l, _c); + if (DEBUG) + System.out.println("lx: Returning " + tok); + // (new Exception("here")).printStackTrace(System.out); + return tok; + } + +} diff --git a/src/java/org/anarres/cpp/Macro.java b/src/java/org/anarres/cpp/Macro.java new file mode 100644 index 0000000..0d0ae55 --- /dev/null +++ b/src/java/org/anarres/cpp/Macro.java @@ -0,0 +1,157 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * A macro object. + * + * This encapsulates a name, an argument count, and a token stream + * for replacement. The replacement token stream may contain the + * extra tokens {@link Token#M_ARG} and {@link Token#M_STRING}. + */ +public class Macro { + private String name; + /* It's an explicit decision to keep these around here. We don't + * need to; the argument token type is M_ARG and the value + * is the index. The strings themselves are only used in + * stringification of the macro, for debugging. */ + private List args; + private boolean variadic; + private List tokens; + + public Macro(String name) { + this.name = name; + this.args = null; + this.variadic = false; + this.tokens = new ArrayList(); + } + + /** + * Returns the name of this macro. + */ + public String getName() { + return name; + } + + /** + * Sets the arguments to this macro. + */ + public void setArgs(List args) { + this.args = args; + } + + /** + * Returns true if this is a function-like macro. + */ + public boolean isFunctionLike() { + return args != null; + } + + /** + * Returns the number of arguments to this macro. + */ + public int getArgs() { + return args.size(); + } + + /** + * Sets the variadic flag on this Macro. + */ + public void setVariadic(boolean b) { + this.variadic = b; + } + + /** + * Returns true if this is a variadic function-like macro. + */ + public boolean isVariadic() { + return variadic; + } + + /** + * Adds a token to the expansion of this macro. + */ + public void addToken(Token tok) { + this.tokens.add(tok); + } + + /** + * Adds a "paste" operator to the expansion of this macro. + * + * A paste operator causes the next token added to be pasted + * to the previous token when the macro is expanded. + * It is an error for a macro to end with a paste token. + */ + public void addPaste(Token tok) { + /* + * Given: tok0 ## tok1 + * We generate: M_PASTE, tok0, tok1 + * This extends as per a stack language: + * tok0 ## tok1 ## tok2 -> + * M_PASTE, tok0, M_PASTE, tok1, tok2 + */ + this.tokens.add(tokens.size() - 1, tok); + } + + /* pp */ List getTokens() { + return tokens; + } + + public String toString() { + StringBuilder buf = new StringBuilder(name); + if (args != null) { + buf.append('('); + Iterator it = args.iterator(); + while (it.hasNext()) { + buf.append(it.next()); + if (it.hasNext()) + buf.append(", "); + else if (isVariadic()) + buf.append("..."); + } + buf.append(')'); + } + if (!tokens.isEmpty()) { + boolean paste = false; + buf.append(" => "); + for (int i = 0; i < tokens.size(); i++) { + Token tok = tokens.get(i); + if (tok.getType() == Token.M_PASTE) { + paste = true; + continue; + } + else { + buf.append(tok.getText()); + } + if (paste) { + buf.append(" #" + "# "); + paste = false; + } + // buf.append(tokens.get(i)); + } + } + return buf.toString(); + } + +} diff --git a/src/java/org/anarres/cpp/MacroTokenSource.java b/src/java/org/anarres/cpp/MacroTokenSource.java new file mode 100644 index 0000000..249afdf --- /dev/null +++ b/src/java/org/anarres/cpp/MacroTokenSource.java @@ -0,0 +1,191 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import static org.anarres.cpp.Token.*; + +/* pp */ class MacroTokenSource extends Source { + private Macro macro; + private Iterator tokens; /* Pointer into the macro. */ + private List args; /* { unexpanded, expanded } */ + private Iterator arg; /* "current expansion" */ + + /* pp */ MacroTokenSource(Macro m, List args) { + this.macro = m; + this.tokens = m.getTokens().iterator(); + this.args = args; + this.arg = null; + } + + @Override + /* pp */ boolean isExpanding(Macro m) { + /* When we are expanding an arg, 'this' macro is not + * being expanded, and thus we may re-expand it. */ + if (/* XXX this.arg == null && */ this.macro == m) + return true; + return super.isExpanding(m); + } + + private static void escape(StringBuilder buf, CharSequence cs) { + for (int i = 0; i < cs.length(); i++) { + char c = cs.charAt(i); + switch (c) { + case '\\': + buf.append("\\\\"); + break; + case '"': + buf.append("\\\""); + break; + case '\n': + buf.append("\\n"); + break; + case '\r': + buf.append("\\r"); + break; + default: + buf.append(c); + } + } + } + + private void concat(StringBuilder buf, Argument arg) { + Iterator it = arg.iterator(); + while (it.hasNext()) { + Token tok = it.next(); + buf.append(tok.getText()); + } + } + + private Token stringify(Token pos, Argument arg) { + StringBuilder buf = new StringBuilder(); + concat(buf, arg); + StringBuilder str = new StringBuilder("\""); + escape(str, buf); + str.append('\"'); + return new Token(STRING, + pos.getLine(), pos.getColumn(), + str.toString(), buf.toString()); + } + + + /* At this point, we have consumed the first M_PASTE. + * @see Macro#addPaste(Token) */ + private void paste(Token ptok) + throws IOException, + LexerException { + StringBuilder buf = new StringBuilder(); + /* We know here that arg is null or expired, + * since we cannot paste an expanded arg. */ + + int count = 2; + for (int i = 0; i < count; i++) { + if (!tokens.hasNext()) + error(ptok.getLine(), ptok.getColumn(), + "Paste at end of expansion"); + Token tok = tokens.next(); + switch (tok.getType()) { + case M_PASTE: + /* One extra to paste, plus one because the + * paste token didn't count. */ + count += 2; + ptok = tok; + break; + case M_ARG: + int idx = ((Integer)tok.getValue()).intValue(); + concat(buf, args.get(idx)); + break; + /* XXX Test this. */ + case COMMENT: + break; + default: + buf.append(tok.getText()); + break; + } + } + + /* XXX Somewhere here, need to check that concatenation + * produces a valid token. */ + + /* Push and re-lex. */ + StringBuilder src = new StringBuilder(); + escape(src, buf); + StringLexerSource sl = new StringLexerSource(src.toString()); + + arg = new SourceIterator(sl); + } + + public Token token() + throws IOException, + LexerException { + for (;;) { + /* Deal with lexed tokens first. */ + + if (arg != null) { + if (arg.hasNext()) + return arg.next(); + arg = null; + } + + if (!tokens.hasNext()) + return new Token(EOF, -1, -1, ""); /* End of macro. */ + Token tok = tokens.next(); + int idx; + switch (tok.getType()) { + case M_STRING: + /* Use the nonexpanded arg. */ + idx = ((Integer)tok.getValue()).intValue(); + return stringify(tok, args.get(idx)); + case M_ARG: + /* Expand the arg. */ + idx = ((Integer)tok.getValue()).intValue(); + // System.out.println("Pushing arg " + args.get(idx)); + arg = args.get(idx).expansion(); + break; + case M_PASTE: + paste(tok); + break; + default: + return tok; + } + } /* for */ + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("expansion of ").append(macro.getName()); + Source parent = getParent(); + if (parent != null) + buf.append(" in ").append(String.valueOf(parent)); + return buf.toString(); + } +} diff --git a/src/java/org/anarres/cpp/Main.java b/src/java/org/anarres/cpp/Main.java new file mode 100644 index 0000000..cec7a37 --- /dev/null +++ b/src/java/org/anarres/cpp/Main.java @@ -0,0 +1,111 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.anarres.cpp.Token.*; + +/** + * (Currently a simple test class). + */ +public class Main { + + public static void main(String[] args) throws Exception { + List path = new ArrayList(); + path.add("/usr/include"); + path.add("/usr/local/include"); + path.add("/usr/lib/gcc/i686-pc-linux-gnu/4.1.2/include"); + + Source source = new FileLexerSource(new File(args[0])); + Preprocessor pp = new Preprocessor(source); + pp.setIncludePath(path); + + for (int i = 1; i < args.length; i++) { + pp.push_source(new FileLexerSource(new File(args[i])),true); + } + + Macro m = new Macro("__WORDSIZE"); + m.addToken(new Token(INTEGER, -1, -1, "32", Integer.valueOf(32))); + pp.addMacro(m); + + m = new Macro("__STDC__"); + m.addToken(new Token(INTEGER, -1, -1, "1", Integer.valueOf(1))); + pp.addMacro(m); + + try { + for (;;) { + Token tok = pp.token(); + if (tok != null && tok.getType() == Token.EOF) + break; + switch (2) { + case 0: + System.out.print(tok); + break; + case 1: + System.out.print("[" + tok.getText() + "]"); + break; + case 2: + System.out.print(tok.getText()); + break; + } + } + } + catch (Exception e) { + e.printStackTrace(); + Source s = pp.getSource(); + while (s != null) { + System.out.println(" -> " + s); + s = s.getParent(); + } + + /* + Iterator it = pp.states.iterator(); + while (it.hasNext()) { + System.out.println(" -? " + it.next()); + } + */ + + } + + Map macros = pp.getMacros(); + List keys = new ArrayList( + macros.keySet() + ); + Collections.sort(keys); + Iterator mt = keys.iterator(); + while (mt.hasNext()) { + String key = mt.next(); + Macro macro = macros.get(key); + System.out.println("#" + "macro " + macro); + } + + } + +} diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java new file mode 100644 index 0000000..c1b87d7 --- /dev/null +++ b/src/java/org/anarres/cpp/Preprocessor.java @@ -0,0 +1,1511 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.anarres.cpp.Token.*; + +/** + * A C Preprocessor. + * The Preprocessor outputs a token stream which does not need + * re-lexing for C or C++. Alternatively, the output text may be + * reconstructed by concatenating the {@link Token#getText() text} + * values of the returned {@link Token Tokens}. (See + * {@link CppReader}, which does this.) + */ +public class Preprocessor { + private static final boolean DEBUG = false; + + public static final int FL_LINEMARKER = 1; + + private static final Macro __LINE__ = new Macro("__LINE__"); + private static final Macro __FILE__ = new Macro("__FILE__"); + + private Map macros; + private Stack states; + private Source source; + + private List path; + private PreprocessorListener listener; + + private int flags; + + public Preprocessor(Source initial, int flags) { + this.macros = new HashMap(); + macros.put(__LINE__.getName(), __LINE__); + macros.put(__FILE__.getName(), __FILE__); + this.states = new Stack(); + states.push(new State()); + this.source = null; + this.path = null; + setListener(new PreprocessorListener()); + setFlags(flags); + + push_source(initial, false); + /* We need to get a \n onto the end of this somehow. */ + if ((flags & FL_LINEMARKER) != 0) + source_untoken(line_token(1, source.getName(), "\n")); + } + + public Preprocessor(Source initial) { + this(initial, 0); + } + + /** Equivalent to + * 'new Preprocessor(new {@link FileLexerSource}(file))' + */ + public Preprocessor(File file) + throws IOException { + this(new FileLexerSource(file), 0); + } + + public void setListener(PreprocessorListener listener) { + this.listener = listener; + Source s = source; + while (s != null) { + s.setListener(listener); + s = s.getParent(); + } + } + + public void setFlags(int flags) { + this.flags = flags; + } + + /** + * Handles an error. + * + * If a PreprocessorListener is installed, it receives the + * error. Otherwise, it is ignored. + */ + protected void error(Token tok, String msg) + throws LexerException { + if (listener != null) + listener.handleError(source, + tok.getLine(), tok.getColumn(), + msg); + } + + /** + * Handles a warning. + * + * If a PreprocessorListener is installed, it receives the + * warning. Otherwise, it is ignored. + */ + protected void warning(Token tok, String msg) + throws LexerException { + if (listener != null) + listener.handleError(source, + tok.getLine(), tok.getColumn(), + msg); + } + +/* + public void setSource(Source source) { + this.source = source; + } +*/ + + public void addMacro(Macro m) throws LexerException { + String name = m.getName(); + /* Already handled as a source error in macro(). */ + if ("defined".equals(name)) + throw new LexerException("Cannot redefine name 'defined'"); + macros.put(m.getName(), m); + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name, String value) + throws LexerException { + try { + Macro m = new Macro(name); + StringLexerSource s = new StringLexerSource(value); + for (;;) { + Token tok = s.token(); + if (tok.getType() == EOF) + break; + m.addToken(tok); + } + addMacro(m); + } + catch (IOException e) { + throw new LexerException(e); + } + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name) + throws LexerException { + addMacro(name, "1"); + } + + /** + * Sets the include path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setIncludePath(List path) { + this.path = path; + } + + /** + * Returns the Map of Macros parsed during the run of this + * Preprocessor. + */ + protected Map getMacros() { + return macros; + } + + +/* States */ + + private void push_state() { + State top = states.peek(); + states.push(new State(top)); + } + + private void pop_state() + throws LexerException { + State s = states.pop(); + if (states.isEmpty()) { + if (listener != null) + listener.handleError(getSource(), 0, 0, + "#" + "endif without #" + "if"); + states.push(s); + } + } + + private boolean isActive() { + State state = states.peek(); + return state.isParentActive() && state.isActive(); + } + + +/* Sources */ + + /** + * Returns the top Source on the input stack. + * + * @see Source + * @see #push_source(Source,boolean) + * @see #pop_source() + */ + protected Source getSource() { + return source; + } + + /** + * Pushes a Source onto the input stack. + * + * @see #getSource() + * @see #pop_source() + */ + protected void push_source(Source source, boolean autopop) { + source.setParent(this.source, autopop); + source.setListener(listener); + this.source = source; + if (listener != null) + listener.handleSourceChange(this.source, "push"); + } + + /** + * Pops a Source from the input stack. + * + * @see #getSource() + * @see #push_source(Source,boolean) + */ + protected void pop_source() { + this.source = this.source.getParent(); + if (listener != null) + listener.handleSourceChange(this.source, "pop"); + } + + +/* Source tokens */ + + private Token source_token; + + private Token line_token(int line, String name, String extra) { + return new Token(P_LINE, line, 0, + "#line " + line + " \"" + name + "\"" + extra, + null + ); + } + + private Token source_token() + throws IOException, + LexerException { + if (source_token != null) { + Token tok = source_token; + source_token = null; + return tok; + } + + for (;;) { + Token tok = source.token(); + if (tok.getType() == EOF && source.isAutopop()) { + // System.out.println("Autopop " + source); + Source s = source; + pop_source(); + if ((flags & FL_LINEMARKER) != 0 && s.isNumbered()) { + /* Not perfect, but ... */ + source_untoken(new Token(NL, source.getLine(), 0, "\n")); + return line_token(source.getLine(), source.getName(), ""); + } + else { + continue; + } + } + return tok; + } + } + + private void source_untoken(Token tok) { + if (this.source_token != null) + throw new IllegalStateException("Cannot return two tokens"); + this.source_token = tok; + } + + private boolean isWhite(Token tok) { + int type = tok.getType(); + return (type == WHITESPACE) || (type == COMMENT); + } + + private Token source_token_nonwhite() + throws IOException, + LexerException { + Token tok; + do { + tok = source_token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns an NL or an EOF token. + * + * The metadata on the token will be correct, which is better + * than generating a new one. + */ + private Token source_skipline(boolean white) + throws IOException, + LexerException { + // (new Exception("skipping line")).printStackTrace(System.out); + return source.skipline(white); + } + + /* processes and expands a macro. */ + private boolean macro(Macro m, Token orig) + throws IOException, + LexerException { + Token tok; + List args; + + // System.out.println("pp: expanding " + m); + + if (m.isFunctionLike()) { + OPEN: for (;;) { + tok = source_token(); + // System.out.println("pp: open: token is " + tok); + switch (tok.getType()) { + case WHITESPACE: /* XXX Really? */ + case COMMENT: + case NL: + break; /* continue */ + case '(': + break OPEN; + default: + source_untoken(tok); + return false; + } + } + + // tok = expanded_token_nonwhite(); + tok = source_token_nonwhite(); + + /* We either have, or we should have args. + * This deals elegantly with the case that we have + * one empty arg. */ + if (tok.getType() != ')' || m.getArgs() > 0) { + args = new ArrayList(); + + Argument arg = new Argument(); + int depth = 0; + boolean space = false; + + ARGS: for (;;) { + // System.out.println("pp: arg: token is " + tok); + switch (tok.getType()) { + case EOF: + error(tok, "EOF in macro args"); + return false; + + case ',': + if (depth == 0) { + if (m.isVariadic() && + /* We are building the last arg. */ + args.size() == m.getArgs() - 1) { + /* Just add the comma. */ + arg.addToken(tok); + } + else { + args.add(arg); + arg = new Argument(); + } + } + else { + arg.addToken(tok); + } + space = false; + break; + case ')': + if (depth == 0) { + args.add(arg); + break ARGS; + } + else { + depth--; + arg.addToken(tok); + } + space = false; + break; + case '(': + depth++; + arg.addToken(tok); + space = false; + break; + + case WHITESPACE: + case COMMENT: + /* Avoid duplicating spaces. */ + space = true; + break; + + default: + /* Do not put space on the beginning of + * an argument token. */ + if (space && ! arg.isEmpty()) + arg.addToken(Token.space); + arg.addToken(tok); + space = false; + break; + + } + // tok = expanded_token(); + tok = source_token(); + } + /* space may still be true here, thus trailing space + * is stripped from arguments. */ + + if (args.size() != m.getArgs()) { + error(tok, + "macro " + m.getName() + + " has " + m.getArgs() + " parameters " + + "but given " + args.size() + " args"); + /* We could replay the arg tokens, but I + * note that GNU cpp does exactly what we do, + * i.e. output the macro name and chew the args. + */ + return false; + } + + for (int i = 0; i < args.size(); i++) { + args.get(i).expand(this); + } + + // System.out.println("Macro " + m + " args " + args); + } + else { + /* nargs == 0 and we (correctly) got () */ + args = null; + } + + } + else { + /* Macro without args. */ + args = null; + } + + if (m == __LINE__) { + push_source(new FixedTokenSource( + new Token[] { new Token(INTEGER, + orig.getLine(), orig.getColumn(), + String.valueOf(orig.getLine()), + Integer.valueOf(orig.getLine())) } + ), true); + } + else if (m == __FILE__) { + File file = source.getFile(); + push_source(new FixedTokenSource( + new Token[] { new Token(STRING, + orig.getLine(), orig.getColumn(), + '"'+ String.valueOf(file) +'"', + file) } + ), true); + } + else { + push_source(new MacroTokenSource(m, args), true); + } + + return true; + } + + /** + * Expands an argument. + */ + /* I'd rather this were done lazily. */ + /* pp */ List expand(List arg) + throws IOException, + LexerException { + List expansion = new ArrayList(); + boolean space = false; + + push_source(new FixedTokenSource(arg), false); + EXPANSION: for (;;) { + Token tok = expanded_token(); + switch (tok.getType()) { + case EOF: + break EXPANSION; + + case WHITESPACE: + case COMMENT: + space = true; + break; + + default: + if (space && ! expansion.isEmpty()) + expansion.add(Token.space); + expansion.add(tok); + space = false; + break; + } + } + + pop_source(); + + return expansion; + } + + /* processes a #define directive */ + private Token define() + throws IOException, + LexerException { + Token tok = source_token_nonwhite(); + if (tok.getType() != IDENTIFIER) { + error(tok, "Expected identifier"); + return source_skipline(false); + } + /* if predefined */ + + String name = tok.getText(); + if ("defined".equals(name)) { + error(tok, "Cannot redefine name 'defined'"); + return source_skipline(false); + } + + Macro m = new Macro(name); + List args; + + tok = source_token(); + if (tok.getType() == '(') { + tok = source_token_nonwhite(); + if (tok.getType() != ')') { + args = new ArrayList(); + ARGS: for (;;) { + switch (tok.getType()) { + case IDENTIFIER: + args.add(tok.getText()); + break; + // case ELLIPSIS: + case NL: + case EOF: + error(tok, + "Unterminated macro parameter list"); + break ARGS; + default: + source_skipline(false); + error(tok, + "error in macro parameters: " + + tok.getText()); + /* XXX return? */ + break ARGS; + } + tok = source_token_nonwhite(); + switch (tok.getType()) { + case ',': + break; + case ')': + tok = source_token_nonwhite(); + break ARGS; + case ELLIPSIS: + tok = source_token_nonwhite(); + if (tok.getType() != ')') + error(tok, + "ellipsis must be on last argument"); + m.setVariadic(true); + tok = source_token_nonwhite(); + break ARGS; + + case NL: + case EOF: + /* Do not skip line. */ + error(tok, + "Unterminated macro definition"); + break ARGS; + default: + source_skipline(false); + error(tok, + "bad token in macro parameters: " + + tok.getText()); + /* XXX return? */ + break ARGS; + } + tok = source_token_nonwhite(); + } + } + else { + tok = source_token_nonwhite(); /* Lose the ')' */ + args = Collections.emptyList(); + } + + m.setArgs(args); + } + else { + /* For searching. */ + args = Collections.emptyList(); + if (tok.getType() == COMMENT || + tok.getType() == WHITESPACE) { + tok = source_token_nonwhite(); + } + } + + /* Get an expansion for the macro, using indexOf. */ + boolean space = false; + boolean paste = false; + /* XXX UGLY: Ensure no space at start. + * Careful not to break EOF/LF from above. */ + if (isWhite(tok)) /* XXX Not sure this can ever happen now. */ + tok = source_token_nonwhite(); + int idx; + + EXPANSION: for (;;) { + switch (tok.getType()) { + case EOF: + break EXPANSION; + case NL: + break EXPANSION; + + case COMMENT: + // break; + case WHITESPACE: + if (!paste) + space = true; + break; + + case PASTE: + space = false; + paste = true; + m.addPaste(new Token(M_PASTE, + tok.getLine(), tok.getColumn(), + "#" + "#", null)); + break; + + case '#': + if (space) + m.addToken(Token.space); + space = false; + Token la = source_token_nonwhite(); + if (la.getType() == IDENTIFIER && + ((idx = args.indexOf(la.getText())) != -1)) { + m.addToken(new Token(M_STRING, + la.getLine(), la.getColumn(), + "#" + la.getText(), + Integer.valueOf(idx))); + } + else { + m.addToken(tok); + /* Allow for special processing. */ + source_untoken(la); + } + break; + + case IDENTIFIER: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + idx = args.indexOf(tok.getText()); + if (idx == -1) + m.addToken(tok); + else + m.addToken(new Token(M_ARG, + tok.getLine(), tok.getColumn(), + tok.getText(), + Integer.valueOf(idx))); + break; + + default: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + m.addToken(tok); + break; + } + tok = source_token(); + } + + // if (DEBUG) + // System.out.println("Defined macro " + m); + addMacro(m); + + return tok; /* NL or EOF. */ + } + + private Token undef() + throws IOException, + LexerException { + Token tok = source_token_nonwhite(); + if (tok.getType() != IDENTIFIER) { + error(tok, + "Expected identifier, not " + tok.getText()); + if (tok.getType() == NL || tok.getType() == EOF) + return tok; + } + else { + Macro m = macros.get(tok.getText()); + if (m != null) { + /* XXX error if predefined */ + macros.remove(m.getName()); + } + } + return source_skipline(true); + } + + /** + * Handles a include directive. + * + * The user may override this to provide alternate semantics + * for the include directive, for example, creating a Source + * based on a virtual file system. + */ + protected void include(File parent, int line, + String name, boolean quoted) + throws IOException, + LexerException { + if (quoted) { + File dir = parent.getParentFile(); + if (dir == null) + dir = new File("/"); + File file = new File(dir, name); + // System.err.println("Include: " + file); + if (file.exists()) { + push_source(new FileLexerSource(file), true); + return; + } + } + + if (path != null) { + for (int i = 0; i < path.size(); i++) { + File file = new File( + path.get(i) + File.separator + name + ); + if (file.exists()) { + // System.err.println("Include: " + file); + push_source(new FileLexerSource(file), true); + return; + } + } + } + + if (listener != null) + listener.handleError(getSource(), + line, 0, + "Header not found: " + name + " in " + path + ); + } + + private Token include() + throws IOException, + LexerException { + LexerSource lexer = (LexerSource)source; + try { + lexer.setInclude(true); + Token tok = token_nonwhite(); + + String name; + boolean quoted; + + if (tok.getType() == STRING) { + /* XXX Use the original text, not the value. + * Backslashes must not be treated as escapes here. */ + StringBuilder buf = new StringBuilder((String)tok.getValue()); + HEADER: for (;;) { + tok = _token(); /* Do macros but nothing else. */ + switch (tok.getType()) { + case WHITESPACE: + case COMMENT: + continue; + case STRING: + buf.append((String)tok.getValue()); + break; + case NL: + case EOF: + break HEADER; + default: + warning(tok, + "Unexpected token on #"+"include line"); + return source_skipline(false); + } + } + name = buf.toString(); + quoted = true; + } + else if (tok.getType() == HEADER) { + name = (String)tok.getValue(); + quoted = false; + tok = source_skipline(true); + } + else { + error(tok, + "Expected string or header, not " + tok.getText()); + switch (tok.getType()) { + case NL: + case EOF: + return tok; + default: + /* Only if not a NL or EOF already. */ + return source_skipline(false); + } + } + + /* Do the inclusion. */ + include(source.getFile(), tok.getLine(), name, quoted); + + /* 'tok' is the 'nl' after the include. We use it after the + * #line directive. */ + if ((flags & FL_LINEMARKER) != 0) { + source_untoken(tok); + return line_token(1, name, ""); + } + return tok; + } + finally { + lexer.setInclude(false); + } + } + + /* For #error and #warning. */ + private void error(Token pptok, boolean is_error) + throws IOException, + LexerException { + StringBuilder buf = new StringBuilder(); + buf.append('#').append(pptok.getText()).append(' '); + /* Peculiar construction to ditch first whitespace. */ + Token tok = source_token_nonwhite(); + ERROR: for (;;) { + switch (tok.getType()) { + case NL: + case EOF: + break ERROR; + default: + buf.append(tok.getText()); + break; + } + tok = source_token(); + } + if (is_error) + error(pptok, buf.toString()); + else + warning(pptok, buf.toString()); + } + + + + + /* This bypasses token() for #elif expressions. + * If we don't do this, then isActive() == false + * causes token() to simply chew the entire input line. */ + private Token expanded_token() + throws IOException, + LexerException { + for (;;) { + Token tok = source_token(); + // System.out.println("Source token is " + tok); + if (tok.getType() == IDENTIFIER) { + Macro m = macros.get(tok.getText()); + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + continue; + } + return tok; + } + } + + private Token expanded_token_nonwhite() + throws IOException, + LexerException { + Token tok; + do { + tok = expanded_token(); + // System.out.println("expanded token is " + tok); + } while (isWhite(tok)); + return tok; + } + + + private Token expr_token = null; + + private Token expr_token() + throws IOException, + LexerException { + Token tok = expr_token; + + if (tok != null) { + // System.out.println("ungetting"); + expr_token = null; + } + else { + tok = expanded_token_nonwhite(); + // System.out.println("expt is " + tok); + + if (tok.getType() == IDENTIFIER && + tok.getText().equals("defined")) { + Token la = source_token_nonwhite(); + boolean paren = false; + if (la.getType() == '(') { + paren = true; + la = source_token_nonwhite(); + } + + // System.out.println("Core token is " + la); + + if (la.getType() != IDENTIFIER) { + error(la, + "defined() needs identifier, not " + + la.getText()); + tok = new Token(INTEGER, + la.getLine(), la.getColumn(), + "0", Integer.valueOf(0)); + } + else if (macros.containsKey(la.getText())) { + // System.out.println("Found macro"); + tok = new Token(INTEGER, + la.getLine(), la.getColumn(), + "1", Integer.valueOf(1)); + } + else { + // System.out.println("Not found macro"); + tok = new Token(INTEGER, + la.getLine(), la.getColumn(), + "0", Integer.valueOf(0)); + } + + if (paren) { + la = source_token_nonwhite(); + if (la.getType() != ')') { + expr_untoken(la); + error(la, "Missing ) in defined()"); + } + } + } + } + + // System.out.println("expr_token returns " + tok); + + return tok; + } + + private void expr_untoken(Token tok) + throws LexerException { + if (expr_token != null) + throw new InternalException( + "Cannot unget two expression tokens." + ); + expr_token = tok; + } + + private int expr_priority(Token op) { + switch (op.getType()) { + case '/': return 11; + case '%': return 11; + case '*': return 11; + case '+': return 10; + case '-': return 10; + case LSH: return 9; + case RSH: return 9; + case '<': return 8; + case '>': return 8; + case LE: return 8; + case GE: return 8; + case EQ: return 7; + case NE: return 7; + case '&': return 6; + case '^': return 5; + case '|': return 4; + case LAND: return 3; + case LOR: return 2; + case '?': return 1; + default: + // System.out.println("Unrecognised operator " + op); + return 0; + } + } + + private long expr(int priority) + throws IOException, + LexerException { + /* + System.out.flush(); + (new Exception("expr(" + priority + ") called")).printStackTrace(); + System.err.flush(); + */ + + Token tok = expr_token(); + long lhs, rhs; + + // System.out.println("Expr lhs token is " + tok); + + switch (tok.getType()) { + case '(': + lhs = expr(0); + tok = expr_token(); + if (tok.getType() != ')') { + expr_untoken(tok); + error(tok, "missing ) in expression"); + return 0; + } + break; + + case '~': lhs = ~expr(11); break; + case '!': lhs = expr(11) == 0 ? 1 : 0; break; + case '-': lhs = -expr(11); break; + case INTEGER: + lhs = ((Number)tok.getValue()).longValue(); + break; + case CHARACTER: + lhs = (long)((Character)tok.getValue()).charValue(); + break; + case IDENTIFIER: + /* XXX warn */ + lhs = 0; + break; + + default: + expr_untoken(tok); + error(tok, + "Bad token in expression: " + tok.getText()); + return 0; + } + + EXPR: for (;;) { + // System.out.println("expr: lhs is " + lhs + ", pri = " + priority); + Token op = expr_token(); + int pri = expr_priority(op); /* 0 if not a binop. */ + if (pri == 0 || priority >= pri) { + expr_untoken(op); + break EXPR; + } + rhs = expr(pri); + // System.out.println("rhs token is " + rhs); + switch (op.getType()) { + case '/': + if (rhs == 0) { + error(op, "Division by zero"); + lhs = 0; + } + else { + lhs = lhs / rhs; + } + break; + case '%': + if (rhs == 0) { + error(op, "Modulus by zero"); + lhs = 0; + } + else { + lhs = lhs % rhs; + } + break; + case '*': lhs = lhs * rhs; break; + case '+': lhs = lhs + rhs; break; + case '-': lhs = lhs - rhs; break; + case '<': lhs = lhs < rhs ? 1 : 0; break; + case '>': lhs = lhs > rhs ? 1 : 0; break; + case '&': lhs = lhs & rhs; break; + case '^': lhs = lhs ^ rhs; break; + case '|': lhs = lhs | rhs; break; + + case LSH: lhs = lhs << rhs; break; + case RSH: lhs = lhs >> rhs; break; + case LE: lhs = lhs <= rhs ? 1 : 0; break; + case GE: lhs = lhs >= rhs ? 1 : 0; break; + case EQ: lhs = lhs == rhs ? 1 : 0; break; + case NE: lhs = lhs != rhs ? 1 : 0; break; + case LAND: lhs = (lhs != 0) && (rhs != 0) ? 1 : 0; break; + case LOR: lhs = (lhs != 0) || (rhs != 0) ? 1 : 0; break; + + case '?': + /* XXX Handle this? */ + + default: + error(op, + "Unexpected operator " + op.getText()); + return 0; + + } + } + + /* + System.out.flush(); + (new Exception("expr returning " + lhs)).printStackTrace(); + System.err.flush(); + */ + // System.out.println("expr returning " + lhs); + + return lhs; + } + + private Token toWhitespace(Token tok) { + String text = tok.getText(); + int len = text.length(); + boolean cr = false; + int nls = 0; + + for (int i = 0; i < len; i++) { + char c = text.charAt(i); + + switch (c) { + case '\r': + cr = true; + nls++; + break; + case '\n': + if (cr) { + cr = false; + break; + } + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + nls++; + break; + } + } + + char[] cbuf = new char[nls]; + Arrays.fill(cbuf, '\n'); + return new Token(WHITESPACE, + tok.getLine(), tok.getColumn(), + new String(cbuf)); + } + + private final Token _token() + throws IOException, + LexerException { + + Token tok; + for (;;) { + if (!isActive()) { + /* Tell lexer to ignore warnings. */ + tok = source_token(); + /* Tell lexer to stop ignoring warnings. */ + switch (tok.getType()) { + case HASH: + case NL: + case EOF: + /* The preprocessor has to take action here. */ + break; + case WHITESPACE: + case COMMENT: + // Patch up to preserve whitespace. + /* XXX We might want to return tok here in C */ + return toWhitespace(tok); + default: + // Return NL to preserve whitespace. + return source_skipline(false); + } + } + else { + tok = source_token(); + } + + LEX: switch (tok.getType()) { + case EOF: + /* Pop the stacks. */ + return tok; + + case WHITESPACE: + case NL: + return tok; + + case COMMENT: + return tok; + + case '!': case '%': case '&': + case '(': case ')': case '*': + case '+': case ',': case '-': + case '/': case ':': case ';': + case '<': case '=': case '>': + case '?': case '[': case ']': + case '^': case '{': case '|': + case '}': case '~': case '.': + + // case '#': + + case AND_EQ: + case ARROW: + case CHARACTER: + case DEC: + case DIV_EQ: + case ELLIPSIS: + case EQ: + case GE: + case HEADER: /* Should only arise from include() */ + case INC: + case LAND: + case LE: + case LOR: + case LSH: + case LSH_EQ: + case SUB_EQ: + case MOD_EQ: + case MULT_EQ: + case NE: + case OR_EQ: + case PLUS_EQ: + case RANGE: + case RSH: + case RSH_EQ: + case STRING: + case XOR_EQ: + return tok; + + case INTEGER: + return tok; + + case IDENTIFIER: + Macro m = macros.get(tok.getText()); + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + break; + return tok; + + case P_LINE: + if ((flags & FL_LINEMARKER) != 0) + return tok; + break; + + case ERROR: + return tok; + + default: + throw new InternalException("Bad token " + tok); + // break; + + case HASH: + tok = source_token_nonwhite(); + // (new Exception("here")).printStackTrace(); + switch (tok.getType()) { + case NL: + break LEX; /* Some code has #\n */ + case IDENTIFIER: + break; + default: + error(tok, + "Preprocessor directive not a word " + + tok.getText()); + return source_skipline(false); + } + Integer _ppcmd = ppcmds.get(tok.getText()); + if (_ppcmd == null) { + error(tok, + "Unknown preprocessor directive " + + tok.getText()); + return source_skipline(false); + } + int ppcmd = _ppcmd.intValue(); + + switch (ppcmd) { + + case PP_DEFINE: + if (!isActive()) + return source_skipline(false); + else + return define(); + // break; + + case PP_UNDEF: + if (!isActive()) + return source_skipline(false); + else + return undef(); + // break; + + case PP_INCLUDE: + if (!isActive()) + return source_skipline(false); + else + return include(); + // break; + + case PP_WARNING: + case PP_ERROR: + if (!isActive()) + return source_skipline(false); + else + error(tok, ppcmd == PP_ERROR); + break; + + case PP_IF: + push_state(); + if (!isActive()) { + return source_skipline(false); + } + expr_token = null; + states.peek().setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + if (tok.getType() == NL) + return tok; + return source_skipline(true); + // break; + + case PP_ELIF: + State state = states.peek(); + if (false) { + /* Check for 'if' */ ; + } + else if (state.sawElse()) { + error(tok, + "#elif after #" + "else"); + return source_skipline(false); + } + else if (!state.isParentActive()) { + /* Nested in skipped 'if' */ + return source_skipline(false); + } + else if (state.isActive()) { + /* The 'if' part got executed. */ + state.setParentActive(false); + /* This is like # else # if but with + * only one # end. */ + state.setActive(false); + return source_skipline(false); + } + else { + expr_token = null; + state.setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + if (tok.getType() == NL) + return tok; + return source_skipline(true); + } + // break; + + case PP_ELSE: + state = states.peek(); + if (false) + /* Check for 'if' */ ; + else if (state.sawElse()) { + error(tok, + "#" + "else after #" + "else"); + return source_skipline(false); + } + else { + state.setSawElse(); + state.setActive(! state.isActive()); + return source_skipline(true); + } + // break; + + case PP_IFDEF: + push_state(); + if (!isActive()) { + return source_skipline(false); + } + else { + tok = source_token_nonwhite(); + // System.out.println("ifdef " + tok); + if (tok.getType() != IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } + else { + String text = tok.getText(); + boolean exists = + macros.containsKey(text); + states.peek().setActive(exists); + return source_skipline(true); + } + } + // break; + + case PP_IFNDEF: + push_state(); + if (!isActive()) { + return source_skipline(false); + } + else { + tok = source_token_nonwhite(); + if (tok.getType() != IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } + else { + String text = tok.getText(); + boolean exists = + macros.containsKey(text); + states.peek().setActive(!exists); + return source_skipline(true); + } + } + // break; + + case PP_ENDIF: + pop_state(); + return source_skipline(true); + // break; + + case PP_LINE: + return source_skipline(false); + // break; + + case PP_PRAGMA: + return source_skipline(false); + // break; + + default: + /* Actual unknown directives are + * processed above. If we get here, + * we succeeded the map lookup but + * failed to handle it. Therefore, + * this is (unconditionally?) fatal. */ + // if (isActive()) /* XXX Could be warning. */ + throw new InternalException( + "Internal error: Unknown directive " + + tok); + // return source_skipline(false); + } + + + } + } + } + + private Token token_nonwhite() + throws IOException, + LexerException { + Token tok; + do { + tok = _token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns the next preprocessor token. + * + * @see Token + * @throws LexerException if a preprocessing error occurs. + * @throws InternalException if an unexpected error condition arises. + */ + public Token token() + throws IOException, + LexerException { + Token tok = _token(); + if (DEBUG) + System.out.println("pp: Returning " + tok); + return tok; + } + +#set ($i = 1) /* First ppcmd is 1, not 0. */ +#set ($ppcmds = [ "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", "include", "line", "pragma", "undef", "warning" ]) +#foreach ($ppcmd in $ppcmds) + private static final int PP_$ppcmd.toUpperCase() = $i; +#set ($i = $i + 1) +#end + + private static final Map ppcmds = + new HashMap(); + + static { +#foreach ($ppcmd in $ppcmds) + ppcmds.put("$ppcmd", Integer.valueOf(PP_$ppcmd.toUpperCase())); +#end + } + + + public String toString() { + StringBuilder buf = new StringBuilder(); + + Source s = getSource(); + while (s != null) { + buf.append(" -> ").append(String.valueOf(s)).append("\n"); + s = s.getParent(); + } + + Map macros = getMacros(); + List keys = new ArrayList( + macros.keySet() + ); + Collections.sort(keys); + Iterator mt = keys.iterator(); + while (mt.hasNext()) { + String key = mt.next(); + Macro macro = macros.get(key); + buf.append("#").append("macro ").append(macro).append("\n"); + } + + return buf.toString(); + } + +} diff --git a/src/java/org/anarres/cpp/PreprocessorListener.java b/src/java/org/anarres/cpp/PreprocessorListener.java new file mode 100644 index 0000000..84a105d --- /dev/null +++ b/src/java/org/anarres/cpp/PreprocessorListener.java @@ -0,0 +1,83 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.File; + +public class PreprocessorListener { + + private int errors; + private int warnings; + + public PreprocessorListener() { + clear(); + } + + public void clear() { + errors = 0; + warnings = 0; + } + + public int getErrors() { + return errors; + } + + public int getWarnings() { + return warnings; + } + + protected void print(String msg) { + System.err.println(msg); + } + + /** + * Handles a warning. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleWarning(Source source, int line, int column, + String msg) + throws LexerException { + warnings++; + print(source.getName() + ":" + line + ":" + column + + ": warning: " + msg); + } + + /** + * Handles an error. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleError(Source source, int line, int column, + String msg) + throws LexerException { + errors++; + print(source.getName() + ":" + line + ":" + column + + ": error: " + msg); + } + + public void handleSourceChange(Source source, String event) { + } + +} diff --git a/src/java/org/anarres/cpp/Source.java b/src/java/org/anarres/cpp/Source.java new file mode 100644 index 0000000..2999418 --- /dev/null +++ b/src/java/org/anarres/cpp/Source.java @@ -0,0 +1,226 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * An input to the Preprocessor. + * + * Inputs may come from Files, Strings or other sources. The + * preprocessor maintains a stack of Sources. Operations such as + * file inclusion or token pasting will push a new source onto + * the Preprocessor stack. Sources pop from the stack when they + * are exhausted; this may be transparent or explicit. + * + * BUG: Error messages are not handled properly. + */ +public abstract class Source implements Iterable { + private Source parent; + private boolean autopop; + private PreprocessorListener listener; + + /* LineNumberReader */ + +/* + // We can't do this, since we would lose the LexerException + private class Itr implements Iterator { + private Token next = null; + private void advance() { + try { + if (next != null) + next = token(); + } + catch (IOException e) { + throw new UnsupportedOperationException( + "Failed to advance token iterator: " + + e.getMessage() + ); + } + } + public boolean hasNext() { + return next.getType() != EOF; + } + public Token next() { + advance(); + Token t = next; + next = null; + return t; + } + public void remove() { + throw new UnsupportedOperationException( + "Cannot remove tokens from a Source." + ); + } + } +*/ + + public Source() { + this.parent = null; + this.autopop = false; + } + + /* pp */ void setParent(Source parent, boolean autopop) { + this.parent = parent; + this.autopop = autopop; + } + + /* pp */ final Source getParent() { + return parent; + } + + /* pp */ void setListener(PreprocessorListener listener) { + this.listener = listener; + } + + /** + * Returns the File currently being lexed. + * + * If this Source is not a {@link FileLexerSource}, then + * it will ask the parent Source, and so forth recursively. + * If no Source on the stack is a FileLexerSource, returns null. + */ + /* pp */ File getFile() { + Source parent = getParent(); + while (parent != null) { + File file = parent.getFile(); + if (file != null) + return file; + parent = parent.getParent(); + } + return null; + } + + /* pp */ String getName() { + Source parent = getParent(); + while (parent != null) { + String name = parent.getName(); + if (name != null) + return name; + parent = parent.getParent(); + } + return null; + } + + public int getLine() { + Source parent = getParent(); + if (parent == null) + return 0; + return parent.getLine(); + } + + /* pp */ boolean isExpanding(Macro m) { + Source parent = getParent(); + if (parent != null) + return parent.isExpanding(m); + return false; + } + + /** + * Returns true if this Source should be transparently popped + * from the input stack. + * + * Examples of such sources are macro expansions. + */ + /* pp */ boolean isAutopop() { + return autopop; + } + + /* pp */ boolean isNumbered() { + return false; + } + + /** + * Returns the next Token parsed from this input stream. + * + * @see Token + */ + public abstract Token token() + throws IOException, + LexerException; + + public Iterator iterator() { + return new SourceIterator(this); + } + + /** + * Skips tokens until the end of line. + * + * @param white true if only whitespace is permitted on the + * remainder of the line. + * @return the NL token. + */ + public Token skipline(boolean white) + throws IOException, + LexerException { + for (;;) { + Token tok = token(); + switch (tok.getType()) { + case EOF: + /* There ought to be a newline before EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok.getLine(), tok.getColumn(), + "No newline before end of file"); + return tok; + case NL: + /* This may contain one or more newlines. */ + return tok; + case COMMENT: + case WHITESPACE: + break; + default: + /* XXX Check white, if required. */ + if (white) + warning(tok.getLine(), tok.getColumn(), + "Unexpected nonwhite token"); + break; + } + } + } + + protected void error(int line, int column, String msg) + throws LexerException { + if (listener != null) + listener.handleError(this, line, column, msg); + else + throw new LexerException("No handler for error at " + line + ":" + column + ": " + msg); + } + + protected void warning(int line, int column, String msg) + throws LexerException { + if (listener != null) + listener.handleWarning(this, line, column, msg); + else + throw new LexerException("No handler for warning at " + line + ":" + column + ": " + msg); + } + +} diff --git a/src/java/org/anarres/cpp/SourceIterator.java b/src/java/org/anarres/cpp/SourceIterator.java new file mode 100644 index 0000000..ac2bc24 --- /dev/null +++ b/src/java/org/anarres/cpp/SourceIterator.java @@ -0,0 +1,94 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.IOException; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import static org.anarres.cpp.Token.*; + +/** + * An Iterator for {@link Source Sources}, + * returning {@link Token Tokens}. + */ +public class SourceIterator implements Iterator { + private Source source; + private Token tok; + + public SourceIterator(Source s) { + this.source = s; + this.tok = null; + } + + /** + * Rethrows IOException inside IllegalStateException. + */ + private void advance() { + try { + if (tok == null) + tok = source.token(); + } + catch (LexerException e) { + throw new IllegalStateException(e); + } + catch (IOException e) { + throw new IllegalStateException(e); + } + } + + /** + * Returns true if the enclosed Source has more tokens. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public boolean hasNext() { + advance(); + return tok.getType() != EOF; + } + + /** + * Returns the next token from the enclosed Source. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public Token next() { + if (!hasNext()) + throw new NoSuchElementException(); + Token t = this.tok; + this.tok = null; + return t; + } + + /** + * Not supported. + * + * @throws UnsupportedOperationException. + */ + public void remove() { + throw new UnsupportedOperationException(); + } +} + diff --git a/src/java/org/anarres/cpp/State.java b/src/java/org/anarres/cpp/State.java new file mode 100644 index 0000000..441e71e --- /dev/null +++ b/src/java/org/anarres/cpp/State.java @@ -0,0 +1,69 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +/* pp */ class State { + boolean parent; + boolean active; + boolean sawElse; + + /* pp */ State() { + this.parent = true; + this.active = true; + this.sawElse = false; + } + + /* pp */ State(State parent) { + this.parent = parent.isParentActive() && parent.isActive(); + this.active = true; + this.sawElse = false; + } + + /* Required for #elif */ + /* pp */ void setParentActive(boolean b) { + this.parent = b; + } + + /* pp */ boolean isParentActive() { + return parent; + } + + /* pp */ void setActive(boolean b) { + this.active = b; + } + + /* pp */ boolean isActive() { + return active; + } + + /* pp */ void setSawElse() { + sawElse = true; + } + + /* pp */ boolean sawElse() { + return sawElse; + } + + public String toString() { + return "parent=" + parent + + ", active=" + active + + ", sawelse=" + sawElse; + } +} diff --git a/src/java/org/anarres/cpp/StringLexerSource.java b/src/java/org/anarres/cpp/StringLexerSource.java new file mode 100644 index 0000000..7e7df75 --- /dev/null +++ b/src/java/org/anarres/cpp/StringLexerSource.java @@ -0,0 +1,64 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; + +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * A Source for lexing a String. + * + * This class is used by token pasting, but can be used by user + * code. + */ +public class StringLexerSource extends LexerSource { + + /** + * Creates a new Source for lexing the given String. + * + * @param ppvalid true if preprocessor directives are to be + * honoured within the string. + */ + public StringLexerSource(String string, boolean ppvalid) + throws IOException { + super(new StringReader(string), ppvalid); + } + + /** + * Creates a new Source for lexing the given String. + * + * By default, preprocessor directives are not honoured within + * the string. + */ + public StringLexerSource(String string) + throws IOException { + this(string, false); + } + + public String toString() { + return "string literal"; + } +} diff --git a/src/java/org/anarres/cpp/Token.java b/src/java/org/anarres/cpp/Token.java new file mode 100644 index 0000000..e5c1319 --- /dev/null +++ b/src/java/org/anarres/cpp/Token.java @@ -0,0 +1,215 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +/** + * A Preprocessor token. + * + * @see Preprocessor + */ +public final class Token { + + // public static final int EOF = -1; + + private int type; + private int line; + private int column; + private Object value; + private String text; + + public Token(int type, int line, int column, + String text, Object value) { + this.type = type; + this.line = line; + this.column = column; + this.text = text; + this.value = value; + } + + public Token(int type, int line, int column, String text) { + this(type, line, column, text, null); + } + + /* pp */ Token(int type, String text, Object value) { + this(type, -1, -1, text, value); + } + + /* pp */ Token(int type, String text) { + this(type, text, null); + } + + /* pp */ Token(int type) { + this(type, texts[type]); + } + + /** + * Returns the semantic type of this token. + */ + public int getType() { + return type; + } + + /* pp */ void setLocation(int line, int column) { + this.line = line; + this.column = column; + } + + /** + * Returns the line at which this token started. + * + * Lines are numbered from zero. + */ + public int getLine() { + return line; + } + + /** + * Returns the column at which this token started. + * + * Columns are numbered from zero. + */ + public int getColumn() { + return column; + } + + /** + * Returns the original or generated text of this token. + * + * This is distinct from the semantic value of the token. + * + * @see #getValue() + */ + public String getText() { + return text; + } + + /** + * Returns the semantic value of this token. + * + * For strings, this is the parsed String. + * For integers, this is an Integer object. + * For other token types, as appropriate. + * + * @see #getText() + */ + public Object getValue() { + return value; + } + + /** + * Returns a description of this token, for debugging purposes. + */ + public String toString() { + StringBuilder buf = new StringBuilder(); + + buf.append('[').append(getTokenName(type)); + if (line != -1) { + buf.append('@').append(line); + if (column != -1) + buf.append(',').append(column); + } + buf.append("]:"); + if (text != null) + buf.append('"').append(text).append('"'); + else if (type > 3 && type < 256) + buf.append( (char)type ); + else + buf.append('<').append(type).append('>'); + if (value != null) + buf.append('=').append(value); + return buf.toString(); + } + + /** + * Returns the descriptive name of the given token type. + * + * This is mostly used for stringification and debugging. + */ + public static final String getTokenName(int type) { + if (type < 0) + return "Invalid" + type; + if (type >= names.length) + return "Invalid" + type; + if (names[type] == null) + return "Unknown" + type; + return names[type]; + } + +#set ($i = 257) +#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "COMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "ERROR" ]) +#foreach ($token in $tokens) + /** The token type $token. */ + public static final int $token = $i; +#set ($i = $i + 1) +#end + /** + * The number of possible semantic token types. + * + * Please note that not all token types below 255 are used. + */ + public static final int _TOKENS = $i; + + /** The position-less space token. */ + /* pp */ static final Token space = new Token(WHITESPACE, -1, -1, " "); + + private static final String[] names = new String[_TOKENS]; + private static final String[] texts = new String[_TOKENS]; + static { + for (int i = 0; i < 255; i++) { + texts[i] = String.valueOf(new char[] { (char)i }); + names[i] = texts[i]; + } + + texts[AND_EQ] = "&="; + texts[ARROW] = "->"; + texts[DEC] = "--"; + texts[DIV_EQ] = "/="; + texts[ELLIPSIS] = "..."; + texts[EQ] = "=="; + texts[GE] = ">="; + texts[HASH] = "#"; + texts[INC] = "++"; + texts[LAND] = "&&"; + texts[LAND_EQ] = "&&="; + texts[LE] = "<="; + texts[LOR] = "||"; + texts[LOR_EQ] = "||="; + texts[LSH] = "<<"; + texts[LSH_EQ] = "<<="; + texts[MOD_EQ] = "%="; + texts[MULT_EQ] = "*="; + texts[NE] = "!="; + texts[NL] = "\n"; + texts[OR_EQ] = "|="; + /* We have to split the two hashes or Velocity eats them. */ + texts[PASTE] = "#" + "#"; + texts[PLUS_EQ] = "+="; + texts[RANGE] = ".."; + texts[RSH] = ">>"; + texts[RSH_EQ] = ">>="; + texts[SUB_EQ] = "-="; + texts[XOR_EQ] = "^="; + +#foreach ($token in $tokens) + names[$token] = "$token"; +#end + } + +} diff --git a/src/java/org/anarres/cpp/TokenSnifferSource.java b/src/java/org/anarres/cpp/TokenSnifferSource.java new file mode 100644 index 0000000..55b53d7 --- /dev/null +++ b/src/java/org/anarres/cpp/TokenSnifferSource.java @@ -0,0 +1,56 @@ +/* + * Anarres C Preprocessor + * Copyright (C) 2007 Shevek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +@Deprecated +/* pp */ class TokenSnifferSource extends Source { + private List target; + + /* pp */ TokenSnifferSource(List target) { + this.target = target; + } + + public Token token() + throws IOException, + LexerException { + Token tok = getParent().token(); + if (tok.getType() != EOF) + target.add(tok); + return tok; + } + + public String toString() { + return getParent().toString(); + } +} -- cgit v1.2.3