From 4528d72b75208f21f8d5e7d72991b7d34b4e46cf Mon Sep 17 00:00:00 2001 From: Shevek Date: Wed, 11 Jun 2008 02:18:54 +0000 Subject: new error propagation mechanism; add include list --- src/java/org/anarres/cpp/ChrootFileSystem.java | 6 + src/java/org/anarres/cpp/LexerSource.java | 92 +++++++----- src/java/org/anarres/cpp/MacroTokenSource.java | 27 ++-- src/java/org/anarres/cpp/Preprocessor.java | 189 +++++++++++++------------ src/java/org/anarres/cpp/Source.java | 14 ++ 5 files changed, 198 insertions(+), 130 deletions(-) (limited to 'src/java/org/anarres') diff --git a/src/java/org/anarres/cpp/ChrootFileSystem.java b/src/java/org/anarres/cpp/ChrootFileSystem.java index 4fedb71..f917c9e 100644 --- a/src/java/org/anarres/cpp/ChrootFileSystem.java +++ b/src/java/org/anarres/cpp/ChrootFileSystem.java @@ -63,6 +63,12 @@ public class ChrootFileSystem implements VirtualFileSystem { return new ChrootFile(this, name); } + @Override + public boolean isFile() { + File real = new File(root, getPath()); + return real.isFile(); + } + public Source getSource() throws IOException { return new FileLexerSource(new File(root, getPath()), getPath()); diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java index 3728bd6..0735e28 100644 --- a/src/java/org/anarres/cpp/LexerSource.java +++ b/src/java/org/anarres/cpp/LexerSource.java @@ -87,7 +87,7 @@ public class LexerSource extends Source { return true; } -/* Error handling - this lot is barely worth it. */ +/* Error handling. */ private final void _error(String msg, boolean error) throws LexerException { @@ -325,13 +325,15 @@ public class LexerSource extends Source { } else if (isLineSeparator(d)) { unread(d); - error("Unterminated character literal"); - return new Token(ERROR, text.toString(), null); + // error("Unterminated character literal"); + return new Token(ERROR, text.toString(), + "Unterminated character literal"); } else if (d == '\'') { text.append('\''); - error("Empty character literal"); - return new Token(ERROR, text.toString(), null); + // error("Empty character literal"); + return new Token(ERROR, text.toString(), + "Empty character literal"); } else if (!Character.isDefined(d)) { text.append('?'); @@ -343,7 +345,7 @@ public class LexerSource extends Source { int e = read(); if (e != '\'') { - error("Illegal character constant"); + // error("Illegal character constant"); /* We consume up to the next ' or the rest of the line. */ for (;;) { if (e == '\'') @@ -355,7 +357,8 @@ public class LexerSource extends Source { text.append((char)e); e = read(); } - return new Token(ERROR, text.toString(), null); + return new Token(ERROR, text.toString(), + "Illegal character constant"); } text.append('\''); /* XXX It this a bad cast? */ @@ -385,13 +388,15 @@ public class LexerSource extends Source { } else if (c == -1) { unread(c); - error("End of file in string literal after " + buf); - return new Token(ERROR, text.toString(), null); + // error("End of file in string literal after " + buf); + return new Token(ERROR, text.toString(), + "End of file in string literal after " + buf); } else if (isLineSeparator(c)) { unread(c); - error("Unterminated string literal after " + buf); - return new Token(ERROR, text.toString(), null); + // error("Unterminated string literal after " + buf); + return new Token(ERROR, text.toString(), + "Unterminated string literal after " + buf); } else { text.append((char)c); @@ -403,21 +408,42 @@ public class LexerSource extends Source { text.toString(), buf.toString()); } - private void number_suffix(StringBuilder text, int d) + private Token _number(StringBuilder text, long val, int d) throws IOException, LexerException { - if (d == 'U') { - text.append((char)d); - d = read(); - } - if (d == 'L') { - text.append((char)d); - } - else if (d == 'I') { - text.append((char)d); - } - else { - unread(d); + int bits = 0; + for (;;) { + /* XXX Error check duplicate bits. */ + if (d == 'U' || d == 'u') { + bits |= 1; + text.append((char)d); + d = read(); + } + else if (d == 'L' || d == 'l') { + if ((bits & 4) != 0) + /* XXX warn */ ; + bits |= 2; + text.append((char)d); + d = read(); + } + else if (d == 'I' || d == 'i') { + if ((bits & 2) != 0) + /* XXX warn */ ; + bits |= 4; + text.append((char)d); + d = read(); + } + else if (Character.isLetter(d)) { + unread(d); + return new Token(ERROR, text.toString(), + "Invalid suffix \"" + (char)d + + "\" on numeric constant"); + } + else { + unread(d); + return new Token(INTEGER, + text.toString(), Long.valueOf(val)); + } } } @@ -433,9 +459,7 @@ public class LexerSource extends Source { text.append((char)d); d = read(); } - number_suffix(text, d); - return new Token(INTEGER, - text.toString(), Long.valueOf(val)); + return _number(text, val, d); } /* We do not know whether know the first digit is valid. */ @@ -447,8 +471,10 @@ public class LexerSource extends Source { int d = read(); if (Character.digit(d, 16) == -1) { unread(d); - error("Illegal hexadecimal constant " + (char)d); - return new Token(ERROR, text.toString(), null); + // error("Illegal hexadecimal constant " + (char)d); + return new Token(ERROR, text.toString(), + "Illegal hexadecimal digit " + (char)d + + " after "+ text); } long val = 0; do { @@ -456,9 +482,7 @@ public class LexerSource extends Source { text.append((char)d); d = read(); } while (Character.digit(d, 16) != -1); - number_suffix(text, d); - return new Token(INTEGER, - text.toString(), Long.valueOf(val)); + return _number(text, val, d); } /* We know we have at least one valid digit, but empty is not @@ -475,9 +499,7 @@ public class LexerSource extends Source { text.append((char)d); d = read(); } while (Character.digit(d, 10) != -1); - number_suffix(text, d); - return new Token(INTEGER, - text.toString(), Long.valueOf(val)); + return _number(text, val, d); } private Token identifier(int c) diff --git a/src/java/org/anarres/cpp/MacroTokenSource.java b/src/java/org/anarres/cpp/MacroTokenSource.java index 28fc8cb..b6500ff 100644 --- a/src/java/org/anarres/cpp/MacroTokenSource.java +++ b/src/java/org/anarres/cpp/MacroTokenSource.java @@ -32,6 +32,8 @@ import java.util.NoSuchElementException; import static org.anarres.cpp.Token.*; +/* This source should always be active, since we don't expand macros + * in any inactive context. */ /* pp */ class MacroTokenSource extends Source { private Macro macro; private Iterator tokens; /* Pointer into the macro. */ @@ -102,14 +104,18 @@ import static org.anarres.cpp.Token.*; throws IOException, LexerException { StringBuilder buf = new StringBuilder(); + Token err = null; /* We know here that arg is null or expired, * since we cannot paste an expanded arg. */ int count = 2; for (int i = 0; i < count; i++) { - if (!tokens.hasNext()) - error(ptok.getLine(), ptok.getColumn(), - "Paste at end of expansion"); + if (!tokens.hasNext()) { + err = new Token(ERROR, + ptok.getLine(), ptok.getColumn(), + ptok.getText(), "Paste at end of expansion"); + break; + } Token tok = tokens.next(); switch (tok.getType()) { case M_PASTE: @@ -131,14 +137,13 @@ import static org.anarres.cpp.Token.*; } } - /* XXX Somewhere here, need to check that concatenation - * produces a valid token. */ - /* Push and re-lex. */ StringBuilder src = new StringBuilder(); escape(src, buf); StringLexerSource sl = new StringLexerSource(src.toString()); + /* XXX Check that concatenation produces a valid token. */ + arg = new SourceIterator(sl); } @@ -149,8 +154,14 @@ import static org.anarres.cpp.Token.*; /* Deal with lexed tokens first. */ if (arg != null) { - if (arg.hasNext()) - return arg.next(); + if (arg.hasNext()) { + Token tok = arg.next(); + if (tok.getType() == M_PASTE) + tok = new Token(ERROR, + tok.getLine(), tok.getColumn(), + tok.getText(), "Unexpected paste token"); + return tok; + } arg = null; } diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java index 158e5b8..4547966 100644 --- a/src/java/org/anarres/cpp/Preprocessor.java +++ b/src/java/org/anarres/cpp/Preprocessor.java @@ -42,16 +42,50 @@ import static org.anarres.cpp.Token.*; * values of the returned {@link Token Tokens}. (See * {@link CppReader}, which does this.) */ + + +/* +Source file name and line number information is conveyed by lines of the form + + # linenum filename flags + +These are called linemarkers. They are inserted as needed into +the output (but never within a string or character constant). They +mean that the following line originated in file filename at line +linenum. filename will never contain any non-printing characters; +they are replaced with octal escape sequences. + +After the file name comes zero or more flags, which are `1', `2', +`3', or `4'. If there are multiple flags, spaces separate them. Here +is what the flags mean: + +`1' + This indicates the start of a new file. +`2' + This indicates returning to a file (after having included another + file). +`3' + This indicates that the following text comes from a system header + file, so certain warnings should be suppressed. +`4' + This indicates that the following text should be treated as being + wrapped in an implicit extern "C" block. +*/ + public class Preprocessor { private static final boolean DEBUG = false; private static final Macro __LINE__ = new Macro("__LINE__"); private static final Macro __FILE__ = new Macro("__FILE__"); + private List inputs; + + /* The fundamental engine. */ private Map macros; private Stack states; private Source source; + /* Support junk to make it work like cpp */ private List quoteincludepath; /* -iquote */ private List sysincludepath; /* -I */ private Set features; @@ -60,6 +94,7 @@ public class Preprocessor { private PreprocessorListener listener; public Preprocessor() { + this.inputs = new ArrayList(); this.macros = new HashMap(); macros.put(__LINE__.getName(), __LINE__); macros.put(__FILE__.getName(), __FILE__); @@ -198,21 +233,7 @@ public class Preprocessor { */ public void addInput(Source source) { source.init(this); - if (this.source == null) { - this.source = source; - /* We need to get a \n onto the end of this somehow. */ - if (features.contains(Feature.LINEMARKERS)) - source_untoken(line_token(1, source.getName(), "\n")); - } - else { - Source s = this.source; - Source p = source.getParent(); - while (p != null) { - s = p; - p = s.getParent(); - } - s.setParent(source, true); - } + inputs.add(source); } /** @@ -282,12 +303,6 @@ public class Preprocessor { warning(tok.getLine(), tok.getColumn(), msg); } -/* - public void setSource(Source source) { - this.source = source; - } -*/ - /** * Adds a Macro to this Preprocessor. * @@ -455,29 +470,16 @@ public class Preprocessor { listener.handleSourceChange(this.source, "resume"); } - /** - * Pushes a source onto the input stack. - * - * The top source on the input stack is the one which is - * currently being processed. - * - * It is unlikely that you will want to call this method. It is more - * likely that you want {@link #addInput(Source)}. - * - * @see #addInput(Source) - */ - public void addSource(Source source) { - push_source(source, true); - } - /* Source tokens */ private Token source_token; + /* XXX Make this include the NL, and make all cpp directives eat + * their own NL. */ private Token line_token(int line, String name, String extra) { return new Token(P_LINE, line, 0, - "#line " + line + " \"" + name + "\"" + extra, + "#line " + line + " \"" + name + "\"" + extra + "\n", null ); } @@ -492,22 +494,29 @@ public class Preprocessor { } for (;;) { - if (source == null) - return new Token(EOF); - Token tok = source.token(); - if (tok.getType() == EOF && source.isAutopop()) { - // System.out.println("Autopop " + source); - Source s = source; + Source s = getSource(); + if (s == null) { + if (inputs.isEmpty()) + return new Token(EOF); + Source t = inputs.remove(0); + push_source(t, true); + if (features.contains(Feature.LINEMARKERS)) + return line_token(t.getLine(), t.getName(), " 1"); + continue; + } + Token tok = s.token(); + if (tok.getType() == EOF && s.isAutopop()) { + // System.out.println("Autopop " + s); pop_source(); + Source t = getSource(); if (features.contains(Feature.LINEMARKERS) - && s.isNumbered()) { + && s.isNumbered() + && t != null) { + /* XXX Don't we mean t.isNumbered() as well? */ /* Not perfect, but ... */ - source_untoken(new Token(NL, source.getLine(), 0, "\n")); - return line_token(source.getLine(), source.getName(), ""); - } - else { - continue; + return line_token(t.getLine(), t.getName(), " 2"); } + continue; } return tok; } @@ -787,55 +796,48 @@ public class Preprocessor { case IDENTIFIER: args.add(tok.getText()); break; - // case ELLIPSIS: case NL: case EOF: error(tok, "Unterminated macro parameter list"); - break ARGS; + return tok; default: - source_skipline(false); error(tok, "error in macro parameters: " + tok.getText()); - /* XXX return? */ - break ARGS; + return source_skipline(false); } tok = source_token_nonwhite(); switch (tok.getType()) { case ',': break; - case ')': - tok = source_token_nonwhite(); - break ARGS; case ELLIPSIS: tok = source_token_nonwhite(); if (tok.getType() != ')') error(tok, "ellipsis must be on last argument"); m.setVariadic(true); - tok = source_token_nonwhite(); + break ARGS; + case ')': break ARGS; case NL: case EOF: /* Do not skip line. */ error(tok, - "Unterminated macro definition"); - break ARGS; + "Unterminated macro parameters"); + return tok; default: - source_skipline(false); error(tok, - "bad token in macro parameters: " + + "Bad token in macro parameters: " + tok.getText()); - /* XXX return? */ - break ARGS; + return source_skipline(false); } tok = source_token_nonwhite(); } } else { - tok = source_token_nonwhite(); /* Lose the ')' */ + assert tok.getType() == ')' : "Expected ')'"; args = Collections.emptyList(); } @@ -844,21 +846,16 @@ public class Preprocessor { else { /* For searching. */ args = Collections.emptyList(); - if (tok.getType() == COMMENT || - tok.getType() == WHITESPACE) { - tok = source_token_nonwhite(); - } + source_untoken(tok); } /* Get an expansion for the macro, using indexOf. */ boolean space = false; boolean paste = false; - /* XXX UGLY: Ensure no space at start. - * Careful not to break EOF/LF from above. */ - if (isWhite(tok)) /* XXX Not sure this can ever happen now. */ - tok = source_token_nonwhite(); int idx; + /* Ensure no space at start. */ + tok = source_token_nonwhite(); EXPANSION: for (;;) { switch (tok.getType()) { case EOF: @@ -873,6 +870,7 @@ public class Preprocessor { space = true; break; + /* Paste. */ case PASTE: space = false; paste = true; @@ -881,6 +879,7 @@ public class Preprocessor { "#" + "#", null)); break; + /* Stringify. */ case '#': if (space) m.addToken(Token.space); @@ -926,8 +925,10 @@ public class Preprocessor { tok = source_token(); } - // if (DEBUG) - // System.out.println("Defined macro " + m); + /* + if (DEBUG) + System.out.println("Defined macro " + m); + */ addMacro(m); return tok; /* NL or EOF. */ @@ -990,10 +991,11 @@ public class Preprocessor { String name, boolean quoted) throws IOException, LexerException { + VirtualFile pdir = null; if (quoted) { VirtualFile pfile = filesystem.getFile(parent); - VirtualFile dir = pfile.getParentFile(); - VirtualFile ifile = dir.getChildFile(name); + pdir = pfile.getParentFile(); + VirtualFile ifile = pdir.getChildFile(name); if (include(ifile)) return; if (include(quoteincludepath, name)) @@ -1004,8 +1006,10 @@ public class Preprocessor { return; StringBuilder buf = new StringBuilder(); + buf.append("File not found: ").append(name); + buf.append(" in"); if (quoted) { - buf.append(" ."); + buf.append(" .").append('(').append(pdir).append(')'); for (String dir : quoteincludepath) buf.append(" ").append(dir); } @@ -1070,10 +1074,8 @@ public class Preprocessor { /* 'tok' is the 'nl' after the include. We use it after the * #line directive. */ - if (features.contains(Feature.LINEMARKERS)) { - source_untoken(tok); - return line_token(1, name, ""); - } + if (features.contains(Feature.LINEMARKERS)) + return line_token(1, name, " 1"); return tok; } finally { @@ -1467,12 +1469,18 @@ public class Preprocessor { throws IOException, LexerException { - Token tok; for (;;) { + Token tok; if (!isActive()) { - /* Tell lexer to ignore warnings. */ - tok = source_token(); - /* Tell lexer to stop ignoring warnings. */ + try { + /* XXX Tell lexer to ignore warnings. */ + source.setActive(false); + tok = source_token(); + } + finally { + /* XXX Tell lexer to stop ignoring warnings. */ + source.setActive(true); + } switch (tok.getType()) { case HASH: case NL: @@ -1563,6 +1571,13 @@ public class Preprocessor { break; case ERROR: + PreprocessorListener l = getListener(); + if (l != null) { + l.handleError(getSource(), + tok.getLine(), tok.getColumn(), + String.valueOf(tok.getValue())); + break; + } return tok; default: @@ -1592,7 +1607,7 @@ public class Preprocessor { } int ppcmd = _ppcmd.intValue(); - switch (ppcmd) { + PP: switch (ppcmd) { case PP_DEFINE: if (!isActive()) diff --git a/src/java/org/anarres/cpp/Source.java b/src/java/org/anarres/cpp/Source.java index 74d1bef..1ce9f47 100644 --- a/src/java/org/anarres/cpp/Source.java +++ b/src/java/org/anarres/cpp/Source.java @@ -46,6 +46,7 @@ public abstract class Source implements Iterable { private Source parent; private boolean autopop; private PreprocessorListener listener; + private boolean active; private boolean werror; /* LineNumberReader */ @@ -86,6 +87,9 @@ public abstract class Source implements Iterable { public Source() { this.parent = null; this.autopop = false; + this.listener = null; + this.active = true; + this.werror = false; } /** @@ -202,6 +206,16 @@ public abstract class Source implements Iterable { return false; } + /* This is an incredibly lazy way of disabling warnings when + * the source is not active. */ + /* pp */ void setActive(boolean b) { + this.active = b; + } + + /* pp */ boolean isActive() { + return active; + } + /** * Returns the next Token parsed from this input stream. * -- cgit v1.2.3