new error propagation mechanism; add include list

author: Shevek <[email protected]> 2008-06-11 02:18:54 +0000
committer: Shevek <[email protected]> 2008-06-11 02:18:54 +0000
commit: 4528d72b75208f21f8d5e7d72991b7d34b4e46cf (patch)
tree: 87ce2b4bb993922762491268a26654211a981638 /src
parent: 38d3f08b4a7302c4a1578c867bdfa3d8a57cd8f9 (diff)
6 files changed, 213 insertions, 137 deletions
diff --git a/src/java/org/anarres/cpp/ChrootFileSystem.java b/src/java/org/anarres/cpp/ChrootFileSystem.java
index 4fedb71..f917c9e 100644
--- a/src/java/org/anarres/cpp/ChrootFileSystem.java
+++ b/src/java/org/anarres/cpp/ChrootFileSystem.java
@@ -63,6 +63,12 @@ public class ChrootFileSystem implements VirtualFileSystem {
 			return new ChrootFile(this, name);
 		}
 
+		@Override
+		public boolean isFile() {
+			File	real = new File(root, getPath());
+			return real.isFile();
+		}
+
 		public Source getSource() throws IOException {
 			return new FileLexerSource(new File(root, getPath()),
 							getPath());
diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java
index 3728bd6..0735e28 100644
--- a/src/java/org/anarres/cpp/LexerSource.java
+++ b/src/java/org/anarres/cpp/LexerSource.java
@@ -87,7 +87,7 @@ public class LexerSource extends Source {
 		return true;
 	}
 
-/* Error handling - this lot is barely worth it. */
+/* Error handling. */
 
 	private final void _error(String msg, boolean error)
 						throws LexerException {
@@ -325,13 +325,15 @@ public class LexerSource extends Source {
 		}
 		else if (isLineSeparator(d)) {
 			unread(d);
-			error("Unterminated character literal");
-			return new Token(ERROR, text.toString(), null);
+			// error("Unterminated character literal");
+			return new Token(ERROR, text.toString(),
+							"Unterminated character literal");
 		}
 		else if (d == '\'') {
 			text.append('\'');
-			error("Empty character literal");
-			return new Token(ERROR, text.toString(), null);
+			// error("Empty character literal");
+			return new Token(ERROR, text.toString(),
+							"Empty character literal");
 		}
 		else if (!Character.isDefined(d)) {
 			text.append('?');
@@ -343,7 +345,7 @@ public class LexerSource extends Source {
 
 		int		e = read();
 		if (e != '\'') {
-			error("Illegal character constant");
+			// error("Illegal character constant");
 			/* We consume up to the next ' or the rest of the line. */
 			for (;;) {
 				if (e == '\'')
@@ -355,7 +357,8 @@ public class LexerSource extends Source {
 				text.append((char)e);
 				e = read();
 			}
-			return new Token(ERROR, text.toString(), null);
+			return new Token(ERROR, text.toString(),
+							"Illegal character constant");
 		}
 		text.append('\'');
 		/* XXX It this a bad cast? */
@@ -385,13 +388,15 @@ public class LexerSource extends Source {
 			}
 			else if (c == -1) {
 				unread(c);
-				error("End of file in string literal after " + buf);
-				return new Token(ERROR, text.toString(), null);
+				// error("End of file in string literal after " + buf);
+				return new Token(ERROR, text.toString(),
+						"End of file in string literal after " + buf);
 			}
 			else if (isLineSeparator(c)) {
 				unread(c);
-				error("Unterminated string literal after " + buf);
-				return new Token(ERROR, text.toString(), null);
+				// error("Unterminated string literal after " + buf);
+				return new Token(ERROR, text.toString(),
+						"Unterminated string literal after " + buf);
 			}
 			else {
 				text.append((char)c);
@@ -403,21 +408,42 @@ public class LexerSource extends Source {
 						text.toString(), buf.toString());
 	}
 
-	private void number_suffix(StringBuilder text, int d)
+	private Token _number(StringBuilder text, long val, int d)
 						throws IOException,
 								LexerException {
-		if (d == 'U') {
-			text.append((char)d);
-			d = read();
-		}
-		if (d == 'L') {
-			text.append((char)d);
-		}
-		else if (d == 'I') {
-			text.append((char)d);
-		}
-		else {
-			unread(d);
+		int	bits = 0;
+		for (;;) {
+			/* XXX Error check duplicate bits. */
+			if (d == 'U' || d == 'u') {
+				bits |= 1;
+				text.append((char)d);
+				d = read();
+			}
+			else if (d == 'L' || d == 'l') {
+				if ((bits & 4) != 0)
+					/* XXX warn */ ;
+				bits |= 2;
+				text.append((char)d);
+				d = read();
+			}
+			else if (d == 'I' || d == 'i') {
+				if ((bits & 2) != 0)
+					/* XXX warn */ ;
+				bits |= 4;
+				text.append((char)d);
+				d = read();
+			}
+			else if (Character.isLetter(d)) {
+				unread(d);
+				return new Token(ERROR, text.toString(),
+						"Invalid suffix \"" + (char)d +
+						"\" on numeric constant");
+			}
+			else {
+				unread(d);
+				return new Token(INTEGER,
+					text.toString(), Long.valueOf(val));
+			}
 		}
 	}
 
@@ -433,9 +459,7 @@ public class LexerSource extends Source {
 			text.append((char)d);
 			d = read();
 		}
-		number_suffix(text, d);
-		return new Token(INTEGER,
-				text.toString(), Long.valueOf(val));
+		return _number(text, val, d);
 	}
 
 	/* We do not know whether know the first digit is valid. */
@@ -447,8 +471,10 @@ public class LexerSource extends Source {
 		int				d = read();
 		if (Character.digit(d, 16) == -1) {
 			unread(d);
-			error("Illegal hexadecimal constant " + (char)d);
-			return new Token(ERROR, text.toString(), null);
+			// error("Illegal hexadecimal constant " + (char)d);
+			return new Token(ERROR, text.toString(),
+					"Illegal hexadecimal digit " + (char)d +
+					" after "+ text);
 		}
 		long	val = 0;
 		do {
@@ -456,9 +482,7 @@ public class LexerSource extends Source {
 			text.append((char)d);
 			d = read();
 		} while (Character.digit(d, 16) != -1);
-		number_suffix(text, d);
-		return new Token(INTEGER,
-				text.toString(), Long.valueOf(val));
+		return _number(text, val, d);
 	}
 
 	/* We know we have at least one valid digit, but empty is not
@@ -475,9 +499,7 @@ public class LexerSource extends Source {
 			text.append((char)d);
 			d = read();
 		} while (Character.digit(d, 10) != -1);
-		number_suffix(text, d);
-		return new Token(INTEGER,
-				text.toString(), Long.valueOf(val));
+		return _number(text, val, d);
 	}
 
 	private Token identifier(int c)
diff --git a/src/java/org/anarres/cpp/MacroTokenSource.java b/src/java/org/anarres/cpp/MacroTokenSource.java
index 28fc8cb..b6500ff 100644
--- a/src/java/org/anarres/cpp/MacroTokenSource.java
+++ b/src/java/org/anarres/cpp/MacroTokenSource.java
@@ -32,6 +32,8 @@ import java.util.NoSuchElementException;
 
 import static org.anarres.cpp.Token.*;
 
+/* This source should always be active, since we don't expand macros
+ * in any inactive context. */
 /* pp */ class MacroTokenSource extends Source {
 	private Macro				macro;
 	private Iterator<Token>		tokens;	/* Pointer into the macro.  */
@@ -102,14 +104,18 @@ import static org.anarres.cpp.Token.*;
 						throws IOException,
 								LexerException {
 		StringBuilder	buf = new StringBuilder();
+		Token			err = null;
 		/* We know here that arg is null or expired,
 		 * since we cannot paste an expanded arg. */
 
 		int	count = 2;
 		for (int i = 0; i < count; i++) {
-			if (!tokens.hasNext())
-				error(ptok.getLine(), ptok.getColumn(),
-						"Paste at end of expansion");
+			if (!tokens.hasNext()) {
+				err = new Token(ERROR,
+						ptok.getLine(), ptok.getColumn(),
+						ptok.getText(), "Paste at end of expansion");
+				break;
+			}
 			Token	tok = tokens.next();
 			switch (tok.getType()) {
 				case M_PASTE:
@@ -131,14 +137,13 @@ import static org.anarres.cpp.Token.*;
 			}
 		}
 
-		/* XXX Somewhere here, need to check that concatenation
-		 * produces a valid token. */
-
 		/* Push and re-lex. */
 		StringBuilder		src = new StringBuilder();
 		escape(src, buf);
 		StringLexerSource	sl = new StringLexerSource(src.toString());
 
+		/* XXX Check that concatenation produces a valid token. */
+
 		arg = new SourceIterator(sl);
 	}
 
@@ -149,8 +154,14 @@ import static org.anarres.cpp.Token.*;
 			/* Deal with lexed tokens first. */
 
 			if (arg != null) {
-				if (arg.hasNext())
-					return arg.next();
+				if (arg.hasNext()) {
+					Token	tok = arg.next();
+					if (tok.getType() == M_PASTE)
+						tok = new Token(ERROR,
+								tok.getLine(), tok.getColumn(),
+								tok.getText(), "Unexpected paste token");
+					return tok;
+				}
 				arg = null;
 			}
 
diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java
index 158e5b8..4547966 100644
--- a/src/java/org/anarres/cpp/Preprocessor.java
+++ b/src/java/org/anarres/cpp/Preprocessor.java
@@ -42,16 +42,50 @@ import static org.anarres.cpp.Token.*;
  * values of the returned {@link Token Tokens}. (See
  * {@link CppReader}, which does this.)
  */
+
+
+/*
+Source file name and line number information is conveyed by lines of the form
+
+     # linenum filename flags
+
+These are called linemarkers. They are inserted as needed into
+the output (but never within a string or character constant). They
+mean that the following line originated in file filename at line
+linenum. filename will never contain any non-printing characters;
+they are replaced with octal escape sequences.
+
+After the file name comes zero or more flags, which are `1', `2',
+`3', or `4'. If there are multiple flags, spaces separate them. Here
+is what the flags mean:
+
+`1'
+    This indicates the start of a new file.
+`2'
+    This indicates returning to a file (after having included another
+    file).
+`3'
+    This indicates that the following text comes from a system header
+    file, so certain warnings should be suppressed.
+`4'
+    This indicates that the following text should be treated as being
+    wrapped in an implicit extern "C" block.
+*/
+
 public class Preprocessor {
 	private static final boolean	DEBUG = false;
 
 	private static final Macro		__LINE__ = new Macro("__LINE__");
 	private static final Macro		__FILE__ = new Macro("__FILE__");
 
+	private List<Source>			inputs;
+
+	/* The fundamental engine. */
 	private Map<String,Macro>		macros;
 	private Stack<State>			states;
 	private Source					source;
 
+	/* Support junk to make it work like cpp */
 	private List<String>			quoteincludepath;	/* -iquote */
 	private List<String>			sysincludepath;		/* -I */
 	private Set<Feature>			features;
@@ -60,6 +94,7 @@ public class Preprocessor {
 	private PreprocessorListener	listener;
 
 	public Preprocessor() {
+		this.inputs = new ArrayList<Source>();
 		this.macros = new HashMap<String,Macro>();
 		macros.put(__LINE__.getName(), __LINE__);
 		macros.put(__FILE__.getName(), __FILE__);
@@ -198,21 +233,7 @@ public class Preprocessor {
 	 */
 	public void addInput(Source source) {
 		source.init(this);
-		if (this.source == null) {
-			this.source = source;
-			/* We need to get a \n onto the end of this somehow. */
-			if (features.contains(Feature.LINEMARKERS))
-				source_untoken(line_token(1, source.getName(), "\n"));
-		}
-		else {
-			Source	s = this.source;
-			Source	p = source.getParent();
-			while (p != null) {
-				s = p;
-				p = s.getParent();
-			}
-			s.setParent(source, true);
-		}
+		inputs.add(source);
 	}
 
 	/**
@@ -282,12 +303,6 @@ public class Preprocessor {
 		warning(tok.getLine(), tok.getColumn(), msg);
 	}
 
-/*
-	public void setSource(Source source) {
-		this.source = source;
-	}
-*/
-
 	/**
 	 * Adds a Macro to this Preprocessor.
 	 *
@@ -455,29 +470,16 @@ public class Preprocessor {
 			listener.handleSourceChange(this.source, "resume");
 	}
 
-	/**
-	 * Pushes a source onto the input stack.
-	 *
-	 * The top source on the input stack is the one which is
-	 * currently being processed.
-	 *
-	 * It is unlikely that you will want to call this method. It is more
-	 * likely that you want {@link #addInput(Source)}.
-	 *
-	 * @see #addInput(Source)
-	 */
-	public void addSource(Source source) {
-		push_source(source, true);
-	}
-
 
 /* Source tokens */
 
 	private Token	source_token;
 
+	/* XXX Make this include the NL, and make all cpp directives eat
+	 * their own NL. */
 	private Token line_token(int line, String name, String extra) {
 		return new Token(P_LINE, line, 0,
-			"#line " + line + " \"" + name + "\"" + extra,
+			"#line " + line + " \"" + name + "\"" + extra + "\n",
 			null
 				);
 	}
@@ -492,22 +494,29 @@ public class Preprocessor {
 		}
 
 		for (;;) {
-			if (source == null)
-				return new Token(EOF);
-			Token	tok = source.token();
-			if (tok.getType() == EOF && source.isAutopop()) {
-				// System.out.println("Autopop " + source);
-				Source	s = source;
+			Source	s = getSource();
+			if (s == null) {
+				if (inputs.isEmpty())
+					return new Token(EOF);
+				Source	t = inputs.remove(0);
+				push_source(t, true);
+				if (features.contains(Feature.LINEMARKERS))
+					return line_token(t.getLine(), t.getName(), " 1");
+				continue;
+			}
+			Token	tok = s.token();
+			if (tok.getType() == EOF && s.isAutopop()) {
+				// System.out.println("Autopop " + s);
 				pop_source();
+				Source	t = getSource();
 				if (features.contains(Feature.LINEMARKERS)
-						&& s.isNumbered()) {
+						&& s.isNumbered()
+						&& t != null) {
+					/* XXX Don't we mean t.isNumbered() as well? */
 					/* Not perfect, but ... */
-					source_untoken(new Token(NL, source.getLine(), 0, "\n"));
-					return line_token(source.getLine(), source.getName(), "");
-				}
-				else {
-					continue;
+					return line_token(t.getLine(), t.getName(), " 2");
 				}
+				continue;
 			}
 			return tok;
 		}
@@ -787,55 +796,48 @@ public class Preprocessor {
 						case IDENTIFIER:
 							args.add(tok.getText());
 							break;
-						// case ELLIPSIS:
 						case NL:
 						case EOF:
 							error(tok,
 								"Unterminated macro parameter list");
-							break ARGS;
+							return tok;
 						default:
-							source_skipline(false);
 							error(tok,
 								"error in macro parameters: " +
 								tok.getText());
-							/* XXX return? */
-							break ARGS;
+							return source_skipline(false);
 					}
 					tok = source_token_nonwhite();
 					switch (tok.getType()) {
 						case ',':
 							break;
-						case ')':
-							tok = source_token_nonwhite();
-							break ARGS;
 						case ELLIPSIS:
 							tok = source_token_nonwhite();
 							if (tok.getType() != ')')
 								error(tok,
 									"ellipsis must be on last argument");
 							m.setVariadic(true);
-							tok = source_token_nonwhite();
+							break ARGS;
+						case ')':
 							break ARGS;
 
 						case NL:
 						case EOF:
 							/* Do not skip line. */
 							error(tok,
-								"Unterminated macro definition");
-							break ARGS;
+								"Unterminated macro parameters");
+							return tok;
 						default:
-							source_skipline(false);
 							error(tok,
-								"bad token in macro parameters: " +
+								"Bad token in macro parameters: " +
 								tok.getText());
-							/* XXX return? */
-							break ARGS;
+							return source_skipline(false);
 					}
 					tok = source_token_nonwhite();
 				}
 			}
 			else {
-				tok = source_token_nonwhite();	/* Lose the ')' */
+				assert tok.getType() == ')' : "Expected ')'";
 				args = Collections.emptyList();
 			}
 
@@ -844,21 +846,16 @@ public class Preprocessor {
 		else {
 			/* For searching. */
 			args = Collections.emptyList();
-			if (tok.getType() == COMMENT ||
-				tok.getType() == WHITESPACE) {
-				tok = source_token_nonwhite();
-			}
+			source_untoken(tok);
 		}
 
 		/* Get an expansion for the macro, using indexOf. */
 		boolean	space = false;
 		boolean	paste = false;
-		/* XXX UGLY: Ensure no space at start.
-		 * Careful not to break EOF/LF from above. */
-		if (isWhite(tok))	/* XXX Not sure this can ever happen now. */
-			tok = source_token_nonwhite();
 		int		idx;
 
+		/* Ensure no space at start. */
+		tok = source_token_nonwhite();
 		EXPANSION: for (;;) {
 			switch (tok.getType()) {
 				case EOF:
@@ -873,6 +870,7 @@ public class Preprocessor {
 						space = true;
 					break;
 
+				/* Paste. */
 				case PASTE:
 					space = false;
 					paste = true;
@@ -881,6 +879,7 @@ public class Preprocessor {
 							"#" + "#", null));
 					break;
 
+				/* Stringify. */
 				case '#':
 					if (space)
 						m.addToken(Token.space);
@@ -926,8 +925,10 @@ public class Preprocessor {
 			tok = source_token();
 		}
 
-		// if (DEBUG)
-			// System.out.println("Defined macro " + m);
+		/*
+		if (DEBUG)
+			System.out.println("Defined macro " + m);
+		*/
 		addMacro(m);
 
 		return tok;	/* NL or EOF. */
@@ -990,10 +991,11 @@ public class Preprocessor {
 					String name, boolean quoted)
 						throws IOException,
 								LexerException {
+		VirtualFile	pdir = null;
 		if (quoted) {
 			VirtualFile	pfile = filesystem.getFile(parent);
-			VirtualFile	dir = pfile.getParentFile();
-			VirtualFile	ifile = dir.getChildFile(name);
+			pdir = pfile.getParentFile();
+			VirtualFile	ifile = pdir.getChildFile(name);
 			if (include(ifile))
 				return;
 			if (include(quoteincludepath, name))
@@ -1004,8 +1006,10 @@ public class Preprocessor {
 			return;
 
 		StringBuilder	buf = new StringBuilder();
+		buf.append("File not found: ").append(name);
+		buf.append(" in");
 		if (quoted) {
-			buf.append(" .");
+			buf.append(" .").append('(').append(pdir).append(')');
 			for (String dir : quoteincludepath)
 				buf.append(" ").append(dir);
 		}
@@ -1070,10 +1074,8 @@ public class Preprocessor {
 
 			/* 'tok' is the 'nl' after the include. We use it after the
 			 * #line directive. */
-			if (features.contains(Feature.LINEMARKERS)) {
-				source_untoken(tok);
-				return line_token(1, name, "");
-			}
+			if (features.contains(Feature.LINEMARKERS))
+				return line_token(1, name, " 1");
 			return tok;
 		}
 		finally {
@@ -1467,12 +1469,18 @@ public class Preprocessor {
 						throws IOException,
 								LexerException {
 
-		Token	tok;
 		for (;;) {
+			Token	tok;
 			if (!isActive()) {
-				/* Tell lexer to ignore warnings. */
-				tok = source_token();
-				/* Tell lexer to stop ignoring warnings. */
+				try {
+					/* XXX Tell lexer to ignore warnings. */
+					source.setActive(false);
+					tok = source_token();
+				}
+				finally {
+					/* XXX Tell lexer to stop ignoring warnings. */
+					source.setActive(true);
+				}
 				switch (tok.getType()) {
 					case HASH:
 					case NL:
@@ -1563,6 +1571,13 @@ public class Preprocessor {
 					break;
 
 				case ERROR:
+					PreprocessorListener	l = getListener();
+					if (l != null) {
+						l.handleError(getSource(),
+								tok.getLine(), tok.getColumn(),
+								String.valueOf(tok.getValue()));
+						break;
+					}
 					return tok;
 
 				default:
@@ -1592,7 +1607,7 @@ public class Preprocessor {
 					}
 					int	ppcmd = _ppcmd.intValue();
 
-					switch (ppcmd) {
+					PP: switch (ppcmd) {
 
 						case PP_DEFINE:
 							if (!isActive())
diff --git a/src/java/org/anarres/cpp/Source.java b/src/java/org/anarres/cpp/Source.java
index 74d1bef..1ce9f47 100644
--- a/src/java/org/anarres/cpp/Source.java
+++ b/src/java/org/anarres/cpp/Source.java
@@ -46,6 +46,7 @@ public abstract class Source implements Iterable<Token> {
 	private Source					parent;
 	private boolean					autopop;
 	private PreprocessorListener	listener;
+	private boolean					active;
 	private boolean					werror;
 
 	/* LineNumberReader */
@@ -86,6 +87,9 @@ public abstract class Source implements Iterable<Token> {
 	public Source() {
 		this.parent = null;
 		this.autopop = false;
+		this.listener = null;
+		this.active = true;
+		this.werror = false;
 	}
 
 	/**
@@ -202,6 +206,16 @@ public abstract class Source implements Iterable<Token> {
 		return false;
 	}
 
+	/* This is an incredibly lazy way of disabling warnings when
+	 * the source is not active. */
+	/* pp */ void setActive(boolean b) {
+		this.active = b;
+	}
+
+	/* pp */ boolean isActive() {
+		return active;
+	}
+
 	/**
 	 * Returns the next Token parsed from this input stream.
 	 *
diff --git a/src/tests/org/anarres/cpp/ErrorTestCase.java b/src/tests/org/anarres/cpp/ErrorTestCase.java
index d5d44a3..d7d01e8 100644
--- a/src/tests/org/anarres/cpp/ErrorTestCase.java
+++ b/src/tests/org/anarres/cpp/ErrorTestCase.java
@@ -8,13 +8,15 @@ import static org.anarres.cpp.Token.*;
 
 public class ErrorTestCase extends BaseTestCase {
 
-	private void testError(Source source)
+	private void testError(Preprocessor p)
 						throws LexerException,
 								IOException {
 		for (;;) {
-			Token	tok = source.token();
+			Token	tok = p.token();
 			if (tok.getType() == EOF)
 				break;
+			else if (tok.getType() == ERROR)
+				throw new LexerException("Error token: " + tok);
 		}
 
 	}
@@ -22,12 +24,15 @@ public class ErrorTestCase extends BaseTestCase {
 	private void testError(String input) throws Exception {
 		StringLexerSource		sl;
 		PreprocessorListener	pl;
+		Preprocessor			p;
 
 		/* Without a PreprocessorListener, throws an exception. */
 		sl = new StringLexerSource(input, true);
+		p = new Preprocessor();
+		p.addInput(sl);
 		try {
-			testError(sl);
-			fail("Lexing succeeded");
+			testError(p);
+			fail("Lexing succeeded unexpectedly on " + input);
 		}
 		catch (LexerException e) {
 			/* ignored */
@@ -35,10 +40,13 @@ public class ErrorTestCase extends BaseTestCase {
 
 		/* With a PreprocessorListener, records the error. */
 		sl = new StringLexerSource(input, true);
+		p = new Preprocessor();
+		p.addInput(sl);
 		pl = new PreprocessorListener();
-		sl.setListener(pl);
-		testError(sl);
-		assertTrue(pl.getErrors() > 0);
+		p.setListener(pl);
+		assertNotNull("CPP has listener", p.getListener());
+		testError(p);
+		assertTrue("Listener has errors", pl.getErrors() > 0);
 	}
 
 	public void testErrors() throws Exception {
author	Shevek <[email protected]>	2008-06-11 02:18:54 +0000
committer	Shevek <[email protected]>	2008-06-11 02:18:54 +0000
commit	4528d72b75208f21f8d5e7d72991b7d34b4e46cf (patch)
tree	87ce2b4bb993922762491268a26654211a981638 /src
parent	38d3f08b4a7302c4a1578c867bdfa3d8a57cd8f9 (diff)