From 93808fc91f990dbc17a2bc2b350552d9dde89692 Mon Sep 17 00:00:00 2001 From: Shevek Date: Fri, 13 Jun 2008 21:33:42 +0000 Subject: implement Feature.CSYNTAX, Feature.KEEPCOMMENTS, etc --- src/java/org/anarres/cpp/CppReader.java | 5 +-- src/java/org/anarres/cpp/Feature.java | 2 ++ src/java/org/anarres/cpp/LexerSource.java | 18 +++++----- src/java/org/anarres/cpp/MacroTokenSource.java | 17 +++++----- src/java/org/anarres/cpp/Main.java | 8 ++--- src/java/org/anarres/cpp/Preprocessor.java | 47 +++++++++++++++----------- src/java/org/anarres/cpp/Source.java | 3 +- src/java/org/anarres/cpp/Token.java | 2 +- src/tests/org/anarres/cpp/ErrorTestCase.java | 2 +- 9 files changed, 56 insertions(+), 48 deletions(-) (limited to 'src') diff --git a/src/java/org/anarres/cpp/CppReader.java b/src/java/org/anarres/cpp/CppReader.java index 92778e0..a429fd4 100644 --- a/src/java/org/anarres/cpp/CppReader.java +++ b/src/java/org/anarres/cpp/CppReader.java @@ -95,8 +95,9 @@ public class CppReader extends Reader { case EOF: token = null; return false; - case COMMENT: - if (false) { + case CCOMMENT: + case CPPCOMMENT: + if (!cpp.getFeature(Feature.KEEPCOMMENTS)) { token = " "; break; } diff --git a/src/java/org/anarres/cpp/Feature.java b/src/java/org/anarres/cpp/Feature.java index 629b0ec..2fc7a61 100644 --- a/src/java/org/anarres/cpp/Feature.java +++ b/src/java/org/anarres/cpp/Feature.java @@ -24,4 +24,6 @@ public enum Feature { DIGRAPHS, TRIGRAPHS, LINEMARKERS, + CSYNTAX, + KEEPCOMMENTS, } diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java index 0735e28..44c6224 100644 --- a/src/java/org/anarres/cpp/LexerSource.java +++ b/src/java/org/anarres/cpp/LexerSource.java @@ -250,7 +250,7 @@ public class LexerSource extends Source { text.append((char)d); } while (d == '*'); } while (d != '/'); - return new Token(COMMENT, text.toString()); + return new Token(CCOMMENT, text.toString()); } private Token cppcomment() @@ -263,7 +263,7 @@ public class LexerSource extends Source { d = read(); } unread(d); - return new Token(COMMENT, text.toString()); + return new Token(CPPCOMMENT, text.toString()); } private int escape(StringBuilder text) @@ -326,13 +326,13 @@ public class LexerSource extends Source { else if (isLineSeparator(d)) { unread(d); // error("Unterminated character literal"); - return new Token(ERROR, text.toString(), + return new Token(INVALID, text.toString(), "Unterminated character literal"); } else if (d == '\'') { text.append('\''); // error("Empty character literal"); - return new Token(ERROR, text.toString(), + return new Token(INVALID, text.toString(), "Empty character literal"); } else if (!Character.isDefined(d)) { @@ -357,7 +357,7 @@ public class LexerSource extends Source { text.append((char)e); e = read(); } - return new Token(ERROR, text.toString(), + return new Token(INVALID, text.toString(), "Illegal character constant"); } text.append('\''); @@ -389,13 +389,13 @@ public class LexerSource extends Source { else if (c == -1) { unread(c); // error("End of file in string literal after " + buf); - return new Token(ERROR, text.toString(), + return new Token(INVALID, text.toString(), "End of file in string literal after " + buf); } else if (isLineSeparator(c)) { unread(c); // error("Unterminated string literal after " + buf); - return new Token(ERROR, text.toString(), + return new Token(INVALID, text.toString(), "Unterminated string literal after " + buf); } else { @@ -435,7 +435,7 @@ public class LexerSource extends Source { } else if (Character.isLetter(d)) { unread(d); - return new Token(ERROR, text.toString(), + return new Token(INVALID, text.toString(), "Invalid suffix \"" + (char)d + "\" on numeric constant"); } @@ -472,7 +472,7 @@ public class LexerSource extends Source { if (Character.digit(d, 16) == -1) { unread(d); // error("Illegal hexadecimal constant " + (char)d); - return new Token(ERROR, text.toString(), + return new Token(INVALID, text.toString(), "Illegal hexadecimal digit " + (char)d + " after "+ text); } diff --git a/src/java/org/anarres/cpp/MacroTokenSource.java b/src/java/org/anarres/cpp/MacroTokenSource.java index b6500ff..264a1a6 100644 --- a/src/java/org/anarres/cpp/MacroTokenSource.java +++ b/src/java/org/anarres/cpp/MacroTokenSource.java @@ -111,9 +111,10 @@ import static org.anarres.cpp.Token.*; int count = 2; for (int i = 0; i < count; i++) { if (!tokens.hasNext()) { - err = new Token(ERROR, - ptok.getLine(), ptok.getColumn(), - ptok.getText(), "Paste at end of expansion"); + /* XXX This one really should throw. */ + error(ptok.getLine(), ptok.getColumn(), + "Paste at end of expansion"); + buf.append(' ').append(ptok.getText()); break; } Token tok = tokens.next(); @@ -129,7 +130,8 @@ import static org.anarres.cpp.Token.*; concat(buf, args.get(idx)); break; /* XXX Test this. */ - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: break; default: buf.append(tok.getText()); @@ -156,10 +158,9 @@ import static org.anarres.cpp.Token.*; if (arg != null) { if (arg.hasNext()) { Token tok = arg.next(); - if (tok.getType() == M_PASTE) - tok = new Token(ERROR, - tok.getLine(), tok.getColumn(), - tok.getText(), "Unexpected paste token"); + /* XXX PASTE -> INVALID. */ + assert tok.getType() != M_PASTE : + "Unexpected paste token"; return tok; } arg = null; diff --git a/src/java/org/anarres/cpp/Main.java b/src/java/org/anarres/cpp/Main.java index b7e11df..0b97f4e 100644 --- a/src/java/org/anarres/cpp/Main.java +++ b/src/java/org/anarres/cpp/Main.java @@ -101,8 +101,9 @@ public class Main { pp.addFeature(Feature.LINEMARKERS); pp.addWarning(Warning.IMPORT); pp.setListener(new PreprocessorListener()); - pp.addMacro("__JCPP__"); + pp.getSystemIncludePath().add("/usr/local/include"); + pp.getSystemIncludePath().add("/usr/include"); GETOPT: while ((c = g.getopt()) != -1) { switch (c) { @@ -157,11 +158,6 @@ public class Main { } } - List path = pp.getSystemIncludePath(); - path.add("/usr/local/include"); - path.add("/usr/include"); - // path.add("/usr/lib/gcc/i686-pc-linux-gnu/4.1.2/include"); - for (int i = g.getOptind(); i < args.length; i++) pp.addInput(new FileLexerSource(new File(args[i]))); diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java index 4547966..a1a72d9 100644 --- a/src/java/org/anarres/cpp/Preprocessor.java +++ b/src/java/org/anarres/cpp/Preprocessor.java @@ -226,9 +226,6 @@ public class Preprocessor { /** * Adds input for the Preprocessor. * - * XXX Inputs should be maintained off the source stack. - *

- * * Inputs are processed in the order in which they are added. */ public void addInput(Source source) { @@ -530,7 +527,9 @@ public class Preprocessor { private boolean isWhite(Token tok) { int type = tok.getType(); - return (type == WHITESPACE) || (type == COMMENT); + return (type == WHITESPACE) + || (type == CCOMMENT) + || (type == CPPCOMMENT); } private Token source_token_nonwhite() @@ -571,7 +570,8 @@ public class Preprocessor { // System.out.println("pp: open: token is " + tok); switch (tok.getType()) { case WHITESPACE: /* XXX Really? */ - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: case NL: break; /* continue */ case '(': @@ -638,7 +638,8 @@ public class Preprocessor { break; case WHITESPACE: - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: /* Avoid duplicating spaces. */ space = true; break; @@ -671,6 +672,11 @@ public class Preprocessor { return false; } + /* + for (Argument a : args) + a.expand(this); + */ + for (int i = 0; i < args.size(); i++) { args.get(i).expand(this); } @@ -748,7 +754,8 @@ public class Preprocessor { break EXPANSION; case WHITESPACE: - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: space = true; break; @@ -863,7 +870,8 @@ public class Preprocessor { case NL: break EXPANSION; - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: // break; case WHITESPACE: if (!paste) @@ -1109,7 +1117,8 @@ public class Preprocessor { warning(tok, "Empty #" + "pragma"); return tok; - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: case WHITESPACE: continue NAME; case IDENTIFIER: @@ -1135,7 +1144,8 @@ public class Preprocessor { case NL: /* This may contain one or more newlines. */ break VALUE; - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: break; case WHITESPACE: value.add(tok); @@ -1488,7 +1498,8 @@ public class Preprocessor { /* The preprocessor has to take action here. */ break; case WHITESPACE: - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: // Patch up to preserve whitespace. /* XXX We might want to return tok here in C */ return toWhitespace(tok); @@ -1510,7 +1521,8 @@ public class Preprocessor { case NL: return tok; - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: return tok; case '!': case '%': case '&': @@ -1570,14 +1582,9 @@ public class Preprocessor { return tok; break; - case ERROR: - PreprocessorListener l = getListener(); - if (l != null) { - l.handleError(getSource(), - tok.getLine(), tok.getColumn(), - String.valueOf(tok.getValue())); - break; - } + case INVALID: + if (features.contains(Feature.CSYNTAX)) + error(tok, String.valueOf(tok.getValue())); return tok; default: diff --git a/src/java/org/anarres/cpp/Source.java b/src/java/org/anarres/cpp/Source.java index 1ce9f47..f6cd3b8 100644 --- a/src/java/org/anarres/cpp/Source.java +++ b/src/java/org/anarres/cpp/Source.java @@ -255,7 +255,8 @@ public abstract class Source implements Iterable { case NL: /* This may contain one or more newlines. */ return tok; - case COMMENT: + case CCOMMENT: + case CPPCOMMENT: case WHITESPACE: break; default: diff --git a/src/java/org/anarres/cpp/Token.java b/src/java/org/anarres/cpp/Token.java index 537704b..bb56531 100644 --- a/src/java/org/anarres/cpp/Token.java +++ b/src/java/org/anarres/cpp/Token.java @@ -151,7 +151,7 @@ public final class Token { } #set ($i = 257) -#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "COMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "ERROR" ]) +#set ($tokens = [ "AND_EQ", "ARROW", "CHARACTER", "CCOMMENT", "CPPCOMMENT", "DEC", "DIV_EQ", "ELLIPSIS", "EOF", "EQ", "GE", "HASH", "HEADER", "IDENTIFIER", "INC", "INTEGER", "LAND", "LAND_EQ", "LE", "LITERAL", "LOR", "LOR_EQ", "LSH", "LSH_EQ", "MOD_EQ", "MULT_EQ", "NE", "NL", "OR_EQ", "PASTE", "PLUS_EQ", "RANGE", "RSH", "RSH_EQ", "STRING", "SUB_EQ", "WHITESPACE", "XOR_EQ", "M_ARG", "M_PASTE", "M_STRING", "P_LINE", "INVALID" ]) #foreach ($token in $tokens) /** The token type $token. */ public static final int $token = $i; diff --git a/src/tests/org/anarres/cpp/ErrorTestCase.java b/src/tests/org/anarres/cpp/ErrorTestCase.java index d7d01e8..6adc912 100644 --- a/src/tests/org/anarres/cpp/ErrorTestCase.java +++ b/src/tests/org/anarres/cpp/ErrorTestCase.java @@ -15,7 +15,7 @@ public class ErrorTestCase extends BaseTestCase { Token tok = p.token(); if (tok.getType() == EOF) break; - else if (tok.getType() == ERROR) + else if (tok.getType() == INVALID) throw new LexerException("Error token: " + tok); } -- cgit v1.2.3