summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/java/org/anarres/cpp/Feature.java5
-rw-r--r--src/java/org/anarres/cpp/JoinReader.java2
-rw-r--r--src/java/org/anarres/cpp/LexerSource.java23
-rw-r--r--src/java/org/anarres/cpp/Preprocessor.java15
-rw-r--r--src/tests/org/anarres/cpp/ErrorTestCase.java18
-rw-r--r--src/tests/org/anarres/cpp/PreprocessorTestCase.java12
6 files changed, 49 insertions, 26 deletions
diff --git a/src/java/org/anarres/cpp/Feature.java b/src/java/org/anarres/cpp/Feature.java
index 2fc7a61..2f80245 100644
--- a/src/java/org/anarres/cpp/Feature.java
+++ b/src/java/org/anarres/cpp/Feature.java
@@ -21,9 +21,14 @@ package org.anarres.cpp;
* Features of the Preprocessor, which may be enabled or disabled.
*/
public enum Feature {
+ /** Supports ANSI digraphs. */
DIGRAPHS,
+ /** Supports ANSI trigraphs. */
TRIGRAPHS,
+ /** Outputs linemarker tokens. */
LINEMARKERS,
+ /** Reports tokens of type INVALID as errors. */
CSYNTAX,
+ /** Preserves comments in the lexed output. */
KEEPCOMMENTS,
}
diff --git a/src/java/org/anarres/cpp/JoinReader.java b/src/java/org/anarres/cpp/JoinReader.java
index 4e1a376..91908a7 100644
--- a/src/java/org/anarres/cpp/JoinReader.java
+++ b/src/java/org/anarres/cpp/JoinReader.java
@@ -68,6 +68,8 @@ import java.io.IOException;
private void _unread(int c) {
if (c != -1)
unget[uptr++] = c;
+ assert uptr <= unget.length :
+ "JoinReader ungets too many characters";
}
protected void warning(String msg)
diff --git a/src/java/org/anarres/cpp/LexerSource.java b/src/java/org/anarres/cpp/LexerSource.java
index 44c6224..0eea1c3 100644
--- a/src/java/org/anarres/cpp/LexerSource.java
+++ b/src/java/org/anarres/cpp/LexerSource.java
@@ -235,6 +235,19 @@ public class LexerSource extends Source {
}
}
+ /* Consumes the rest of the current line into an invalid. */
+ private Token invalid(StringBuilder text, String reason)
+ throws IOException,
+ LexerException {
+ int d = read();
+ while (!isLineSeparator(d)) {
+ text.append((char)d);
+ d = read();
+ }
+ unread(d);
+ return new Token(INVALID, text.toString(), reason);
+ }
+
private Token ccomment()
throws IOException,
LexerException {
@@ -325,19 +338,17 @@ public class LexerSource extends Source {
}
else if (isLineSeparator(d)) {
unread(d);
- // error("Unterminated character literal");
return new Token(INVALID, text.toString(),
"Unterminated character literal");
}
else if (d == '\'') {
text.append('\'');
- // error("Empty character literal");
return new Token(INVALID, text.toString(),
"Empty character literal");
}
else if (!Character.isDefined(d)) {
text.append('?');
- error("Illegal unicode character literal");
+ return invalid(text, "Illegal unicode character literal");
}
else {
text.append((char)d);
@@ -348,17 +359,17 @@ public class LexerSource extends Source {
// error("Illegal character constant");
/* We consume up to the next ' or the rest of the line. */
for (;;) {
- if (e == '\'')
- break;
if (isLineSeparator(e)) {
unread(e);
break;
}
text.append((char)e);
+ if (e == '\'')
+ break;
e = read();
}
return new Token(INVALID, text.toString(),
- "Illegal character constant");
+ "Illegal character constant " + text);
}
text.append('\'');
/* XXX It this a bad cast? */
diff --git a/src/java/org/anarres/cpp/Preprocessor.java b/src/java/org/anarres/cpp/Preprocessor.java
index a1a72d9..82e636b 100644
--- a/src/java/org/anarres/cpp/Preprocessor.java
+++ b/src/java/org/anarres/cpp/Preprocessor.java
@@ -497,7 +497,7 @@ public class Preprocessor {
return new Token(EOF);
Source t = inputs.remove(0);
push_source(t, true);
- if (features.contains(Feature.LINEMARKERS))
+ if (getFeature(Feature.LINEMARKERS))
return line_token(t.getLine(), t.getName(), " 1");
continue;
}
@@ -506,7 +506,7 @@ public class Preprocessor {
// System.out.println("Autopop " + s);
pop_source();
Source t = getSource();
- if (features.contains(Feature.LINEMARKERS)
+ if (getFeature(Feature.LINEMARKERS)
&& s.isNumbered()
&& t != null) {
/* XXX Don't we mean t.isNumbered() as well? */
@@ -1082,7 +1082,7 @@ public class Preprocessor {
/* 'tok' is the 'nl' after the include. We use it after the
* #line directive. */
- if (features.contains(Feature.LINEMARKERS))
+ if (getFeature(Feature.LINEMARKERS))
return line_token(1, name, " 1");
return tok;
}
@@ -1498,13 +1498,16 @@ public class Preprocessor {
/* The preprocessor has to take action here. */
break;
case WHITESPACE:
+ return tok;
case CCOMMENT:
case CPPCOMMENT:
// Patch up to preserve whitespace.
- /* XXX We might want to return tok here in C */
+ if (getFeature(Feature.KEEPCOMMENTS))
+ return tok;
return toWhitespace(tok);
default:
// Return NL to preserve whitespace.
+ /* XXX This might lose a comment. */
return source_skipline(false);
}
}
@@ -1578,12 +1581,12 @@ public class Preprocessor {
return tok;
case P_LINE:
- if (features.contains(Feature.LINEMARKERS))
+ if (getFeature(Feature.LINEMARKERS))
return tok;
break;
case INVALID:
- if (features.contains(Feature.CSYNTAX))
+ if (getFeature(Feature.CSYNTAX))
error(tok, String.valueOf(tok.getValue()));
return tok;
diff --git a/src/tests/org/anarres/cpp/ErrorTestCase.java b/src/tests/org/anarres/cpp/ErrorTestCase.java
index 6adc912..ad042be 100644
--- a/src/tests/org/anarres/cpp/ErrorTestCase.java
+++ b/src/tests/org/anarres/cpp/ErrorTestCase.java
@@ -8,17 +8,17 @@ import static org.anarres.cpp.Token.*;
public class ErrorTestCase extends BaseTestCase {
- private void testError(Preprocessor p)
+ private boolean testError(Preprocessor p)
throws LexerException,
IOException {
for (;;) {
Token tok = p.token();
if (tok.getType() == EOF)
break;
- else if (tok.getType() == INVALID)
- throw new LexerException("Error token: " + tok);
+ if (tok.getType() == INVALID)
+ return true;
}
-
+ return false;
}
private void testError(String input) throws Exception {
@@ -29,23 +29,25 @@ public class ErrorTestCase extends BaseTestCase {
/* Without a PreprocessorListener, throws an exception. */
sl = new StringLexerSource(input, true);
p = new Preprocessor();
+ p.addFeature(Feature.CSYNTAX);
p.addInput(sl);
try {
- testError(p);
- fail("Lexing succeeded unexpectedly on " + input);
+ assertTrue(testError(p));
+ fail("Lexing unexpectedly succeeded without listener.");
}
catch (LexerException e) {
- /* ignored */
+ /* required */
}
/* With a PreprocessorListener, records the error. */
sl = new StringLexerSource(input, true);
p = new Preprocessor();
+ p.addFeature(Feature.CSYNTAX);
p.addInput(sl);
pl = new PreprocessorListener();
p.setListener(pl);
assertNotNull("CPP has listener", p.getListener());
- testError(p);
+ assertTrue(testError(p));
assertTrue("Listener has errors", pl.getErrors() > 0);
}
diff --git a/src/tests/org/anarres/cpp/PreprocessorTestCase.java b/src/tests/org/anarres/cpp/PreprocessorTestCase.java
index ff2bc10..e07b085 100644
--- a/src/tests/org/anarres/cpp/PreprocessorTestCase.java
+++ b/src/tests/org/anarres/cpp/PreprocessorTestCase.java
@@ -81,20 +81,20 @@ public class PreprocessorTestCase extends BaseTestCase {
/* Redefinitions, undefinitions. */
testInput("#define two three\n", NL);
-testInput("one /* one */\n", NL, I("one"), WHITESPACE, COMMENT);
+testInput("one /* one */\n", NL, I("one"), WHITESPACE, CCOMMENT);
testInput("#define one two\n", NL);
-testInput("one /* three */\n", NL, I("three"), WHITESPACE, COMMENT);
+testInput("one /* three */\n", NL, I("three"), WHITESPACE, CCOMMENT);
testInput("#undef two\n", NL);
testInput("#define two five\n", NL);
-testInput("one /* five */\n", NL, I("five"), WHITESPACE, COMMENT);
+testInput("one /* five */\n", NL, I("five"), WHITESPACE, CCOMMENT);
testInput("#undef two\n", NL);
-testInput("one /* two */\n", NL, I("two"), WHITESPACE, COMMENT);
+testInput("one /* two */\n", NL, I("two"), WHITESPACE, CCOMMENT);
testInput("#undef one\n", NL);
testInput("#define one four\n", NL);
-testInput("one /* four */\n", NL, I("four"), WHITESPACE, COMMENT);
+testInput("one /* four */\n", NL, I("four"), WHITESPACE, CCOMMENT);
testInput("#undef one\n", NL);
testInput("#define one one\n", NL);
-testInput("one /* one */\n", NL, I("one"), WHITESPACE, COMMENT);
+testInput("one /* one */\n", NL, I("one"), WHITESPACE, CCOMMENT);
/* Variadic macros. */
testInput("#define var(x...) a x b\n", NL);