diff options
Diffstat (limited to 'src/main/java')
31 files changed, 6150 insertions, 0 deletions
diff --git a/src/main/java/org/anarres/cpp/Argument.java b/src/main/java/org/anarres/cpp/Argument.java new file mode 100644 index 0000000..a868db5 --- /dev/null +++ b/src/main/java/org/anarres/cpp/Argument.java @@ -0,0 +1,77 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.IOException; + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * A macro argument. + * + * This encapsulates a raw and preprocessed token stream. + */ +/* pp */ class Argument extends ArrayList<Token> { + public static final int NO_ARGS = -1; + + private List<Token> expansion; + + public Argument() { + this.expansion = null; + } + + public void addToken(Token tok) { + add(tok); + } + + /* pp */ void expand(Preprocessor p) + throws IOException, + LexerException { + /* Cache expansion. */ + if (expansion == null) { + this.expansion = p.expand(this); + // System.out.println("Expanded arg " + this); + } + } + + public Iterator<Token> expansion() { + return expansion.iterator(); + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("Argument("); + // buf.append(super.toString()); + buf.append("raw=[ "); + for (int i = 0; i < size(); i++) + buf.append(get(i).getText()); + buf.append(" ];expansion=[ "); + if (expansion == null) + buf.append("null"); + else + for (int i = 0; i < expansion.size(); i++) + buf.append(expansion.get(i).getText()); + buf.append(" ])"); + return buf.toString(); + } + +} diff --git a/src/main/java/org/anarres/cpp/ChrootFileSystem.java b/src/main/java/org/anarres/cpp/ChrootFileSystem.java new file mode 100644 index 0000000..1cec184 --- /dev/null +++ b/src/main/java/org/anarres/cpp/ChrootFileSystem.java @@ -0,0 +1,84 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; + +/** + * A virtual filesystem implementation using java.io in a virtual + * chroot. + */ +public class ChrootFileSystem implements VirtualFileSystem { + + private File root; + + public ChrootFileSystem(File root) { + this.root = root; + } + + @Override + public VirtualFile getFile(String path) { + return new ChrootFile(path); + } + + @Override + public VirtualFile getFile(String dir, String name) { + return new ChrootFile(dir, name); + } + + private class ChrootFile extends File implements VirtualFile { + + private File rfile; + + public ChrootFile(String path) { + super(path); + } + + public ChrootFile(String dir, String name) { + super(dir, name); + } + + /* private */ + public ChrootFile(File dir, String name) { + super(dir, name); + } + + @Override + public ChrootFile getParentFile() { + return new ChrootFile(getParent()); + } + + @Override + public ChrootFile getChildFile(String name) { + return new ChrootFile(this, name); + } + + @Override + public boolean isFile() { + File real = new File(root, getPath()); + return real.isFile(); + } + + @Override + public Source getSource() throws IOException { + return new FileLexerSource(new File(root, getPath()), + getPath()); + } + } + +} diff --git a/src/main/java/org/anarres/cpp/CppReader.java b/src/main/java/org/anarres/cpp/CppReader.java new file mode 100644 index 0000000..7307f56 --- /dev/null +++ b/src/main/java/org/anarres/cpp/CppReader.java @@ -0,0 +1,153 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.Closeable; +import java.io.IOException; +import java.io.Reader; + +import static org.anarres.cpp.Token.*; + +/** + * A Reader wrapper around the Preprocessor. + * + * This is a utility class to provide a transparent {@link Reader} + * which preprocesses the input text. + * + * @see Preprocessor + * @see Reader + */ +public class CppReader extends Reader implements Closeable { + + private Preprocessor cpp; + private String token; + private int idx; + + public CppReader(final Reader r) { + cpp = new Preprocessor(new LexerSource(r, true) { + @Override + public String getName() { + return "<CppReader Input@" + + System.identityHashCode(r) + ">"; + } + }); + token = ""; + idx = 0; + } + + public CppReader(Preprocessor p) { + cpp = p; + token = ""; + idx = 0; + } + + /** + * Returns the Preprocessor used by this CppReader. + */ + public Preprocessor getPreprocessor() { + return cpp; + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name) + throws LexerException { + cpp.addMacro(name); + } + + /** + * Defines the given name as a macro. + * + * This is a convnience method. + */ + public void addMacro(String name, String value) + throws LexerException { + cpp.addMacro(name, value); + } + + private boolean refill() + throws IOException { + try { + assert cpp != null : "cpp is null : was it closed?"; + if (token == null) + return false; + while (idx >= token.length()) { + Token tok = cpp.token(); + switch (tok.getType()) { + case EOF: + token = null; + return false; + case CCOMMENT: + case CPPCOMMENT: + if (!cpp.getFeature(Feature.KEEPCOMMENTS)) { + token = " "; + break; + } + default: + token = tok.getText(); + break; + } + idx = 0; + } + return true; + } catch (LexerException e) { + /* Never happens. + if (e.getCause() instanceof IOException) + throw (IOException)e.getCause(); + */ + IOException ie = new IOException(String.valueOf(e)); + ie.initCause(e); + throw ie; + } + } + + @Override + public int read() + throws IOException { + if (!refill()) + return -1; + return token.charAt(idx++); + } + + /* XXX Very slow and inefficient. */ + public int read(char cbuf[], int off, int len) + throws IOException { + if (token == null) + return -1; + for (int i = 0; i < len; i++) { + int ch = read(); + if (ch == -1) + return i; + cbuf[off + i] = (char) ch; + } + return len; + } + + @Override + public void close() + throws IOException { + if (cpp != null) { + cpp.close(); + cpp = null; + } + token = null; + } + +} diff --git a/src/main/java/org/anarres/cpp/CppTask.java b/src/main/java/org/anarres/cpp/CppTask.java new file mode 100644 index 0000000..5e17786 --- /dev/null +++ b/src/main/java/org/anarres/cpp/CppTask.java @@ -0,0 +1,215 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import org.apache.tools.ant.BuildException; +import org.apache.tools.ant.taskdefs.Copy; +import org.apache.tools.ant.types.FilterSet; +import org.apache.tools.ant.types.FilterSetCollection; +import org.apache.tools.ant.types.Path; + +/** + * An ant task for jcpp. + */ +public class CppTask extends Copy { + + private class Listener extends PreprocessorListener { + + @Override + protected void print(String msg) { + log(msg); + } + } + + public static class Macro { + + private String name; + private String value; + + public void setName(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public void setValue(String value) { + this.value = value; + } + + public String getValue() { + return value; + } + } + + private final Listener listener = new Listener(); + private final List<Macro> macros = new ArrayList<Macro>(); + private Path systemincludepath; + private Path localincludepath; + + public void addMacro(Macro macro) { + macros.add(macro); + } + + public void addSystemincludepath(Path path) { + if (systemincludepath == null) + systemincludepath = new Path(getProject()); + systemincludepath.add(path); + } + + public void addLocalincludepath(Path path) { + if (localincludepath == null) + localincludepath = new Path(getProject()); + localincludepath.add(path); + } + + /* + public void execute() { + FileWriter writer = null; + try { + if (input == null) + throw new BuildException("Input not specified"); + if (output == null) + throw new BuildException("Output not specified"); + cpp.addInput(this.input); + writer = new FileWriter(this.output); + for (;;) { + Token tok = cpp.token(); + if (tok != null && tok.getType() == Token.EOF) + break; + writer.write(tok.getText()); + } + } + catch (Exception e) { + throw new BuildException(e); + } + finally { + if (writer != null) { + try { + writer.close(); + } + catch (IOException e) { + } + } + } + } + */ + private void preprocess(File input, File output) throws Exception { + Preprocessor cpp = new Preprocessor(); + cpp.setListener(listener); + for (Macro macro : macros) + cpp.addMacro(macro.getName(), macro.getValue()); + if (systemincludepath != null) + cpp.setSystemIncludePath(Arrays.asList(systemincludepath.list())); + if (localincludepath != null) + cpp.setQuoteIncludePath(Arrays.asList(localincludepath.list())); + + File dir = output.getParentFile(); + if (!dir.exists()) { + if (!dir.mkdirs()) + throw new BuildException("Failed to make parent directory " + dir); + } else if (!dir.isDirectory()) { + throw new BuildException("Parent directory of output file " + output + " exists, but is not a directory."); + } + FileWriter writer = null; + try { + if (input == null) + throw new BuildException("Input not specified"); + if (output == null) + throw new BuildException("Output not specified"); + cpp.addInput(input); + writer = new FileWriter(output); + for (;;) { + Token tok = cpp.token(); + if (tok == null) + break; + if (tok.getType() == Token.EOF) + break; + writer.write(tok.getText()); + } + } finally { + if (writer != null) { + try { + writer.close(); + } catch (IOException e) { + } + } + } + } + + @Override + protected void doFileOperations() { + if (fileCopyMap.size() > 0) { + log("Copying " + fileCopyMap.size() + + " file" + (fileCopyMap.size() == 1 ? "" : "s") + + " to " + destDir.getAbsolutePath()); + + Enumeration<String> e = fileCopyMap.keys(); + + while (e.hasMoreElements()) { + String fromFile = e.nextElement(); + String[] toFiles = (String[]) fileCopyMap.get(fromFile); + + for (String toFile : toFiles) { + if (fromFile.equals(toFile)) { + log("Skipping self-copy of " + fromFile, verbosity); + continue; + } + + try { + log("Copying " + fromFile + " to " + toFile, verbosity); + + FilterSetCollection executionFilters + = new FilterSetCollection(); + if (filtering) { + executionFilters + .addFilterSet(getProject().getGlobalFilterSet()); + } + for (Enumeration filterEnum = getFilterSets().elements(); + filterEnum.hasMoreElements();) { + executionFilters + .addFilterSet((FilterSet) filterEnum.nextElement()); + } + + File srcFile = new File(fromFile); + File dstFile = new File(toFile); + preprocess(srcFile, dstFile); + } catch (Exception ioe) { + // ioe.printStackTrace(); + String msg = "Failed to copy " + fromFile + " to " + toFile + + " due to " + ioe.getMessage(); + File targetFile = new File(toFile); + if (targetFile.exists() && !targetFile.delete()) { + msg += " and I couldn't delete the corrupt " + toFile; + } + throw new BuildException(msg, ioe, getLocation()); + } + } + } + } + + } + +} diff --git a/src/main/java/org/anarres/cpp/Feature.java b/src/main/java/org/anarres/cpp/Feature.java new file mode 100644 index 0000000..04a68b7 --- /dev/null +++ b/src/main/java/org/anarres/cpp/Feature.java @@ -0,0 +1,42 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +/** + * Features of the Preprocessor, which may be enabled or disabled. + */ +public enum Feature { + /** Supports ANSI digraphs. */ + DIGRAPHS, + /** Supports ANSI trigraphs. */ + TRIGRAPHS, + /** Outputs linemarker tokens. */ + LINEMARKERS, + /** Reports tokens of type INVALID as errors. */ + CSYNTAX, + /** Preserves comments in the lexed output. */ + KEEPCOMMENTS, + /** Preserves comments in the lexed output, even when inactive. */ + KEEPALLCOMMENTS, + VERBOSE, + DEBUG, + + /** Supports lexing of objective-C. */ + OBJCSYNTAX, + INCLUDENEXT +} diff --git a/src/main/java/org/anarres/cpp/FileLexerSource.java b/src/main/java/org/anarres/cpp/FileLexerSource.java new file mode 100644 index 0000000..db5f9e0 --- /dev/null +++ b/src/main/java/org/anarres/cpp/FileLexerSource.java @@ -0,0 +1,84 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; + +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +/** + * A {@link Source} which lexes a file. + * + * The input is buffered. + * + * @see Source + */ +public class FileLexerSource extends LexerSource { + // private File file; + private String path; + + /** + * Creates a new Source for lexing the given File. + * + * Preprocessor directives are honoured within the file. + */ + public FileLexerSource(File file, String path) + throws IOException { + super( + new BufferedReader( + new FileReader( + file + ) + ), + true + ); + + // this.file = file; + this.path = path; + } + + public FileLexerSource(File file) + throws IOException { + this(file, file.getPath()); + } + + public FileLexerSource(String path) + throws IOException { + this(new File(path)); + } + + @Override + /* pp */ String getPath() { + return path; + } + + @Override + /* pp */ String getName() { + return getPath(); + } + + public String toString() { + return "file " + path; + } +} diff --git a/src/main/java/org/anarres/cpp/FixedTokenSource.java b/src/main/java/org/anarres/cpp/FixedTokenSource.java new file mode 100644 index 0000000..a1c7500 --- /dev/null +++ b/src/main/java/org/anarres/cpp/FixedTokenSource.java @@ -0,0 +1,60 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.IOException; + +import java.util.Arrays; +import java.util.List; + +/* pp */ class FixedTokenSource extends Source { + + private static final Token EOF + = new Token(Token.EOF, "<ts-eof>"); + + private final List<Token> tokens; + private int idx; + + /* pp */ FixedTokenSource(Token... tokens) { + this.tokens = Arrays.asList(tokens); + this.idx = 0; + } + + /* pp */ FixedTokenSource(List<Token> tokens) { + this.tokens = tokens; + this.idx = 0; + } + + @Override + public Token token() + throws IOException, + LexerException { + if (idx >= tokens.size()) + return EOF; + return tokens.get(idx++); + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("constant token stream ").append(tokens); + Source parent = getParent(); + if (parent != null) + buf.append(" in ").append(String.valueOf(parent)); + return buf.toString(); + } +} diff --git a/src/main/java/org/anarres/cpp/InputLexerSource.java b/src/main/java/org/anarres/cpp/InputLexerSource.java new file mode 100644 index 0000000..3e1dcb3 --- /dev/null +++ b/src/main/java/org/anarres/cpp/InputLexerSource.java @@ -0,0 +1,64 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; + +/** + * A {@link Source} which lexes a file. + * + * The input is buffered. + * + * @see Source + */ +public class InputLexerSource extends LexerSource { + + /** + * Creates a new Source for lexing the given Reader. + * + * Preprocessor directives are honoured within the file. + */ + public InputLexerSource(InputStream input) + throws IOException { + super( + new BufferedReader( + new InputStreamReader( + input + ) + ), + true + ); + } + + @Override + /* pp */ String getPath() { + return "<standard-input>"; + } + + @Override + /* pp */ String getName() { + return "standard input"; + } + + @Override + public String toString() { + return getPath(); + } +} diff --git a/src/main/java/org/anarres/cpp/InternalException.java b/src/main/java/org/anarres/cpp/InternalException.java new file mode 100644 index 0000000..ac53017 --- /dev/null +++ b/src/main/java/org/anarres/cpp/InternalException.java @@ -0,0 +1,31 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +/** + * An internal exception. + * + * This exception is thrown when an internal state violation is + * encountered. This should never happen. If it ever happens, please + * report it as a bug. + */ +public class InternalException extends RuntimeException { + public InternalException(String msg) { + super(msg); + } +} diff --git a/src/main/java/org/anarres/cpp/JavaFileSystem.java b/src/main/java/org/anarres/cpp/JavaFileSystem.java new file mode 100644 index 0000000..a60271d --- /dev/null +++ b/src/main/java/org/anarres/cpp/JavaFileSystem.java @@ -0,0 +1,84 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.File; +import java.io.IOException; + +/** + * A virtual filesystem implementation using java.io. + */ +public class JavaFileSystem implements VirtualFileSystem { + + @Override + public VirtualFile getFile(String path) { + return new JavaFile(path); + } + + @Override + public VirtualFile getFile(String dir, String name) { + return new JavaFile(dir, name); + } + + private class JavaFile extends File implements VirtualFile { + + public JavaFile(String path) { + super(path); + } + + public JavaFile(String dir, String name) { + super(dir, name); + } + + /* private */ + public JavaFile(File dir, String name) { + super(dir, name); + } + + /* + @Override + public String getPath() { + return getCanonicalPath(); + } + */ + @Override + public JavaFile getParentFile() { + String parent = getParent(); + if (parent != null) + return new JavaFile(parent); + File absolute = getAbsoluteFile(); + parent = absolute.getParent(); + /* + if (parent == null) + return null; + */ + return new JavaFile(parent); + } + + @Override + public JavaFile getChildFile(String name) { + return new JavaFile(this, name); + } + + @Override + public Source getSource() throws IOException { + return new FileLexerSource(this); + } + + } + +} diff --git a/src/main/java/org/anarres/cpp/JoinReader.java b/src/main/java/org/anarres/cpp/JoinReader.java new file mode 100644 index 0000000..0286577 --- /dev/null +++ b/src/main/java/org/anarres/cpp/JoinReader.java @@ -0,0 +1,218 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.Closeable; +import java.io.Reader; +import java.io.IOException; + +/* pp */ class JoinReader /* extends Reader */ implements Closeable { + + private final Reader in; + + private PreprocessorListener listener; + private LexerSource source; + private boolean trigraphs; + private boolean warnings; + + private int newlines; + private boolean flushnl; + private int[] unget; + private int uptr; + + public JoinReader(Reader in, boolean trigraphs) { + this.in = in; + this.trigraphs = trigraphs; + this.newlines = 0; + this.flushnl = false; + this.unget = new int[2]; + this.uptr = 0; + } + + public JoinReader(Reader in) { + this(in, false); + } + + public void setTrigraphs(boolean enable, boolean warnings) { + this.trigraphs = enable; + this.warnings = warnings; + } + + /* pp */ void init(Preprocessor pp, LexerSource s) { + this.listener = pp.getListener(); + this.source = s; + setTrigraphs(pp.getFeature(Feature.TRIGRAPHS), + pp.getWarning(Warning.TRIGRAPHS)); + } + + private int __read() throws IOException { + if (uptr > 0) + return unget[--uptr]; + return in.read(); + } + + private void _unread(int c) { + if (c != -1) + unget[uptr++] = c; + assert uptr <= unget.length : + "JoinReader ungets too many characters"; + } + + protected void warning(String msg) + throws LexerException { + if (source != null) + source.warning(msg); + else + throw new LexerException(msg); + } + + private char trigraph(char raw, char repl) + throws IOException, LexerException { + if (trigraphs) { + if (warnings) + warning("trigraph ??" + raw + " converted to " + repl); + return repl; + } else { + if (warnings) + warning("trigraph ??" + raw + " ignored"); + _unread(raw); + _unread('?'); + return '?'; + } + } + + private int _read() + throws IOException, LexerException { + int c = __read(); + if (c == '?' && (trigraphs || warnings)) { + int d = __read(); + if (d == '?') { + int e = __read(); + switch (e) { + case '(': + return trigraph('(', '['); + case ')': + return trigraph(')', ']'); + case '<': + return trigraph('<', '{'); + case '>': + return trigraph('>', '}'); + case '=': + return trigraph('=', '#'); + case '/': + return trigraph('/', '\\'); + case '\'': + return trigraph('\'', '^'); + case '!': + return trigraph('!', '|'); + case '-': + return trigraph('-', '~'); + } + _unread(e); + } + _unread(d); + } + return c; + } + + public int read() + throws IOException, LexerException { + if (flushnl) { + if (newlines > 0) { + newlines--; + return '\n'; + } + flushnl = false; + } + + for (;;) { + int c = _read(); + switch (c) { + case '\\': + int d = _read(); + switch (d) { + case '\n': + newlines++; + continue; + case '\r': + newlines++; + int e = _read(); + if (e != '\n') + _unread(e); + continue; + default: + _unread(d); + return c; + } + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + flushnl = true; + return c; + case -1: + if (newlines > 0) { + newlines--; + return '\n'; + } + default: + return c; + } + } + } + + public int read(char cbuf[], int off, int len) + throws IOException, LexerException { + for (int i = 0; i < len; i++) { + int ch = read(); + if (ch == -1) + return i; + cbuf[off + i] = (char) ch; + } + return len; + } + + @Override + public void close() + throws IOException { + in.close(); + } + + @Override + public String toString() { + return "JoinReader(nl=" + newlines + ")"; + } + + /* + public static void main(String[] args) throws IOException { + FileReader f = new FileReader(new File(args[0])); + BufferedReader b = new BufferedReader(f); + JoinReader r = new JoinReader(b); + BufferedWriter w = new BufferedWriter( + new java.io.OutputStreamWriter(System.out) + ); + int c; + while ((c = r.read()) != -1) { + w.write((char)c); + } + w.close(); + } + */ +} diff --git a/src/main/java/org/anarres/cpp/LexerException.java b/src/main/java/org/anarres/cpp/LexerException.java new file mode 100644 index 0000000..41c6275 --- /dev/null +++ b/src/main/java/org/anarres/cpp/LexerException.java @@ -0,0 +1,33 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +/** + * A preprocessor exception. + * + * Note to users: I don't really like the name of this class. S. + */ +public class LexerException extends Exception { + public LexerException(String msg) { + super(msg); + } + + public LexerException(Throwable cause) { + super(cause); + } +} diff --git a/src/main/java/org/anarres/cpp/LexerSource.java b/src/main/java/org/anarres/cpp/LexerSource.java new file mode 100644 index 0000000..ca18314 --- /dev/null +++ b/src/main/java/org/anarres/cpp/LexerSource.java @@ -0,0 +1,910 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.IOException; +import java.io.Reader; + +import javax.annotation.Nonnull; +import static org.anarres.cpp.Token.*; + +/** Does not handle digraphs. */ +public class LexerSource extends Source { + + private static final boolean DEBUG = false; + + private JoinReader reader; + private final boolean ppvalid; + private boolean bol; + private boolean include; + + private boolean digraphs; + + /* Unread. */ + private int u0, u1; + private int ucount; + + private int line; + private int column; + private int lastcolumn; + private boolean cr; + + /* ppvalid is: + * false in StringLexerSource, + * true in FileLexerSource */ + public LexerSource(Reader r, boolean ppvalid) { + this.reader = new JoinReader(r); + this.ppvalid = ppvalid; + this.bol = true; + this.include = false; + + this.digraphs = true; + + this.ucount = 0; + + this.line = 1; + this.column = 0; + this.lastcolumn = -1; + this.cr = false; + } + + @Override + /* pp */ void init(Preprocessor pp) { + super.init(pp); + this.digraphs = pp.getFeature(Feature.DIGRAPHS); + this.reader.init(pp, this); + } + + @Override + public int getLine() { + return line; + } + + @Override + public int getColumn() { + return column; + } + + @Override + /* pp */ boolean isNumbered() { + return true; + } + + /* Error handling. */ + private void _error(String msg, boolean error) + throws LexerException { + int _l = line; + int _c = column; + if (_c == 0) { + _c = lastcolumn; + _l--; + } else { + _c--; + } + if (error) + super.error(_l, _c, msg); + else + super.warning(_l, _c, msg); + } + + /* Allow JoinReader to call this. */ + /* pp */ final void error(String msg) + throws LexerException { + _error(msg, true); + } + + /* Allow JoinReader to call this. */ + /* pp */ final void warning(String msg) + throws LexerException { + _error(msg, false); + } + + /* A flag for string handling. */ + + /* pp */ void setInclude(boolean b) { + this.include = b; + } + + /* + private boolean _isLineSeparator(int c) { + return Character.getType(c) == Character.LINE_SEPARATOR + || c == -1; + } + */ + + /* XXX Move to JoinReader and canonicalise newlines. */ + private static boolean isLineSeparator(int c) { + switch ((char) c) { + case '\r': + case '\n': + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + return true; + default: + return (c == -1); + } + } + + private int read() + throws IOException, + LexerException { + int c; + assert ucount <= 2 : "Illegal ucount: " + ucount; + switch (ucount) { + case 2: + ucount = 1; + c = u1; + break; + case 1: + ucount = 0; + c = u0; + break; + default: + if (reader == null) + c = -1; + else + c = reader.read(); + break; + } + + switch (c) { + case '\r': + cr = true; + line++; + lastcolumn = column; + column = 0; + break; + case '\n': + if (cr) { + cr = false; + break; + } + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + line++; + lastcolumn = column; + column = 0; + break; + case -1: + cr = false; + break; + default: + cr = false; + column++; + break; + } + + /* + if (isLineSeparator(c)) { + line++; + lastcolumn = column; + column = 0; + } + else { + column++; + } + */ + return c; + } + + /* You can unget AT MOST one newline. */ + private void unread(int c) + throws IOException { + /* XXX Must unread newlines. */ + if (c != -1) { + if (isLineSeparator(c)) { + line--; + column = lastcolumn; + cr = false; + } else { + column--; + } + switch (ucount) { + case 0: + u0 = c; + ucount = 1; + break; + case 1: + u1 = c; + ucount = 2; + break; + default: + throw new IllegalStateException( + "Cannot unget another character!" + ); + } + // reader.unread(c); + } + } + + /* Consumes the rest of the current line into an invalid. */ + @Nonnull + private Token invalid(StringBuilder text, String reason) + throws IOException, + LexerException { + int d = read(); + while (!isLineSeparator(d)) { + text.append((char) d); + d = read(); + } + unread(d); + return new Token(INVALID, text.toString(), reason); + } + + @Nonnull + private Token ccomment() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("/*"); + int d; + do { + do { + d = read(); + text.append((char) d); + } while (d != '*'); + do { + d = read(); + text.append((char) d); + } while (d == '*'); + } while (d != '/'); + return new Token(CCOMMENT, text.toString()); + } + + @Nonnull + private Token cppcomment() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("//"); + int d = read(); + while (!isLineSeparator(d)) { + text.append((char) d); + d = read(); + } + unread(d); + return new Token(CPPCOMMENT, text.toString()); + } + + private int escape(StringBuilder text) + throws IOException, + LexerException { + int d = read(); + switch (d) { + case 'a': + text.append('a'); + return 0x07; + case 'b': + text.append('b'); + return '\b'; + case 'f': + text.append('f'); + return '\f'; + case 'n': + text.append('n'); + return '\n'; + case 'r': + text.append('r'); + return '\r'; + case 't': + text.append('t'); + return '\t'; + case 'v': + text.append('v'); + return 0x0b; + case '\\': + text.append('\\'); + return '\\'; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + int len = 0; + int val = 0; + do { + val = (val << 3) + Character.digit(d, 8); + text.append((char) d); + d = read(); + } while (++len < 3 && Character.digit(d, 8) != -1); + unread(d); + return val; + + case 'x': + text.append((char) d); + len = 0; + val = 0; + while (len++ < 2) { + d = read(); + if (Character.digit(d, 16) == -1) { + unread(d); + break; + } + val = (val << 4) + Character.digit(d, 16); + text.append((char) d); + } + return val; + + /* Exclude two cases from the warning. */ + case '"': + text.append('"'); + return '"'; + case '\'': + text.append('\''); + return '\''; + + default: + warning("Unnecessary escape character " + (char) d); + text.append((char) d); + return d; + } + } + + @Nonnull + private Token character() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("'"); + int d = read(); + if (d == '\\') { + text.append('\\'); + d = escape(text); + } else if (isLineSeparator(d)) { + unread(d); + return new Token(INVALID, text.toString(), + "Unterminated character literal"); + } else if (d == '\'') { + text.append('\''); + return new Token(INVALID, text.toString(), + "Empty character literal"); + } else if (!Character.isDefined(d)) { + text.append('?'); + return invalid(text, "Illegal unicode character literal"); + } else { + text.append((char) d); + } + + int e = read(); + if (e != '\'') { + // error("Illegal character constant"); + /* We consume up to the next ' or the rest of the line. */ + for (;;) { + if (isLineSeparator(e)) { + unread(e); + break; + } + text.append((char) e); + if (e == '\'') + break; + e = read(); + } + return new Token(INVALID, text.toString(), + "Illegal character constant " + text); + } + text.append('\''); + /* XXX It this a bad cast? */ + return new Token(CHARACTER, + text.toString(), Character.valueOf((char) d)); + } + + @Nonnull + private Token string(char open, char close) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder(); + text.append(open); + + StringBuilder buf = new StringBuilder(); + + for (;;) { + int c = read(); + if (c == close) { + break; + } else if (c == '\\') { + text.append('\\'); + if (!include) { + char d = (char) escape(text); + buf.append(d); + } + } else if (c == -1) { + unread(c); + // error("End of file in string literal after " + buf); + return new Token(INVALID, text.toString(), + "End of file in string literal after " + buf); + } else if (isLineSeparator(c)) { + unread(c); + // error("Unterminated string literal after " + buf); + return new Token(INVALID, text.toString(), + "Unterminated string literal after " + buf); + } else { + text.append((char) c); + buf.append((char) c); + } + } + text.append(close); + switch (close) { + case '"': + return new Token(STRING, + text.toString(), buf.toString()); + case '>': + return new Token(HEADER, + text.toString(), buf.toString()); + case '\'': + if (buf.length() == 1) + return new Token(CHARACTER, + text.toString(), buf.toString()); + return new Token(SQSTRING, + text.toString(), buf.toString()); + default: + throw new IllegalStateException( + "Unknown closing character " + String.valueOf(close)); + } + } + + @Nonnull + private Token _number_suffix(StringBuilder text, NumericValue value, int d) + throws IOException, + LexerException { + int flags = 0; // U, I, L, LL, F, D, MSB + for (;;) { + if (d == 'U' || d == 'u') { + if ((flags & NumericValue.F_UNSIGNED) != 0) + warning("Duplicate unsigned suffix " + d); + flags |= NumericValue.F_UNSIGNED; + text.append((char) d); + d = read(); + } else if (d == 'L' || d == 'l') { + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); + text.append((char) d); + int e = read(); + if (e == d) { // Case must match. Ll is Welsh. + flags |= NumericValue.F_LONGLONG; + text.append((char) e); + d = read(); + } else { + flags |= NumericValue.F_LONG; + d = e; + } + } else if (d == 'I' || d == 'i') { + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); + flags |= NumericValue.F_INT; + text.append((char) d); + d = read(); + } else if (d == 'F' || d == 'f') { + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); + flags |= NumericValue.F_FLOAT; + text.append((char) d); + d = read(); + } else if (d == 'D' || d == 'd') { + if ((flags & NumericValue.FF_SIZE) != 0) + warning("Nultiple length suffixes after " + text); + flags |= NumericValue.F_DOUBLE; + text.append((char) d); + d = read(); + } // This should probably be isPunct() || isWhite(). + else if (Character.isLetter(d) || d == '_') { + unread(d); + value.setFlags(flags); + return invalid(text, + "Invalid suffix \"" + (char) d + + "\" on numeric constant"); + } else { + unread(d); + value.setFlags(flags); + return new Token(NUMBER, + text.toString(), value); + } + } + } + + /* Either a decimal part, or a hex exponent. */ + @Nonnull + private String _number_part(StringBuilder text, int base) + throws IOException, + LexerException { + StringBuilder part = new StringBuilder(); + int d = read(); + while (Character.digit(d, base) != -1) { + text.append((char) d); + part.append((char) d); + d = read(); + } + unread(d); + return part.toString(); + } + + /* We already chewed a zero, so empty is fine. */ + @Nonnull + private Token number_octal() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("0"); + String integer = _number_part(text, 8); + int d = read(); + NumericValue value = new NumericValue(8, integer); + return _number_suffix(text, value, d); + } + + /* We do not know whether know the first digit is valid. */ + @Nonnull + private Token number_hex(char x) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder("0"); + text.append(x); + String integer = _number_part(text, 16); + NumericValue value = new NumericValue(16, integer); + int d = read(); + if (d == '.') { + String fraction = _number_part(text, 16); + value.setFractionalPart(fraction); + d = read(); + } + if (d == 'P' || d == 'p') { + String exponent = _number_part(text, 10); + value.setExponent(exponent); + d = read(); + } + // XXX Make sure it's got enough parts + return _number_suffix(text, value, d); + } + + /* We know we have at least one valid digit, but empty is not + * fine. */ + @Nonnull + private Token number_decimal() + throws IOException, + LexerException { + StringBuilder text = new StringBuilder(); + String integer = _number_part(text, 10); + NumericValue value = new NumericValue(10, integer); + int d = read(); + if (d == '.') { + String fraction = _number_part(text, 10); + value.setFractionalPart(fraction); + d = read(); + } + if (d == 'E' || d == 'e') { + String exponent = _number_part(text, 10); + value.setExponent(exponent); + d = read(); + } + // XXX Make sure it's got enough parts + return _number_suffix(text, value, d); + } + + @Nonnull + private Token identifier(int c) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder(); + int d; + text.append((char) c); + for (;;) { + d = read(); + if (Character.isIdentifierIgnorable(d)) + ; else if (Character.isJavaIdentifierPart(d)) + text.append((char) d); + else + break; + } + unread(d); + return new Token(IDENTIFIER, text.toString()); + } + + @Nonnull + private Token whitespace(int c) + throws IOException, + LexerException { + StringBuilder text = new StringBuilder(); + int d; + text.append((char) c); + for (;;) { + d = read(); + if (ppvalid && isLineSeparator(d)) /* XXX Ugly. */ + + break; + if (Character.isWhitespace(d)) + text.append((char) d); + else + break; + } + unread(d); + return new Token(WHITESPACE, text.toString()); + } + + /* No token processed by cond() contains a newline. */ + @Nonnull + private Token cond(char c, int yes, int no) + throws IOException, + LexerException { + int d = read(); + if (c == d) + return new Token(yes); + unread(d); + return new Token(no); + } + + @Override + public Token token() + throws IOException, + LexerException { + Token tok = null; + + int _l = line; + int _c = column; + + int c = read(); + int d; + + switch (c) { + case '\n': + if (ppvalid) { + bol = true; + if (include) { + tok = new Token(NL, _l, _c, "\n"); + } else { + int nls = 0; + do { + nls++; + d = read(); + } while (d == '\n'); + unread(d); + char[] text = new char[nls]; + for (int i = 0; i < text.length; i++) + text[i] = '\n'; + // Skip the bol = false below. + tok = new Token(NL, _l, _c, new String(text)); + } + if (DEBUG) + System.out.println("lx: Returning NL: " + tok); + return tok; + } + /* Let it be handled as whitespace. */ + break; + + case '!': + tok = cond('=', NE, '!'); + break; + + case '#': + if (bol) + tok = new Token(HASH); + else + tok = cond('#', PASTE, '#'); + break; + + case '+': + d = read(); + if (d == '+') + tok = new Token(INC); + else if (d == '=') + tok = new Token(PLUS_EQ); + else + unread(d); + break; + case '-': + d = read(); + if (d == '-') + tok = new Token(DEC); + else if (d == '=') + tok = new Token(SUB_EQ); + else if (d == '>') + tok = new Token(ARROW); + else + unread(d); + break; + + case '*': + tok = cond('=', MULT_EQ, '*'); + break; + case '/': + d = read(); + if (d == '*') + tok = ccomment(); + else if (d == '/') + tok = cppcomment(); + else if (d == '=') + tok = new Token(DIV_EQ); + else + unread(d); + break; + + case '%': + d = read(); + if (d == '=') + tok = new Token(MOD_EQ); + else if (digraphs && d == '>') + tok = new Token('}'); // digraph + else if (digraphs && d == ':') + PASTE: + { + d = read(); + if (d != '%') { + unread(d); + tok = new Token('#'); // digraph + break PASTE; + } + d = read(); + if (d != ':') { + unread(d); // Unread 2 chars here. + unread('%'); + tok = new Token('#'); // digraph + break PASTE; + } + tok = new Token(PASTE); // digraph + } + else + unread(d); + break; + + case ':': + /* :: */ + d = read(); + if (digraphs && d == '>') + tok = new Token(']'); // digraph + else + unread(d); + break; + + case '<': + if (include) { + tok = string('<', '>'); + } else { + d = read(); + if (d == '=') + tok = new Token(LE); + else if (d == '<') + tok = cond('=', LSH_EQ, LSH); + else if (digraphs && d == ':') + tok = new Token('['); // digraph + else if (digraphs && d == '%') + tok = new Token('{'); // digraph + else + unread(d); + } + break; + + case '=': + tok = cond('=', EQ, '='); + break; + + case '>': + d = read(); + if (d == '=') + tok = new Token(GE); + else if (d == '>') + tok = cond('=', RSH_EQ, RSH); + else + unread(d); + break; + + case '^': + tok = cond('=', XOR_EQ, '^'); + break; + + case '|': + d = read(); + if (d == '=') + tok = new Token(OR_EQ); + else if (d == '|') + tok = cond('=', LOR_EQ, LOR); + else + unread(d); + break; + case '&': + d = read(); + if (d == '&') + tok = cond('=', LAND_EQ, LAND); + else if (d == '=') + tok = new Token(AND_EQ); + else + unread(d); + break; + + case '.': + d = read(); + if (d == '.') + tok = cond('.', ELLIPSIS, RANGE); + else + unread(d); + if (Character.isDigit(d)) { + unread('.'); + tok = number_decimal(); + } + /* XXX decimal fraction */ + break; + + case '0': + /* octal or hex */ + d = read(); + if (d == 'x' || d == 'X') + tok = number_hex((char) d); + else { + unread(d); + tok = number_octal(); + } + break; + + case '\'': + tok = string('\'', '\''); + break; + + case '"': + tok = string('"', '"'); + break; + + case -1: + close(); + tok = new Token(EOF, _l, _c, "<eof>"); + break; + } + + if (tok == null) { + if (Character.isWhitespace(c)) { + tok = whitespace(c); + } else if (Character.isDigit(c)) { + unread(c); + tok = number_decimal(); + } else if (Character.isJavaIdentifierStart(c)) { + tok = identifier(c); + } else { + tok = new Token(c); + } + } + + if (bol) { + switch (tok.getType()) { + case WHITESPACE: + case CCOMMENT: + break; + default: + bol = false; + break; + } + } + + tok.setLocation(_l, _c); + if (DEBUG) + System.out.println("lx: Returning " + tok); + // (new Exception("here")).printStackTrace(System.out); + return tok; + } + + public void close() + throws IOException { + if (reader != null) { + reader.close(); + reader = null; + } + super.close(); + } + +} diff --git a/src/main/java/org/anarres/cpp/Macro.java b/src/main/java/org/anarres/cpp/Macro.java new file mode 100644 index 0000000..534cb2b --- /dev/null +++ b/src/main/java/org/anarres/cpp/Macro.java @@ -0,0 +1,189 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * A macro object. + * + * This encapsulates a name, an argument count, and a token stream + * for replacement. The replacement token stream may contain the + * extra tokens {@link Token#M_ARG} and {@link Token#M_STRING}. + */ +public class Macro { + private Source source; + private String name; + /* It's an explicit decision to keep these around here. We don't + * need to; the argument token type is M_ARG and the value + * is the index. The strings themselves are only used in + * stringification of the macro, for debugging. */ + private List<String> args; + private boolean variadic; + private List<Token> tokens; + + public Macro(Source source, String name) { + this.source = source; + this.name = name; + this.args = null; + this.variadic = false; + this.tokens = new ArrayList<Token>(); + } + + public Macro(String name) { + this(null, name); + } + + /** + * Sets the Source from which this macro was parsed. + */ + public void setSource(Source s) { + this.source = s; + } + + /** + * Returns the Source from which this macro was parsed. + * + * This method may return null if the macro was not parsed + * from a regular file. + */ + public Source getSource() { + return source; + } + + /** + * Returns the name of this macro. + */ + public String getName() { + return name; + } + + /** + * Sets the arguments to this macro. + */ + public void setArgs(List<String> args) { + this.args = args; + } + + /** + * Returns true if this is a function-like macro. + */ + public boolean isFunctionLike() { + return args != null; + } + + /** + * Returns the number of arguments to this macro. + */ + public int getArgs() { + return args.size(); + } + + /** + * Sets the variadic flag on this Macro. + */ + public void setVariadic(boolean b) { + this.variadic = b; + } + + /** + * Returns true if this is a variadic function-like macro. + */ + public boolean isVariadic() { + return variadic; + } + + /** + * Adds a token to the expansion of this macro. + */ + public void addToken(Token tok) { + this.tokens.add(tok); + } + + /** + * Adds a "paste" operator to the expansion of this macro. + * + * A paste operator causes the next token added to be pasted + * to the previous token when the macro is expanded. + * It is an error for a macro to end with a paste token. + */ + public void addPaste(Token tok) { + /* + * Given: tok0 ## tok1 + * We generate: M_PASTE, tok0, tok1 + * This extends as per a stack language: + * tok0 ## tok1 ## tok2 -> + * M_PASTE, tok0, M_PASTE, tok1, tok2 + */ + this.tokens.add(tokens.size() - 1, tok); + } + + /* pp */ List<Token> getTokens() { + return tokens; + } + + /* Paste tokens are inserted before the first of the two pasted + * tokens, so it's a kind of bytecode notation. This method + * swaps them around again. We know that there will never be two + * sequential paste tokens, so a boolean is sufficient. */ + public String getText() { + StringBuilder buf = new StringBuilder(); + boolean paste = false; + for (int i = 0; i < tokens.size(); i++) { + Token tok = tokens.get(i); + if (tok.getType() == Token.M_PASTE) { + assert paste == false : "Two sequential pastes."; + paste = true; + continue; + } + else { + buf.append(tok.getText()); + } + if (paste) { + buf.append(" #" + "# "); + paste = false; + } + // buf.append(tokens.get(i)); + } + return buf.toString(); + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(name); + if (args != null) { + buf.append('('); + Iterator<String> it = args.iterator(); + while (it.hasNext()) { + buf.append(it.next()); + if (it.hasNext()) + buf.append(", "); + else if (isVariadic()) + buf.append("..."); + } + buf.append(')'); + } + if (!tokens.isEmpty()) { + buf.append(" => ").append(getText()); + } + return buf.toString(); + } + +} diff --git a/src/main/java/org/anarres/cpp/MacroTokenSource.java b/src/main/java/org/anarres/cpp/MacroTokenSource.java new file mode 100644 index 0000000..516e4f2 --- /dev/null +++ b/src/main/java/org/anarres/cpp/MacroTokenSource.java @@ -0,0 +1,203 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +import static org.anarres.cpp.Token.*; + +/* This source should always be active, since we don't expand macros + * in any inactive context. */ +/* pp */ class MacroTokenSource extends Source { + + private final Macro macro; + private final Iterator<Token> tokens; /* Pointer into the macro. */ + + private final List<Argument> args; /* { unexpanded, expanded } */ + + private Iterator<Token> arg; /* "current expansion" */ + + /* pp */ MacroTokenSource(Macro m, List<Argument> args) { + this.macro = m; + this.tokens = m.getTokens().iterator(); + this.args = args; + this.arg = null; + } + + @Override + /* pp */ boolean isExpanding(Macro m) { + /* When we are expanding an arg, 'this' macro is not + * being expanded, and thus we may re-expand it. */ + if (/* XXX this.arg == null && */this.macro == m) + return true; + return super.isExpanding(m); + } + + /* XXX Called from Preprocessor [ugly]. */ + /* pp */ static void escape(StringBuilder buf, CharSequence cs) { + for (int i = 0; i < cs.length(); i++) { + char c = cs.charAt(i); + switch (c) { + case '\\': + buf.append("\\\\"); + break; + case '"': + buf.append("\\\""); + break; + case '\n': + buf.append("\\n"); + break; + case '\r': + buf.append("\\r"); + break; + default: + buf.append(c); + } + } + } + + private void concat(StringBuilder buf, Argument arg) { + Iterator<Token> it = arg.iterator(); + while (it.hasNext()) { + Token tok = it.next(); + buf.append(tok.getText()); + } + } + + private Token stringify(Token pos, Argument arg) { + StringBuilder buf = new StringBuilder(); + concat(buf, arg); + // System.out.println("Concat: " + arg + " -> " + buf); + StringBuilder str = new StringBuilder("\""); + escape(str, buf); + str.append("\""); + // System.out.println("Escape: " + buf + " -> " + str); + return new Token(STRING, + pos.getLine(), pos.getColumn(), + str.toString(), buf.toString()); + } + + + /* At this point, we have consumed the first M_PASTE. + * @see Macro#addPaste(Token) */ + private void paste(Token ptok) + throws IOException, + LexerException { + StringBuilder buf = new StringBuilder(); + Token err = null; + /* We know here that arg is null or expired, + * since we cannot paste an expanded arg. */ + + int count = 2; + for (int i = 0; i < count; i++) { + if (!tokens.hasNext()) { + /* XXX This one really should throw. */ + error(ptok.getLine(), ptok.getColumn(), + "Paste at end of expansion"); + buf.append(' ').append(ptok.getText()); + break; + } + Token tok = tokens.next(); + // System.out.println("Paste " + tok); + switch (tok.getType()) { + case M_PASTE: + /* One extra to paste, plus one because the + * paste token didn't count. */ + count += 2; + ptok = tok; + break; + case M_ARG: + int idx = ((Integer) tok.getValue()).intValue(); + concat(buf, args.get(idx)); + break; + /* XXX Test this. */ + case CCOMMENT: + case CPPCOMMENT: + break; + default: + buf.append(tok.getText()); + break; + } + } + + /* Push and re-lex. */ + /* + StringBuilder src = new StringBuilder(); + escape(src, buf); + StringLexerSource sl = new StringLexerSource(src.toString()); + */ + StringLexerSource sl = new StringLexerSource(buf.toString()); + + /* XXX Check that concatenation produces a valid token. */ + arg = new SourceIterator(sl); + } + + public Token token() + throws IOException, + LexerException { + for (;;) { + /* Deal with lexed tokens first. */ + + if (arg != null) { + if (arg.hasNext()) { + Token tok = arg.next(); + /* XXX PASTE -> INVALID. */ + assert tok.getType() != M_PASTE : + "Unexpected paste token"; + return tok; + } + arg = null; + } + + if (!tokens.hasNext()) + return new Token(EOF, -1, -1, ""); /* End of macro. */ + + Token tok = tokens.next(); + int idx; + switch (tok.getType()) { + case M_STRING: + /* Use the nonexpanded arg. */ + idx = ((Integer) tok.getValue()).intValue(); + return stringify(tok, args.get(idx)); + case M_ARG: + /* Expand the arg. */ + idx = ((Integer) tok.getValue()).intValue(); + // System.out.println("Pushing arg " + args.get(idx)); + arg = args.get(idx).expansion(); + break; + case M_PASTE: + paste(tok); + break; + default: + return tok; + } + } /* for */ + + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("expansion of ").append(macro.getName()); + Source parent = getParent(); + if (parent != null) + buf.append(" in ").append(String.valueOf(parent)); + return buf.toString(); + } +} diff --git a/src/main/java/org/anarres/cpp/Main.java b/src/main/java/org/anarres/cpp/Main.java new file mode 100644 index 0000000..1902798 --- /dev/null +++ b/src/main/java/org/anarres/cpp/Main.java @@ -0,0 +1,320 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.File; +import java.io.PrintStream; +import java.util.EnumSet; +import gnu.getopt.Getopt; +import gnu.getopt.LongOpt; + +/** + * (Currently a simple test class). + */ +public class Main { + + private static class Option extends LongOpt { + + private String eg; + private String help; + + public Option(String word, int arg, int ch, + String eg, String help) { + super(word, arg, null, ch); + this.eg = eg; + this.help = help; + } + } + + private static final Option[] OPTS = new Option[]{ + new Option("help", LongOpt.NO_ARGUMENT, 'h', null, + "Displays help and usage information."), + new Option("define", LongOpt.REQUIRED_ARGUMENT, 'D', "name=definition", + "Defines the given macro."), + new Option("undefine", LongOpt.REQUIRED_ARGUMENT, 'U', "name", + "Undefines the given macro, previously either builtin or defined using -D."), + new Option("include", LongOpt.REQUIRED_ARGUMENT, 1, "file", + "Process file as if \"#" + "include \"file\"\" appeared as the first line of the primary source file."), + new Option("incdir", LongOpt.REQUIRED_ARGUMENT, 'I', "dir", + "Adds the directory dir to the list of directories to be searched for header files."), + new Option("iquote", LongOpt.REQUIRED_ARGUMENT, 0, "dir", + "Adds the directory dir to the list of directories to be searched for header files included using \"\"."), + new Option("warning", LongOpt.REQUIRED_ARGUMENT, 'W', "type", + "Enables the named warning class (" + getWarnings() + ")."), + new Option("no-warnings", LongOpt.NO_ARGUMENT, 'w', null, + "Disables ALL warnings."), + new Option("verbose", LongOpt.NO_ARGUMENT, 'v', null, + "Operates incredibly verbosely."), + new Option("debug", LongOpt.NO_ARGUMENT, 3, null, + "Operates incredibly verbosely."), + new Option("version", LongOpt.NO_ARGUMENT, 2, null, + "Prints jcpp's version number (" + Version.getVersion() + ")"),}; + + private static CharSequence getWarnings() { + StringBuilder buf = new StringBuilder(); + for (Warning w : Warning.values()) { + if (buf.length() > 0) + buf.append(", "); + String name = w.name().toLowerCase(); + buf.append(name.replace('_', '-')); + } + return buf; + } + + public static void main(String[] args) throws Exception { + (new Main()).run(args); + } + + public void run(String[] args) throws Exception { + Option[] opts = OPTS; + String sopts = getShortOpts(opts); + Getopt g = new Getopt("jcpp", args, sopts, opts); + int c; + String arg; + int idx; + + Preprocessor pp = new Preprocessor(); + pp.addFeature(Feature.DIGRAPHS); + pp.addFeature(Feature.TRIGRAPHS); + pp.addFeature(Feature.LINEMARKERS); + pp.addWarning(Warning.IMPORT); + pp.setListener(new PreprocessorListener()); + pp.addMacro("__JCPP__"); + pp.getSystemIncludePath().add("/usr/local/include"); + pp.getSystemIncludePath().add("/usr/include"); + pp.getFrameworksPath().add("/System/Library/Frameworks"); + pp.getFrameworksPath().add("/Library/Frameworks"); + pp.getFrameworksPath().add("/Local/Library/Frameworks"); + + GETOPT: + while ((c = g.getopt()) != -1) { + switch (c) { + case 'D': + arg = g.getOptarg(); + idx = arg.indexOf('='); + if (idx == -1) + pp.addMacro(arg); + else + pp.addMacro(arg.substring(0, idx), + arg.substring(idx + 1)); + break; + case 'U': + pp.getMacros().remove(g.getOptarg()); + break; + case 'I': + pp.getSystemIncludePath().add(g.getOptarg()); + break; + case 0: // --iquote= + pp.getQuoteIncludePath().add(g.getOptarg()); + break; + case 'W': + arg = g.getOptarg().toUpperCase(); + arg = arg.replace('-', '_'); + if (arg.equals("ALL")) + pp.addWarnings(EnumSet.allOf(Warning.class)); + else + pp.addWarning(Enum.valueOf(Warning.class, arg)); + break; + case 'w': + pp.getWarnings().clear(); + break; + case 1: // --include= + // pp.addInput(new File(g.getOptarg())); + // Comply exactly with spec. + pp.addInput(new StringLexerSource( + "#" + "include \"" + g.getOptarg() + "\"\n" + )); + break; + case 2: // --version + version(System.out); + return; + case 'v': + pp.addFeature(Feature.VERBOSE); + break; + case 3: + pp.addFeature(Feature.DEBUG); + break; + case 'h': + usage(getClass().getName(), opts); + return; + default: + throw new Exception("Illegal option " + (char) c); + case '?': + continue; /* Make failure-proof. */ + + } + } + + for (int i = g.getOptind(); i < args.length; i++) + pp.addInput(new FileLexerSource(new File(args[i]))); + if (g.getOptind() == args.length) + pp.addInput(new InputLexerSource(System.in)); + + if (pp.getFeature(Feature.VERBOSE)) { + System.err.println("#" + "include \"...\" search starts here:"); + for (String dir : pp.getQuoteIncludePath()) + System.err.println(" " + dir); + System.err.println("#" + "include <...> search starts here:"); + for (String dir : pp.getSystemIncludePath()) + System.err.println(" " + dir); + System.err.println("End of search list."); + } + + try { + for (;;) { + Token tok = pp.token(); + if (tok == null) + break; + if (tok.getType() == Token.EOF) + break; + System.out.print(tok.getText()); + } + } catch (Exception e) { + e.printStackTrace(System.err); + Source s = pp.getSource(); + while (s != null) { + System.err.println(" -> " + s); + s = s.getParent(); + } + } + + } + + private void version(PrintStream out) { + out.println("Anarres Java C Preprocessor version " + Version.getVersion()); + out.println("Copyright (C) 2008-2014 Shevek (http://www.anarres.org/)."); + out.println("This is free software; see the source for copying conditions. There is NO"); + out.println("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."); + } + + private static String getShortOpts(Option[] opts) + throws Exception { + StringBuilder buf = new StringBuilder(); + for (int i = 0; i < opts.length; i++) { + char c = (char) opts[i].getVal(); + if (!Character.isLetterOrDigit(c)) + continue; + for (int j = 0; j < buf.length(); j++) + if (buf.charAt(j) == c) + throw new Exception( + "Duplicate short option " + c + ); + buf.append(c); + switch (opts[i].getHasArg()) { + case LongOpt.NO_ARGUMENT: + break; + case LongOpt.OPTIONAL_ARGUMENT: + buf.append("::"); + break; + case LongOpt.REQUIRED_ARGUMENT: + buf.append(":"); + break; + } + } + return buf.toString(); + } + + /* This is incomplete but nearly there. */ + /** + * Wraps a string. + * + * The output form is: + * <pre> + * prefix in[0] + * <--indent-> in[1] + * <--indent-> in[2] + * <-----width----> + * </pre> + */ + /* XXX There's some of this in commons. */ + private static String wrap(String in, String prefix, + int indent, int width) { + StringBuilder buf = new StringBuilder(prefix); + + while (buf.length() < indent) + buf.append(' '); + + int start = 0; + + while (start < in.length()) { + while (start < in.length() + && Character.isWhitespace(in.charAt(start))) + start++; + + int end = start + width - indent; + + if (end > in.length()) { + buf.append(in.substring(start)); + break; + } + + int idx = end; + while (!Character.isWhitespace(in.charAt(idx))) + idx--; + + if (idx == start) { + idx = end - 1; + buf.append(in.substring(start, idx)); + buf.append('-'); + } else { + buf.append(in.substring(start, idx)); + start = idx; + } + + start = idx; + } + + return buf.toString(); + } + + private static void usage(String command, Option[] options) { + StringBuilder text = new StringBuilder("Usage: "); + text.append(command).append('\n'); + for (int i = 0; i < options.length; i++) { + StringBuilder line = new StringBuilder(); + Option opt = options[i]; + line.append(" --").append(opt.getName()); + switch (opt.getHasArg()) { + case LongOpt.NO_ARGUMENT: + break; + case LongOpt.OPTIONAL_ARGUMENT: + line.append("[=").append(opt.eg).append(']'); + break; + case LongOpt.REQUIRED_ARGUMENT: + line.append('=').append(opt.eg); + break; + } + if (Character.isLetterOrDigit(opt.getVal())) + line.append(" (-").append((char) opt.getVal()).append(")"); + if (line.length() < 30) { + while (line.length() < 30) + line.append(' '); + } else { + line.append('\n'); + for (int j = 0; j < 30; j++) + line.append(' '); + } + /* This should use wrap. */ + line.append(opt.help); + line.append('\n'); + text.append(line); + } + + System.out.println(text); + } + +} diff --git a/src/main/java/org/anarres/cpp/NumericValue.java b/src/main/java/org/anarres/cpp/NumericValue.java new file mode 100644 index 0000000..8d961c4 --- /dev/null +++ b/src/main/java/org/anarres/cpp/NumericValue.java @@ -0,0 +1,179 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.math.BigDecimal; +import java.math.BigInteger; + +public class NumericValue extends Number { + + public static final int F_UNSIGNED = 1; + public static final int F_INT = 2; + public static final int F_LONG = 4; + public static final int F_LONGLONG = 8; + public static final int F_FLOAT = 16; + public static final int F_DOUBLE = 32; + + public static final int FF_SIZE = F_INT | F_LONG | F_LONGLONG | F_FLOAT | F_DOUBLE; + + private final int base; + private final String integer; + private String fraction; + private String exponent; + private int flags; + + public NumericValue(int base, String integer) { + this.base = base; + this.integer = integer; + } + + public int getBase() { + return base; + } + + public String getIntegerPart() { + return integer; + } + + public String getFractionalPart() { + return fraction; + } + + /* pp */ void setFractionalPart(String fraction) { + this.fraction = fraction; + } + + public String getExponent() { + return exponent; + } + + /* pp */ void setExponent(String exponent) { + this.exponent = exponent; + } + + public int getFlags() { + return flags; + } + + /* pp */ void setFlags(int flags) { + this.flags = flags; + } + + /** + * So, it turns out that parsing arbitrary bases into arbitrary + * precision numbers is nontrivial, and this routine gets it wrong + * in many important cases. + */ + public BigDecimal toBigDecimal() { + int scale = 0; + String text = getIntegerPart(); + String t_fraction = getFractionalPart(); + if (t_fraction != null) { + text += getFractionalPart(); + // XXX Wrong for anything but base 10. + scale += getFractionalPart().length(); + } + if (getExponent() != null) + scale -= Integer.parseInt(getExponent()); + BigInteger unscaled = new BigInteger(text, getBase()); + return new BigDecimal(unscaled, scale); + } + + public Number toJavaLangNumber() { + int flags = getFlags(); + if ((flags & F_DOUBLE) != 0) + return doubleValue(); + else if ((flags & F_FLOAT) != 0) + return floatValue(); + else if ((flags & (F_LONG | F_LONGLONG)) != 0) + return longValue(); + else if ((flags & F_INT) != 0) + return intValue(); + else if (getFractionalPart() != null) + return doubleValue(); // .1 is a double in Java. + else if (getExponent() != null) + return doubleValue(); + else + return intValue(); + } + + @Override + public int intValue() { + return Integer.parseInt(toString()); + } + + @Override + public long longValue() { + return Long.parseLong(toString()); + } + + @Override + public float floatValue() { + return Float.parseFloat(toString()); + } + + @Override + public double doubleValue() { + return Double.parseDouble(toString()); + } + + private boolean appendFlags(StringBuilder buf, String suffix, int flag) { + if ((getFlags() & flag) != flag) + return false; + buf.append(suffix); + return true; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + switch (base) { + case 8: + buf.append('0'); + break; + case 10: + break; + case 16: + buf.append("0x"); + break; + case 2: + buf.append('b'); + break; + default: + buf.append("[base-").append(base).append("]"); + break; + } + buf.append(getIntegerPart()); + if (getFractionalPart() != null) + buf.append('.').append(getFractionalPart()); + if (getExponent() != null) { + buf.append(base > 10 ? 'p' : 'e'); + buf.append(getExponent()); + } + /* + if (appendFlags(buf, "ui", F_UNSIGNED | F_INT)); + else if (appendFlags(buf, "ul", F_UNSIGNED | F_LONG)); + else if (appendFlags(buf, "ull", F_UNSIGNED | F_LONGLONG)); + else if (appendFlags(buf, "i", F_INT)); + else if (appendFlags(buf, "l", F_LONG)); + else if (appendFlags(buf, "ll", F_LONGLONG)); + else if (appendFlags(buf, "f", F_FLOAT)); + else if (appendFlags(buf, "d", F_DOUBLE)); + */ + return buf.toString(); + } +} diff --git a/src/main/java/org/anarres/cpp/Preprocessor.java b/src/main/java/org/anarres/cpp/Preprocessor.java new file mode 100644 index 0000000..35480b2 --- /dev/null +++ b/src/main/java/org/anarres/cpp/Preprocessor.java @@ -0,0 +1,2016 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import javax.annotation.Nonnull; +import static org.anarres.cpp.Token.*; +import static org.anarres.cpp.PreprocessorCommand.*; + +/** + * A C Preprocessor. + * The Preprocessor outputs a token stream which does not need + * re-lexing for C or C++. Alternatively, the output text may be + * reconstructed by concatenating the {@link Token#getText() text} + * values of the returned {@link Token Tokens}. (See + * {@link CppReader}, which does this.) + */ +/* + Source file name and line number information is conveyed by lines of the form + + # linenum filename flags + + These are called linemarkers. They are inserted as needed into + the output (but never within a string or character constant). They + mean that the following line originated in file filename at line + linenum. filename will never contain any non-printing characters; + they are replaced with octal escape sequences. + + After the file name comes zero or more flags, which are `1', `2', + `3', or `4'. If there are multiple flags, spaces separate them. Here + is what the flags mean: + + `1' + This indicates the start of a new file. + `2' + This indicates returning to a file (after having included another + file). + `3' + This indicates that the following text comes from a system header + file, so certain warnings should be suppressed. + `4' + This indicates that the following text should be treated as being + wrapped in an implicit extern "C" block. + */ +public class Preprocessor implements Closeable { + + private static final Source INTERNAL = new Source() { + @Override + public Token token() + throws IOException, + LexerException { + throw new LexerException("Cannot read from " + getName()); + } + + @Override + public String getPath() { + return "<internal-data>"; + } + + @Override + public String getName() { + return "internal data"; + } + }; + private static final Macro __LINE__ = new Macro(INTERNAL, "__LINE__"); + private static final Macro __FILE__ = new Macro(INTERNAL, "__FILE__"); + private static final Macro __COUNTER__ = new Macro(INTERNAL, "__COUNTER__"); + + private List<Source> inputs; + + /* The fundamental engine. */ + private Map<String, Macro> macros; + private Stack<State> states; + private Source source; + + /* Miscellaneous support. */ + private int counter; + + /* Support junk to make it work like cpp */ + private List<String> quoteincludepath; /* -iquote */ + + private List<String> sysincludepath; /* -I */ + + private List<String> frameworkspath; + private Set<Feature> features; + private Set<Warning> warnings; + private VirtualFileSystem filesystem; + private PreprocessorListener listener; + + public Preprocessor() { + this.inputs = new ArrayList<Source>(); + + this.macros = new HashMap<String, Macro>(); + macros.put(__LINE__.getName(), __LINE__); + macros.put(__FILE__.getName(), __FILE__); + macros.put(__COUNTER__.getName(), __COUNTER__); + this.states = new Stack<State>(); + states.push(new State()); + this.source = null; + + this.counter = 0; + + this.quoteincludepath = new ArrayList<String>(); + this.sysincludepath = new ArrayList<String>(); + this.frameworkspath = new ArrayList<String>(); + this.features = EnumSet.noneOf(Feature.class); + this.warnings = EnumSet.noneOf(Warning.class); + this.filesystem = new JavaFileSystem(); + this.listener = null; + } + + public Preprocessor(Source initial) { + this(); + addInput(initial); + } + + /** Equivalent to + * 'new Preprocessor(new {@link FileLexerSource}(file))' + */ + public Preprocessor(File file) + throws IOException { + this(new FileLexerSource(file)); + } + + /** + * Sets the VirtualFileSystem used by this Preprocessor. + */ + public void setFileSystem(VirtualFileSystem filesystem) { + this.filesystem = filesystem; + } + + /** + * Returns the VirtualFileSystem used by this Preprocessor. + */ + public VirtualFileSystem getFileSystem() { + return filesystem; + } + + /** + * Sets the PreprocessorListener which handles events for + * this Preprocessor. + * + * The listener is notified of warnings, errors and source + * changes, amongst other things. + */ + public void setListener(PreprocessorListener listener) { + this.listener = listener; + Source s = source; + while (s != null) { + // s.setListener(listener); + s.init(this); + s = s.getParent(); + } + } + + /** + * Returns the PreprocessorListener which handles events for + * this Preprocessor. + */ + public PreprocessorListener getListener() { + return listener; + } + + /** + * Returns the feature-set for this Preprocessor. + * + * This set may be freely modified by user code. + */ + public Set<Feature> getFeatures() { + return features; + } + + /** + * Adds a feature to the feature-set of this Preprocessor. + */ + public void addFeature(Feature f) { + features.add(f); + } + + /** + * Adds features to the feature-set of this Preprocessor. + */ + public void addFeatures(Collection<Feature> f) { + features.addAll(f); + } + + /** + * Returns true if the given feature is in + * the feature-set of this Preprocessor. + */ + public boolean getFeature(Feature f) { + return features.contains(f); + } + + /** + * Returns the warning-set for this Preprocessor. + * + * This set may be freely modified by user code. + */ + public Set<Warning> getWarnings() { + return warnings; + } + + /** + * Adds a warning to the warning-set of this Preprocessor. + */ + public void addWarning(Warning w) { + warnings.add(w); + } + + /** + * Adds warnings to the warning-set of this Preprocessor. + */ + public void addWarnings(Collection<Warning> w) { + warnings.addAll(w); + } + + /** + * Returns true if the given warning is in + * the warning-set of this Preprocessor. + */ + public boolean getWarning(Warning w) { + return warnings.contains(w); + } + + /** + * Adds input for the Preprocessor. + * + * Inputs are processed in the order in which they are added. + */ + public void addInput(Source source) { + source.init(this); + inputs.add(source); + } + + /** + * Adds input for the Preprocessor. + * + * @see #addInput(Source) + */ + public void addInput(File file) + throws IOException { + addInput(new FileLexerSource(file)); + } + + /** + * Handles an error. + * + * If a PreprocessorListener is installed, it receives the + * error. Otherwise, an exception is thrown. + */ + protected void error(int line, int column, String msg) + throws LexerException { + if (listener != null) + listener.handleError(source, line, column, msg); + else + throw new LexerException("Error at " + line + ":" + column + ": " + msg); + } + + /** + * Handles an error. + * + * If a PreprocessorListener is installed, it receives the + * error. Otherwise, an exception is thrown. + * + * @see #error(int, int, String) + */ + protected void error(Token tok, String msg) + throws LexerException { + error(tok.getLine(), tok.getColumn(), msg); + } + + /** + * Handles a warning. + * + * If a PreprocessorListener is installed, it receives the + * warning. Otherwise, an exception is thrown. + */ + protected void warning(int line, int column, String msg) + throws LexerException { + if (warnings.contains(Warning.ERROR)) + error(line, column, msg); + else if (listener != null) + listener.handleWarning(source, line, column, msg); + else + throw new LexerException("Warning at " + line + ":" + column + ": " + msg); + } + + /** + * Handles a warning. + * + * If a PreprocessorListener is installed, it receives the + * warning. Otherwise, an exception is thrown. + * + * @see #warning(int, int, String) + */ + protected void warning(Token tok, String msg) + throws LexerException { + warning(tok.getLine(), tok.getColumn(), msg); + } + + /** + * Adds a Macro to this Preprocessor. + * + * The given {@link Macro} object encapsulates both the name + * and the expansion. + */ + public void addMacro(Macro m) throws LexerException { + // System.out.println("Macro " + m); + String name = m.getName(); + /* Already handled as a source error in macro(). */ + if ("defined".equals(name)) + throw new LexerException("Cannot redefine name 'defined'"); + macros.put(m.getName(), m); + } + + /** + * Defines the given name as a macro. + * + * The String value is lexed into a token stream, which is + * used as the macro expansion. + */ + public void addMacro(String name, String value) + throws LexerException { + try { + Macro m = new Macro(name); + StringLexerSource s = new StringLexerSource(value); + for (;;) { + Token tok = s.token(); + if (tok.getType() == EOF) + break; + m.addToken(tok); + } + addMacro(m); + } catch (IOException e) { + throw new LexerException(e); + } + } + + /** + * Defines the given name as a macro, with the value <code>1</code>. + * + * This is a convnience method, and is equivalent to + * <code>addMacro(name, "1")</code>. + */ + public void addMacro(String name) + throws LexerException { + addMacro(name, "1"); + } + + /** + * Sets the user include path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setQuoteIncludePath(List<String> path) { + this.quoteincludepath = path; + } + + /** + * Returns the user include-path of this Preprocessor. + * + * This list may be freely modified by user code. + */ + public List<String> getQuoteIncludePath() { + return quoteincludepath; + } + + /** + * Sets the system include path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setSystemIncludePath(List<String> path) { + this.sysincludepath = path; + } + + /** + * Returns the system include-path of this Preprocessor. + * + * This list may be freely modified by user code. + */ + public List<String> getSystemIncludePath() { + return sysincludepath; + } + + /** + * Sets the Objective-C frameworks path used by this Preprocessor. + */ + /* Note for future: Create an IncludeHandler? */ + public void setFrameworksPath(List<String> path) { + this.frameworkspath = path; + } + + /** + * Returns the Objective-C frameworks path used by this + * Preprocessor. + * + * This list may be freely modified by user code. + */ + public List<String> getFrameworksPath() { + return frameworkspath; + } + + /** + * Returns the Map of Macros parsed during the run of this + * Preprocessor. + */ + public Map<String, Macro> getMacros() { + return macros; + } + + /** + * Returns the named macro. + * + * While you can modify the returned object, unexpected things + * might happen if you do. + */ + public Macro getMacro(String name) { + return macros.get(name); + } + + /* States */ + private void push_state() { + State top = states.peek(); + states.push(new State(top)); + } + + private void pop_state() + throws LexerException { + State s = states.pop(); + if (states.isEmpty()) { + error(0, 0, "#" + "endif without #" + "if"); + states.push(s); + } + } + + private boolean isActive() { + State state = states.peek(); + return state.isParentActive() && state.isActive(); + } + + + /* Sources */ + /** + * Returns the top Source on the input stack. + * + * @see Source + * @see #push_source(Source,boolean) + * @see #pop_source() + */ + protected Source getSource() { + return source; + } + + /** + * Pushes a Source onto the input stack. + * + * @see #getSource() + * @see #pop_source() + */ + protected void push_source(Source source, boolean autopop) { + source.init(this); + source.setParent(this.source, autopop); + // source.setListener(listener); + if (listener != null) + listener.handleSourceChange(this.source, "suspend"); + this.source = source; + if (listener != null) + listener.handleSourceChange(this.source, "push"); + } + + /** + * Pops a Source from the input stack. + * + * @see #getSource() + * @see #push_source(Source,boolean) + */ + protected void pop_source() + throws IOException { + if (listener != null) + listener.handleSourceChange(this.source, "pop"); + Source s = this.source; + this.source = s.getParent(); + /* Always a noop unless called externally. */ + s.close(); + if (listener != null && this.source != null) + listener.handleSourceChange(this.source, "resume"); + } + + + /* Source tokens */ + private Token source_token; + + /* XXX Make this include the NL, and make all cpp directives eat + * their own NL. */ + private Token line_token(int line, String name, String extra) { + StringBuilder buf = new StringBuilder(); + buf.append("#line ").append(line) + .append(" \""); + /* XXX This call to escape(name) is correct but ugly. */ + MacroTokenSource.escape(buf, name); + buf.append("\"").append(extra).append("\n"); + return new Token(P_LINE, line, 0, buf.toString(), null); + } + + private Token source_token() + throws IOException, + LexerException { + if (source_token != null) { + Token tok = source_token; + source_token = null; + if (getFeature(Feature.DEBUG)) + System.err.println("Returning unget token " + tok); + return tok; + } + + for (;;) { + Source s = getSource(); + if (s == null) { + if (inputs.isEmpty()) + return new Token(EOF); + Source t = inputs.remove(0); + push_source(t, true); + if (getFeature(Feature.LINEMARKERS)) + return line_token(t.getLine(), t.getName(), " 1"); + continue; + } + Token tok = s.token(); + /* XXX Refactor with skipline() */ + if (tok.getType() == EOF && s.isAutopop()) { + // System.out.println("Autopop " + s); + pop_source(); + Source t = getSource(); + if (getFeature(Feature.LINEMARKERS) + && s.isNumbered() + && t != null) { + /* We actually want 'did the nested source + * contain a newline token', which isNumbered() + * approximates. This is not perfect, but works. */ + return line_token(t.getLine() + 1, t.getName(), " 2"); + } + continue; + } + if (getFeature(Feature.DEBUG)) + System.err.println("Returning fresh token " + tok); + return tok; + } + } + + private void source_untoken(Token tok) { + if (this.source_token != null) + throw new IllegalStateException("Cannot return two tokens"); + this.source_token = tok; + } + + private boolean isWhite(Token tok) { + int type = tok.getType(); + return (type == WHITESPACE) + || (type == CCOMMENT) + || (type == CPPCOMMENT); + } + + private Token source_token_nonwhite() + throws IOException, + LexerException { + Token tok; + do { + tok = source_token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns an NL or an EOF token. + * + * The metadata on the token will be correct, which is better + * than generating a new one. + * + * This method can, as of recent patches, return a P_LINE token. + */ + private Token source_skipline(boolean white) + throws IOException, + LexerException { + // (new Exception("skipping line")).printStackTrace(System.out); + Source s = getSource(); + Token tok = s.skipline(white); + /* XXX Refactor with source_token() */ + if (tok.getType() == EOF && s.isAutopop()) { + // System.out.println("Autopop " + s); + pop_source(); + Source t = getSource(); + if (getFeature(Feature.LINEMARKERS) + && s.isNumbered() + && t != null) { + /* We actually want 'did the nested source + * contain a newline token', which isNumbered() + * approximates. This is not perfect, but works. */ + return line_token(t.getLine() + 1, t.getName(), " 2"); + } + } + return tok; + } + + /* processes and expands a macro. */ + private boolean macro(Macro m, Token orig) + throws IOException, + LexerException { + Token tok; + List<Argument> args; + + // System.out.println("pp: expanding " + m); + if (m.isFunctionLike()) { + OPEN: + for (;;) { + tok = source_token(); + // System.out.println("pp: open: token is " + tok); + switch (tok.getType()) { + case WHITESPACE: /* XXX Really? */ + + case CCOMMENT: + case CPPCOMMENT: + case NL: + break; /* continue */ + + case '(': + break OPEN; + default: + source_untoken(tok); + return false; + } + } + + // tok = expanded_token_nonwhite(); + tok = source_token_nonwhite(); + + /* We either have, or we should have args. + * This deals elegantly with the case that we have + * one empty arg. */ + if (tok.getType() != ')' || m.getArgs() > 0) { + args = new ArrayList<Argument>(); + + Argument arg = new Argument(); + int depth = 0; + boolean space = false; + + ARGS: + for (;;) { + // System.out.println("pp: arg: token is " + tok); + switch (tok.getType()) { + case EOF: + error(tok, "EOF in macro args"); + return false; + + case ',': + if (depth == 0) { + if (m.isVariadic() + && /* We are building the last arg. */ args.size() == m.getArgs() - 1) { + /* Just add the comma. */ + arg.addToken(tok); + } else { + args.add(arg); + arg = new Argument(); + } + } else { + arg.addToken(tok); + } + space = false; + break; + case ')': + if (depth == 0) { + args.add(arg); + break ARGS; + } else { + depth--; + arg.addToken(tok); + } + space = false; + break; + case '(': + depth++; + arg.addToken(tok); + space = false; + break; + + case WHITESPACE: + case CCOMMENT: + case CPPCOMMENT: + /* Avoid duplicating spaces. */ + space = true; + break; + + default: + /* Do not put space on the beginning of + * an argument token. */ + if (space && !arg.isEmpty()) + arg.addToken(Token.space); + arg.addToken(tok); + space = false; + break; + + } + // tok = expanded_token(); + tok = source_token(); + } + /* space may still be true here, thus trailing space + * is stripped from arguments. */ + + if (args.size() != m.getArgs()) { + error(tok, + "macro " + m.getName() + + " has " + m.getArgs() + " parameters " + + "but given " + args.size() + " args"); + /* We could replay the arg tokens, but I + * note that GNU cpp does exactly what we do, + * i.e. output the macro name and chew the args. + */ + return false; + } + + /* + for (Argument a : args) + a.expand(this); + */ + for (int i = 0; i < args.size(); i++) { + args.get(i).expand(this); + } + + // System.out.println("Macro " + m + " args " + args); + } else { + /* nargs == 0 and we (correctly) got () */ + args = null; + } + + } else { + /* Macro without args. */ + args = null; + } + + if (m == __LINE__) { + push_source(new FixedTokenSource( + new Token[]{new Token(NUMBER, + orig.getLine(), orig.getColumn(), + String.valueOf(orig.getLine()), + new NumericValue(10, "" + orig.getLine()))} + ), true); + } else if (m == __FILE__) { + StringBuilder buf = new StringBuilder("\""); + String name = getSource().getName(); + if (name == null) + name = "<no file>"; + for (int i = 0; i < name.length(); i++) { + char c = name.charAt(i); + switch (c) { + case '\\': + buf.append("\\\\"); + break; + case '"': + buf.append("\\\""); + break; + default: + buf.append(c); + break; + } + } + buf.append("\""); + String text = buf.toString(); + push_source(new FixedTokenSource( + new Token[]{new Token(STRING, + orig.getLine(), orig.getColumn(), + text, text)} + ), true); + } else if (m == __COUNTER__) { + /* This could equivalently have been done by adding + * a special Macro subclass which overrides getTokens(). */ + int value = this.counter++; + push_source(new FixedTokenSource( + new Token[]{new Token(NUMBER, + orig.getLine(), orig.getColumn(), + String.valueOf(value), + new NumericValue(10, "" + value))} + ), true); + } else { + push_source(new MacroTokenSource(m, args), true); + } + + return true; + } + + /** + * Expands an argument. + */ + /* I'd rather this were done lazily, but doing so breaks spec. */ + /* pp */ List<Token> expand(List<Token> arg) + throws IOException, + LexerException { + List<Token> expansion = new ArrayList<Token>(); + boolean space = false; + + push_source(new FixedTokenSource(arg), false); + + EXPANSION: + for (;;) { + Token tok = expanded_token(); + switch (tok.getType()) { + case EOF: + break EXPANSION; + + case WHITESPACE: + case CCOMMENT: + case CPPCOMMENT: + space = true; + break; + + default: + if (space && !expansion.isEmpty()) + expansion.add(Token.space); + expansion.add(tok); + space = false; + break; + } + } + + pop_source(); + + return expansion; + } + + /* processes a #define directive */ + private Token define() + throws IOException, + LexerException { + Token tok = source_token_nonwhite(); + if (tok.getType() != IDENTIFIER) { + error(tok, "Expected identifier"); + return source_skipline(false); + } + /* if predefined */ + + String name = tok.getText(); + if ("defined".equals(name)) { + error(tok, "Cannot redefine name 'defined'"); + return source_skipline(false); + } + + Macro m = new Macro(getSource(), name); + List<String> args; + + tok = source_token(); + if (tok.getType() == '(') { + tok = source_token_nonwhite(); + if (tok.getType() != ')') { + args = new ArrayList<String>(); + ARGS: + for (;;) { + switch (tok.getType()) { + case IDENTIFIER: + args.add(tok.getText()); + break; + case NL: + case EOF: + error(tok, + "Unterminated macro parameter list"); + return tok; + default: + error(tok, + "error in macro parameters: " + + tok.getText()); + return source_skipline(false); + } + tok = source_token_nonwhite(); + switch (tok.getType()) { + case ',': + break; + case ELLIPSIS: + tok = source_token_nonwhite(); + if (tok.getType() != ')') + error(tok, + "ellipsis must be on last argument"); + m.setVariadic(true); + break ARGS; + case ')': + break ARGS; + + case NL: + case EOF: + /* Do not skip line. */ + error(tok, + "Unterminated macro parameters"); + return tok; + default: + error(tok, + "Bad token in macro parameters: " + + tok.getText()); + return source_skipline(false); + } + tok = source_token_nonwhite(); + } + } else { + assert tok.getType() == ')' : "Expected ')'"; + args = Collections.emptyList(); + } + + m.setArgs(args); + } else { + /* For searching. */ + args = Collections.emptyList(); + source_untoken(tok); + } + + /* Get an expansion for the macro, using indexOf. */ + boolean space = false; + boolean paste = false; + int idx; + + /* Ensure no space at start. */ + tok = source_token_nonwhite(); + EXPANSION: + for (;;) { + switch (tok.getType()) { + case EOF: + break EXPANSION; + case NL: + break EXPANSION; + + case CCOMMENT: + case CPPCOMMENT: + /* XXX This is where we implement GNU's cpp -CC. */ + // break; + case WHITESPACE: + if (!paste) + space = true; + break; + + /* Paste. */ + case PASTE: + space = false; + paste = true; + m.addPaste(new Token(M_PASTE, + tok.getLine(), tok.getColumn(), + "#" + "#", null)); + break; + + /* Stringify. */ + case '#': + if (space) + m.addToken(Token.space); + space = false; + Token la = source_token_nonwhite(); + if (la.getType() == IDENTIFIER + && ((idx = args.indexOf(la.getText())) != -1)) { + m.addToken(new Token(M_STRING, + la.getLine(), la.getColumn(), + "#" + la.getText(), + Integer.valueOf(idx))); + } else { + m.addToken(tok); + /* Allow for special processing. */ + source_untoken(la); + } + break; + + case IDENTIFIER: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + idx = args.indexOf(tok.getText()); + if (idx == -1) + m.addToken(tok); + else + m.addToken(new Token(M_ARG, + tok.getLine(), tok.getColumn(), + tok.getText(), + Integer.valueOf(idx))); + break; + + default: + if (space) + m.addToken(Token.space); + space = false; + paste = false; + m.addToken(tok); + break; + } + tok = source_token(); + } + + if (getFeature(Feature.DEBUG)) + System.err.println("Defined macro " + m); + addMacro(m); + + return tok; /* NL or EOF. */ + + } + + private Token undef() + throws IOException, + LexerException { + Token tok = source_token_nonwhite(); + if (tok.getType() != IDENTIFIER) { + error(tok, + "Expected identifier, not " + tok.getText()); + if (tok.getType() == NL || tok.getType() == EOF) + return tok; + } else { + Macro m = macros.get(tok.getText()); + if (m != null) { + /* XXX error if predefined */ + macros.remove(m.getName()); + } + } + return source_skipline(true); + } + + /** + * Attempts to include the given file. + * + * User code may override this method to implement a virtual + * file system. + */ + private boolean include(VirtualFile file) + throws IOException, + LexerException { + // System.out.println("Try to include " + ((File)file).getAbsolutePath()); + if (!file.isFile()) + return false; + if (getFeature(Feature.DEBUG)) + System.err.println("pp: including " + file); + push_source(file.getSource(), true); + return true; + } + + /** + * Includes a file from an include path, by name. + */ + private boolean include(Iterable<String> path, String name) + throws IOException, + LexerException { + for (String dir : path) { + VirtualFile file = filesystem.getFile(dir, name); + if (include(file)) + return true; + } + return false; + } + + /** + * Handles an include directive. + */ + private void include(String parent, int line, + String name, boolean quoted) + throws IOException, + LexerException { + VirtualFile pdir = null; + if (quoted) { + VirtualFile pfile = filesystem.getFile(parent); + pdir = pfile.getParentFile(); + VirtualFile ifile = pdir.getChildFile(name); + if (include(ifile)) + return; + if (include(quoteincludepath, name)) + return; + } + + if (include(sysincludepath, name)) + return; + + StringBuilder buf = new StringBuilder(); + buf.append("File not found: ").append(name); + buf.append(" in"); + if (quoted) { + buf.append(" .").append('(').append(pdir).append(')'); + for (String dir : quoteincludepath) + buf.append(" ").append(dir); + } + for (String dir : sysincludepath) + buf.append(" ").append(dir); + error(line, 0, buf.toString()); + } + + private Token include(boolean next) + throws IOException, + LexerException { + LexerSource lexer = (LexerSource) source; + try { + lexer.setInclude(true); + Token tok = token_nonwhite(); + + String name; + boolean quoted; + + if (tok.getType() == STRING) { + /* XXX Use the original text, not the value. + * Backslashes must not be treated as escapes here. */ + StringBuilder buf = new StringBuilder((String) tok.getValue()); + HEADER: + for (;;) { + tok = token_nonwhite(); + switch (tok.getType()) { + case STRING: + buf.append((String) tok.getValue()); + break; + case NL: + case EOF: + break HEADER; + default: + warning(tok, + "Unexpected token on #" + "include line"); + return source_skipline(false); + } + } + name = buf.toString(); + quoted = true; + } else if (tok.getType() == HEADER) { + name = (String) tok.getValue(); + quoted = false; + tok = source_skipline(true); + } else { + error(tok, + "Expected string or header, not " + tok.getText()); + switch (tok.getType()) { + case NL: + case EOF: + return tok; + default: + /* Only if not a NL or EOF already. */ + return source_skipline(false); + } + } + + /* Do the inclusion. */ + include(source.getPath(), tok.getLine(), name, quoted); + + /* 'tok' is the 'nl' after the include. We use it after the + * #line directive. */ + if (getFeature(Feature.LINEMARKERS)) + return line_token(1, source.getName(), " 1"); + return tok; + } finally { + lexer.setInclude(false); + } + } + + protected void pragma(Token name, List<Token> value) + throws IOException, + LexerException { + warning(name, "Unknown #" + "pragma: " + name.getText()); + } + + private Token pragma() + throws IOException, + LexerException { + Token name; + + NAME: + for (;;) { + Token tok = token(); + switch (tok.getType()) { + case EOF: + /* There ought to be a newline before EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok, + "End of file in #" + "pragma"); + return tok; + case NL: + /* This may contain one or more newlines. */ + warning(tok, + "Empty #" + "pragma"); + return tok; + case CCOMMENT: + case CPPCOMMENT: + case WHITESPACE: + continue NAME; + case IDENTIFIER: + name = tok; + break NAME; + default: + return source_skipline(false); + } + } + + Token tok; + List<Token> value = new ArrayList<Token>(); + VALUE: + for (;;) { + tok = token(); + switch (tok.getType()) { + case EOF: + /* There ought to be a newline before EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok, + "End of file in #" + "pragma"); + break VALUE; + case NL: + /* This may contain one or more newlines. */ + break VALUE; + case CCOMMENT: + case CPPCOMMENT: + break; + case WHITESPACE: + value.add(tok); + break; + default: + value.add(tok); + break; + } + } + + pragma(name, value); + + return tok; /* The NL. */ + + } + + /* For #error and #warning. */ + private void error(Token pptok, boolean is_error) + throws IOException, + LexerException { + StringBuilder buf = new StringBuilder(); + buf.append('#').append(pptok.getText()).append(' '); + /* Peculiar construction to ditch first whitespace. */ + Token tok = source_token_nonwhite(); + ERROR: + for (;;) { + switch (tok.getType()) { + case NL: + case EOF: + break ERROR; + default: + buf.append(tok.getText()); + break; + } + tok = source_token(); + } + if (is_error) + error(pptok, buf.toString()); + else + warning(pptok, buf.toString()); + } + + /* This bypasses token() for #elif expressions. + * If we don't do this, then isActive() == false + * causes token() to simply chew the entire input line. */ + private Token expanded_token() + throws IOException, + LexerException { + for (;;) { + Token tok = source_token(); + // System.out.println("Source token is " + tok); + if (tok.getType() == IDENTIFIER) { + Macro m = macros.get(tok.getText()); + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + continue; + } + return tok; + } + } + + private Token expanded_token_nonwhite() + throws IOException, + LexerException { + Token tok; + do { + tok = expanded_token(); + // System.out.println("expanded token is " + tok); + } while (isWhite(tok)); + return tok; + } + + private Token expr_token = null; + + private Token expr_token() + throws IOException, + LexerException { + Token tok = expr_token; + + if (tok != null) { + // System.out.println("ungetting"); + expr_token = null; + } else { + tok = expanded_token_nonwhite(); + // System.out.println("expt is " + tok); + + if (tok.getType() == IDENTIFIER + && tok.getText().equals("defined")) { + Token la = source_token_nonwhite(); + boolean paren = false; + if (la.getType() == '(') { + paren = true; + la = source_token_nonwhite(); + } + + // System.out.println("Core token is " + la); + if (la.getType() != IDENTIFIER) { + error(la, + "defined() needs identifier, not " + + la.getText()); + tok = new Token(NUMBER, + la.getLine(), la.getColumn(), + "0", new NumericValue(10, "0")); + } else if (macros.containsKey(la.getText())) { + // System.out.println("Found macro"); + tok = new Token(NUMBER, + la.getLine(), la.getColumn(), + "1", new NumericValue(10, "1")); + } else { + // System.out.println("Not found macro"); + tok = new Token(NUMBER, + la.getLine(), la.getColumn(), + "0", new NumericValue(10, "0")); + } + + if (paren) { + la = source_token_nonwhite(); + if (la.getType() != ')') { + expr_untoken(la); + error(la, "Missing ) in defined()"); + } + } + } + } + + // System.out.println("expr_token returns " + tok); + return tok; + } + + private void expr_untoken(Token tok) + throws LexerException { + if (expr_token != null) + throw new InternalException( + "Cannot unget two expression tokens." + ); + expr_token = tok; + } + + private int expr_priority(Token op) { + switch (op.getType()) { + case '/': + return 11; + case '%': + return 11; + case '*': + return 11; + case '+': + return 10; + case '-': + return 10; + case LSH: + return 9; + case RSH: + return 9; + case '<': + return 8; + case '>': + return 8; + case LE: + return 8; + case GE: + return 8; + case EQ: + return 7; + case NE: + return 7; + case '&': + return 6; + case '^': + return 5; + case '|': + return 4; + case LAND: + return 3; + case LOR: + return 2; + case '?': + return 1; + default: + // System.out.println("Unrecognised operator " + op); + return 0; + } + } + + private long expr(int priority) + throws IOException, + LexerException { + /* + System.out.flush(); + (new Exception("expr(" + priority + ") called")).printStackTrace(); + System.err.flush(); + */ + + Token tok = expr_token(); + long lhs, rhs; + + // System.out.println("Expr lhs token is " + tok); + switch (tok.getType()) { + case '(': + lhs = expr(0); + tok = expr_token(); + if (tok.getType() != ')') { + expr_untoken(tok); + error(tok, "missing ) in expression"); + return 0; + } + break; + + case '~': + lhs = ~expr(11); + break; + case '!': + lhs = expr(11) == 0 ? 1 : 0; + break; + case '-': + lhs = -expr(11); + break; + case NUMBER: + NumericValue value = (NumericValue) tok.getValue(); + lhs = value.longValue(); + break; + case CHARACTER: + lhs = (long) ((Character) tok.getValue()).charValue(); + break; + case IDENTIFIER: + if (warnings.contains(Warning.UNDEF)) + warning(tok, "Undefined token '" + tok.getText() + + "' encountered in conditional."); + lhs = 0; + break; + + default: + expr_untoken(tok); + error(tok, + "Bad token in expression: " + tok.getText()); + return 0; + } + + EXPR: + for (;;) { + // System.out.println("expr: lhs is " + lhs + ", pri = " + priority); + Token op = expr_token(); + int pri = expr_priority(op); /* 0 if not a binop. */ + + if (pri == 0 || priority >= pri) { + expr_untoken(op); + break EXPR; + } + rhs = expr(pri); + // System.out.println("rhs token is " + rhs); + switch (op.getType()) { + case '/': + if (rhs == 0) { + error(op, "Division by zero"); + lhs = 0; + } else { + lhs = lhs / rhs; + } + break; + case '%': + if (rhs == 0) { + error(op, "Modulus by zero"); + lhs = 0; + } else { + lhs = lhs % rhs; + } + break; + case '*': + lhs = lhs * rhs; + break; + case '+': + lhs = lhs + rhs; + break; + case '-': + lhs = lhs - rhs; + break; + case '<': + lhs = lhs < rhs ? 1 : 0; + break; + case '>': + lhs = lhs > rhs ? 1 : 0; + break; + case '&': + lhs = lhs & rhs; + break; + case '^': + lhs = lhs ^ rhs; + break; + case '|': + lhs = lhs | rhs; + break; + + case LSH: + lhs = lhs << rhs; + break; + case RSH: + lhs = lhs >> rhs; + break; + case LE: + lhs = lhs <= rhs ? 1 : 0; + break; + case GE: + lhs = lhs >= rhs ? 1 : 0; + break; + case EQ: + lhs = lhs == rhs ? 1 : 0; + break; + case NE: + lhs = lhs != rhs ? 1 : 0; + break; + case LAND: + lhs = (lhs != 0) && (rhs != 0) ? 1 : 0; + break; + case LOR: + lhs = (lhs != 0) || (rhs != 0) ? 1 : 0; + break; + + case '?': + /* XXX Handle this? */ + + default: + error(op, + "Unexpected operator " + op.getText()); + return 0; + + } + } + + /* + System.out.flush(); + (new Exception("expr returning " + lhs)).printStackTrace(); + System.err.flush(); + */ + // System.out.println("expr returning " + lhs); + return lhs; + } + + private Token toWhitespace(Token tok) { + String text = tok.getText(); + int len = text.length(); + boolean cr = false; + int nls = 0; + + for (int i = 0; i < len; i++) { + char c = text.charAt(i); + + switch (c) { + case '\r': + cr = true; + nls++; + break; + case '\n': + if (cr) { + cr = false; + break; + } + /* fallthrough */ + case '\u2028': + case '\u2029': + case '\u000B': + case '\u000C': + case '\u0085': + cr = false; + nls++; + break; + } + } + + char[] cbuf = new char[nls]; + Arrays.fill(cbuf, '\n'); + return new Token(WHITESPACE, + tok.getLine(), tok.getColumn(), + new String(cbuf)); + } + + @Nonnull + private Token _token() + throws IOException, + LexerException { + + for (;;) { + Token tok; + if (!isActive()) { + try { + /* XXX Tell lexer to ignore warnings. */ + source.setActive(false); + tok = source_token(); + } finally { + /* XXX Tell lexer to stop ignoring warnings. */ + source.setActive(true); + } + switch (tok.getType()) { + case HASH: + case NL: + case EOF: + /* The preprocessor has to take action here. */ + break; + case WHITESPACE: + return tok; + case CCOMMENT: + case CPPCOMMENT: + // Patch up to preserve whitespace. + if (getFeature(Feature.KEEPALLCOMMENTS)) + return tok; + if (!isActive()) + return toWhitespace(tok); + if (getFeature(Feature.KEEPCOMMENTS)) + return tok; + return toWhitespace(tok); + default: + // Return NL to preserve whitespace. + /* XXX This might lose a comment. */ + return source_skipline(false); + } + } else { + tok = source_token(); + } + + LEX: + switch (tok.getType()) { + case EOF: + /* Pop the stacks. */ + return tok; + + case WHITESPACE: + case NL: + return tok; + + case CCOMMENT: + case CPPCOMMENT: + return tok; + + case '!': + case '%': + case '&': + case '(': + case ')': + case '*': + case '+': + case ',': + case '-': + case '/': + case ':': + case ';': + case '<': + case '=': + case '>': + case '?': + case '[': + case ']': + case '^': + case '{': + case '|': + case '}': + case '~': + case '.': + + /* From Olivier Chafik for Objective C? */ + case '@': + /* The one remaining ASCII, might as well. */ + case '`': + + // case '#': + case AND_EQ: + case ARROW: + case CHARACTER: + case DEC: + case DIV_EQ: + case ELLIPSIS: + case EQ: + case GE: + case HEADER: /* Should only arise from include() */ + + case INC: + case LAND: + case LE: + case LOR: + case LSH: + case LSH_EQ: + case SUB_EQ: + case MOD_EQ: + case MULT_EQ: + case NE: + case OR_EQ: + case PLUS_EQ: + case RANGE: + case RSH: + case RSH_EQ: + case STRING: + case XOR_EQ: + return tok; + + case NUMBER: + return tok; + + case IDENTIFIER: + Macro m = macros.get(tok.getText()); + if (m == null) + return tok; + if (source.isExpanding(m)) + return tok; + if (macro(m, tok)) + break; + return tok; + + case P_LINE: + if (getFeature(Feature.LINEMARKERS)) + return tok; + break; + + case INVALID: + if (getFeature(Feature.CSYNTAX)) + error(tok, String.valueOf(tok.getValue())); + return tok; + + default: + throw new InternalException("Bad token " + tok); + // break; + + case HASH: + tok = source_token_nonwhite(); + // (new Exception("here")).printStackTrace(); + switch (tok.getType()) { + case NL: + break LEX; /* Some code has #\n */ + + case IDENTIFIER: + break; + default: + error(tok, + "Preprocessor directive not a word " + + tok.getText()); + return source_skipline(false); + } + PreprocessorCommand ppcmd = PreprocessorCommand.forText(tok.getText()); + if (ppcmd == null) { + error(tok, + "Unknown preprocessor directive " + + tok.getText()); + return source_skipline(false); + } + + PP: + switch (ppcmd) { + + case PP_DEFINE: + if (!isActive()) + return source_skipline(false); + else + return define(); + // break; + + case PP_UNDEF: + if (!isActive()) + return source_skipline(false); + else + return undef(); + // break; + + case PP_INCLUDE: + if (!isActive()) + return source_skipline(false); + else + return include(false); + // break; + case PP_INCLUDE_NEXT: + if (!isActive()) + return source_skipline(false); + if (!getFeature(Feature.INCLUDENEXT)) { + error(tok, + "Directive include_next not enabled" + ); + return source_skipline(false); + } + return include(true); + // break; + + case PP_WARNING: + case PP_ERROR: + if (!isActive()) + return source_skipline(false); + else + error(tok, ppcmd == PP_ERROR); + break; + + case PP_IF: + push_state(); + if (!isActive()) { + return source_skipline(false); + } + expr_token = null; + states.peek().setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + + if (tok.getType() == NL) + return tok; + return source_skipline(true); + // break; + + case PP_ELIF: + State state = states.peek(); + if (false) { + /* Check for 'if' */; + } else if (state.sawElse()) { + error(tok, + "#elif after #" + "else"); + return source_skipline(false); + } else if (!state.isParentActive()) { + /* Nested in skipped 'if' */ + return source_skipline(false); + } else if (state.isActive()) { + /* The 'if' part got executed. */ + state.setParentActive(false); + /* This is like # else # if but with + * only one # end. */ + state.setActive(false); + return source_skipline(false); + } else { + expr_token = null; + state.setActive(expr(0) != 0); + tok = expr_token(); /* unget */ + + if (tok.getType() == NL) + return tok; + return source_skipline(true); + } + // break; + + case PP_ELSE: + state = states.peek(); + if (false) + /* Check for 'if' */ ; else if (state.sawElse()) { + error(tok, + "#" + "else after #" + "else"); + return source_skipline(false); + } else { + state.setSawElse(); + state.setActive(!state.isActive()); + return source_skipline(warnings.contains(Warning.ENDIF_LABELS)); + } + // break; + + case PP_IFDEF: + push_state(); + if (!isActive()) { + return source_skipline(false); + } else { + tok = source_token_nonwhite(); + // System.out.println("ifdef " + tok); + if (tok.getType() != IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } else { + String text = tok.getText(); + boolean exists + = macros.containsKey(text); + states.peek().setActive(exists); + return source_skipline(true); + } + } + // break; + + case PP_IFNDEF: + push_state(); + if (!isActive()) { + return source_skipline(false); + } else { + tok = source_token_nonwhite(); + if (tok.getType() != IDENTIFIER) { + error(tok, + "Expected identifier, not " + + tok.getText()); + return source_skipline(false); + } else { + String text = tok.getText(); + boolean exists + = macros.containsKey(text); + states.peek().setActive(!exists); + return source_skipline(true); + } + } + // break; + + case PP_ENDIF: + pop_state(); + return source_skipline(warnings.contains(Warning.ENDIF_LABELS)); + // break; + + case PP_LINE: + return source_skipline(false); + // break; + + case PP_PRAGMA: + if (!isActive()) + return source_skipline(false); + return pragma(); + // break; + + default: + /* Actual unknown directives are + * processed above. If we get here, + * we succeeded the map lookup but + * failed to handle it. Therefore, + * this is (unconditionally?) fatal. */ + // if (isActive()) /* XXX Could be warning. */ + throw new InternalException( + "Internal error: Unknown directive " + + tok); + // return source_skipline(false); + } + + } + } + } + + private Token token_nonwhite() + throws IOException, + LexerException { + Token tok; + do { + tok = _token(); + } while (isWhite(tok)); + return tok; + } + + /** + * Returns the next preprocessor token. + * + * @see Token + * @throws LexerException if a preprocessing error occurs. + * @throws InternalException if an unexpected error condition arises. + */ + @Nonnull + public Token token() + throws IOException, + LexerException { + Token tok = _token(); + if (getFeature(Feature.DEBUG)) + System.err.println("pp: Returning " + tok); + return tok; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + + Source s = getSource(); + while (s != null) { + buf.append(" -> ").append(String.valueOf(s)).append("\n"); + s = s.getParent(); + } + + Map<String, Macro> macros = getMacros(); + List<String> keys = new ArrayList<String>( + macros.keySet() + ); + Collections.sort(keys); + Iterator<String> mt = keys.iterator(); + while (mt.hasNext()) { + String key = mt.next(); + Macro macro = macros.get(key); + buf.append("#").append("macro ").append(macro).append("\n"); + } + + return buf.toString(); + } + + @Override + public void close() + throws IOException { + { + Source s = source; + while (s != null) { + s.close(); + s = s.getParent(); + } + } + for (Source s : inputs) { + s.close(); + } + } + +} diff --git a/src/main/java/org/anarres/cpp/PreprocessorCommand.java b/src/main/java/org/anarres/cpp/PreprocessorCommand.java new file mode 100644 index 0000000..3938360 --- /dev/null +++ b/src/main/java/org/anarres/cpp/PreprocessorCommand.java @@ -0,0 +1,44 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.anarres.cpp; + +import javax.annotation.CheckForNull; +import javax.annotation.Nonnull; + +/** + * + * @author shevek + */ +public enum PreprocessorCommand { + + PP_DEFINE("define"), + PP_ELIF("elif"), + PP_ELSE("else"), + PP_ENDIF("endif"), + PP_ERROR("error"), + PP_IF("if"), + PP_IFDEF("ifdef"), + PP_IFNDEF("ifndef"), + PP_INCLUDE("include"), + PP_LINE("line"), + PP_PRAGMA("pragma"), + PP_UNDEF("undef"), + PP_WARNING("warning"), + PP_INCLUDE_NEXT("include_next"), + PP_IMPORT("import"); + private final String text; + /* pp */ PreprocessorCommand(String text) { + this.text = text; + } + + @CheckForNull + public static PreprocessorCommand forText(@Nonnull String text) { + for (PreprocessorCommand ppcmd : PreprocessorCommand.values()) + if (ppcmd.text.equals(text)) + return ppcmd; + return null; + } +} diff --git a/src/main/java/org/anarres/cpp/PreprocessorListener.java b/src/main/java/org/anarres/cpp/PreprocessorListener.java new file mode 100644 index 0000000..a5b4339 --- /dev/null +++ b/src/main/java/org/anarres/cpp/PreprocessorListener.java @@ -0,0 +1,85 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +/** + * A handler for preprocessor events, primarily errors and warnings. + * + * If no PreprocessorListener is installed in a Preprocessor, all + * error and warning events will throw an exception. Installing a + * listener allows more intelligent handling of these events. + */ +public class PreprocessorListener { + + private int errors; + private int warnings; + + public PreprocessorListener() { + clear(); + } + + public void clear() { + errors = 0; + warnings = 0; + } + + public int getErrors() { + return errors; + } + + public int getWarnings() { + return warnings; + } + + protected void print(String msg) { + System.err.println(msg); + } + + /** + * Handles a warning. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleWarning(Source source, int line, int column, + String msg) + throws LexerException { + warnings++; + print(source.getName() + ":" + line + ":" + column + + ": warning: " + msg); + } + + /** + * Handles an error. + * + * The behaviour of this method is defined by the + * implementation. It may simply record the error message, or + * it may throw an exception. + */ + public void handleError(Source source, int line, int column, + String msg) + throws LexerException { + errors++; + print(source.getName() + ":" + line + ":" + column + + ": error: " + msg); + } + + public void handleSourceChange(Source source, String event) { + } + +} diff --git a/src/main/java/org/anarres/cpp/ResourceFileSystem.java b/src/main/java/org/anarres/cpp/ResourceFileSystem.java new file mode 100644 index 0000000..7efd664 --- /dev/null +++ b/src/main/java/org/anarres/cpp/ResourceFileSystem.java @@ -0,0 +1,81 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import javax.annotation.Nonnull; + +/** + * + * @author shevek + */ +public class ResourceFileSystem implements VirtualFileSystem { + + private final ClassLoader loader; + + public ResourceFileSystem(@Nonnull ClassLoader loader) { + this.loader = loader; + } + + @Override + public VirtualFile getFile(String path) { + return new ResourceFile(loader, path); + } + + @Override + public VirtualFile getFile(String dir, String name) { + return getFile(dir + "/" + name); + } + + private class ResourceFile implements VirtualFile { + + private final ClassLoader loader; + private final String path; + + public ResourceFile(ClassLoader loader, String path) { + this.loader = loader; + this.path = path; + } + + @Override + public boolean isFile() { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public String getPath() { + return path; + } + + @Override + public String getName() { + return path.substring(path.lastIndexOf('/') + 1); + } + + @Override + public ResourceFile getParentFile() { + int idx = path.lastIndexOf('/'); + if (idx < 1) + return null; + return new ResourceFile(loader, path.substring(0, idx)); + } + + @Override + public ResourceFile getChildFile(String name) { + return new ResourceFile(loader, path + "/" + name); + } + + @Override + public Source getSource() throws IOException { + InputStream stream = loader.getResourceAsStream(path); + BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); + return new LexerSource(reader, true); + } + } +} diff --git a/src/main/java/org/anarres/cpp/Source.java b/src/main/java/org/anarres/cpp/Source.java new file mode 100644 index 0000000..532f5fc --- /dev/null +++ b/src/main/java/org/anarres/cpp/Source.java @@ -0,0 +1,287 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; +import javax.annotation.Nonnull; +import static org.anarres.cpp.Token.*; + +/** + * An input to the Preprocessor. + * + * Inputs may come from Files, Strings or other sources. The + * preprocessor maintains a stack of Sources. Operations such as + * file inclusion or token pasting will push a new source onto + * the Preprocessor stack. Sources pop from the stack when they + * are exhausted; this may be transparent or explicit. + * + * BUG: Error messages are not handled properly. + */ +public abstract class Source implements Iterable<Token>, Closeable { + + private Source parent; + private boolean autopop; + private PreprocessorListener listener; + private boolean active; + private boolean werror; + + /* LineNumberReader */ + + /* + // We can't do this, since we would lose the LexerException + private class Itr implements Iterator { + private Token next = null; + private void advance() { + try { + if (next != null) + next = token(); + } + catch (IOException e) { + throw new UnsupportedOperationException( + "Failed to advance token iterator: " + + e.getMessage() + ); + } + } + public boolean hasNext() { + return next.getType() != EOF; + } + public Token next() { + advance(); + Token t = next; + next = null; + return t; + } + public void remove() { + throw new UnsupportedOperationException( + "Cannot remove tokens from a Source." + ); + } + } + */ + public Source() { + this.parent = null; + this.autopop = false; + this.listener = null; + this.active = true; + this.werror = false; + } + + /** + * Sets the parent source of this source. + * + * Sources form a singly linked list. + */ + /* pp */ void setParent(Source parent, boolean autopop) { + this.parent = parent; + this.autopop = autopop; + } + + /** + * Returns the parent source of this source. + * + * Sources form a singly linked list. + */ + /* pp */ final Source getParent() { + return parent; + } + + + // @OverrideMustInvoke + /* pp */ void init(Preprocessor pp) { + setListener(pp.getListener()); + this.werror = pp.getWarnings().contains(Warning.ERROR); + } + + /** + * Sets the listener for this Source. + * + * Normally this is set by the Preprocessor when a Source is + * used, but if you are using a Source as a standalone object, + * you may wish to call this. + */ + public void setListener(PreprocessorListener pl) { + this.listener = pl; + } + + /** + * Returns the File currently being lexed. + * + * If this Source is not a {@link FileLexerSource}, then + * it will ask the parent Source, and so forth recursively. + * If no Source on the stack is a FileLexerSource, returns null. + */ + /* pp */ String getPath() { + Source parent = getParent(); + if (parent != null) + return parent.getPath(); + return null; + } + + /** + * Returns the human-readable name of the current Source. + */ + /* pp */ String getName() { + Source parent = getParent(); + if (parent != null) + return parent.getName(); + return null; + } + + /** + * Returns the current line number within this Source. + */ + public int getLine() { + Source parent = getParent(); + if (parent == null) + return 0; + return parent.getLine(); + } + + /** + * Returns the current column number within this Source. + */ + public int getColumn() { + Source parent = getParent(); + if (parent == null) + return 0; + return parent.getColumn(); + } + + /** + * Returns true if this Source is expanding the given macro. + * + * This is used to prevent macro recursion. + */ + /* pp */ boolean isExpanding(Macro m) { + Source parent = getParent(); + if (parent != null) + return parent.isExpanding(m); + return false; + } + + /** + * Returns true if this Source should be transparently popped + * from the input stack. + * + * Examples of such sources are macro expansions. + */ + /* pp */ boolean isAutopop() { + return autopop; + } + + /** + * Returns true if this source has line numbers. + */ + /* pp */ boolean isNumbered() { + return false; + } + + /* This is an incredibly lazy way of disabling warnings when + * the source is not active. */ + /* pp */ void setActive(boolean b) { + this.active = b; + } + + /* pp */ boolean isActive() { + return active; + } + + /** + * Returns the next Token parsed from this input stream. + * + * @see Token + */ + @Nonnull + public abstract Token token() + throws IOException, + LexerException; + + /** + * Returns a token iterator for this Source. + */ + @Override + public Iterator<Token> iterator() { + return new SourceIterator(this); + } + + /** + * Skips tokens until the end of line. + * + * @param white true if only whitespace is permitted on the + * remainder of the line. + * @return the NL token. + */ + @Nonnull + public Token skipline(boolean white) + throws IOException, + LexerException { + for (;;) { + Token tok = token(); + switch (tok.getType()) { + case EOF: + /* There ought to be a newline before EOF. + * At least, in any skipline context. */ + /* XXX Are we sure about this? */ + warning(tok.getLine(), tok.getColumn(), + "No newline before end of file"); + return new Token(NL, + tok.getLine(), tok.getColumn(), + "\n"); + // return tok; + case NL: + /* This may contain one or more newlines. */ + return tok; + case CCOMMENT: + case CPPCOMMENT: + case WHITESPACE: + break; + default: + /* XXX Check white, if required. */ + if (white) + warning(tok.getLine(), tok.getColumn(), + "Unexpected nonwhite token"); + break; + } + } + } + + protected void error(int line, int column, String msg) + throws LexerException { + if (listener != null) + listener.handleError(this, line, column, msg); + else + throw new LexerException("Error at " + line + ":" + column + ": " + msg); + } + + protected void warning(int line, int column, String msg) + throws LexerException { + if (werror) + error(line, column, msg); + else if (listener != null) + listener.handleWarning(this, line, column, msg); + else + throw new LexerException("Warning at " + line + ":" + column + ": " + msg); + } + + public void close() + throws IOException { + } + +} diff --git a/src/main/java/org/anarres/cpp/SourceIterator.java b/src/main/java/org/anarres/cpp/SourceIterator.java new file mode 100644 index 0000000..d5c63c7 --- /dev/null +++ b/src/main/java/org/anarres/cpp/SourceIterator.java @@ -0,0 +1,92 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.IOException; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import static org.anarres.cpp.Token.*; + +/** + * An Iterator for {@link Source Sources}, + * returning {@link Token Tokens}. + */ +public class SourceIterator implements Iterator<Token> { + private Source source; + private Token tok; + + public SourceIterator(Source s) { + this.source = s; + this.tok = null; + } + + /** + * Rethrows IOException inside IllegalStateException. + */ + private void advance() { + try { + if (tok == null) + tok = source.token(); + } + catch (LexerException e) { + throw new IllegalStateException(e); + } + catch (IOException e) { + throw new IllegalStateException(e); + } + } + + /** + * Returns true if the enclosed Source has more tokens. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public boolean hasNext() { + advance(); + return tok.getType() != EOF; + } + + /** + * Returns the next token from the enclosed Source. + * + * The EOF token is never returned by the iterator. + * @throws IllegalStateException if the Source + * throws a LexerException or IOException + */ + public Token next() { + if (!hasNext()) + throw new NoSuchElementException(); + Token t = this.tok; + this.tok = null; + return t; + } + + /** + * Not supported. + * + * @throws UnsupportedOperationException. + */ + public void remove() { + throw new UnsupportedOperationException(); + } +} + diff --git a/src/main/java/org/anarres/cpp/State.java b/src/main/java/org/anarres/cpp/State.java new file mode 100644 index 0000000..ed5c736 --- /dev/null +++ b/src/main/java/org/anarres/cpp/State.java @@ -0,0 +1,67 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +/* pp */ class State { + boolean parent; + boolean active; + boolean sawElse; + + /* pp */ State() { + this.parent = true; + this.active = true; + this.sawElse = false; + } + + /* pp */ State(State parent) { + this.parent = parent.isParentActive() && parent.isActive(); + this.active = true; + this.sawElse = false; + } + + /* Required for #elif */ + /* pp */ void setParentActive(boolean b) { + this.parent = b; + } + + /* pp */ boolean isParentActive() { + return parent; + } + + /* pp */ void setActive(boolean b) { + this.active = b; + } + + /* pp */ boolean isActive() { + return active; + } + + /* pp */ void setSawElse() { + sawElse = true; + } + + /* pp */ boolean sawElse() { + return sawElse; + } + + public String toString() { + return "parent=" + parent + + ", active=" + active + + ", sawelse=" + sawElse; + } +} diff --git a/src/main/java/org/anarres/cpp/StringLexerSource.java b/src/main/java/org/anarres/cpp/StringLexerSource.java new file mode 100644 index 0000000..8640bc8 --- /dev/null +++ b/src/main/java/org/anarres/cpp/StringLexerSource.java @@ -0,0 +1,53 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.StringReader; + +/** + * A Source for lexing a String. + * + * This class is used by token pasting, but can be used by user + * code. + */ +public class StringLexerSource extends LexerSource { + + /** + * Creates a new Source for lexing the given String. + * + * @param ppvalid true if preprocessor directives are to be + * honoured within the string. + */ + public StringLexerSource(String string, boolean ppvalid) { + super(new StringReader(string), ppvalid); + } + + /** + * Creates a new Source for lexing the given String. + * + * By default, preprocessor directives are not honoured within + * the string. + */ + public StringLexerSource(String string) { + this(string, false); + } + + @Override + public String toString() { + return "string literal"; + } +} diff --git a/src/main/java/org/anarres/cpp/Token.java b/src/main/java/org/anarres/cpp/Token.java new file mode 100644 index 0000000..3e6eb3e --- /dev/null +++ b/src/main/java/org/anarres/cpp/Token.java @@ -0,0 +1,193 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +/** + * A Preprocessor token. + * + * @see Preprocessor + */ +public final class Token { + + // public static final int EOF = -1; + private final int type; + private int line; + private int column; + private final Object value; + private final String text; + + public Token(int type, int line, int column, + String text, Object value) { + this.type = type; + this.line = line; + this.column = column; + this.text = text; + this.value = value; + } + + public Token(int type, int line, int column, String text) { + this(type, line, column, text, null); + } + + /* pp */ Token(int type, String text, Object value) { + this(type, -1, -1, text, value); + } + + /* pp */ Token(int type, String text) { + this(type, text, null); + } + + /* pp */ Token(int type) { + this(type, TokenType.getTokenText(type)); + } + + /** + * Returns the semantic type of this token. + */ + public int getType() { + return type; + } + + /* pp */ void setLocation(int line, int column) { + this.line = line; + this.column = column; + } + + /** + * Returns the line at which this token started. + * + * Lines are numbered from zero. + */ + public int getLine() { + return line; + } + + /** + * Returns the column at which this token started. + * + * Columns are numbered from zero. + */ + public int getColumn() { + return column; + } + + /** + * Returns the original or generated text of this token. + * + * This is distinct from the semantic value of the token. + * + * @see #getValue() + */ + public String getText() { + return text; + } + + /** + * Returns the semantic value of this token. + * + * For strings, this is the parsed String. + * For integers, this is an Integer object. + * For other token types, as appropriate. + * + * @see #getText() + */ + public Object getValue() { + return value; + } + + /** + * Returns a description of this token, for debugging purposes. + */ + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + + buf.append('[').append(getTokenName(type)); + if (line != -1) { + buf.append('@').append(line); + if (column != -1) + buf.append(',').append(column); + } + buf.append("]:"); + if (text != null) + buf.append('"').append(text).append('"'); + else if (type > 3 && type < 256) + buf.append((char) type); + else + buf.append('<').append(type).append('>'); + if (value != null) + buf.append('=').append(value); + return buf.toString(); + } + + /** + * Returns the descriptive name of the given token type. + * + * This is mostly used for stringification and debugging. + */ + public static String getTokenName(int type) { + return TokenType.getTokenName(type); + } + + public static final int AND_EQ = 257; + public static final int ARROW = 258; + public static final int CHARACTER = 259; + public static final int CCOMMENT = 260; + public static final int CPPCOMMENT = 261; + public static final int DEC = 262; + public static final int DIV_EQ = 263; + public static final int ELLIPSIS = 264; + public static final int EOF = 265; + public static final int EQ = 266; + public static final int GE = 267; + public static final int HASH = 268; + public static final int HEADER = 269; + public static final int IDENTIFIER = 270; + public static final int INC = 271; + public static final int NUMBER = 272; + public static final int LAND = 273; + public static final int LAND_EQ = 274; + public static final int LE = 275; + public static final int LITERAL = 276; + public static final int LOR = 277; + public static final int LOR_EQ = 278; + public static final int LSH = 279; + public static final int LSH_EQ = 280; + public static final int MOD_EQ = 281; + public static final int MULT_EQ = 282; + public static final int NE = 283; + public static final int NL = 284; + public static final int OR_EQ = 285; + public static final int PASTE = 286; + public static final int PLUS_EQ = 287; + public static final int RANGE = 288; + public static final int RSH = 289; + public static final int RSH_EQ = 290; + public static final int SQSTRING = 291; + public static final int STRING = 292; + public static final int SUB_EQ = 293; + public static final int WHITESPACE = 294; + public static final int XOR_EQ = 295; + public static final int M_ARG = 296; + public static final int M_PASTE = 297; + public static final int M_STRING = 298; + public static final int P_LINE = 299; + public static final int INVALID = 300; + + /** The position-less space token. */ + /* pp */ static final Token space = new Token(WHITESPACE, -1, -1, " "); +} diff --git a/src/main/java/org/anarres/cpp/TokenSnifferSource.java b/src/main/java/org/anarres/cpp/TokenSnifferSource.java new file mode 100644 index 0000000..1512b2e --- /dev/null +++ b/src/main/java/org/anarres/cpp/TokenSnifferSource.java @@ -0,0 +1,54 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.anarres.cpp; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import static org.anarres.cpp.Token.*; + +@Deprecated +/* pp */ class TokenSnifferSource extends Source { + private List<Token> target; + + /* pp */ TokenSnifferSource(List<Token> target) { + this.target = target; + } + + public Token token() + throws IOException, + LexerException { + Token tok = getParent().token(); + if (tok.getType() != EOF) + target.add(tok); + return tok; + } + + public String toString() { + return getParent().toString(); + } +} diff --git a/src/main/java/org/anarres/cpp/TokenType.java b/src/main/java/org/anarres/cpp/TokenType.java new file mode 100644 index 0000000..86df097 --- /dev/null +++ b/src/main/java/org/anarres/cpp/TokenType.java @@ -0,0 +1,128 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.anarres.cpp; + +import java.util.ArrayList; +import java.util.List; +import javax.annotation.CheckForNull; +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import static org.anarres.cpp.Token.*; + +/** + * + * @author shevek + */ +/* pp */ class TokenType { + + private static final List<TokenType> TYPES = new ArrayList<TokenType>(); + + private static void addTokenType(@Nonnegative int type, @Nonnull String name, @CheckForNull String text) { + while (TYPES.size() <= type) + TYPES.add(null); + TYPES.set(type, new TokenType(name, text)); + } + + private static void addTokenType(@Nonnegative int type, @Nonnull String name) { + addTokenType(type, name, null); + } + + @CheckForNull + public static TokenType getTokenType(@Nonnegative int type) { + try { + return TYPES.get(type); + } catch (IndexOutOfBoundsException e) { + return null; + } + } + + @Nonnull + public static String getTokenName(@Nonnegative int type) { + if (type < 0) + return "Invalid" + type; + TokenType tokenType = getTokenType(type); + if (tokenType == null) + return "Unknown" + type; + return tokenType.getName(); + } + + @CheckForNull + public static String getTokenText(@Nonnegative int type) { + TokenType tokenType = getTokenType(type); + if (tokenType == null) + return null; + return tokenType.getText(); + } + + static { + for (int i = 0; i < 255; i++) { + String text = String.valueOf((char) i); + addTokenType(i, text, text); + } + addTokenType(AND_EQ, "AND_EQ", "&="); + addTokenType(ARROW, "ARROW", "->"); + addTokenType(CHARACTER, "CHARACTER"); + addTokenType(CCOMMENT, "CCOMMENT"); + addTokenType(CPPCOMMENT, "CPPCOMMENT"); + addTokenType(DEC, "DEC", "--"); + addTokenType(DIV_EQ, "DIV_EQ", "/="); + addTokenType(ELLIPSIS, "ELLIPSIS", "..."); + addTokenType(EOF, "EOF"); + addTokenType(EQ, "EQ", "=="); + addTokenType(GE, "GE", ">="); + addTokenType(HASH, "HASH", "#"); + addTokenType(HEADER, "HEADER"); + addTokenType(IDENTIFIER, "IDENTIFIER"); + addTokenType(INC, "INC", "++"); + addTokenType(NUMBER, "NUMBER"); + addTokenType(LAND, "LAND", "&&"); + addTokenType(LAND_EQ, "LAND_EQ", "&&="); + addTokenType(LE, "LE", "<="); + addTokenType(LITERAL, "LITERAL"); + addTokenType(LOR, "LOR", "||"); + addTokenType(LOR_EQ, "LOR_EQ", "||="); + addTokenType(LSH, "LSH", "<<"); + addTokenType(LSH_EQ, "LSH_EQ", "<<="); + addTokenType(MOD_EQ, "MOD_EQ", "%="); + addTokenType(MULT_EQ, "MULT_EQ", "*="); + addTokenType(NE, "NE", "!="); + addTokenType(NL, "NL"); + addTokenType(OR_EQ, "OR_EQ", "|="); + addTokenType(PASTE, "PASTE", "##"); + addTokenType(PLUS_EQ, "PLUS_EQ", "+="); + addTokenType(RANGE, "RANGE", ".."); + addTokenType(RSH, "RSH", ">>"); + addTokenType(RSH_EQ, "RSH_EQ", ">>="); + addTokenType(SQSTRING, "SQSTRING"); + addTokenType(STRING, "STRING"); + addTokenType(SUB_EQ, "SUB_EQ", "-="); + addTokenType(WHITESPACE, "WHITESPACE"); + addTokenType(XOR_EQ, "XOR_EQ", "^="); + addTokenType(M_ARG, "M_ARG"); + addTokenType(M_PASTE, "M_PASTE"); + addTokenType(M_STRING, "M_STRING"); + addTokenType(P_LINE, "P_LINE"); + addTokenType(INVALID, "INVALID"); + } + + private final String name; + private final String text; + + /* pp */ TokenType(@Nonnull String name, @CheckForNull String text) { + this.name = name; + this.text = text; + } + + @Nonnull + public String getName() { + return name; + } + + @CheckForNull + public String getText() { + return text; + } +} diff --git a/src/main/java/org/anarres/cpp/VirtualFile.java b/src/main/java/org/anarres/cpp/VirtualFile.java new file mode 100644 index 0000000..aee1cad --- /dev/null +++ b/src/main/java/org/anarres/cpp/VirtualFile.java @@ -0,0 +1,45 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +import java.io.IOException; +import javax.annotation.CheckForNull; +import javax.annotation.Nonnull; + +/** + * An extremely lightweight virtual file interface. + */ +public interface VirtualFile { + + // public String getParent(); + public boolean isFile(); + + @Nonnull + public String getPath(); + + @Nonnull + public String getName(); + + @CheckForNull + public VirtualFile getParentFile(); + + @Nonnull + public VirtualFile getChildFile(String name); + + @Nonnull + public Source getSource() throws IOException; +} diff --git a/src/main/java/org/anarres/cpp/VirtualFileSystem.java b/src/main/java/org/anarres/cpp/VirtualFileSystem.java new file mode 100644 index 0000000..56e53ac --- /dev/null +++ b/src/main/java/org/anarres/cpp/VirtualFileSystem.java @@ -0,0 +1,27 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +/** + * An extremely lightweight virtual file system interface. + */ +public interface VirtualFileSystem { + + public VirtualFile getFile(String path); + + public VirtualFile getFile(String dir, String name); +} diff --git a/src/main/java/org/anarres/cpp/Warning.java b/src/main/java/org/anarres/cpp/Warning.java new file mode 100644 index 0000000..da2b2c6 --- /dev/null +++ b/src/main/java/org/anarres/cpp/Warning.java @@ -0,0 +1,32 @@ +/* + * Anarres C Preprocessor + * Copyright (c) 2007-2008, Shevek + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ +package org.anarres.cpp; + +/** + * Warning classes which may optionally be emitted by the Preprocessor. + */ +public enum Warning { + + TRIGRAPHS, + // TRADITIONAL, + IMPORT, + UNDEF, + UNUSED_MACROS, + ENDIF_LABELS, + ERROR, + // SYSTEM_HEADERS +} |