diff options
Diffstat (limited to 'src/classes/com/sun/gluegen/cgram/StdCParser.g')
-rw-r--r-- | src/classes/com/sun/gluegen/cgram/StdCParser.g | 1375 |
1 files changed, 1375 insertions, 0 deletions
diff --git a/src/classes/com/sun/gluegen/cgram/StdCParser.g b/src/classes/com/sun/gluegen/cgram/StdCParser.g new file mode 100644 index 000000000..e84fbfd6e --- /dev/null +++ b/src/classes/com/sun/gluegen/cgram/StdCParser.g @@ -0,0 +1,1375 @@ +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + Copyright (c) Non, Inc. 1997 -- All Rights Reserved + +PROJECT: C Compiler +MODULE: Parser +FILE: stdc.g + +AUTHOR: John D. Mitchell ([email protected]), Jul 12, 1997 + +REVISION HISTORY: + + Name Date Description + ---- ---- ----------- + JDM 97.07.12 Initial version. + JTC 97.11.18 Declaration vs declarator & misc. hacking. + JDM 97.11.20 Fixed: declaration vs funcDef, + parenthesized expressions, + declarator iteration, + varargs recognition, + empty source file recognition, + and some typos. + + +DESCRIPTION: + + This grammar supports the Standard C language. + + Note clearly that this grammar does *NOT* deal with + preprocessor functionality (including things like trigraphs) + Nor does this grammar deal with multi-byte characters nor strings + containing multi-byte characters [these constructs are "exercises + for the reader" as it were :-)]. + + Please refer to the ISO/ANSI C Language Standard if you believe + this grammar to be in error. Please cite chapter and verse in any + correspondence to the author to back up your claim. + +TODO: + + - typedefName is commented out, needs a symbol table to resolve + ambiguity. + + - trees + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + + +header { + package com.sun.gluegen.cgram; + + import java.io.*; + + import antlr.CommonAST; + import antlr.DumpASTVisitor; +} + + +class StdCParser extends Parser; + +options + { + k = 2; + exportVocab = STDC; + buildAST = true; + ASTLabelType = "TNode"; + + // Copied following options from java grammar. + codeGenMakeSwitchThreshold = 2; + codeGenBitsetTestThreshold = 3; + } + + +{ + // Suppport C++-style single-line comments? + public static boolean CPPComments = true; + + // access to symbol table + public CSymbolTable symbolTable = new CSymbolTable(); + + // source for names to unnamed scopes + protected int unnamedScopeCounter = 0; + + public boolean isTypedefName(String name) { + boolean returnValue = false; + TNode node = symbolTable.lookupNameInCurrentScope(name); + for (; node != null; node = (TNode) node.getNextSibling() ) { + if(node.getType() == LITERAL_typedef) { + returnValue = true; + break; + } + } + return returnValue; + } + + + public String getAScopeName() { + return "" + (unnamedScopeCounter++); + } + + public void pushScope(String scopeName) { + symbolTable.pushScope(scopeName); + } + + public void popScope() { + symbolTable.popScope(); + } + + int traceDepth = 0; + public void reportError(RecognitionException ex) { + try { + System.err.println("ANTLR Parsing Error: "+ex + " token name:" + tokenNames[LA(1)]); + ex.printStackTrace(System.err); + } + catch (TokenStreamException e) { + System.err.println("ANTLR Parsing Error: "+ex); + ex.printStackTrace(System.err); + } + } + public void reportError(String s) { + System.err.println("ANTLR Parsing Error from String: " + s); + } + public void reportWarning(String s) { + System.err.println("ANTLR Parsing Warning from String: " + s); + } + public void match(int t) throws MismatchedTokenException { + boolean debugging = false; + + if ( debugging ) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("Match("+tokenNames[t]+") with LA(1)="+ + tokenNames[LA(1)] + ((inputState.guessing>0)?" [inputState.guessing "+ inputState.guessing + "]":"")); + } + catch (TokenStreamException e) { + System.out.println("Match("+tokenNames[t]+") " + ((inputState.guessing>0)?" [inputState.guessing "+ inputState.guessing + "]":"")); + + } + + } + try { + if ( LA(1)!=t ) { + if ( debugging ){ + for (int x=0; x<traceDepth; x++) System.out.print(" "); + System.out.println("token mismatch: "+tokenNames[LA(1)] + + "!="+tokenNames[t]); + } + throw new MismatchedTokenException(tokenNames, LT(1), t, false, getFilename()); + + } else { + // mark token as consumed -- fetch next token deferred until LA/LT + consume(); + } + } + catch (TokenStreamException e) { + } + + } + public void traceIn(String rname) { + traceDepth += 1; + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("> "+rname+"; LA(1)==("+ tokenNames[LT(1).getType()] + + ") " + LT(1).getText() + " [inputState.guessing "+ inputState.guessing + "]"); + } + catch (TokenStreamException e) { + } + } + public void traceOut(String rname) { + for (int x=0; x<traceDepth; x++) System.out.print(" "); + try { + System.out.println("< "+rname+"; LA(1)==("+ tokenNames[LT(1).getType()] + + ") "+LT(1).getText() + " [inputState.guessing "+ inputState.guessing + "]"); + } + catch (TokenStreamException e) { + } + traceDepth -= 1; + } + +} + + + +translationUnit + : externalList + + | /* Empty source files are *not* allowed. */ + { + System.err.println ( "Empty source file!" ); + } + ; + + +externalList + : ( externalDef )+ + ; + + +externalDef + : ( "typedef" | declaration )=> declaration + | functionDef + | asm_expr + ; + + +asm_expr + : "asm"^ + ("volatile")? LCURLY! expr RCURLY! SEMI! + ; + + +declaration + { AST ds1 = null; } + : ds:declSpecifiers { ds1 = astFactory.dupList(#ds); } + ( + initDeclList[ds1] + )? + SEMI! + { ## = #( #[NDeclaration], ##); } + + ; + + +declSpecifiers + { int specCount=0; } + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + s:storageClassSpecifier + | typeQualifier + | ( "struct" | "union" | "enum" | typeSpecifier[specCount] )=> + specCount = typeSpecifier[specCount] + )+ + ; + +storageClassSpecifier + : "auto" + | "register" + | "typedef" + | functionStorageClassSpecifier + ; + + +functionStorageClassSpecifier + : "extern" + | "static" + ; + + +typeQualifier + : "const" + | "volatile" + ; + +typeSpecifier [int specCount] returns [int retSpecCount] + { retSpecCount = specCount + 1; } + : + ( "void" + | "char" + | "short" + | "int" + | "long" + | "float" + | "double" + | "signed" + | "unsigned" + | structOrUnionSpecifier + | enumSpecifier + | { specCount == 0 }? typedefName + ) + ; + + +typedefName + : { isTypedefName ( LT(1).getText() ) }? + i:ID { ## = #(#[NTypedefName], #i); } + ; + +structOrUnionSpecifier + { String scopeName; } + : sou:structOrUnion! + ( ( ID LCURLY )=> i:ID l:LCURLY + { + scopeName = #sou.getText() + " " + #i.getText(); + #l.setText(scopeName); + pushScope(scopeName); + } + structDeclarationList + { popScope();} + RCURLY! + | l1:LCURLY + { + scopeName = getAScopeName(); + #l1.setText(scopeName); + pushScope(scopeName); + } + structDeclarationList + { popScope(); } + RCURLY! + | ID + ) + { + ## = #( #sou, ## ); + } + ; + + +structOrUnion + : "struct" + | "union" + ; + + +structDeclarationList + : ( structDeclaration )+ + ; + + +structDeclaration + : specifierQualifierList structDeclaratorList ( SEMI! )+ + ; + + +specifierQualifierList + { int specCount = 0; } + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + ( "struct" | "union" | "enum" | typeSpecifier[specCount] )=> + specCount = typeSpecifier[specCount] + | typeQualifier + )+ + ; + + +structDeclaratorList + : structDeclarator ( COMMA! structDeclarator )* + ; + + +structDeclarator + : + ( COLON constExpr + | declarator[false] ( COLON constExpr )? + ) + { ## = #( #[NStructDeclarator], ##); } + ; + + +enumSpecifier + : "enum"^ + ( ( ID LCURLY )=> i:ID LCURLY enumList[i.getText()] RCURLY! + | LCURLY enumList["anonymous"] RCURLY! + | ID + ) + ; + + +enumList[String enumName] + : enumerator[enumName] ( COMMA! enumerator[enumName] )* + ; + +enumerator[String enumName] + : i:ID { symbolTable.add( i.getText(), + #( null, + #[LITERAL_enum, "enum"], + #[ ID, enumName] + ) + ); + } + (ASSIGN constExpr)? + ; + + +initDeclList[AST declarationSpecifiers] + : initDecl[declarationSpecifiers] + ( COMMA! initDecl[declarationSpecifiers] )* + ; + + +initDecl[AST declarationSpecifiers] + { String declName = ""; } + : declName = d:declarator[false] + { AST ds1, d1; + ds1 = astFactory.dupList(declarationSpecifiers); + d1 = astFactory.dupList(#d); + symbolTable.add(declName, #(null, ds1, d1) ); + } + ( ASSIGN initializer + | COLON expr + )? + { ## = #( #[NInitDecl], ## ); } + + ; + +pointerGroup + : ( STAR ( typeQualifier )* )+ { ## = #( #[NPointerGroup], ##); } + ; + + + +idList + : ID ( COMMA! ID )* + ; + + +initializer + : ( assignExpr + | LCURLY initializerList ( COMMA! )? RCURLY! + ) + { ## = #( #[NInitializer], ## ); } + ; + + +initializerList + : initializer ( COMMA! initializer )* + ; + + +declarator[boolean isFunctionDefinition] returns [String declName] + { declName = ""; } + : + ( pointerGroup )? + + ( id:ID { declName = id.getText(); } + | LPAREN declName = declarator[false] RPAREN + ) + + ( ! LPAREN + { + if (isFunctionDefinition) { + pushScope(declName); + } + else { + pushScope("!"+declName); + } + } + ( + (declSpecifiers)=> p:parameterTypeList + { + ## = #( null, ##, #( #[NParameterTypeList], #p ) ); + } + + | (i:idList)? + { + ## = #( null, ##, #( #[NParameterTypeList], #i ) ); + } + ) + { + popScope(); + } + RPAREN + | LBRACKET ( constExpr )? RBRACKET + )* + { ## = #( #[NDeclarator], ## ); } + ; + +parameterTypeList + : parameterDeclaration + ( options { + warnWhenFollowAmbig = false; + } : + COMMA! + parameterDeclaration + )* + ( COMMA! + VARARGS + )? + ; + + +parameterDeclaration + { String declName; } + : ds:declSpecifiers + ( ( declarator[false] )=> declName = d:declarator[false] + { + AST d2, ds2; + d2 = astFactory.dupList(#d); + ds2 = astFactory.dupList(#ds); + symbolTable.add(declName, #(null, ds2, d2)); + } + | nonemptyAbstractDeclarator + )? + { + ## = #( #[NParameterDeclaration], ## ); + } + ; + +/* JTC: + * This handles both new and old style functions. + * see declarator rule to see differences in parameters + * and here (declaration SEMI)* is the param type decls for the + * old style. may want to do some checking to check for illegal + * combinations (but I assume all parsed code will be legal?) + */ + +functionDef + { String declName; } + : ( (functionDeclSpecifiers)=> ds:functionDeclSpecifiers + | //epsilon + ) + declName = d:declarator[true] + { + AST d2, ds2; + d2 = astFactory.dupList(#d); + ds2 = astFactory.dupList(#ds); + symbolTable.add(declName, #(null, ds2, d2)); + pushScope(declName); + } + ( declaration )* (VARARGS)? ( SEMI! )* + { popScope(); } + compoundStatement[declName] + { ## = #( #[NFunctionDef], ## );} + ; + +functionDeclSpecifiers + { int specCount = 0; } + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + functionStorageClassSpecifier + | typeQualifier + | ( "struct" | "union" | "enum" | typeSpecifier[specCount] )=> + specCount = typeSpecifier[specCount] + )+ + ; + +declarationList + : ( options { // this loop properly aborts when + // it finds a non-typedefName ID MBZ + warnWhenFollowAmbig = false; + } : + ( declarationPredictor )=> declaration + )+ + ; + +declarationPredictor + : (options { //only want to look at declaration if I don't see typedef + warnWhenFollowAmbig = false; + }: + "typedef" + | declaration + ) + ; + + +compoundStatement[String scopeName] + : LCURLY! + { + pushScope(scopeName); + } + ( ( declarationPredictor)=> declarationList )? + ( statementList )? + { popScope(); } + RCURLY! + { ## = #( #[NCompoundStatement, scopeName], ##); } + ; + + +statementList + : ( statement )+ + ; +statement + : SEMI // Empty statements + + | compoundStatement[getAScopeName()] // Group of statements + + | expr SEMI! { ## = #( #[NStatementExpr], ## ); } // Expressions + +// Iteration statements: + + | "while"^ LPAREN! expr RPAREN! statement + | "do"^ statement "while"! LPAREN! expr RPAREN! SEMI! + |! "for" + LPAREN ( e1:expr )? SEMI ( e2:expr )? SEMI ( e3:expr )? RPAREN + s:statement + { + if ( #e1 == null) { #e1 = (TNode) #[ NEmptyExpression ]; } + if ( #e2 == null) { #e2 = (TNode) #[ NEmptyExpression ]; } + if ( #e3 == null) { #e3 = (TNode) #[ NEmptyExpression ]; } + ## = #( #[LITERAL_for, "for"], #e1, #e2, #e3, #s ); + } + + +// Jump statements: + + | "goto"^ ID SEMI! + | "continue" SEMI! + | "break" SEMI! + | "return"^ ( expr )? SEMI! + + +// Labeled statements: + | ID COLON! (options {warnWhenFollowAmbig=false;}:statement)? { ## = #( #[NLabel], ## ); } + | "case"^ constExpr COLON! statement + | "default"^ COLON! statement + + + +// Selection statements: + + | "if"^ + LPAREN! expr RPAREN! statement + ( //standard if-else ambiguity + options { + warnWhenFollowAmbig = false; + } : + "else" statement )? + | "switch"^ LPAREN! expr RPAREN! statement + ; + + + + + + +expr + : assignExpr (options { + /* MBZ: + COMMA is ambiguous between comma expressions and + argument lists. argExprList should get priority, + and it does by being deeper in the expr rule tree + and using (COMMA assignExpr)* + */ + warnWhenFollowAmbig = false; + } : + c:COMMA^ { #c.setType(NCommaExpr); } assignExpr + )* + ; + + +assignExpr + : conditionalExpr ( a:assignOperator! assignExpr { ## = #( #a, ## );} )? + ; + +assignOperator + : ASSIGN + | DIV_ASSIGN + | PLUS_ASSIGN + | MINUS_ASSIGN + | STAR_ASSIGN + | MOD_ASSIGN + | RSHIFT_ASSIGN + | LSHIFT_ASSIGN + | BAND_ASSIGN + | BOR_ASSIGN + | BXOR_ASSIGN + ; + + +conditionalExpr + : logicalOrExpr + ( QUESTION^ expr COLON! conditionalExpr )? + ; + + +constExpr + : conditionalExpr + ; + +logicalOrExpr + : logicalAndExpr ( LOR^ logicalAndExpr )* + ; + + +logicalAndExpr + : inclusiveOrExpr ( LAND^ inclusiveOrExpr )* + ; + +inclusiveOrExpr + : exclusiveOrExpr ( BOR^ exclusiveOrExpr )* + ; + + +exclusiveOrExpr + : bitAndExpr ( BXOR^ bitAndExpr )* + ; + + +bitAndExpr + : equalityExpr ( BAND^ equalityExpr )* + ; + + + +equalityExpr + : relationalExpr + ( ( EQUAL^ | NOT_EQUAL^ ) relationalExpr )* + ; + + +relationalExpr + : shiftExpr + ( ( LT^ | LTE^ | GT^ | GTE^ ) shiftExpr )* + ; + + + +shiftExpr + : additiveExpr + ( ( LSHIFT^ | RSHIFT^ ) additiveExpr )* + ; + + +additiveExpr + : multExpr + ( ( PLUS^ | MINUS^ ) multExpr )* + ; + + +multExpr + : castExpr + ( ( STAR^ | DIV^ | MOD^ ) castExpr )* + ; + + +castExpr + : ( LPAREN typeName RPAREN )=> + LPAREN! typeName RPAREN! ( castExpr ) + { ## = #( #[NCast, "("], ## ); } + + | unaryExpr + ; + + +typeName + : specifierQualifierList (nonemptyAbstractDeclarator)? + ; + +nonemptyAbstractDeclarator + : ( + pointerGroup + ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )* + + | ( (LPAREN + ( nonemptyAbstractDeclarator + | parameterTypeList + )? + RPAREN) + | (LBRACKET (expr)? RBRACKET) + )+ + ) + { ## = #( #[NNonemptyAbstractDeclarator], ## ); } + + ; + +/* JTC: + +LR rules: + +abstractDeclarator + : nonemptyAbstractDeclarator + | // null + ; + +nonemptyAbstractDeclarator + : LPAREN nonemptyAbstractDeclarator RPAREN + | abstractDeclarator LPAREN RPAREN + | abstractDeclarator (LBRACKET (expr)? RBRACKET) + | STAR abstractDeclarator + ; +*/ + +unaryExpr + : postfixExpr + | INC^ unaryExpr + | DEC^ unaryExpr + | u:unaryOperator castExpr { ## = #( #[NUnaryExpr], ## ); } + + | "sizeof"^ + ( ( LPAREN typeName )=> LPAREN typeName RPAREN + | unaryExpr + ) + ; + + +unaryOperator + : BAND + | STAR + | PLUS + | MINUS + | BNOT + | LNOT + ; + +postfixExpr + : primaryExpr + ( + postfixSuffix {## = #( #[NPostfixExpr], ## );} + )? + ; +postfixSuffix + : + ( PTR ID + | DOT ID + | functionCall + | LBRACKET expr RBRACKET + | INC + | DEC + )+ + ; + +functionCall + : + LPAREN^ (a:argExprList)? RPAREN + { + ##.setType( NFunctionCallArgs ); + } + ; + + +primaryExpr + : ID + | charConst + | intConst + | floatConst + | stringConst + +// JTC: +// ID should catch the enumerator +// leaving it in gives ambiguous err +// | enumerator + | LPAREN! expr RPAREN! { ## = #( #[NExpressionGroup, "("], ## ); } + ; + +argExprList + : assignExpr ( COMMA! assignExpr )* + ; + + + +protected +charConst + : CharLiteral + ; + + +protected +stringConst + : (StringLiteral)+ { ## = #(#[NStringSeq], ##); } + ; + + +protected +intConst + : IntOctalConst + | LongOctalConst + | UnsignedOctalConst + | IntIntConst + | LongIntConst + | UnsignedIntConst + | IntHexConst + | LongHexConst + | UnsignedHexConst + ; + + +protected +floatConst + : FloatDoubleConst + | DoubleDoubleConst + | LongDoubleConst + ; + + + + + + +dummy + : NTypedefName + | NInitDecl + | NDeclarator + | NStructDeclarator + | NDeclaration + | NCast + | NPointerGroup + | NExpressionGroup + | NFunctionCallArgs + | NNonemptyAbstractDeclarator + | NInitializer + | NStatementExpr + | NEmptyExpression + | NParameterTypeList + | NFunctionDef + | NCompoundStatement + | NParameterDeclaration + | NCommaExpr + | NUnaryExpr + | NLabel + | NPostfixExpr + | NRangeExpr + | NStringSeq + | NInitializerElementLabel + | NLcurlyInitializer + | NAsmAttribute + | NGnuAsmExpr + | NTypeMissing + ; + + + + + + +{ + import java.io.*; + import antlr.*; +} + +class StdCLexer extends Lexer; + +options + { + k = 3; + exportVocab = STDC; + testLiterals = false; + } + +{ + LineObject lineObject = new LineObject(); + String originalSource = ""; + PreprocessorInfoChannel preprocessorInfoChannel = new PreprocessorInfoChannel(); + int tokenNumber = 0; + boolean countingTokens = true; + int deferredLineCount = 0; + + public void setCountingTokens(boolean ct) + { + countingTokens = ct; + if ( countingTokens ) { + tokenNumber = 0; + } + else { + tokenNumber = 1; + } + } + + public void setOriginalSource(String src) + { + originalSource = src; + lineObject.setSource(src); + } + public void setSource(String src) + { + lineObject.setSource(src); + } + + public PreprocessorInfoChannel getPreprocessorInfoChannel() + { + return preprocessorInfoChannel; + } + + public void setPreprocessingDirective(String pre) + { + preprocessorInfoChannel.addLineForTokenNumber( pre, new Integer(tokenNumber) ); + } + + public void addDefine(String name, String value) + { + } + + protected Token makeToken(int t) + { + if ( t != Token.SKIP && countingTokens) { + tokenNumber++; + } + CToken tok = (CToken) super.makeToken(t); + tok.setLine(lineObject.line); + tok.setSource(lineObject.source); + tok.setTokenNumber(tokenNumber); + + lineObject.line += deferredLineCount; + deferredLineCount = 0; + return tok; + } + + public void deferredNewline() { + deferredLineCount++; + } + + public void newline() { + lineObject.newline(); + } + + + + + + +} + +protected +Vocabulary + : '\3'..'\377' + ; + + +/* Operators: */ + +ASSIGN : '=' ; +COLON : ':' ; +COMMA : ',' ; +QUESTION : '?' ; +SEMI : ';' ; +PTR : "->" ; + + +// DOT & VARARGS are commented out since they are generated as part of +// the Number rule below due to some bizarre lexical ambiguity shme. + +// DOT : '.' ; +protected +DOT:; + +// VARARGS : "..." ; +protected +VARARGS:; + + +LPAREN : '(' ; +RPAREN : ')' ; +LBRACKET : '[' ; +RBRACKET : ']' ; +LCURLY : '{' ; +RCURLY : '}' ; + +EQUAL : "==" ; +NOT_EQUAL : "!=" ; +LTE : "<=" ; +LT : "<" ; +GTE : ">=" ; +GT : ">" ; + +DIV : '/' ; +DIV_ASSIGN : "/=" ; +PLUS : '+' ; +PLUS_ASSIGN : "+=" ; +INC : "++" ; +MINUS : '-' ; +MINUS_ASSIGN : "-=" ; +DEC : "--" ; +STAR : '*' ; +STAR_ASSIGN : "*=" ; +MOD : '%' ; +MOD_ASSIGN : "%=" ; +RSHIFT : ">>" ; +RSHIFT_ASSIGN : ">>=" ; +LSHIFT : "<<" ; +LSHIFT_ASSIGN : "<<=" ; + +LAND : "&&" ; +LNOT : '!' ; +LOR : "||" ; + +BAND : '&' ; +BAND_ASSIGN : "&=" ; +BNOT : '~' ; +BOR : '|' ; +BOR_ASSIGN : "|=" ; +BXOR : '^' ; +BXOR_ASSIGN : "^=" ; + + +Whitespace + : ( ( '\003'..'\010' | '\t' | '\013' | '\f' | '\016'.. '\037' | '\177'..'\377' | ' ' ) + | "\r\n" { newline(); } + | ( '\n' | '\r' ) { newline(); } + ) { _ttype = Token.SKIP; } + ; + + +Comment + : "/*" + ( { LA(2) != '/' }? '*' + | "\r\n" { deferredNewline(); } + | ( '\r' | '\n' ) { deferredNewline(); } + | ~( '*'| '\r' | '\n' ) + )* + "*/" { _ttype = Token.SKIP; + } + ; + + +CPPComment + : + "//" ( ~('\n') )* + { + _ttype = Token.SKIP; + } + ; + +protected NonWhitespace + : (~('\r' | '\n'))* + ; + + +PREPROC_DIRECTIVE +options { + paraphrase = "a line directive"; +} + + : + '#' + ( ( "line" || (( ' ' | '\t' | '\014')+ '0'..'9')) => LineDirective + | ( (Space)* "define" (Space)* i:ID (Space)* (n:Number)? + nw:NonWhitespace + ("\r\n" | "\r" | "\n") ) { if (n != null) { + addDefine(i.getText(), n.getText()); + } else { + setPreprocessingDirective("#define " + i.getText() + " " + + nw.getText()); + } + } + | (~'\n')* { setPreprocessingDirective(getText()); } + ) + { + _ttype = Token.SKIP; + } + ; + +protected Space: + ( ' ' | '\t' | '\014') + ; + +protected LineDirective +{ + boolean oldCountingTokens = countingTokens; + countingTokens = false; +} +: + { + lineObject = new LineObject(); + deferredLineCount = 0; + } + ("line")? //this would be for if the directive started "#line", but not there for GNU directives + (Space)+ + n:Number { lineObject.setLine(Integer.parseInt(n.getText())); } + (Space)+ + ( fn:StringLiteral { try { + lineObject.setSource(fn.getText().substring(1,fn.getText().length()-1)); + } + catch (StringIndexOutOfBoundsException e) { /*not possible*/ } + } + | fi:ID { lineObject.setSource(fi.getText()); } + )? + (Space)* + ("1" { lineObject.setEnteringFile(true); } )? + (Space)* + ("2" { lineObject.setReturningToFile(true); } )? + (Space)* + ("3" { lineObject.setSystemHeader(true); } )? + (Space)* + ("4" { lineObject.setTreatAsC(true); } )? + (~('\r' | '\n'))* + ("\r\n" | "\r" | "\n") + { + preprocessorInfoChannel.addLineForTokenNumber(new LineObject(lineObject), new Integer(tokenNumber)); + countingTokens = oldCountingTokens; + } + ; + + + +/* Literals: */ + +/* + * Note that we do NOT handle tri-graphs nor multi-byte sequences. + */ + + +/* + * Note that we can't have empty character constants (even though we + * can have empty strings :-). + */ +CharLiteral + : '\'' ( Escape | ~( '\'' ) ) '\'' + ; + + +/* + * Can't have raw imbedded newlines in string constants. Strict reading of + * the standard gives odd dichotomy between newlines & carriage returns. + * Go figure. + */ +StringLiteral + : '"' + ( Escape + | ( + '\r' { deferredNewline(); } + | '\n' { + deferredNewline(); + _ttype = BadStringLiteral; + } + | '\\' '\n' { + deferredNewline(); + } + ) + | ~( '"' | '\r' | '\n' | '\\' ) + )* + '"' + ; + + +protected BadStringLiteral + : // Imaginary token. + ; + + +/* + * Handle the various escape sequences. + * + * Note carefully that these numeric escape *sequences* are *not* of the + * same form as the C language numeric *constants*. + * + * There is no such thing as a binary numeric escape sequence. + * + * Octal escape sequences are either 1, 2, or 3 octal digits exactly. + * + * There is no such thing as a decimal escape sequence. + * + * Hexadecimal escape sequences are begun with a leading \x and continue + * until a non-hexadecimal character is found. + * + * No real handling of tri-graph sequences, yet. + */ + +protected +Escape + : '\\' + ( options{warnWhenFollowAmbig=false;}: + 'a' + | 'b' + | 'f' + | 'n' + | 'r' + | 't' + | 'v' + | '"' + | '\'' + | '\\' + | '?' + | ('0'..'3') ( options{warnWhenFollowAmbig=false;}: Digit ( options{warnWhenFollowAmbig=false;}: Digit )? )? + | ('4'..'7') ( options{warnWhenFollowAmbig=false;}: Digit )? + | 'x' ( options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F' )+ + ) + ; + + +/* Numeric Constants: */ + +protected +Digit + : '0'..'9' + ; + +protected +LongSuffix + : 'l' + | 'L' + ; + +protected +UnsignedSuffix + : 'u' + | 'U' + ; + +protected +FloatSuffix + : 'f' + | 'F' + ; + +protected +Exponent + : ( 'e' | 'E' ) ( '+' | '-' )? ( Digit )+ + ; + + +protected +DoubleDoubleConst:; + +protected +FloatDoubleConst:; + +protected +LongDoubleConst:; + +protected +IntOctalConst:; + +protected +LongOctalConst:; + +protected +UnsignedOctalConst:; + +protected +IntIntConst:; + +protected +LongIntConst:; + +protected +UnsignedIntConst:; + +protected +IntHexConst:; + +protected +LongHexConst:; + +protected +UnsignedHexConst:; + + + + +Number + : ( ( Digit )+ ( '.' | 'e' | 'E' ) )=> ( Digit )+ + ( '.' ( Digit )* ( Exponent )? + | Exponent + ) { _ttype = DoubleDoubleConst; } + ( FloatSuffix { _ttype = FloatDoubleConst; } + | LongSuffix { _ttype = LongDoubleConst; } + )? + + | ( "..." )=> "..." { _ttype = VARARGS; } + + | '.' { _ttype = DOT; } + ( ( Digit )+ ( Exponent )? + { _ttype = DoubleDoubleConst; } + ( FloatSuffix { _ttype = FloatDoubleConst; } + | LongSuffix { _ttype = LongDoubleConst; } + )? + )? + + | '0' ( '0'..'7' )* { _ttype = IntOctalConst; } + ( LongSuffix { _ttype = LongOctalConst; } + | UnsignedSuffix { _ttype = UnsignedOctalConst; } + )? + + | '1'..'9' ( Digit )* { _ttype = IntIntConst; } + ( LongSuffix { _ttype = LongIntConst; } + | UnsignedSuffix { _ttype = UnsignedIntConst; } + )? + + | '0' ( 'x' | 'X' ) ( 'a'..'f' | 'A'..'F' | Digit )+ + { _ttype = IntHexConst; } + ( LongSuffix { _ttype = LongHexConst; } + | UnsignedSuffix { _ttype = UnsignedHexConst; } + )? + ; + + +ID + options + { + testLiterals = true; + } + : ( 'a'..'z' | 'A'..'Z' | '_' ) + ( 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' )* + ; + + |